From 41878723fd82c4c556785f06da22b9785700f76c Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Mon, 19 Aug 2024 17:34:37 -0600 Subject: [PATCH 01/80] Let "python3" be set at the commandline. --- Doc/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Doc/Makefile b/Doc/Makefile index b2ee3fe7d28ed0..08d24b090d7f49 100644 --- a/Doc/Makefile +++ b/Doc/Makefile @@ -4,7 +4,7 @@ # # You can set these variables from the command line. -PYTHON = python3 +PYTHON ?= python3 VENVDIR = ./venv UV = uv SPHINXBUILD = PATH=$(VENVDIR)/bin:$$PATH sphinx-build From 424dc37daf0a0baed6f2157676a7b1dd2035be59 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Tue, 20 Aug 2024 14:43:15 -0600 Subject: [PATCH 02/80] Add a NEWS entry. --- .../Documentation/2024-08-20-14-43-05.gh-issue-123152.8J0smG.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 Misc/NEWS.d/next/Documentation/2024-08-20-14-43-05.gh-issue-123152.8J0smG.rst diff --git a/Misc/NEWS.d/next/Documentation/2024-08-20-14-43-05.gh-issue-123152.8J0smG.rst b/Misc/NEWS.d/next/Documentation/2024-08-20-14-43-05.gh-issue-123152.8J0smG.rst new file mode 100644 index 00000000000000..ccbb089a5ada77 --- /dev/null +++ b/Misc/NEWS.d/next/Documentation/2024-08-20-14-43-05.gh-issue-123152.8J0smG.rst @@ -0,0 +1 @@ +Add a new concurrency HOWTO page to the docs. From 9ab68fa0ddd9fcb684d594c4714e07bec05abcb5 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Mon, 19 Aug 2024 17:34:51 -0600 Subject: [PATCH 03/80] Add the concurrency howto doc. --- Doc/howto/concurrency.rst | 330 ++++++++++++++++++++++++++++++++++++ Doc/howto/index.rst | 2 + Doc/includes/concurrency.py | 163 ++++++++++++++++++ 3 files changed, 495 insertions(+) create mode 100644 Doc/howto/concurrency.rst create mode 100644 Doc/includes/concurrency.py diff --git a/Doc/howto/concurrency.rst b/Doc/howto/concurrency.rst new file mode 100644 index 00000000000000..8e05b1ee37b2f4 --- /dev/null +++ b/Doc/howto/concurrency.rst @@ -0,0 +1,330 @@ +.. _concurrency-howto: + +***************** +Concurrency HOWTO +***************** + +Python is a language the accommodates a variety of programming styles, +from procedural to object-oriented to functional. The same applies +to concurrency. Here we'll look at how different concurrency models +look in Python, with an emphasis on practical workload-oriented examples. + +The following Python concurrency models are covered: + +* threads (:mod:`threading` and :mod:`concurrent.futures`) +* multi-processing (:mod:`multiprocessing` and :mod:`concurrent.futures`) +* async/await +* CSP/actor model (:mod:`!interpreters`) +* distributed (e.g. SMP) + +Each of these will be explained, with some simple examples. The later +workload-oriented examples will be implemented using each, +for comparison, when possible. + +.. note:: + + You should always make sure concurrency is the right tool for the job + before you reach for it when solving your problem. There are many + cases where concurrency simply isn't applicable or will only + complicate the solution. In-depth discussion of this point + is outside the scope of this document. + + +All About Concurrency +===================== + +What is concurrency? +-------------------- + +At its most fundamental, concurrency means doing multiple things at once, +from a strictly *logical* viewpoint. + +When a computer program runs, it executes a sequence of code in order. +Sometimes it makes sense to break up that sequence into smaller pieces, +where some of them can run independently of others. + +For example, consider the following program with three pieces:: + + prep + do A + do B + +If both ``do A`` and ``do B`` only rely on ``prep`` having completed, +then we could rearrange the program in one of the following ways and +end up with the same result:: + + prep = prep prep = prep ----- + do B = do A do B = | | + do A = = do A do B + +In the first alternative, we swap ``do A`` and ``do B``. In the second +one we split the original program into two programs that we can run at +the same time. In the third one, we run ``do A`` and ``do B`` at the +same time. "At the same time" means concurrency. + +Concurrency often involves some degree of synchronization between +the tasks. At the most basic conceptual level: one task may wait +for another to finish. + +In addition to code running at the same time, concurrency typically +also involves some amount of resources shared between the concurrent +tasks. That may include memory, files, and sockets. + +What is parallelism? +-------------------- + +Concurrency may happen in one of two ways. The concurrent tasks may +share a single CPU, each running a little bit at a time, with the +operating system (or language runtime) taking care of the switching. +The other way is where each task runs on its own CPU, meaning they +are physically running at the same time, not just logically. + +That second way is parallelism. + +What problems can concurrency help solve? +----------------------------------------- + +Primarily, concurrency can be helpful by making your program faster +and more responsive (less latency), when possible. In other words, +you get better computational throughput. That happens by enabling +the following: + +* run on multiple CPU cores (parallelism) +* keep blocking resources from blocking the whole program +* make sure critical tasks have priority +* process results as they come, instead of waiting for them all + +Other possible benefits: + +* asynchronous events can be handled more cleanly +* better efficiency using hardware resources +* improved scalability + +What are the downsides? +----------------------- + +The main challenge when using concurrency is the extra complexity. + +.. XXX + +* races on shared resources +* error handling +* ... + +The operating system, along with some libraries and frameworks, +can help mitigate the extra complexity. So can the concurrency +model you use, which we'll talk about a little later.. + +Workloads +--------- + +We've looked at what you can do with concurrency from a high level. +Now let's look at some concrete examples. + + +... + + +Concurrency Models +------------------ + +The concept of concurrency has been a part of the study and practice +of computer software since the 1950s and 1960s, with various innovations +since then. The application of the different theoretical concurrency +models can be categorized as follows: + +* free threads - using multiple threads in the same process, + with no isolation between them +* isolated threads - threads with strict isolation between them + (e.g. CSP and actor model) +* multiprocessing - using multiple isolated processes +* distributed - multiprocessing across multiple computers +* async/await - using coroutines (AKA cooperative multitasking) + +(There are certainly others, but these are the focus here.) + +There are tradeoffs to each. Free-threading probably has the most +notoriety and the most examples, but is also the most likely to cause +you pain. +Isolated threads have few of the downsides but are less familiar. +Multiprocessing and distributed are less efficient at smaller scales. +Async can be straightforward, but may cascade throughout a code base, +doesn't necessarily give you parallelism. + + +Python Concurrency Models +========================= + +We've looked at concurrency and concurrency models generally. +Now let's see what they look like in Python. + +Free-threading +-------------- + +The stdlib :mod:`threading` module ... + +... + +Multi-processing +---------------- + +... + +Async/Await +----------- + +... + +Isolated Threads (CSP/Actor Model) +---------------------------------- + +... + +Distributed +----------- + +... + + +Python Concurrency Workloads +============================ + +... + +also see: + +* https://github.com/faster-cpython/ideas/wiki/Tables:-Workloads +* https://github.com/ericsnowcurrently/concurrency-benchmarks + + +Workload 1 +---------- + +# ... + +.. raw:: html + + + +.. list-table:: + :header-rows: 1 + :class: borderless workload-example + :align: left + + * - threads + - multiple interpreters + - async/await + - multiple processes + - SMP + * - .. raw:: html + +
+ (expand) + + .. literalinclude:: ../includes/concurrency.py + :name: concurrency-workload-1-threads + :start-after: [start-w1-threads] + :end-before: [end-w1-threads] + :dedent: + :linenos: + + .. raw:: html + +
+ + - .. raw:: html + +
+ (expand) + + .. literalinclude:: ../includes/concurrency.py + :name: concurrency-workload-1-subinterpreters + :start-after: [start-w1-subinterpreters] + :end-before: [end-w1-subinterpreters] + :dedent: + :linenos: + + .. raw:: html + +
+ + - .. raw:: html + +
+ (expand) + + .. literalinclude:: ../includes/concurrency.py + :name: concurrency-workload-1-async + :start-after: [start-w1-async] + :end-before: [end-w1-async] + :dedent: + :linenos: + + .. raw:: html + +
+ + - .. raw:: html + +
+ (expand) + + .. literalinclude:: ../includes/concurrency.py + :name: concurrency-workload-1-multiprocessing + :start-after: [start-w1-multiprocessing] + :end-before: [end-w1-multiprocessing] + :dedent: + :linenos: + + .. raw:: html + +
+ + - .. raw:: html + +
+ (expand) + + .. literalinclude:: ../includes/concurrency.py + :name: concurrency-workload-1-smp + :start-after: [start-w1-smp] + :end-before: [end-w1-smp] + :dedent: + :linenos: + + .. raw:: html + +
+ +Using :mod:`concurrent.futures`: + +.. raw:: html + +
+ (expand) + +.. literalinclude:: ../includes/concurrency.py + :name: concurrency-workload-1-concurrent-futures-thread + :start-after: [start-w1-concurrent-futures-thread] + :end-before: [end-w1-concurrent-futures-thread] + :dedent: + :linenos: + +.. raw:: html + +
+ +Workload 2 +---------- + +... diff --git a/Doc/howto/index.rst b/Doc/howto/index.rst index a882f1747084fe..06baa71c9b6b3e 100644 --- a/Doc/howto/index.rst +++ b/Doc/howto/index.rst @@ -11,6 +11,7 @@ Python Library Reference. :maxdepth: 1 :hidden: + concurrency.rst cporting.rst curses.rst descriptor.rst @@ -51,6 +52,7 @@ General: Advanced development: +* :ref:`concurrency-howto` * :ref:`curses-howto` * :ref:`freethreading-extensions-howto` * :ref:`isolating-extensions-howto` diff --git a/Doc/includes/concurrency.py b/Doc/includes/concurrency.py new file mode 100644 index 00000000000000..4b0d519774152b --- /dev/null +++ b/Doc/includes/concurrency.py @@ -0,0 +1,163 @@ +"""Example code for howto/concurrency.rst. + +The examples take advantage of the literalinclude directive's +:start-after: and :end-before: options. +""" + + +class Workload1: + + def run_using_threads(self): + # [start-w1-threads] + import threading + + def task(): + ... + + t = threading.Thread(target=task) + t.start() + + ... + # [end-w1-threads] + + def run_using_multiprocessing(self): + # [start-w1-multiprocessing] + import multiprocessing + + def task(): + ... + + ... + # [end-w1-multiprocessing] + + def run_using_async(self): + # [start-w1-async] + # async 1 + ... + # [end-w1-async] + + def run_using_subinterpreters(self): + # [start-w1-subinterpreters] + # subinterpreters 1 + ... + # [end-w1-subinterpreters] + + def run_using_smp(self): + # [start-w1-smp] + # smp 1 + ... + # [end-w1-smp] + + def run_using_concurrent_futures_thread(self): + # [start-w1-concurrent-futures-thread] + # concurrent.futures 1 + ... + # [end-w1-concurrent-futures-thread] + + +####################################### +# workload 2: ... +####################################### + +class Workload2: + + def run_using_threads(self): + # [start-w2-threads] + import threading + + def task(): + ... + + t = threading.Thread(target=task) + t.start() + + ... + # [end-w2-threads] + + def run_using_multiprocessing(self): + # [start-w2-multiprocessing] + import multiprocessing + + def task(): + ... + + ... + # [end-w2-multiprocessing] + + def run_using_async(self): + # [start-w2-async] + # async 2 + ... + # [end-w2-async] + + def run_using_subinterpreters(self): + # [start-w2-subinterpreters] + # subinterpreters 2 + ... + # [end-w2-subinterpreters] + + def run_using_smp(self): + # [start-w2-smp] + # smp 2 + ... + # [end-w2-smp] + + def run_using_concurrent_futures_thread(self): + # [start-w2-concurrent-futures-thread] + # concurrent.futures 2 + ... + # [end-w2-concurrent-futures-thread] + + +####################################### +# workload 3: ... +####################################### + +class Workload3: + + def run_using_threads(self): + # [start-w3-threads] + import threading + + def task(): + ... + + t = threading.Thread(target=task) + t.start() + + ... + # [end-w3-threads] + + def run_using_multiprocessing(self): + # [start-w3-multiprocessing] + import multiprocessing + + def task(): + ... + + ... + # [end-w3-multiprocessing] + + def run_using_async(self): + # [start-w3-async] + # async 3 + ... + # [end-w3-async] + + def run_using_subinterpreters(self): + # [start-w3-subinterpreters] + # subinterpreters 3 + ... + # [end-w3-subinterpreters] + + def run_using_smp(self): + # [start-w3-smp] + # smp 3 + ... + # [end-w3-smp] + + def run_using_concurrent_futures_thread(self): + # [start-w3-concurrent-futures-thread] + # concurrent.futures 3 + ... + # [end-w3-concurrent-futures-thread] From fce455ef23454ae4800ac865ec74d05bfaf85946 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Tue, 20 Aug 2024 12:19:07 -0600 Subject: [PATCH 04/80] Add more explanation. --- Doc/howto/concurrency.rst | 176 ++++++++++++++++++++++++++++---------- 1 file changed, 129 insertions(+), 47 deletions(-) diff --git a/Doc/howto/concurrency.rst b/Doc/howto/concurrency.rst index 8e05b1ee37b2f4..9a9946a3c87b9e 100644 --- a/Doc/howto/concurrency.rst +++ b/Doc/howto/concurrency.rst @@ -29,6 +29,17 @@ for comparison, when possible. complicate the solution. In-depth discussion of this point is outside the scope of this document. +.. note:: + + Free-threading is one of the oldest concurrency models, fundamental + to operating systems, and widely supported in programming languages. + However, it is generally considered perilous and not human-friendly. + Other concurrency models have demonstrated better usability and + newer programming languages typically avoid exposing threads directly. + Take that into consideration before reaching for threads and look at + the alternatives first. + See `the table below`_. + All About Concurrency ===================== @@ -39,11 +50,19 @@ What is concurrency? At its most fundamental, concurrency means doing multiple things at once, from a strictly *logical* viewpoint. -When a computer program runs, it executes a sequence of code in order. +When a computer program runs, it executes a sequence of code +in a given order. If you were to trace the actual execution, you would +still end up with a *linear* series of executed intructions that matches +the code. We call this sequence of code (and instructions) a logical +"thread" of execution. + Sometimes it makes sense to break up that sequence into smaller pieces, -where some of them can run independently of others. +where some of them can run independently of others. Thus the program +then involves multiple logical threads. This is also called +"multitasking" and each logical thread a "task". -For example, consider the following program with three pieces:: +As an example of splitting up the sequence, consider the following +abstract program with three pieces:: prep do A @@ -60,16 +79,21 @@ end up with the same result:: In the first alternative, we swap ``do A`` and ``do B``. In the second one we split the original program into two programs that we can run at the same time. In the third one, we run ``do A`` and ``do B`` at the -same time. "At the same time" means concurrency. +same time. "At the same time" means concurrency. It always involves +multiple logical threads. -Concurrency often involves some degree of synchronization between -the tasks. At the most basic conceptual level: one task may wait -for another to finish. +Additionally, concurrency often involves some degree of synchronization +between the logical threads. At the most basic conceptual level: +one thread may wait for another to finish. -In addition to code running at the same time, concurrency typically +Aside from code running at the same time, concurrency typically also involves some amount of resources shared between the concurrent tasks. That may include memory, files, and sockets. +One important observation is that most concurrent programs +can be represented instead as a single task, with the code of the +concurrent tasks merged into a single sequence. + What is parallelism? -------------------- @@ -81,6 +105,46 @@ are physically running at the same time, not just logically. That second way is parallelism. +Modern CPUs are designed around parallelism, with multiple cores +and sometimes multiple execution pipelines per core. The operating +system exposes physical CPU threads as OS threads and as processes. +A programming language (or runtime) may add additional layers of +abstraction on top of that. + +Parallelism is where concurrent logical threads are running +on distinct physical threads across multiple cores, + +Concurrency Models +------------------ + +The concept of concurrency has been a part of the study and practice +of computer software since very early on, in the 1950s and 1960s, +long before the wide-spread adotion of multi-core CPUs. Clearly +its about more than just parallelism. + +Over the decades, research and use of concurrency has led to a variety +of well defined abstract models, with different characteristics and +tradeoffs. The application of the different theoretical concurrency +models can be categorized as follows: + +* free threads - using multiple physical threads in the same process, + with no isolation between them +* isolated threads - threads, often physical, with strict isolation + between them (e.g. CSP and actor model) +* multiprocessing - using multiple isolated processes +* distributed - multiprocessing across multiple computers +* async/await - using coroutines (AKA "cooperative multitasking") + +(There are certainly others, but these are the focus here.) + +There are tradeoffs to each. Free-threading probably has the most +notoriety and the most examples, but is also the most likely to cause +you pain. +Isolated threads have few of the downsides but are less familiar. +Multiprocessing and distributed are less efficient at smaller scales. +Async can be straightforward, but may cascade throughout a code base +and doesn't necessarily give you parallelism. + What problems can concurrency help solve? ----------------------------------------- @@ -92,6 +156,7 @@ the following: * run on multiple CPU cores (parallelism) * keep blocking resources from blocking the whole program * make sure critical tasks have priority +* make sure other tasks have a fair share of time * process results as they come, instead of waiting for them all Other possible benefits: @@ -103,17 +168,48 @@ Other possible benefits: What are the downsides? ----------------------- -The main challenge when using concurrency is the extra complexity. - -.. XXX - -* races on shared resources -* error handling -* ... - -The operating system, along with some libraries and frameworks, -can help mitigate the extra complexity. So can the concurrency -model you use, which we'll talk about a little later.. +The main challenge when using concurrency is the (potential) extra +complexity. This complexity comes from the effect of multiple logical +threads running at the same time and interacting with each other. +In practice, this falls into two categories: data races and tracing +relative execution. Both are a form of "spooky action at a distance". + +The first category relates to mutable data shared between threads: +a data race is where one thread writes to memory at a time when another +thread is expecting the value to be unchanged, invalidating its logic. +Similarly, two threads could write to the same memory location at the +same time, either corrupting the data there or invalidating +the expectations of one of the threads. + +In each case, the non-deterministic scheduling of threads means it is +both hard to reproduce races and to track down where a race happened. +These qualities much these bugs especially frustrating +and worth diligently avoiding. + +Races are possible when the concurrency approach is subject +to parallel execution or to non-deterministic switching. +(This excludes "async/await", which relies on cooperative multitasking.) +When all memory is possibly shared, as is the case with free-threading, +then all memory is at risk. + +Dealing with data races is often managed using locks (AKA mutexes), +at a low level, and thread-safe types and APIs at a high level. +Depending on the programming language, the complexity is sometimes +mitigated somewhat by the compiler and runtime. There are even +libraries and frameworks that help abstract away the complexity +to an extent. On top of that, there are tools that can help identify +potential races via static analysis. Unfortunately, none of these aids +is foolproof and the risk of hitting a race is always looming. + +.. XXX mention reentrancy? + +The second category of complexity is the problem of tracing the execution +of one logical thread relative to another. This is especially relevant +for error handling, when an error in the one thread is exposed in the +other. This applies equally to threads that start other threads as to +concurrency models that use callbacks. Knowing where the failing thread +was started is valuable when debugging, as is knowing where a callback +was registered. Workloads --------- @@ -121,37 +217,9 @@ Workloads We've looked at what you can do with concurrency from a high level. Now let's look at some concrete examples. - ... -Concurrency Models ------------------- - -The concept of concurrency has been a part of the study and practice -of computer software since the 1950s and 1960s, with various innovations -since then. The application of the different theoretical concurrency -models can be categorized as follows: - -* free threads - using multiple threads in the same process, - with no isolation between them -* isolated threads - threads with strict isolation between them - (e.g. CSP and actor model) -* multiprocessing - using multiple isolated processes -* distributed - multiprocessing across multiple computers -* async/await - using coroutines (AKA cooperative multitasking) - -(There are certainly others, but these are the focus here.) - -There are tradeoffs to each. Free-threading probably has the most -notoriety and the most examples, but is also the most likely to cause -you pain. -Isolated threads have few of the downsides but are less familiar. -Multiprocessing and distributed are less efficient at smaller scales. -Async can be straightforward, but may cascade throughout a code base, -doesn't necessarily give you parallelism. - - Python Concurrency Models ========================= @@ -197,6 +265,20 @@ also see: * https://github.com/ericsnowcurrently/concurrency-benchmarks +.. _the table below: + +.. rst-class:: align-left + +======== ========= =============== ===== =============== === +workload threading subinterpreters async multiprocessing smp +======== ========= =============== ===== =============== === +1 Y Y Y Y Y +2 Y Y Y Y Y +3 Y Y Y Y Y +4 Y Y Y Y Y +======== ========= =============== ===== =============== === + + Workload 1 ---------- From 28d8ac089ca094fe25a26d56351696074f452bab Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Tue, 20 Aug 2024 14:38:52 -0600 Subject: [PATCH 05/80] More explanation. --- Doc/howto/concurrency.rst | 112 +++++++++++++++++++++++++++++++------- 1 file changed, 91 insertions(+), 21 deletions(-) diff --git a/Doc/howto/concurrency.rst b/Doc/howto/concurrency.rst index 9a9946a3c87b9e..0420f2d7e56ea8 100644 --- a/Doc/howto/concurrency.rst +++ b/Doc/howto/concurrency.rst @@ -11,11 +11,11 @@ look in Python, with an emphasis on practical workload-oriented examples. The following Python concurrency models are covered: -* threads (:mod:`threading` and :mod:`concurrent.futures`) +* free-threading (:mod:`threading` and :mod:`concurrent.futures`) +* isolated threads, AKA CSP/actor model (:mod:`!interpreters`) * multi-processing (:mod:`multiprocessing` and :mod:`concurrent.futures`) -* async/await -* CSP/actor model (:mod:`!interpreters`) -* distributed (e.g. SMP) +* distributed, e.g. SMP (:mod:`!dask`) +* async/await (:mod:`asycio`) Each of these will be explained, with some simple examples. The later workload-oriented examples will be implemented using each, @@ -127,13 +127,15 @@ of well defined abstract models, with different characteristics and tradeoffs. The application of the different theoretical concurrency models can be categorized as follows: -* free threads - using multiple physical threads in the same process, - with no isolation between them -* isolated threads - threads, often physical, with strict isolation - between them (e.g. CSP and actor model) -* multiprocessing - using multiple isolated processes -* distributed - multiprocessing across multiple computers -* async/await - using coroutines (AKA "cooperative multitasking") +================= ========== +free threads using multiple physical threads in the same process, + with no isolation between them +isolated threads threads, often physical, with strict isolation + between them (e.g. CSP and actor model) +multiprocessing using multiple isolated processes +distributed multiprocessing across multiple computers +async/await using coroutines (AKA "cooperative multitasking") +================= ========== (There are certainly others, but these are the focus here.) @@ -214,10 +216,67 @@ was registered. Workloads --------- -We've looked at what you can do with concurrency from a high level. -Now let's look at some concrete examples. - -... +In practice, concurrency is used in a wide variety of software. +Here's a not-comprehensive list: + +======================= =========== +application concurrency +======================= =========== +web server handle simultaneous static requests, CGI requests +web browser load multiple resources at once +database server handle simultaneous requests +devops script process multiple files at once +system logger handle simultaneous logging requests +ATM network handle multiple bank transactions at once +hacker toolkit decode a passwd file with brute force +raytracer compute RGB for each image pixel +machine learning apply matrices on training data set +astrophysics merge black hole data from multiple satelites and observatories +investing combine thousands of industry data sources into a concise actionable analysis +MMO game server handle login requests, handle client updates +game client GUI, physics engine, handle server updates +audio transcoder process chunks +engineering simultation calculate stress loads at vertices +molecular modeling try many permutations +======================= =========== + +It can be helpful to identify common characteristics by which we could +group concurrency workloads. Here are some: + +* number of logical threads +* main + workers vs. independent +* main + background +* how much computation, per thread +* how much blocking on other threads, per thread +* how much blocking IO, per thread +* number of external inputs +* number of external outputs +* how much data used, per thread +* how much data do logical threads share +* size of the data shared by threads + +From our list of workloads above, we can observe some clustering: + +* ... + +Let's also revisit the ways concurrency can be helpful: + +* get work done faster + * run more tasks at once (multi-core) +* make the app feel more responsive + * make sure critical tasks have priority + * process results as they come, instead of waiting for them all + * send payload to multiple targets before starting next task +* use system resources more efficiently + * keep slow parts from blocking fast parts + * keep blocking resources from blocking the whole program + * make sure other tasks have a fair share of time + * task scheduling & resource usage optimization +* scaling +* handle asynchronous events + +All of these things factor in to how concurrency should be applied for +a workload, or even if it should. Python Concurrency Models @@ -233,30 +292,41 @@ The stdlib :mod:`threading` module ... ... +Isolated Threads (CSP/Actor Model) +---------------------------------- + +The future stdlib :mod:`!interpreters` module ... + +... + Multi-processing ---------------- +The stdlib :mod:`multiprocessing` module ... + ... -Async/Await +Distributed ----------- -... - -Isolated Threads (CSP/Actor Model) ----------------------------------- +The popular :mod:`!dask` module ... ... -Distributed +Async/Await ----------- +The stdlib :mod:`asyncio` module ... + ... Python Concurrency Workloads ============================ +Below we have a series of examples of how to implement the most +common Python workloads that take advantage of concurrency. + ... also see: From 503d829c2bb9bb9f928356a07f8eee8edbb905ec Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Tue, 20 Aug 2024 15:53:25 -0600 Subject: [PATCH 06/80] Re-structure the comparison table. --- Doc/howto/concurrency.rst | 45 ++++++++++++++++++++++++++------------- 1 file changed, 30 insertions(+), 15 deletions(-) diff --git a/Doc/howto/concurrency.rst b/Doc/howto/concurrency.rst index 0420f2d7e56ea8..9e90edcbe6057a 100644 --- a/Doc/howto/concurrency.rst +++ b/Doc/howto/concurrency.rst @@ -38,7 +38,7 @@ for comparison, when possible. newer programming languages typically avoid exposing threads directly. Take that into consideration before reaching for threads and look at the alternatives first. - See `the table below`_. + See `the table below `_. All About Concurrency @@ -320,6 +320,35 @@ The stdlib :mod:`asyncio` module ... ... +.. _concurrency-models-comparison: + +Comparison +---------- + +.. list-table:: + :header-rows: 1 + :class: borderless + :align: left + + * - model + - pros + - cons + * - threads + - ... + - ... + * - multiple interpreters + - ... + - ... + * - multiprocessing + - ... + - ... + * - distributed + - ... + - ... + * - async/await + - ... + - ... + Python Concurrency Workloads ============================ @@ -335,20 +364,6 @@ also see: * https://github.com/ericsnowcurrently/concurrency-benchmarks -.. _the table below: - -.. rst-class:: align-left - -======== ========= =============== ===== =============== === -workload threading subinterpreters async multiprocessing smp -======== ========= =============== ===== =============== === -1 Y Y Y Y Y -2 Y Y Y Y Y -3 Y Y Y Y Y -4 Y Y Y Y Y -======== ========= =============== ===== =============== === - - Workload 1 ---------- From e5d904fef52367855d220bfd0e85d3f32fa15aa5 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Tue, 20 Aug 2024 16:35:11 -0600 Subject: [PATCH 07/80] Add basic examples for each concurrency model. --- Doc/howto/concurrency.rst | 176 ++++++++++++++++++++++++++++++++------ 1 file changed, 148 insertions(+), 28 deletions(-) diff --git a/Doc/howto/concurrency.rst b/Doc/howto/concurrency.rst index 9e90edcbe6057a..74fe74a94b8a5e 100644 --- a/Doc/howto/concurrency.rst +++ b/Doc/howto/concurrency.rst @@ -288,37 +288,175 @@ Now let's see what they look like in Python. Free-threading -------------- -The stdlib :mod:`threading` module ... +For free-threading we can use the stdlib :mod:`threading` module: -... +:: + + import threading + + def task(): + # Do something. + pass + + threads = [] + for _ in range(5): + t = threading.Thread(target=task) + t.start() + threads.append(t) + + # Wait for all the threads to finish + for t in threads: + t.join() + +You can also use :mod:`concurrent.futures`: + +:: + + import concurrent.futures + + def task(arg): + return arg + + with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor: + futures = {} + for i in range(10): + fut = executor.submit(task, i) + futures[fut] = i + for fut in concurrent.futures.as_completed(futures): + res = fut.result() + assert res == futures[fut] + +Note that there are some limitations to the parallelism Python +can provide. See :pep:`630`. Isolated Threads (CSP/Actor Model) ---------------------------------- -The future stdlib :mod:`!interpreters` module ... +The future stdlib :mod:`!interpreters` module supports isolated execution: -... +:: + + import interpreters + import threading + + script = """if True: + # Do something. + pass + """ + + def task(): + interp = interpreters.create() + interp.exec(script) + + threads = [] + for _ in range(5): + t = threading.Thread(target=task) + t.start() + threads.append(t) + + # Wait for all the subinterpreters to finish + for t in threads: + t.join() + +You will also be able to use :mod:`concurrent.futures`: + +:: + + import concurrent.futures + + def task(arg): + return arg + + with concurrent.futures.InterpreterPoolExecutor(max_workers=5) as executor: + futures = {} + for i in range(10): + fut = executor.submit(task, i) + futures[fut] = i + for fut in concurrent.futures.as_completed(futures): + res = fut.result() + assert res == futures[fut] Multi-processing ---------------- -The stdlib :mod:`multiprocessing` module ... +You can use the stdlib :mod:`multiprocessing` module: -... +:: + + import multiprocessing + + def task() + # Do something. + pass + + procs = [] + for _ in range(5): + p = multiprocessing.Process(target=task) + p.start() + procs.append(p) + + # Wait for all the subprocesses to finish + for p in procs: + p.join() + +You will also be able to use :mod:`concurrent.futures`: + +:: + + import concurrent.futures + + def task(arg): + return arg + + with concurrent.futures.ProcessPoolExecutor(max_workers=5) as executor: + futures = {} + for i in range(10): + fut = executor.submit(task, i) + futures[fut] = i + for fut in concurrent.futures.as_completed(futures): + res = fut.result() + assert res == futures[fut] Distributed ----------- -The popular :mod:`!dask` module ... +The popular :mod:`!dask` module gives us distributed concurrency: -... +:: + + from dask.distributed import LocalCluster + + def task() + # Do something. + pass + + client = LocalCluster().get_client() + + futures = [] + for _ in range(5): + fut = client.submit(task) + futures.append(fut) + + # Wait for all the tasks to finish. + client.gather(futures) Async/Await ----------- -The stdlib :mod:`asyncio` module ... +The stdlib :mod:`asyncio` module provides an event loop you can use: -... +:: + + import asyncio + + async def task(): + # Do something. + pass + + coros = [task() for _ in range(5)] + + # Wait for all the coroutines to finish. + await asyncio.gather(*coros) .. _concurrency-models-comparison: @@ -473,24 +611,6 @@ Workload 1 -Using :mod:`concurrent.futures`: - -.. raw:: html - -
- (expand) - -.. literalinclude:: ../includes/concurrency.py - :name: concurrency-workload-1-concurrent-futures-thread - :start-after: [start-w1-concurrent-futures-thread] - :end-before: [end-w1-concurrent-futures-thread] - :dedent: - :linenos: - -.. raw:: html - -
- Workload 2 ---------- From b7bb9a5d3162e9331377ccca4b066dfaa8bbe76f Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Wed, 21 Aug 2024 09:47:19 -0600 Subject: [PATCH 08/80] Clarify about "spook action at a distance". --- Doc/howto/concurrency.rst | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/Doc/howto/concurrency.rst b/Doc/howto/concurrency.rst index 74fe74a94b8a5e..cc6d504bab004d 100644 --- a/Doc/howto/concurrency.rst +++ b/Doc/howto/concurrency.rst @@ -174,7 +174,9 @@ The main challenge when using concurrency is the (potential) extra complexity. This complexity comes from the effect of multiple logical threads running at the same time and interacting with each other. In practice, this falls into two categories: data races and tracing -relative execution. Both are a form of "spooky action at a distance". +relative execution. Both are a form of "spooky action at a distance" [#f1]_ +(meaning something changes unexpectedly in one place due to unknown +changes somewhere else). The first category relates to mutable data shared between threads: a data race is where one thread writes to memory at a time when another @@ -615,3 +617,9 @@ Workload 2 ---------- ... + + +.. rubric:: Footnotes + +.. [#f1] The phrase was originally said by Albert Einstein about + quantum entanglement. From 7efb9b4a0f181517db603b74f17472268c295b3f Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Wed, 21 Aug 2024 09:52:28 -0600 Subject: [PATCH 09/80] Clarify about the "pain" threads can cause. --- Doc/howto/concurrency.rst | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/Doc/howto/concurrency.rst b/Doc/howto/concurrency.rst index cc6d504bab004d..b064680caba135 100644 --- a/Doc/howto/concurrency.rst +++ b/Doc/howto/concurrency.rst @@ -140,10 +140,11 @@ async/await using coroutines (AKA "cooperative multitasking") (There are certainly others, but these are the focus here.) There are tradeoffs to each. Free-threading probably has the most -notoriety and the most examples, but is also the most likely to cause -you pain. -Isolated threads have few of the downsides but are less familiar. -Multiprocessing and distributed are less efficient at smaller scales. +notoriety and the most examples, but is also has the most pitfalls +(see `concurrency-downsides`_ below). +Isolated threads have few of those pitfalls but are less familiar. +Multiprocessing and distributed are likewise isolated, but less +efficient, which can have a larger negative impact at smaller scales. Async can be straightforward, but may cascade throughout a code base and doesn't necessarily give you parallelism. @@ -167,6 +168,8 @@ Other possible benefits: * better efficiency using hardware resources * improved scalability +.. _concurrency-downsides: + What are the downsides? ----------------------- From daaa02b9522a965e41eb68e8908e3682eb01a2de Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Wed, 21 Aug 2024 10:21:25 -0600 Subject: [PATCH 10/80] Add references to the howto in the library docs. --- Doc/library/asyncio.rst | 6 ++++++ Doc/library/concurrency.rst | 3 ++- Doc/library/concurrent.futures.rst | 6 ++++++ Doc/library/multiprocessing.rst | 4 ++++ Doc/library/threading.rst | 4 ++++ 5 files changed, 22 insertions(+), 1 deletion(-) diff --git a/Doc/library/asyncio.rst b/Doc/library/asyncio.rst index 5f83b3a2658da4..b1fab30a22f15c 100644 --- a/Doc/library/asyncio.rst +++ b/Doc/library/asyncio.rst @@ -60,6 +60,12 @@ Additionally, there are **low-level** APIs for .. _asyncio-cli: +.. seealso:: + + The :ref:`concurrency-howto` offers explanations about concurrency + and different concurrency models, along with examples for each + of those models. + .. rubric:: asyncio REPL You can experiment with an ``asyncio`` concurrent context in the :term:`REPL`: diff --git a/Doc/library/concurrency.rst b/Doc/library/concurrency.rst index 5be1a1106b09a0..1c8676f9ff4f9d 100644 --- a/Doc/library/concurrency.rst +++ b/Doc/library/concurrency.rst @@ -8,8 +8,9 @@ The modules described in this chapter provide support for concurrent execution of code. The appropriate choice of tool will depend on the task to be executed (CPU bound vs IO bound) and preferred style of development (event driven cooperative multitasking vs preemptive -multitasking). Here's an overview: +multitasking). See the :ref:`concurrency-howto`. +Here's an overview of the modules: .. toctree:: diff --git a/Doc/library/concurrent.futures.rst b/Doc/library/concurrent.futures.rst index e3b24451188cc4..9d81ea591b8bd2 100644 --- a/Doc/library/concurrent.futures.rst +++ b/Doc/library/concurrent.futures.rst @@ -21,6 +21,12 @@ defined by the abstract :class:`Executor` class. .. include:: ../includes/wasm-notavail.rst +.. seealso:: + + The :ref:`concurrency-howto` offers explanations about concurrency + and different concurrency models, along with examples for each + of those models. + Executor Objects ---------------- diff --git a/Doc/library/multiprocessing.rst b/Doc/library/multiprocessing.rst index f1f9d087edf7f9..6af56bc5f36e91 100644 --- a/Doc/library/multiprocessing.rst +++ b/Doc/library/multiprocessing.rst @@ -54,6 +54,10 @@ will print to standard output :: the submission of work to the underlying process pool to be separated from waiting for the results. + The :ref:`concurrency-howto` offers explanations about concurrency + and different concurrency models, along with examples for each + of those models. + The :class:`Process` class ^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/Doc/library/threading.rst b/Doc/library/threading.rst index cb82fea377697b..824cfb405381f8 100644 --- a/Doc/library/threading.rst +++ b/Doc/library/threading.rst @@ -26,6 +26,10 @@ level :mod:`_thread` module. :mod:`asyncio` offers an alternative approach to achieving task level concurrency without requiring the use of multiple operating system threads. + The :ref:`concurrency-howto` offers explanations about concurrency + and different concurrency models, along with examples for each + of those models. + .. note:: In the Python 2.x series, this module contained ``camelCase`` names From ad50bdd2b075c497a4e55592c256e4e7a8d43d7f Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Wed, 21 Aug 2024 15:43:13 -0600 Subject: [PATCH 11/80] Clean up the Python concurrency models section. --- Doc/howto/concurrency.rst | 74 +++++++++------------------------------ 1 file changed, 17 insertions(+), 57 deletions(-) diff --git a/Doc/howto/concurrency.rst b/Doc/howto/concurrency.rst index b064680caba135..2422767cf177ca 100644 --- a/Doc/howto/concurrency.rst +++ b/Doc/howto/concurrency.rst @@ -290,12 +290,16 @@ Python Concurrency Models We've looked at concurrency and concurrency models generally. Now let's see what they look like in Python. +We'll also `compare them below `_. + Free-threading -------------- -For free-threading we can use the stdlib :mod:`threading` module: +.. currentmodule:: threading -:: +For free-threading we can use the stdlib :mod:`threading` module. + +Here's a basic example of how it looks to use the threading module:: import threading @@ -313,27 +317,11 @@ For free-threading we can use the stdlib :mod:`threading` module: for t in threads: t.join() -You can also use :mod:`concurrent.futures`: - -:: - - import concurrent.futures - - def task(arg): - return arg - - with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor: - futures = {} - for i in range(10): - fut = executor.submit(task, i) - futures[fut] = i - for fut in concurrent.futures.as_completed(futures): - res = fut.result() - assert res == futures[fut] - Note that there are some limitations to the parallelism Python can provide. See :pep:`630`. +.. currentmodule:: None + Isolated Threads (CSP/Actor Model) ---------------------------------- @@ -363,27 +351,11 @@ The future stdlib :mod:`!interpreters` module supports isolated execution: for t in threads: t.join() -You will also be able to use :mod:`concurrent.futures`: - -:: - - import concurrent.futures - - def task(arg): - return arg - - with concurrent.futures.InterpreterPoolExecutor(max_workers=5) as executor: - futures = {} - for i in range(10): - fut = executor.submit(task, i) - futures[fut] = i - for fut in concurrent.futures.as_completed(futures): - res = fut.result() - assert res == futures[fut] - Multi-processing ---------------- +.. currentmodule:: multiprocessing + You can use the stdlib :mod:`multiprocessing` module: :: @@ -404,23 +376,7 @@ You can use the stdlib :mod:`multiprocessing` module: for p in procs: p.join() -You will also be able to use :mod:`concurrent.futures`: - -:: - - import concurrent.futures - - def task(arg): - return arg - - with concurrent.futures.ProcessPoolExecutor(max_workers=5) as executor: - futures = {} - for i in range(10): - fut = executor.submit(task, i) - futures[fut] = i - for fut in concurrent.futures.as_completed(futures): - res = fut.result() - assert res == futures[fut] +.. currentmodule:: None Distributed ----------- @@ -448,6 +404,8 @@ The popular :mod:`!dask` module gives us distributed concurrency: Async/Await ----------- +.. currentmodule:: asyncio + The stdlib :mod:`asyncio` module provides an event loop you can use: :: @@ -463,6 +421,8 @@ The stdlib :mod:`asyncio` module provides an event loop you can use: # Wait for all the coroutines to finish. await asyncio.gather(*coros) +.. currentmodule:: None + .. _concurrency-models-comparison: Comparison @@ -493,8 +453,8 @@ Comparison - ... -Python Concurrency Workloads -============================ +Python Concurrency Workload Examples +==================================== Below we have a series of examples of how to implement the most common Python workloads that take advantage of concurrency. From 3a829780a6c9e963512b7caf004ba6b180e3dfcf Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Wed, 21 Aug 2024 15:56:06 -0600 Subject: [PATCH 12/80] Add a section about concurrent.futures. --- Doc/howto/concurrency.rst | 110 ++++++++++++++++++++++ Doc/includes/concurrency.py | 178 ++++++++++++++++++++++++++++++++++++ 2 files changed, 288 insertions(+) diff --git a/Doc/howto/concurrency.rst b/Doc/howto/concurrency.rst index 2422767cf177ca..58232b11a49bdf 100644 --- a/Doc/howto/concurrency.rst +++ b/Doc/howto/concurrency.rst @@ -292,6 +292,9 @@ Now let's see what they look like in Python. We'll also `compare them below `_. +Finally, we'll look at how `concurrent.futures `_ +provides a high-level API for some of the concurrency models. + Free-threading -------------- @@ -452,6 +455,113 @@ Comparison - ... - ... +concurrent.futures +------------------ + +.. currentmodule:: concurrent.futures + +:mod:`concurrent.futures` provides a high-level abstraction around +using concurrency in Python. + +The :class:`Executor` base class is the focal point of the API. +It is implemented for threads as :class:`ThreadPoolExecutor`, wrapping +:class:`threading.Thread`. It is implemented for subprocesses as +:class:`ProcessPoolExecutor`, wrapping :mod:`multiprocessing`. +It will be implemented for multiple interpreters as +:class:`!InterpreterPoolExecutor`. Each implementation has some very +minor uniqueness that we'll look at in a moment. + +.. note: :mod:`multiprocessing`, :mod:`asyncio`, and ``dask`` + provide similar APIs. In the case of :mod:`!multiprocessing`, + that API also supports thread and interpreter backends. + +.. note: Generic examples in this section will use the thread-based + implementation. However, any of the other implementations + can be simply substituted. + +With an executor you can call a function asynchronously (in the background) +using :meth:`executor.submit() `. +It returns a :class:`Future ` object which tracks completion +and provides the result. :class:`Future ` objects have a few +other tricks, like cancelation and completion callbacks, which we won't +cover here. Likewise we won't cover the various uses of timeouts. + +Here's an example of using :meth:`executor.submit() ` +and :meth:`Future.result() `: + +.. literalinclude:: ../includes/concurrency.py + :name: concurrency-cf-basic + :start-after: [start-cf-basic] + :end-before: [end-cf-basic] + :dedent: + :linenos: + +You can use :meth:`executor.map() ` to call a function +multiple times and yield each result: + +.. literalinclude:: ../includes/concurrency.py + :name: concurrency-cf-map-1 + :start-after: [start-cf-map-1] + :end-before: [end-cf-map-1] + :dedent: + :linenos: + +.. literalinclude:: ../includes/concurrency.py + :name: concurrency-cf-map-2 + :start-after: [start-cf-map-2] + :end-before: [end-cf-map-2] + :dedent: + :linenos: + +You can wait for an existing set of :class:`futures ` +using :func:`wait` +(and :func:`as_completed` and :meth:`executor.map() `): + +.. literalinclude:: ../includes/concurrency.py + :name: concurrency-cf-wait + :start-after: [start-cf-wait] + :end-before: [end-cf-wait] + :dedent: + :linenos: + +You can use :func:`as_completed` to handle each :class:`future ` +as it completes: + +.. literalinclude:: ../includes/concurrency.py + :name: concurrency-cf-as-completed + :start-after: [start-cf-as-completed] + :end-before: [end-cf-as-completed] + :dedent: + :linenos: + +In each case handling errors on a per-:class:`future ` basis +is straightforward: + +.. literalinclude:: ../includes/concurrency.py + :name: concurrency-cf-error-result + :start-after: [start-cf-error-result] + :end-before: [end-cf-error-result] + :dedent: + :linenos: + +As promised, here's a look at what is unique to each of the +:class:`Executor` implementations. + + +:class:`ThreadPoolExecutor`: + +* ... + +:class:`ProcessPoolExecutor`: + +* ... + +:class:`!InterpreterPoolExecutor`: + +* ... + +.. currentmodule:: None + Python Concurrency Workload Examples ==================================== diff --git a/Doc/includes/concurrency.py b/Doc/includes/concurrency.py index 4b0d519774152b..336cb9c988c8bf 100644 --- a/Doc/includes/concurrency.py +++ b/Doc/includes/concurrency.py @@ -5,6 +5,184 @@ """ +class ConcurrentFutures: + + def example_basic(self): + # [start-cf-basic] + from concurrent.futures import ThreadPoolExecutor as Executor + + with Executor() as e: + # Copy 4 files concurrently. + e.submit(shutil.copy, 'src1.txt', 'dest1.txt') + e.submit(shutil.copy, 'src2.txt', 'dest2.txt') + e.submit(shutil.copy, 'src3.txt', 'dest3.txt') + e.submit(shutil.copy, 'src4.txt', 'dest4.txt') + + # Run a function asynchronously and check the result. + fut = executor.submit(pow, 323, 1235) + res = fut.result() + assert res == 323**1235 + # [end-cf-basic] + + def example_map_1(self): + # [start-cf-map-1] + from concurrent.futures import ThreadPoolExecutor as Executor + + pow_args = { + 323: 1235, + 100: 10, + -1: 3, + } + for i in range(100): + pow_args[i] = i + + with Executor() as e: + # Run a function asynchronously and check the results. + results = e.map(pow, pow_args.keys(), pow_args.values()) + for (a, n), res in zip(pow_args.items(), results): + assert res == a**n + # [end-cf-map-1] + + def example_map_2(self): + # [start-cf-map-2] + from concurrent.futures import ThreadPoolExecutor as Executor + + files = { + 'src1.txt': 'dest1.txt', + 'src2.txt': 'dest2.txt', + 'src3.txt': 'dest3.txt', + 'src4.txt': 'dest4.txt', + } + + with Executor() as e: + # Copy files concurrently, tracking missing files. + copied = {} + results = e.map(shutil.copy, files.keys(), files.values()) + for src in files: + copied[src] = next(results) + assert list(copied.values()) == list(files.values()) + + # An alternate spelling: + copied = {} + results = e.map(shutil.copy, files.keys(), files.values()) + for src, res in zip(files, results, strict=True): + copied[src] = res + assert list(copied.values()) == list(files.values()) + # [end-cf-map-2] + + def example_wait(self): + # [start-cf-wait] + import concurrent.futures + from concurrent.futures import ThreadPoolExecutor as Executor + + files = { + 'src1.txt': 'dest1.txt', + 'src2.txt': 'dest2.txt', + 'src3.txt': 'dest3.txt', + 'src4.txt': 'dest4.txt', + } + + with Executor() as e: + # Copy 4 files concurrently and wait for them all to finish. + futures = (e.submit(shutil.copy, src, tgt) + for src, tgt in files.items()) + concurrent.futures.wait(futures) + + # Using as_completed(): + futures = (e.submit(shutil.copy, src, tgt) + for src, tgt in files.items()) + list(concurrent.futures.as_completed(futures)) + + # Using Executor.map(): + list(e.map(shutil.copy, files.keys(), files.values())) + # [end-cf-wait] + + def example_as_completed(self): + # [start-cf-as-completed] + import concurrent.futures + from concurrent.futures import ThreadPoolExecutor as Executor + + files = { + 'src1.txt': 'dest1.txt', + 'src2.txt': 'dest2.txt', + 'src3.txt': 'dest3.txt', + 'src4.txt': 'dest4.txt', + } + + with Executor() as e: + # Copy 4 files concurrently and handle each completion. + copied = {} + missing = [] + futures = (e.submit(shutil.copy, src, tgt) + for src, tgt in files.items()) + futures = dict(zip(futures, enumerate(files, 1))) + for fut in concurrent.futures.as_completed(futures): + i, src = futures[fut] + res = fut.result() + print(f'({i}) {src} copied') + copied[src] = res + assert list(copied.values()) == list(files.values()) + # [end-cf-as-completed] + + def example_error_result(self): + # [start-cf-error-result] + import concurrent.futures + from concurrent.futures import ThreadPoolExecutor as Executor + + # Run a function asynchronously and catch the error. + def fail(): + raise Exception('spam!') + with Executor() as e: + fut = executor.submit(fail) + try: + fut.result() + except Exception as exc: + arg, = exc.args + assert arg == 'spam!' + + + # Copy files concurrently, tracking missing files. + files = { + 'src1.txt': 'dest1.txt', + 'src2.txt': 'dest2.txt', + 'src3.txt': 'dest3.txt', + 'src4.txt': 'dest4.txt', + } + with Executor() as e: + # using executor.map(): + results = e.map(shutil.copy, files.keys(), files.values()) + for src in files: + try: + next(results) + except FileNotFoundError: + print(f'missing {src}') + assert not list(results) + + # using wait(): + futures = (e.submit(shutil.copy, src, tgt) + for src, tgt in files.items()) + futures = dict(zip(futures, files)) + completed, _ = concurrent.futures.wait(futures) + for fut in completed: + src = futures[fut] + try: + fut.result() + except FileNotFoundError: + print(f'missing {src}') + + # using as_completed(): + futures = (e.submit(shutil.copy, src, tgt) + for src, tgt in files.items()) + futures = dict(zip(futures, files)) + for fut in concurrent.futures.as_completed(futures): + src = futures[fut] + try: + fut.result() + except FileNotFoundError: + print(f'missing {src}') + # [end-cf-error-result] + + class Workload1: def run_using_threads(self): From 560f26e65fe4ebf229c04a6f90151509861bc181 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Wed, 21 Aug 2024 16:51:27 -0600 Subject: [PATCH 13/80] Make the examples runnable. --- Doc/howto/concurrency.rst | 13 +- Doc/includes/concurrency.py | 405 ++++++++++++++++++++++-------------- 2 files changed, 262 insertions(+), 156 deletions(-) diff --git a/Doc/howto/concurrency.rst b/Doc/howto/concurrency.rst index 58232b11a49bdf..7be06e91a30ad0 100644 --- a/Doc/howto/concurrency.rst +++ b/Doc/howto/concurrency.rst @@ -538,9 +538,16 @@ In each case handling errors on a per-:class:`future ` basis is straightforward: .. literalinclude:: ../includes/concurrency.py - :name: concurrency-cf-error-result - :start-after: [start-cf-error-result] - :end-before: [end-cf-error-result] + :name: concurrency-cf-error-result-1 + :start-after: [start-cf-error-result-1] + :end-before: [end-cf-error-result-1] + :dedent: + :linenos: + +.. literalinclude:: ../includes/concurrency.py + :name: concurrency-cf-error-result-2 + :start-after: [start-cf-error-result-2] + :end-before: [end-cf-error-result-2] :dedent: :linenos: diff --git a/Doc/includes/concurrency.py b/Doc/includes/concurrency.py index 336cb9c988c8bf..bec62c98130d0e 100644 --- a/Doc/includes/concurrency.py +++ b/Doc/includes/concurrency.py @@ -4,27 +4,69 @@ :start-after: and :end-before: options. """ +import contextlib +import os +import tempfile + + +@contextlib.contextmanager +def dummy_files(*filenames): + with tempfile.TemporaryDirectory() as tempdir: + orig = os.getcwd() + os.chdir(tempdir) + try: + for filename in filenames: + with open(filename, 'w') as outfile: + outfile.write(f'# {filename}\n') + yield tempdir + finally: + os.chdir(orig) + + +try: + zip((), (), strict=True) +except TypeError: + def zip(*args, strict=False, _zip=zip): + return _zip(*args) + + +class example(staticmethod): + + registry = [] + + def __init__(self, func): + super().__init__(func) + self.func = func + + def __set_name__(self, cls, name): + assert name == self.func.__name__, (name, self.func.__name__) + type(self).registry.append((self.func, cls)) + class ConcurrentFutures: - def example_basic(self): - # [start-cf-basic] - from concurrent.futures import ThreadPoolExecutor as Executor + @example + def example_basic(): + with dummy_files('src1.txt', 'src2.txt', 'src3.txt', 'src4.txt'): + # [start-cf-basic] + import shutil + from concurrent.futures import ThreadPoolExecutor as Executor + + with Executor() as e: + # Copy 4 files concurrently. + e.submit(shutil.copy, 'src1.txt', 'dest1.txt') + e.submit(shutil.copy, 'src2.txt', 'dest2.txt') + e.submit(shutil.copy, 'src3.txt', 'dest3.txt') + e.submit(shutil.copy, 'src4.txt', 'dest4.txt') + + # Run a function asynchronously and check the result. + fut = e.submit(pow, 323, 1235) + res = fut.result() + assert res == 323**1235 + # [end-cf-basic] - with Executor() as e: - # Copy 4 files concurrently. - e.submit(shutil.copy, 'src1.txt', 'dest1.txt') - e.submit(shutil.copy, 'src2.txt', 'dest2.txt') - e.submit(shutil.copy, 'src3.txt', 'dest3.txt') - e.submit(shutil.copy, 'src4.txt', 'dest4.txt') - - # Run a function asynchronously and check the result. - fut = executor.submit(pow, 323, 1235) - res = fut.result() - assert res == 323**1235 - # [end-cf-basic] - - def example_map_1(self): + @example + def example_map(): # [start-cf-map-1] from concurrent.futures import ThreadPoolExecutor as Executor @@ -43,89 +85,95 @@ def example_map_1(self): assert res == a**n # [end-cf-map-1] - def example_map_2(self): - # [start-cf-map-2] - from concurrent.futures import ThreadPoolExecutor as Executor - - files = { - 'src1.txt': 'dest1.txt', - 'src2.txt': 'dest2.txt', - 'src3.txt': 'dest3.txt', - 'src4.txt': 'dest4.txt', - } - - with Executor() as e: - # Copy files concurrently, tracking missing files. - copied = {} - results = e.map(shutil.copy, files.keys(), files.values()) - for src in files: - copied[src] = next(results) - assert list(copied.values()) == list(files.values()) - - # An alternate spelling: - copied = {} - results = e.map(shutil.copy, files.keys(), files.values()) - for src, res in zip(files, results, strict=True): - copied[src] = res - assert list(copied.values()) == list(files.values()) - # [end-cf-map-2] - - def example_wait(self): - # [start-cf-wait] - import concurrent.futures - from concurrent.futures import ThreadPoolExecutor as Executor - - files = { - 'src1.txt': 'dest1.txt', - 'src2.txt': 'dest2.txt', - 'src3.txt': 'dest3.txt', - 'src4.txt': 'dest4.txt', - } + with dummy_files('src1.txt', 'src2.txt', 'src3.txt', 'src4.txt'): + # [start-cf-map-2] + import shutil + from concurrent.futures import ThreadPoolExecutor as Executor + + # Copy files concurrently. + + files = { + 'src1.txt': 'dest1.txt', + 'src2.txt': 'dest2.txt', + 'src3.txt': 'dest3.txt', + 'src4.txt': 'dest4.txt', + } + + with Executor() as e: + copied = {} + results = e.map(shutil.copy, files.keys(), files.values()) + for src, dest in zip(files, results, strict=True): + print(f'copied {src} to {dest}') + copied[src] = dest + assert list(copied.values()) == list(files.values()) + # [end-cf-map-2] + + @example + def example_wait(): + with dummy_files('src1.txt', 'src2.txt', 'src3.txt', 'src4.txt'): + # [start-cf-wait] + import shutil + import concurrent.futures + from concurrent.futures import ThreadPoolExecutor as Executor - with Executor() as e: # Copy 4 files concurrently and wait for them all to finish. - futures = (e.submit(shutil.copy, src, tgt) - for src, tgt in files.items()) - concurrent.futures.wait(futures) - # Using as_completed(): - futures = (e.submit(shutil.copy, src, tgt) - for src, tgt in files.items()) - list(concurrent.futures.as_completed(futures)) + files = { + 'src1.txt': 'dest1.txt', + 'src2.txt': 'dest2.txt', + 'src3.txt': 'dest3.txt', + 'src4.txt': 'dest4.txt', + } + + with Executor() as e: + # Using wait(): + futures = [e.submit(shutil.copy, src, tgt) + for src, tgt in files.items()] + concurrent.futures.wait(futures) + + # Using as_completed(): + futures = (e.submit(shutil.copy, src, tgt) + for src, tgt in files.items()) + list(concurrent.futures.as_completed(futures)) + + # Using Executor.map(): + list(e.map(shutil.copy, files.keys(), files.values())) + # [end-cf-wait] + + @example + def example_as_completed(): + with dummy_files('src1.txt', 'src2.txt', 'src3.txt', 'src4.txt'): + # [start-cf-as-completed] + import shutil + import concurrent.futures + from concurrent.futures import ThreadPoolExecutor as Executor - # Using Executor.map(): - list(e.map(shutil.copy, files.keys(), files.values())) - # [end-cf-wait] - - def example_as_completed(self): - # [start-cf-as-completed] - import concurrent.futures - from concurrent.futures import ThreadPoolExecutor as Executor - - files = { - 'src1.txt': 'dest1.txt', - 'src2.txt': 'dest2.txt', - 'src3.txt': 'dest3.txt', - 'src4.txt': 'dest4.txt', - } - - with Executor() as e: # Copy 4 files concurrently and handle each completion. - copied = {} - missing = [] - futures = (e.submit(shutil.copy, src, tgt) - for src, tgt in files.items()) - futures = dict(zip(futures, enumerate(files, 1))) - for fut in concurrent.futures.as_completed(futures): - i, src = futures[fut] - res = fut.result() - print(f'({i}) {src} copied') - copied[src] = res - assert list(copied.values()) == list(files.values()) - # [end-cf-as-completed] - def example_error_result(self): - # [start-cf-error-result] + files = { + 'src1.txt': 'dest1.txt', + 'src2.txt': 'dest2.txt', + 'src3.txt': 'dest3.txt', + 'src4.txt': 'dest4.txt', + } + + with Executor() as e: + copied = {} + futures = (e.submit(shutil.copy, src, tgt) + for src, tgt in files.items()) + futures = dict(zip(futures, enumerate(files, 1))) + for fut in concurrent.futures.as_completed(futures): + i, src = futures[fut] + res = fut.result() + print(f'({i}) {src} copied') + copied[src] = res + assert set(copied.values()) == set(files.values()), (copied, files) + # [end-cf-as-completed] + + @example + def example_error_result(): + # [start-cf-error-result-1] + import shutil import concurrent.futures from concurrent.futures import ThreadPoolExecutor as Executor @@ -133,59 +181,69 @@ def example_error_result(self): def fail(): raise Exception('spam!') with Executor() as e: - fut = executor.submit(fail) + fut = e.submit(fail) try: fut.result() except Exception as exc: arg, = exc.args assert arg == 'spam!' + # [end-cf-error-result-1] + with dummy_files('src1.txt', 'src2.txt', 'src3.txt', 'src4.txt'): + # [start-cf-error-result-2] + import shutil + import concurrent.futures + from concurrent.futures import ThreadPoolExecutor as Executor - # Copy files concurrently, tracking missing files. - files = { - 'src1.txt': 'dest1.txt', - 'src2.txt': 'dest2.txt', - 'src3.txt': 'dest3.txt', - 'src4.txt': 'dest4.txt', - } - with Executor() as e: - # using executor.map(): - results = e.map(shutil.copy, files.keys(), files.values()) - for src in files: - try: - next(results) - except FileNotFoundError: - print(f'missing {src}') - assert not list(results) - - # using wait(): - futures = (e.submit(shutil.copy, src, tgt) - for src, tgt in files.items()) - futures = dict(zip(futures, files)) - completed, _ = concurrent.futures.wait(futures) - for fut in completed: - src = futures[fut] - try: - fut.result() - except FileNotFoundError: - print(f'missing {src}') - - # using as_completed(): - futures = (e.submit(shutil.copy, src, tgt) - for src, tgt in files.items()) - futures = dict(zip(futures, files)) - for fut in concurrent.futures.as_completed(futures): - src = futures[fut] - try: - fut.result() - except FileNotFoundError: - print(f'missing {src}') - # [end-cf-error-result] + # Copy files concurrently, tracking missing files. + + files = { + 'src1.txt': 'dest1.txt', + 'src2.txt': 'dest2.txt', + 'src3.txt': 'dest3.txt', + 'src4.txt': 'dest4.txt', + 'missing.txt': 'dest5.txt', + } + + with Executor() as e: + # using executor.map(): + results = e.map(shutil.copy, files.keys(), files.values()) + for src in files: + try: + next(results) + except FileNotFoundError: + print(f'missing {src}') + assert not list(results) + + # using wait(): + futures = [e.submit(shutil.copy, src, tgt) + for src, tgt in files.items()] + futures = dict(zip(futures, files)) + completed, _ = concurrent.futures.wait(futures) + for fut in completed: + src = futures[fut] + try: + fut.result() + except FileNotFoundError: + print(f'missing {src}') + + # using as_completed(): + futures = (e.submit(shutil.copy, src, tgt) + for src, tgt in files.items()) + futures = dict(zip(futures, files)) + for fut in concurrent.futures.as_completed(futures): + src = futures[fut] + try: + fut.result() + except FileNotFoundError: + print(f'missing {src}') + # [end-cf-error-result-2] class Workload1: - def run_using_threads(self): + @example + def run_using_threads(): # [start-w1-threads] import threading @@ -198,7 +256,8 @@ def task(): ... # [end-w1-threads] - def run_using_multiprocessing(self): + @example + def run_using_multiprocessing(): # [start-w1-multiprocessing] import multiprocessing @@ -208,25 +267,29 @@ def task(): ... # [end-w1-multiprocessing] - def run_using_async(self): + @example + def run_using_async(): # [start-w1-async] # async 1 ... # [end-w1-async] - def run_using_subinterpreters(self): + @example + def run_using_subinterpreters(): # [start-w1-subinterpreters] # subinterpreters 1 ... # [end-w1-subinterpreters] - def run_using_smp(self): + @example + def run_using_smp(): # [start-w1-smp] # smp 1 ... # [end-w1-smp] - def run_using_concurrent_futures_thread(self): + @example + def run_using_concurrent_futures_thread(): # [start-w1-concurrent-futures-thread] # concurrent.futures 1 ... @@ -239,7 +302,8 @@ def run_using_concurrent_futures_thread(self): class Workload2: - def run_using_threads(self): + @example + def run_using_threads(): # [start-w2-threads] import threading @@ -252,7 +316,8 @@ def task(): ... # [end-w2-threads] - def run_using_multiprocessing(self): + @example + def run_using_multiprocessing(): # [start-w2-multiprocessing] import multiprocessing @@ -262,25 +327,29 @@ def task(): ... # [end-w2-multiprocessing] - def run_using_async(self): + @example + def run_using_async(): # [start-w2-async] # async 2 ... # [end-w2-async] - def run_using_subinterpreters(self): + @example + def run_using_subinterpreters(): # [start-w2-subinterpreters] # subinterpreters 2 ... # [end-w2-subinterpreters] - def run_using_smp(self): + @example + def run_using_smp(): # [start-w2-smp] # smp 2 ... # [end-w2-smp] - def run_using_concurrent_futures_thread(self): + @example + def run_using_concurrent_futures_thread(): # [start-w2-concurrent-futures-thread] # concurrent.futures 2 ... @@ -293,7 +362,8 @@ def run_using_concurrent_futures_thread(self): class Workload3: - def run_using_threads(self): + @example + def run_using_threads(): # [start-w3-threads] import threading @@ -306,7 +376,8 @@ def task(): ... # [end-w3-threads] - def run_using_multiprocessing(self): + @example + def run_using_multiprocessing(): # [start-w3-multiprocessing] import multiprocessing @@ -316,26 +387,54 @@ def task(): ... # [end-w3-multiprocessing] - def run_using_async(self): + @example + def run_using_async(): # [start-w3-async] # async 3 ... # [end-w3-async] - def run_using_subinterpreters(self): + @example + def run_using_subinterpreters(): # [start-w3-subinterpreters] # subinterpreters 3 ... # [end-w3-subinterpreters] - def run_using_smp(self): + @example + def run_using_smp(): # [start-w3-smp] # smp 3 ... # [end-w3-smp] - def run_using_concurrent_futures_thread(self): + @example + def run_using_concurrent_futures_thread(): # [start-w3-concurrent-futures-thread] # concurrent.futures 3 ... # [end-w3-concurrent-futures-thread] + + +if __name__ == '__main__': + # Run all the examples. + div1 = '#' * 40 + div2 = '#' + '-' * 39 + last = None + for func, cls in example.registry: + print() + if cls is not last: + last = cls + print(div1) + print(f'# {cls.__name__}') + print(div1) + print() + print(div2) + print(f'# {func.__name__}') + print(div2) + print() + try: + func() + except Exception: + import traceback + traceback.print_exc() From 4f49ed7c6fd994f302977faf2f948658304c500c Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Thu, 22 Aug 2024 09:51:18 -0600 Subject: [PATCH 14/80] Revert changes to Makefile. --- Doc/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Doc/Makefile b/Doc/Makefile index 08d24b090d7f49..b2ee3fe7d28ed0 100644 --- a/Doc/Makefile +++ b/Doc/Makefile @@ -4,7 +4,7 @@ # # You can set these variables from the command line. -PYTHON ?= python3 +PYTHON = python3 VENVDIR = ./venv UV = uv SPHINXBUILD = PATH=$(VENVDIR)/bin:$$PATH sphinx-build From 5151e3bd01be53b276e5bff5f06f917b1476a0b1 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Thu, 22 Aug 2024 09:52:07 -0600 Subject: [PATCH 15/80] Fix a typo. --- Doc/howto/concurrency.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Doc/howto/concurrency.rst b/Doc/howto/concurrency.rst index 7be06e91a30ad0..e69cc3a172cd59 100644 --- a/Doc/howto/concurrency.rst +++ b/Doc/howto/concurrency.rst @@ -15,7 +15,7 @@ The following Python concurrency models are covered: * isolated threads, AKA CSP/actor model (:mod:`!interpreters`) * multi-processing (:mod:`multiprocessing` and :mod:`concurrent.futures`) * distributed, e.g. SMP (:mod:`!dask`) -* async/await (:mod:`asycio`) +* async/await (:mod:`asyncio`) Each of these will be explained, with some simple examples. The later workload-oriented examples will be implemented using each, From 14e59f8a2fdb29b298cdae7b02748a48e8190a8f Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Thu, 22 Aug 2024 09:53:19 -0600 Subject: [PATCH 16/80] Fix sphinx warnings. --- Doc/howto/concurrency.rst | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/Doc/howto/concurrency.rst b/Doc/howto/concurrency.rst index e69cc3a172cd59..584fa9dadc7a43 100644 --- a/Doc/howto/concurrency.rst +++ b/Doc/howto/concurrency.rst @@ -482,11 +482,11 @@ minor uniqueness that we'll look at in a moment. With an executor you can call a function asynchronously (in the background) using :meth:`executor.submit() `. It returns a :class:`Future ` object which tracks completion -and provides the result. :class:`Future ` objects have a few +and provides the result. :class:`Future ` objects have a few other tricks, like cancelation and completion callbacks, which we won't cover here. Likewise we won't cover the various uses of timeouts. -Here's an example of using :meth:`executor.submit() ` +Here's an example of using :meth:`executor.submit() ` and :meth:`Future.result() `: .. literalinclude:: ../includes/concurrency.py @@ -513,7 +513,7 @@ multiple times and yield each result: :dedent: :linenos: -You can wait for an existing set of :class:`futures ` +You can wait for an existing set of :class:`futures ` using :func:`wait` (and :func:`as_completed` and :meth:`executor.map() `): @@ -524,7 +524,7 @@ using :func:`wait` :dedent: :linenos: -You can use :func:`as_completed` to handle each :class:`future ` +You can use :func:`as_completed` to handle each :class:`future ` as it completes: .. literalinclude:: ../includes/concurrency.py @@ -534,7 +534,7 @@ as it completes: :dedent: :linenos: -In each case handling errors on a per-:class:`future ` basis +In each case handling errors on a per-:class:`future ` basis is straightforward: .. literalinclude:: ../includes/concurrency.py From 0b27fd90a1b508fd119d39ccc2842d843f2a89e2 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Thu, 22 Aug 2024 10:15:10 -0600 Subject: [PATCH 17/80] Adjust the references. --- Doc/howto/concurrency.rst | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/Doc/howto/concurrency.rst b/Doc/howto/concurrency.rst index 584fa9dadc7a43..b2ea15265deb93 100644 --- a/Doc/howto/concurrency.rst +++ b/Doc/howto/concurrency.rst @@ -480,14 +480,14 @@ minor uniqueness that we'll look at in a moment. can be simply substituted. With an executor you can call a function asynchronously (in the background) -using :meth:`executor.submit() `. -It returns a :class:`Future ` object which tracks completion -and provides the result. :class:`Future ` objects have a few -other tricks, like cancelation and completion callbacks, which we won't -cover here. Likewise we won't cover the various uses of timeouts. +using :meth:`Executor.submit`. It returns a :class:`Future` object +which tracks completion and provides the result. +:class:`!Future` objects have a few other tricks, like cancelation +and completion callbacks, which we won't cover here. +Likewise we won't cover the various uses of timeouts. -Here's an example of using :meth:`executor.submit() ` -and :meth:`Future.result() `: +Here's an example of using :meth:`Executor.submit` +and :meth:`Future.result`: .. literalinclude:: ../includes/concurrency.py :name: concurrency-cf-basic @@ -496,8 +496,8 @@ and :meth:`Future.result() `: :dedent: :linenos: -You can use :meth:`executor.map() ` to call a function -multiple times and yield each result: +You can use :meth:`Executor.map` to call a function multiple times +and yield each result: .. literalinclude:: ../includes/concurrency.py :name: concurrency-cf-map-1 @@ -513,9 +513,8 @@ multiple times and yield each result: :dedent: :linenos: -You can wait for an existing set of :class:`futures ` -using :func:`wait` -(and :func:`as_completed` and :meth:`executor.map() `): +You can wait for an existing set of :class:`!Future` objects using +:func:`wait` (and :func:`as_completed` and :meth:`Executor.map`): .. literalinclude:: ../includes/concurrency.py :name: concurrency-cf-wait @@ -524,7 +523,7 @@ using :func:`wait` :dedent: :linenos: -You can use :func:`as_completed` to handle each :class:`future ` +You can use :func:`as_completed` to handle each :class:`!Future` as it completes: .. literalinclude:: ../includes/concurrency.py @@ -534,7 +533,7 @@ as it completes: :dedent: :linenos: -In each case handling errors on a per-:class:`future ` basis +In each case handling errors on a per-:class:`!Future` basis is straightforward: .. literalinclude:: ../includes/concurrency.py From 0586c724c7686c58766e32eb12464c48bf8cad06 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Thu, 22 Aug 2024 14:37:55 -0600 Subject: [PATCH 18/80] Fill out and restructure the comparisons. --- Doc/howto/concurrency.rst | 271 ++++++++++++++++++++++++++++++-------- 1 file changed, 216 insertions(+), 55 deletions(-) diff --git a/Doc/howto/concurrency.rst b/Doc/howto/concurrency.rst index b2ea15265deb93..b406f0fc819466 100644 --- a/Doc/howto/concurrency.rst +++ b/Doc/howto/concurrency.rst @@ -9,13 +9,13 @@ from procedural to object-oriented to functional. The same applies to concurrency. Here we'll look at how different concurrency models look in Python, with an emphasis on practical workload-oriented examples. -The following Python concurrency models are covered: +The following concurrency models are covered: -* free-threading (:mod:`threading` and :mod:`concurrent.futures`) -* isolated threads, AKA CSP/actor model (:mod:`!interpreters`) -* multi-processing (:mod:`multiprocessing` and :mod:`concurrent.futures`) -* distributed, e.g. SMP (:mod:`!dask`) -* async/await (:mod:`asyncio`) +* free-threading +* isolated threads, AKA CSP/actor model +* multi-processing +* distributed, e.g. SMP +* async/await Each of these will be explained, with some simple examples. The later workload-oriented examples will be implemented using each, @@ -38,7 +38,71 @@ for comparison, when possible. newer programming languages typically avoid exposing threads directly. Take that into consideration before reaching for threads and look at the alternatives first. - See `the table below `_. + +.. raw:: html + + + +For convenience, here's a summary comparng the concurrency models +in Python: + +.. list-table:: + :header-rows: 1 + :class: borderless vert-aligned + :align: left + + * - `model `_ + - Python API + - scale + - `multi-core `_ + - `races `_ + - overhead + - `c.f `_ + * - free threading `(Python) `_ + - :mod:`threading` + - small-medium + - `yes* `_ + - **yes** + - very low + - yes + * - isolated threads `(Python) `_ + - `interpreters `_ + - small-medium + - yes + - no + - `low+ `_ + - `yes* `_ + * - multiprocessing `(Python) `_ + - :mod:`multiprocessing` + - small + - yes + - no + - **medium** + - yes + * - distributed `(Python) `_ + - :mod:`!dask` + - large + - yes + - no + - **medium+** + - no + * - async/await `(Python) `_ + - :mod:`asyncio` + - small-medium + - **no** + - no + - low + - no All About Concurrency @@ -288,12 +352,118 @@ Python Concurrency Models ========================= We've looked at concurrency and concurrency models generally. -Now let's see what they look like in Python. +Now let's see what each looks like in Python. +We'll also look at `concurrent.futures `_ +provides a high-level API for some of the concurrency models. -We'll also `compare them below `_. +Here's a summary: -Finally, we'll look at how `concurrent.futures `_ -provides a high-level API for some of the concurrency models. +.. list-table:: + :header-rows: 1 + :class: borderless vert-aligned + :align: left + + * - model + - Python API + - scale + - pros + - cons + * - free threading + - :mod:`threading` + - small-medium + - * familiar to many + * many examples available + * can enable multi-core parallelism (`caveat: GIL `_) + - * all memory is subject to races + * some IO may have races (e.g. writing to stdout) + * can be hard for humans to follow what's happening in different + threads at any given point + * - multiple interpreters (isolated threads) + - `interpreters `_ + - small-medium + - * isolation eliminates nearly all races, by default + (sharing is strictly opt-in) + * synchronization is built in to cross-interpreter interaction + * enables full multi-core parallelism of all Python code + - * unfamiliar to many + * less efficient than threads + * (currently) limited in what data can be shared between + interpreters + * - multiprocessing + - :mod:`multiprocessing` + - small + - * isolated (no races) + * enables full multi-core parallelism of all Python code + - * substantially less efficient than using a single process + * can lead to exhaustion of system resources + (e.g. file handles, PIDs) + * API can be hard to use + * - distributed + - :mod:`!dask` + - large + - * isolated (no races) + * fully parallel + * facilitates massive scaling + - * not necessarily a good fit for small-scale applications + * often requires configuration + * - async/await + - :mod:`asyncio` + - small-medium + - * not subject to races + * increasingly familiar to many; popular in newer languages + * has a long history in Python (e.g. ``twisted``) + - * async and non-async functions don't mix well, + potentially leading to duplication of code + * switching to async can require substantial cascading code churn + * callbacks can make it difficult to follow program logic, + making debugging harder + * does not enable multi-core parallelism + +Here's a comparison of the overhead of each model in Python: + +.. list-table:: + :header-rows: 1 + :class: borderless vert-aligned + :align: left + + * - model + - memory + - startup + - cross-task + - management + - system + * - free threading + - very low + - very low + - none + - very low + - none + * - multiple interpreters + - `low* `_ + - `medium* `_ + - low + - very low + - none + * - multiprocessing + - medium + - medium + - medium + - medium + - low + * - distributed + - medium+ + - medium+ + - medium-high + - medium + - low-medium + * - async/await + - low + - low + - none + - low + - none + +.. _python-free-threading: Free-threading -------------- @@ -320,11 +490,25 @@ Here's a basic example of how it looks to use the threading module:: for t in threads: t.join() +.. _python-gil: + +The Global Interpreter Lock (GIL) +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + Note that there are some limitations to the parallelism Python can provide. See :pep:`630`. +the :term:`global interpreter lock` (GIL) prevents multi-core +parallelism for CPU-bound Python code (:pep:`for now... <630>`) + +the :term:`global interpreter lock` (GIL) + +... + .. currentmodule:: None +.. _python-isolated-threads: + Isolated Threads (CSP/Actor Model) ---------------------------------- @@ -354,6 +538,22 @@ The future stdlib :mod:`!interpreters` module supports isolated execution: for t in threads: t.join() +.. _python-stdlib-interpreters: + +A Stdlib Module for Using Multiple Interpreters +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +:pep:`734` + +.. _python-interpreters-overhead: + +Improving Performance for Multiple Interpreters +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +improving... + +.. _python-multiprocessing: + Multi-processing ---------------- @@ -381,6 +581,8 @@ You can use the stdlib :mod:`multiprocessing` module: .. currentmodule:: None +.. _python-distributed: + Distributed ----------- @@ -404,6 +606,8 @@ The popular :mod:`!dask` module gives us distributed concurrency: # Wait for all the tasks to finish. client.gather(futures) +.. _python-async-await: + Async/Await ----------- @@ -426,35 +630,6 @@ The stdlib :mod:`asyncio` module provides an event loop you can use: .. currentmodule:: None -.. _concurrency-models-comparison: - -Comparison ----------- - -.. list-table:: - :header-rows: 1 - :class: borderless - :align: left - - * - model - - pros - - cons - * - threads - - ... - - ... - * - multiple interpreters - - ... - - ... - * - multiprocessing - - ... - - ... - * - distributed - - ... - - ... - * - async/await - - ... - - ... - concurrent.futures ------------------ @@ -588,23 +763,9 @@ Workload 1 # ... -.. raw:: html - - - .. list-table:: :header-rows: 1 - :class: borderless workload-example + :class: borderless vert-aligned :align: left * - threads From 8df61adb644b1adee084f194bcd3e1400aad797d Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Thu, 22 Aug 2024 16:15:15 -0600 Subject: [PATCH 19/80] Fill out the section about Python theads. --- Doc/howto/concurrency.rst | 74 ++++++++++++++++++++++++++++----------- 1 file changed, 54 insertions(+), 20 deletions(-) diff --git a/Doc/howto/concurrency.rst b/Doc/howto/concurrency.rst index b406f0fc819466..425fe212148f57 100644 --- a/Doc/howto/concurrency.rst +++ b/Doc/howto/concurrency.rst @@ -470,40 +470,74 @@ Free-threading .. currentmodule:: threading -For free-threading we can use the stdlib :mod:`threading` module. +Threads, through the :mod:`threading` module, have been the dominant +tool in Python concurrency for decades, which mirrors the generate state +of software in general. Threads are very light-weight and efficient. +Most importantly, they are the most direct route to taking advantage +of multi-core parallelism (more an that in a moment). -Here's a basic example of how it looks to use the threading module:: +The main downside to using threads is that each one shares the full +memory of the process with all the others. That exposes programs +to a significant risk of `races `_. + +The other potential problem with using threads is that the conceptual +model has no inherent synchronization, so it can be hard to follow +what is going on in the program at any given moment. That is +especially challenging for testing and debugging. + +Using threads for concurrency boils down to: + +1. create a thread object to run a function +2. start the thread +3. (optionally) wait for it to finish + +Here's how that looks:: import threading def task(): # Do something. - pass + ... - threads = [] - for _ in range(5): - t = threading.Thread(target=task) - t.start() - threads.append(t) + t = threading.Thread(target=task) + t.start() - # Wait for all the threads to finish - for t in threads: - t.join() + # Do other stuff. + + t.join() .. _python-gil: The Global Interpreter Lock (GIL) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -Note that there are some limitations to the parallelism Python -can provide. See :pep:`630`. - -the :term:`global interpreter lock` (GIL) prevents multi-core -parallelism for CPU-bound Python code (:pep:`for now... <630>`) - -the :term:`global interpreter lock` (GIL) - -... +While physical threads are the direct route to multi-core parallelism, +Python's threads have always had an extra wrinkle that gets in the way: +the :term:`global interpreter lock` (GIL). + +The :term:`!GIL` is very efficient tool for keeping the Python +implementation simple, which is an important constraint for the project. +In fact, it protects Python's maintainers and users from a large +category of concurrency problems that one must normally face when +threads are involved. + +The big tradeoff is that the bytecode interpreter, which executes your +Python code, only runs while holding the :term:`!GIL`. That means only +one thread can be running Python code at a time. Threads will take +short turns, so none have to wait too long, but it still prevents +any actual parallelism. + +At the same time, the Python runtime (and extension modules) can +release the :term:`!GIL` when the thread is going to be doing something +unrelated to Python, particularly something slow or long, +like a blocking IO operation. + +There is also an ongoing effort to eliminate the :term:`!GIL`: +:pep:`630`. Any attempt to remove the :term:`!GIL` necessarily involves +some slowdown to single-threaded performance and extra maintenance +burden to the Python project and extension module maintainers. +However, there is sufficient interest in unlocking full multi-core +parallelism to justify the current experiment. .. currentmodule:: None From 269eaac4ac142491ab849bc14fc04706437b2372 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Thu, 22 Aug 2024 16:15:29 -0600 Subject: [PATCH 20/80] Fill out the section about multiple interpreters. --- Doc/howto/concurrency.rst | 102 +++++++++++++++++++++++++++++++++----- 1 file changed, 89 insertions(+), 13 deletions(-) diff --git a/Doc/howto/concurrency.rst b/Doc/howto/concurrency.rst index 425fe212148f57..5b3cbfc28b1632 100644 --- a/Doc/howto/concurrency.rst +++ b/Doc/howto/concurrency.rst @@ -546,45 +546,121 @@ parallelism to justify the current experiment. Isolated Threads (CSP/Actor Model) ---------------------------------- -The future stdlib :mod:`!interpreters` module supports isolated execution: +There's a major alternative to free-threading, both for multi-core +parallelism and for a simpler conceptual model: use multiple interpreters. + +Python's major implementation, CPython, has for decades supported +running with multiple independent copies of the Python runtime +("interpreter") in a single process. However, these interpreters +weren't completely isolated from one another; most importantly they +shared the one :term:`!GIL`. Over several years a lot of work went +into improving the isolation between interpreters, culminating in +no longer sharing a single :term:`!GIL`. + +Besides unlocking full multi-core parallelism, the isolation between +interpreters means that, from a conceptual level, concurrency can be +simpler. An interpreter encapsulates all of Python's runtime state, +including things like :data:`sys.modules`. By default, interpreters +mostly don't share any data (including objects) at all. Anything that +gets shared is done on a strictly opt-in basis. That means programmers +don't need to worry about possible `races `_ +with *any* data in the program. They only need to worry about data +that was explicitly shared. + +Using multiple interpreters is fairly straight-forward: + +1. create a new interpreter +2. switch the current thread to use that interpreter +3. call :func:`exec`, but targeting the new interpreter +4. switch back + +You can use the :mod:`!interpreters` module (more on that in a moment) +to do this:: -:: + import interpreters + + script = """if True: + # Do something. + ... + """ + + interp = interpreters.create() + interp.exec(script) + +Note that no threads were involved. That's because running in an +interpreter happens relative to the current thread. New threads +aren't implicitly involved. They can be added in explicitly though. +Why? For multi-core parallelism. + +If you want multi-core parallelism, run a different interpreter in each +thread. Their isolation means that each can run unblocked in that +thread. + +Here's the very explicit way to do that:: import interpreters import threading script = """if True: # Do something. - pass + ... """ def task(): interp = interpreters.create() interp.exec(script) - threads = [] - for _ in range(5): - t = threading.Thread(target=task) - t.start() - threads.append(t) + t = threading.Thread(target=task) + t.start() + + # Do other stuff. + + t.join() - # Wait for all the subinterpreters to finish - for t in threads: - t.join() +There's a convenience method too:: + + import interpreters + + def task(): + # Do something. + ... + + interp = interpreters.create() + t = interp.call_in_thread(task) + + # Do other stuff. + + t.join() .. _python-stdlib-interpreters: A Stdlib Module for Using Multiple Interpreters ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -:pep:`734` +While use of multiple interpreters has been part of Python's C-API +for decades, the feature hasn't been exposed to Python code through +the stdlib. :pep:`734` proposes changing that by adding a new +:mod:`!interpreters` module. + +In the meantime, an implementation of that PEP is available for +Python 3.13+ on PyPI: :pypi:`interpreters-pep-734`. .. _python-interpreters-overhead: Improving Performance for Multiple Interpreters ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -improving... +The long effort to improve on Python's implementation of multiple +interpreters focused on isolation and stability. There was very little +done to improve performance. This has the most impact on: + +* how much memory each interpreter uses + (i.e. how many can run at the same time) +* how long it takes to create a new interpreter + +As the work on isolation wraps up, improvements will shift to focus +on performance and memory usage. Thus the overhead associated with +using multiple interpreters will drastically decrease over time. .. _python-multiprocessing: From 77549126235968b2f10fc1c0f5d34380ad9eca49 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Thu, 22 Aug 2024 16:49:07 -0600 Subject: [PATCH 21/80] Fill out the section about multprocessing. --- Doc/howto/concurrency.rst | 39 ++++++++++++++++++++++++++++----------- 1 file changed, 28 insertions(+), 11 deletions(-) diff --git a/Doc/howto/concurrency.rst b/Doc/howto/concurrency.rst index 5b3cbfc28b1632..4b2467d0add4df 100644 --- a/Doc/howto/concurrency.rst +++ b/Doc/howto/concurrency.rst @@ -669,9 +669,23 @@ Multi-processing .. currentmodule:: multiprocessing -You can use the stdlib :mod:`multiprocessing` module: - -:: +The stdlib :mod:`multiprocessing` module, which has been around many +years, provides an API for using multiple processes for concurrency. +Furthermore, processes are always isolated, so you have many of the +same benefits of using multiple interpreters, including multi-core +parallelism. + +There are some obstacles however. First of all, using multiple +processes has a higher overhead than operating in a single process, +sometimes significantly higher. This applies in just about every +dimension of overhead. Secondly, the :mod:`multiprocessing` module's +API is substantially larger and more complex that what we use for +threads and multiple interpreters. Finally, there are some scaling +issues with using multiple processes, related both to the performance +overhead and to how the operating system assigns resources like +file handles. + +Here's a very basic example:: import multiprocessing @@ -679,15 +693,18 @@ You can use the stdlib :mod:`multiprocessing` module: # Do something. pass - procs = [] - for _ in range(5): - p = multiprocessing.Process(target=task) - p.start() - procs.append(p) + p = multiprocessing.Process(target=task) + p.start() + + # Do other stuff. + + p.join() - # Wait for all the subprocesses to finish - for p in procs: - p.join() +The similarity with :class:`threading.Thread` is intentional. +On top of that, the :mod:`multiprocessing` module provides an extensive +API to address a variety of needs, including machinery for inter-process +shared memory. Also note that that API can be used for threads and +(eventually) interpreters using different backends. .. currentmodule:: None From 2c9a793cfd37999eafe1e52e40108559d9bb05b3 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Fri, 23 Aug 2024 12:42:39 -0600 Subject: [PATCH 22/80] Fill out the section about async/await. --- Doc/howto/concurrency.rst | 101 +++++++++++++++++++++++++++++------- Doc/reference/datamodel.rst | 2 + 2 files changed, 84 insertions(+), 19 deletions(-) diff --git a/Doc/howto/concurrency.rst b/Doc/howto/concurrency.rst index 4b2467d0add4df..49b9278f8306be 100644 --- a/Doc/howto/concurrency.rst +++ b/Doc/howto/concurrency.rst @@ -15,7 +15,7 @@ The following concurrency models are covered: * isolated threads, AKA CSP/actor model * multi-processing * distributed, e.g. SMP -* async/await +* coroutines (async/await) Each of these will be explained, with some simple examples. The later workload-oriented examples will be implemented using each, @@ -96,7 +96,7 @@ in Python: - no - **medium+** - no - * - async/await `(Python) `_ + * - coroutines `(Python) `_ - :mod:`asyncio` - small-medium - **no** @@ -198,7 +198,7 @@ isolated threads threads, often physical, with strict isolation between them (e.g. CSP and actor model) multiprocessing using multiple isolated processes distributed multiprocessing across multiple computers -async/await using coroutines (AKA "cooperative multitasking") +coroutines "cooperative multitasking", AKA async/await ================= ========== (There are certainly others, but these are the focus here.) @@ -259,7 +259,7 @@ and worth diligently avoiding. Races are possible when the concurrency approach is subject to parallel execution or to non-deterministic switching. -(This excludes "async/await", which relies on cooperative multitasking.) +(This excludes coroutines, which rely on cooperative multitasking.) When all memory is possibly shared, as is the case with free-threading, then all memory is at risk. @@ -406,7 +406,7 @@ Here's a summary: * facilitates massive scaling - * not necessarily a good fit for small-scale applications * often requires configuration - * - async/await + * - coroutines (async/await) - :mod:`asyncio` - small-medium - * not subject to races @@ -456,7 +456,7 @@ Here's a comparison of the overhead of each model in Python: - medium-high - medium - low-medium - * - async/await + * - coroutines - low - low - none @@ -733,30 +733,93 @@ The popular :mod:`!dask` module gives us distributed concurrency: # Wait for all the tasks to finish. client.gather(futures) -.. _python-async-await: +.. _python-coroutines: -Async/Await ------------ +Coroutines (Async/Await) +------------------------ .. currentmodule:: asyncio -The stdlib :mod:`asyncio` module provides an event loop you can use: - -:: +The use of :term:`coroutines ` for concurrency has been +around a long time and has grown in popularity in the software world, +particularly with the addition of ``async/await`` syntax in +various languages. + +Python has supported coroutines to some degree since the beginning. +The best example is :pypi:`twisted`, which has provided this concurrency +model for decades. For most of that time :pypi:`!twisted` did it +primarily through callbacks and a form of "promises"/"futures". + +Explicit support for coroutines in Python really started with the +introduction of :term:`generators ` in Python 2.2 +(:pep:`255`). In Python 2.5 (:pep:`342`), :term:`!generators` were +tweaked to explicitly support use as coroutines. That went a step +further in Python 3.3 with the addition of ``yield from`` (:pep:`380`) +and the :mod:`asyncio` module (:pep:`3156`). Finally, in Python 3.5 +(:pep:`492`), we got dedicated ``async/await`` syntax +and :ref:`a dedicated protocol ` +for :term:`!coroutine` objects. + +There are three main pieces to using coroutines: + +* coroutines (non-blocking, yield control instead) +* an event loop (schedules coroutines) +* coroutine wrappers around blocking operations + +A :term:`coroutine function` looks *almost* the same as a regular +function. It is a non-blocking function that *cooperatively* yields +control of the program to other coroutines, which in turn yield control +back (eventually). At those points of synchronization, +coroutines often provide data to one another. + +The event loop is what keeps track of which coroutines have yielded +control and which should get control next. + +Generally a coroutine needs to avoid doing anything that takes very long +before yielding control back to the event loop. Any blocking operation +in a coroutine, like waiting on a socket, has to be implemented in a way +that only waits a little while, yields, and then waits again, etc. until +ready. The alternative is to wrap the blocking operation/function +in some sort of "future" coroutine that yields until the blocking +operation completes. The event loop can also fill that role +to an extent. + +In addition to support for coroutines in the language, Python's stdlib +provides the :mod:`asyncio` module, which includes: + +* an event loop +* a number of useful coroutines +* a variety of helpful APIs that build on coroutines and the event loop + +Here's a very basic example of using coroutines with :mod:`!asyncio`:: import asyncio - async def task(): - # Do something. - pass + async def task(data): + # Do something small. + await asyncio.sleep(0.1) + # Do something else small. + return data - coros = [task() for _ in range(5)] + # Run it once, basically synchronously. + res = asyncio.run(task('spam!') + assert res == 'spam!', repr(res) - # Wait for all the coroutines to finish. - await asyncio.gather(*coros) + # Run it multiple times concurrently. + values = list(range(5)) + res = asyncio.run( + asyncio.gather(*(task(v) for v in values)) + ) + assert res == values, (res, values) .. currentmodule:: None +One of the main challenges with using coroutines is that they do not +normally mix well with non-coroutines. As a result, ``async/await`` +can be contagious, requiring surrounding code to be async. This can +lead to having the same thing implemented twice, once normal and once +async, with signficant code duplication. + concurrent.futures ------------------ @@ -897,7 +960,7 @@ Workload 1 * - threads - multiple interpreters - - async/await + - coroutines - multiple processes - SMP * - .. raw:: html diff --git a/Doc/reference/datamodel.rst b/Doc/reference/datamodel.rst index f099d5553963e0..9e8b83dc2d786c 100644 --- a/Doc/reference/datamodel.rst +++ b/Doc/reference/datamodel.rst @@ -3332,6 +3332,8 @@ object itself in order to be consistently invoked by the interpreter). .. index:: single: coroutine +.. _coroutine-protocol: + Coroutines ========== From 2975bbb5e02fd20eaeca0cf894db3b6c604af257 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Fri, 23 Aug 2024 12:47:24 -0600 Subject: [PATCH 23/80] Shuffle the concurrency model order. --- Doc/howto/concurrency.rst | 200 +++++++++++++++++++------------------- 1 file changed, 100 insertions(+), 100 deletions(-) diff --git a/Doc/howto/concurrency.rst b/Doc/howto/concurrency.rst index 49b9278f8306be..96b403484b9407 100644 --- a/Doc/howto/concurrency.rst +++ b/Doc/howto/concurrency.rst @@ -13,9 +13,9 @@ The following concurrency models are covered: * free-threading * isolated threads, AKA CSP/actor model +* coroutines (async/await) * multi-processing * distributed, e.g. SMP -* coroutines (async/await) Each of these will be explained, with some simple examples. The later workload-oriented examples will be implemented using each, @@ -82,6 +82,13 @@ in Python: - no - `low+ `_ - `yes* `_ + * - coroutines `(Python) `_ + - :mod:`asyncio` + - small-medium + - **no** + - no + - low + - no * - multiprocessing `(Python) `_ - :mod:`multiprocessing` - small @@ -96,13 +103,6 @@ in Python: - no - **medium+** - no - * - coroutines `(Python) `_ - - :mod:`asyncio` - - small-medium - - **no** - - no - - low - - no All About Concurrency @@ -196,9 +196,9 @@ free threads using multiple physical threads in the same process, with no isolation between them isolated threads threads, often physical, with strict isolation between them (e.g. CSP and actor model) +coroutines "cooperative multitasking", AKA async/await multiprocessing using multiple isolated processes distributed multiprocessing across multiple computers -coroutines "cooperative multitasking", AKA async/await ================= ========== (There are certainly others, but these are the focus here.) @@ -389,6 +389,18 @@ Here's a summary: * less efficient than threads * (currently) limited in what data can be shared between interpreters + * - coroutines (async/await) + - :mod:`asyncio` + - small-medium + - * not subject to races + * increasingly familiar to many; popular in newer languages + * has a long history in Python (e.g. ``twisted``) + - * async and non-async functions don't mix well, + potentially leading to duplication of code + * switching to async can require substantial cascading code churn + * callbacks can make it difficult to follow program logic, + making debugging harder + * does not enable multi-core parallelism * - multiprocessing - :mod:`multiprocessing` - small @@ -406,18 +418,6 @@ Here's a summary: * facilitates massive scaling - * not necessarily a good fit for small-scale applications * often requires configuration - * - coroutines (async/await) - - :mod:`asyncio` - - small-medium - - * not subject to races - * increasingly familiar to many; popular in newer languages - * has a long history in Python (e.g. ``twisted``) - - * async and non-async functions don't mix well, - potentially leading to duplication of code - * switching to async can require substantial cascading code churn - * callbacks can make it difficult to follow program logic, - making debugging harder - * does not enable multi-core parallelism Here's a comparison of the overhead of each model in Python: @@ -444,6 +444,12 @@ Here's a comparison of the overhead of each model in Python: - low - very low - none + * - coroutines + - low + - low + - none + - low + - none * - multiprocessing - medium - medium @@ -456,12 +462,6 @@ Here's a comparison of the overhead of each model in Python: - medium-high - medium - low-medium - * - coroutines - - low - - low - - none - - low - - none .. _python-free-threading: @@ -662,77 +662,6 @@ As the work on isolation wraps up, improvements will shift to focus on performance and memory usage. Thus the overhead associated with using multiple interpreters will drastically decrease over time. -.. _python-multiprocessing: - -Multi-processing ----------------- - -.. currentmodule:: multiprocessing - -The stdlib :mod:`multiprocessing` module, which has been around many -years, provides an API for using multiple processes for concurrency. -Furthermore, processes are always isolated, so you have many of the -same benefits of using multiple interpreters, including multi-core -parallelism. - -There are some obstacles however. First of all, using multiple -processes has a higher overhead than operating in a single process, -sometimes significantly higher. This applies in just about every -dimension of overhead. Secondly, the :mod:`multiprocessing` module's -API is substantially larger and more complex that what we use for -threads and multiple interpreters. Finally, there are some scaling -issues with using multiple processes, related both to the performance -overhead and to how the operating system assigns resources like -file handles. - -Here's a very basic example:: - - import multiprocessing - - def task() - # Do something. - pass - - p = multiprocessing.Process(target=task) - p.start() - - # Do other stuff. - - p.join() - -The similarity with :class:`threading.Thread` is intentional. -On top of that, the :mod:`multiprocessing` module provides an extensive -API to address a variety of needs, including machinery for inter-process -shared memory. Also note that that API can be used for threads and -(eventually) interpreters using different backends. - -.. currentmodule:: None - -.. _python-distributed: - -Distributed ------------ - -The popular :mod:`!dask` module gives us distributed concurrency: - -:: - - from dask.distributed import LocalCluster - - def task() - # Do something. - pass - - client = LocalCluster().get_client() - - futures = [] - for _ in range(5): - fut = client.submit(task) - futures.append(fut) - - # Wait for all the tasks to finish. - client.gather(futures) - .. _python-coroutines: Coroutines (Async/Await) @@ -812,14 +741,85 @@ Here's a very basic example of using coroutines with :mod:`!asyncio`:: ) assert res == values, (res, values) -.. currentmodule:: None - One of the main challenges with using coroutines is that they do not normally mix well with non-coroutines. As a result, ``async/await`` can be contagious, requiring surrounding code to be async. This can lead to having the same thing implemented twice, once normal and once async, with signficant code duplication. +.. currentmodule:: None + +.. _python-multiprocessing: + +Multi-processing +---------------- + +.. currentmodule:: multiprocessing + +The stdlib :mod:`multiprocessing` module, which has been around many +years, provides an API for using multiple processes for concurrency. +Furthermore, processes are always isolated, so you have many of the +same benefits of using multiple interpreters, including multi-core +parallelism. + +There are some obstacles however. First of all, using multiple +processes has a higher overhead than operating in a single process, +sometimes significantly higher. This applies in just about every +dimension of overhead. Secondly, the :mod:`multiprocessing` module's +API is substantially larger and more complex that what we use for +threads and multiple interpreters. Finally, there are some scaling +issues with using multiple processes, related both to the performance +overhead and to how the operating system assigns resources like +file handles. + +Here's a very basic example:: + + import multiprocessing + + def task() + # Do something. + pass + + p = multiprocessing.Process(target=task) + p.start() + + # Do other stuff. + + p.join() + +The similarity with :class:`threading.Thread` is intentional. +On top of that, the :mod:`multiprocessing` module provides an extensive +API to address a variety of needs, including machinery for inter-process +shared memory. Also note that that API can be used for threads and +(eventually) interpreters using different backends. + +.. currentmodule:: None + +.. _python-distributed: + +Distributed +----------- + +The popular :mod:`!dask` module gives us distributed concurrency: + +:: + + from dask.distributed import LocalCluster + + def task() + # Do something. + pass + + client = LocalCluster().get_client() + + futures = [] + for _ in range(5): + fut = client.submit(task) + futures.append(fut) + + # Wait for all the tasks to finish. + client.gather(futures) + concurrent.futures ------------------ From b82a18b29c8c16c90344fd5a30826e9a0630a208 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Fri, 23 Aug 2024 13:21:47 -0600 Subject: [PATCH 24/80] Fill out the section about distributed concurrency. --- Doc/howto/concurrency.rst | 33 +++++++++++++++++++-------------- 1 file changed, 19 insertions(+), 14 deletions(-) diff --git a/Doc/howto/concurrency.rst b/Doc/howto/concurrency.rst index 96b403484b9407..790b1f9f839a06 100644 --- a/Doc/howto/concurrency.rst +++ b/Doc/howto/concurrency.rst @@ -97,7 +97,7 @@ in Python: - **medium** - yes * - distributed `(Python) `_ - - :mod:`!dask` + - :pypi:`dask` - large - yes - no @@ -411,7 +411,7 @@ Here's a summary: (e.g. file handles, PIDs) * API can be hard to use * - distributed - - :mod:`!dask` + - :pypi:`dask` - large - * isolated (no races) * fully parallel @@ -776,7 +776,7 @@ Here's a very basic example:: import multiprocessing - def task() + def task(): # Do something. pass @@ -800,25 +800,30 @@ shared memory. Also note that that API can be used for threads and Distributed ----------- -The popular :mod:`!dask` module gives us distributed concurrency: +When it comes to concurrency at scale, through distributed concurrency, +one of the best examples is :pypi:`dask`. -:: +Here's a very basic example:: from dask.distributed import LocalCluster - def task() + def task(data): # Do something. - pass + return data client = LocalCluster().get_client() - futures = [] - for _ in range(5): - fut = client.submit(task) - futures.append(fut) + # Run it once, basically synchronously. + fut = client.submit(task, 'spam!') + res = fut.result() + assert res == 'spam!', repr(res) - # Wait for all the tasks to finish. - client.gather(futures) + # Run it multiple times concurrently. + values = list(range(5)) + res = client.gather( + (client.submit(task, v) for v in values), + ) + assert res == values, (res, values) concurrent.futures ------------------ @@ -836,7 +841,7 @@ It will be implemented for multiple interpreters as :class:`!InterpreterPoolExecutor`. Each implementation has some very minor uniqueness that we'll look at in a moment. -.. note: :mod:`multiprocessing`, :mod:`asyncio`, and ``dask`` +.. note: :mod:`multiprocessing`, :mod:`asyncio`, and :pypi:`dask` provide similar APIs. In the case of :mod:`!multiprocessing`, that API also supports thread and interpreter backends. From 5e7f7c3d5ae1d1ade281ae4c4f938ff4049d82cd Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Fri, 23 Aug 2024 13:28:46 -0600 Subject: [PATCH 25/80] Drop an incomplete note. --- Doc/howto/concurrency.rst | 4 ---- 1 file changed, 4 deletions(-) diff --git a/Doc/howto/concurrency.rst b/Doc/howto/concurrency.rst index 790b1f9f839a06..df8220c4faa4f3 100644 --- a/Doc/howto/concurrency.rst +++ b/Doc/howto/concurrency.rst @@ -324,10 +324,6 @@ group concurrency workloads. Here are some: * how much data do logical threads share * size of the data shared by threads -From our list of workloads above, we can observe some clustering: - -* ... - Let's also revisit the ways concurrency can be helpful: * get work done faster From c06b8433c1aaea4147524323e1e7da9be29f2919 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Fri, 23 Aug 2024 13:34:18 -0600 Subject: [PATCH 26/80] Fix typos. --- Doc/howto/concurrency.rst | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/Doc/howto/concurrency.rst b/Doc/howto/concurrency.rst index df8220c4faa4f3..a84e0da4138d3f 100644 --- a/Doc/howto/concurrency.rst +++ b/Doc/howto/concurrency.rst @@ -53,7 +53,7 @@ for comparison, when possible. } -For convenience, here's a summary comparng the concurrency models +For convenience, here's a summary comparing the concurrency models in Python: .. list-table:: @@ -116,7 +116,7 @@ from a strictly *logical* viewpoint. When a computer program runs, it executes a sequence of code in a given order. If you were to trace the actual execution, you would -still end up with a *linear* series of executed intructions that matches +still end up with a *linear* series of executed instructions that matches the code. We call this sequence of code (and instructions) a logical "thread" of execution. @@ -183,7 +183,7 @@ Concurrency Models The concept of concurrency has been a part of the study and practice of computer software since very early on, in the 1950s and 1960s, -long before the wide-spread adotion of multi-core CPUs. Clearly +long before the wide-spread adoption of multi-core CPUs. Clearly its about more than just parallelism. Over the decades, research and use of concurrency has led to a variety @@ -300,12 +300,14 @@ ATM network handle multiple bank transactions at once hacker toolkit decode a passwd file with brute force raytracer compute RGB for each image pixel machine learning apply matrices on training data set -astrophysics merge black hole data from multiple satelites and observatories -investing combine thousands of industry data sources into a concise actionable analysis +astrophysics merge black hole data from multiple satellites + and observatories +investing combine thousands of industry data sources into + a concise actionable analysis MMO game server handle login requests, handle client updates game client GUI, physics engine, handle server updates audio transcoder process chunks -engineering simultation calculate stress loads at vertices +engineering simulation calculate stress loads at vertices molecular modeling try many permutations ======================= =========== @@ -741,7 +743,7 @@ One of the main challenges with using coroutines is that they do not normally mix well with non-coroutines. As a result, ``async/await`` can be contagious, requiring surrounding code to be async. This can lead to having the same thing implemented twice, once normal and once -async, with signficant code duplication. +async, with significant code duplication. .. currentmodule:: None @@ -848,7 +850,7 @@ minor uniqueness that we'll look at in a moment. With an executor you can call a function asynchronously (in the background) using :meth:`Executor.submit`. It returns a :class:`Future` object which tracks completion and provides the result. -:class:`!Future` objects have a few other tricks, like cancelation +:class:`!Future` objects have a few other tricks, like cancellation and completion callbacks, which we won't cover here. Likewise we won't cover the various uses of timeouts. From e05e76e7a2ad5b87c92f36ba86c3bd6752c02d58 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Fri, 23 Aug 2024 16:09:02 -0600 Subject: [PATCH 27/80] Add a missing divider. --- Doc/includes/concurrency.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Doc/includes/concurrency.py b/Doc/includes/concurrency.py index bec62c98130d0e..6be6f29cfd0083 100644 --- a/Doc/includes/concurrency.py +++ b/Doc/includes/concurrency.py @@ -240,6 +240,10 @@ def fail(): # [end-cf-error-result-2] +####################################### +# workload 1: ... +####################################### + class Workload1: @example From e7144ecd35b7fea41354de396a821d362ee3de22 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Mon, 26 Aug 2024 10:04:28 -0600 Subject: [PATCH 28/80] Add a link to the "overhead" table. --- Doc/howto/concurrency.rst | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Doc/howto/concurrency.rst b/Doc/howto/concurrency.rst index a84e0da4138d3f..8e5074bbe19f53 100644 --- a/Doc/howto/concurrency.rst +++ b/Doc/howto/concurrency.rst @@ -66,7 +66,7 @@ in Python: - scale - `multi-core `_ - `races `_ - - overhead + - `overhead `_ - `c.f `_ * - free threading `(Python) `_ - :mod:`threading` @@ -417,6 +417,8 @@ Here's a summary: - * not necessarily a good fit for small-scale applications * often requires configuration +.. _concurrency-overhead-table: + Here's a comparison of the overhead of each model in Python: .. list-table:: From e131b83eaf4f988e8897aa776b6a7b18aa2a2e66 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Mon, 26 Aug 2024 16:13:14 -0600 Subject: [PATCH 29/80] Expand the general explanation of workloads. --- Doc/howto/concurrency.rst | 59 ++++++++++++++++++++++++++------------- 1 file changed, 40 insertions(+), 19 deletions(-) diff --git a/Doc/howto/concurrency.rst b/Doc/howto/concurrency.rst index 8e5074bbe19f53..c1d984f7fe216c 100644 --- a/Doc/howto/concurrency.rst +++ b/Doc/howto/concurrency.rst @@ -311,22 +311,46 @@ engineering simulation calculate stress loads at vertices molecular modeling try many permutations ======================= =========== -It can be helpful to identify common characteristics by which we could -group concurrency workloads. Here are some: - -* number of logical threads -* main + workers vs. independent -* main + background -* how much computation, per thread -* how much blocking on other threads, per thread -* how much blocking IO, per thread -* number of external inputs -* number of external outputs -* how much data used, per thread -* how much data do logical threads share -* size of the data shared by threads - -Let's also revisit the ways concurrency can be helpful: +For a given workload, here are some characteristics that will help you +understand the problem and, potentially, which concurrency model would +be the best fit: + +* requests + * frequency + * expected latency for (at least partial) response +* inputs per request + * how many + * size of each input +* tasks (logical threads) per input + * how many + * variety vs. uniformity + * compute per task: how much + * data per task: how much and what kinds + * I/O per task: how much and what kinds + * tasks not tied to outputs +* task interaction + * how much and in what ways + * what data is shared between tasks + * how much blocking while waiting +* outputs per request + * how many + * size pf each output + * correlation to inputs + +To some extent the most critical factors can be compressed down to: + +* many inputs vs. 1 large divisible input +* many outputs vs. combined output vs. matching large output +* many short computations vs. fewer medium/long computations + +Aside from the concurrency model, the answers to the above can impact +the following: + +* use of a worker pool +* use of background tasks/threads + +In the context of the above characteristics, let's revisit the ways that +concurrency can be helpful: * get work done faster * run more tasks at once (multi-core) @@ -342,9 +366,6 @@ Let's also revisit the ways concurrency can be helpful: * scaling * handle asynchronous events -All of these things factor in to how concurrency should be applied for -a workload, or even if it should. - Python Concurrency Models ========================= From c601e27bf39fda411cce82e797de7c432caf2381 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Mon, 26 Aug 2024 16:12:42 -0600 Subject: [PATCH 30/80] Updates for the first two workload examples. --- Doc/howto/concurrency.rst | 269 +++++++++++++++++++++++++++++++++--- Doc/includes/concurrency.py | 83 ++++++----- 2 files changed, 299 insertions(+), 53 deletions(-) diff --git a/Doc/howto/concurrency.rst b/Doc/howto/concurrency.rst index c1d984f7fe216c..c9eaf78d5bb376 100644 --- a/Doc/howto/concurrency.rst +++ b/Doc/howto/concurrency.rst @@ -965,17 +965,161 @@ Python Concurrency Workload Examples Below we have a series of examples of how to implement the most common Python workloads that take advantage of concurrency. +For each workload you will find an implementation for each of the +concurrency models. -... +The implementations are meant to accurately demonstrate how best +to solve the problem using the given concurrency model. The examples +for the workload are presented side-by-side, for easier comparison. +The examples for threads, multiprocessing, and multiple interpreters +will use :mod:`concurrent.futures` when that is the better approach. +Performance comparisons are not included here. -also see: +Here's a summary of the examples, by workload: + +.. list-table:: + :header-rows: 1 + :class: borderless vert-aligned + :align: left + + * - workload + - req in + - req out + - *N* core tasks + - core task + * - `web: resize image `_ + - image (**net**) + - image (**net**) + - | *N* small sub-images + | **mem**: ~ 2x image size + - | **time**: short + | **mem**: small + * - `grep `_ + - | *N* filenames (**stdin**) + | file bytes x *N* (**disk**) + - *M* matches (**stdout**) + - 1+ per file + - | **time**: ~ file size + | **mem**: small + * - `... `_ + - ... + - ... + - ... + - ... + +.. other examples: + + * (scientific, finance, ML, matrices) + * conway's game of life + * raytracer + * mandelbrot + * find primes + * compute factorials + * + * + + * + * + * + +Also see: * https://github.com/faster-cpython/ideas/wiki/Tables:-Workloads * https://github.com/ericsnowcurrently/concurrency-benchmarks -Workload 1 ----------- +Workload: Image Resizer Web Service +----------------------------------- + +# ... + +.. list-table:: + :header-rows: 1 + :class: borderless vert-aligned + :align: left + + * - threads + - multiple interpreters + - coroutines + - multiple processes + - SMP + * - .. raw:: html + +
+ (expand) + + .. literalinclude:: ../includes/concurrency.py + :start-after: [start-web-image-resize-threads] + :end-before: [end-web-image-resize-threads] + :dedent: + :linenos: + + .. raw:: html + +
+ + - .. raw:: html + +
+ (expand) + + .. literalinclude:: ../includes/concurrency.py + :start-after: [start-web-image-resize-subinterpreters] + :end-before: [end-web-image-resize-subinterpreters] + :dedent: + :linenos: + + .. raw:: html + +
+ + - .. raw:: html + +
+ (expand) + + .. literalinclude:: ../includes/concurrency.py + :start-after: [start-web-image-resize-async] + :end-before: [end-web-image-resize-async] + :dedent: + :linenos: + + .. raw:: html + +
+ + - .. raw:: html + +
+ (expand) + + .. literalinclude:: ../includes/concurrency.py + :start-after: [start-web-image-resize-multiprocessing] + :end-before: [end-web-image-resize-multiprocessing] + :dedent: + :linenos: + + .. raw:: html + +
+ + - .. raw:: html + +
+ (expand) + + .. literalinclude:: ../includes/concurrency.py + :start-after: [start-web-image-resize-smp] + :end-before: [end-web-image-resize-smp] + :dedent: + :linenos: + + .. raw:: html + +
+ +Workload: grep +-------------- # ... @@ -995,9 +1139,8 @@ Workload 1 (expand) .. literalinclude:: ../includes/concurrency.py - :name: concurrency-workload-1-threads - :start-after: [start-w1-threads] - :end-before: [end-w1-threads] + :start-after: [start-grep-threads] + :end-before: [end-grep-threads] :dedent: :linenos: @@ -1011,9 +1154,8 @@ Workload 1 (expand) .. literalinclude:: ../includes/concurrency.py - :name: concurrency-workload-1-subinterpreters - :start-after: [start-w1-subinterpreters] - :end-before: [end-w1-subinterpreters] + :start-after: [start-grep-subinterpreters] + :end-before: [end-grep-subinterpreters] :dedent: :linenos: @@ -1027,9 +1169,8 @@ Workload 1 (expand) .. literalinclude:: ../includes/concurrency.py - :name: concurrency-workload-1-async - :start-after: [start-w1-async] - :end-before: [end-w1-async] + :start-after: [start-grep-async] + :end-before: [end-grep-async] :dedent: :linenos: @@ -1043,9 +1184,8 @@ Workload 1 (expand) .. literalinclude:: ../includes/concurrency.py - :name: concurrency-workload-1-multiprocessing - :start-after: [start-w1-multiprocessing] - :end-before: [end-w1-multiprocessing] + :start-after: [start-grep-multiprocessing] + :end-before: [end-grep-multiprocessing] :dedent: :linenos: @@ -1059,9 +1199,8 @@ Workload 1 (expand) .. literalinclude:: ../includes/concurrency.py - :name: concurrency-workload-1-smp - :start-after: [start-w1-smp] - :end-before: [end-w1-smp] + :start-after: [start-grep-smp] + :end-before: [end-grep-smp] :dedent: :linenos: @@ -1069,10 +1208,96 @@ Workload 1 -Workload 2 ----------- -... +Workload: ... +------------- + +# ... + +.. list-table:: + :header-rows: 1 + :class: borderless vert-aligned + :align: left + + * - threads + - multiple interpreters + - coroutines + - multiple processes + - SMP + * - .. raw:: html + +
+ (expand) + + .. literalinclude:: ../includes/concurrency.py + :start-after: [start-w3-threads] + :end-before: [end-w3-threads] + :dedent: + :linenos: + + .. raw:: html + +
+ + - .. raw:: html + +
+ (expand) + + .. literalinclude:: ../includes/concurrency.py + :start-after: [start-w3-subinterpreters] + :end-before: [end-w3-subinterpreters] + :dedent: + :linenos: + + .. raw:: html + +
+ + - .. raw:: html + +
+ (expand) + + .. literalinclude:: ../includes/concurrency.py + :start-after: [start-w3-async] + :end-before: [end-w3-async] + :dedent: + :linenos: + + .. raw:: html + +
+ + - .. raw:: html + +
+ (expand) + + .. literalinclude:: ../includes/concurrency.py + :start-after: [start-w3-multiprocessing] + :end-before: [end-w3-multiprocessing] + :dedent: + :linenos: + + .. raw:: html + +
+ + - .. raw:: html + +
+ (expand) + + .. literalinclude:: ../includes/concurrency.py + :start-after: [start-w3-smp] + :end-before: [end-w3-smp] + :dedent: + :linenos: + + .. raw:: html + +
.. rubric:: Footnotes diff --git a/Doc/includes/concurrency.py b/Doc/includes/concurrency.py index 6be6f29cfd0083..2bea4f0853b09b 100644 --- a/Doc/includes/concurrency.py +++ b/Doc/includes/concurrency.py @@ -11,6 +11,7 @@ @contextlib.contextmanager def dummy_files(*filenames): + """A context manager that creates empty files in a temp directory.""" with tempfile.TemporaryDirectory() as tempdir: orig = os.getcwd() os.chdir(tempdir) @@ -31,6 +32,10 @@ def zip(*args, strict=False, _zip=zip): class example(staticmethod): + """A function containing example code. + + The function will be called when this file is run as a script. + """ registry = [] @@ -43,7 +48,19 @@ def __set_name__(self, cls, name): type(self).registry.append((self.func, cls)) -class ConcurrentFutures: +class Examples: + """Code examples for docs using "literalinclude".""" + + +class WorkloadExamples(Examples): + """Examples of a single concurrency workload.""" + + +####################################### +# concurrent.futures examples +####################################### + +class ConcurrentFutures(Examples): @example def example_basic(): @@ -241,14 +258,14 @@ def fail(): ####################################### -# workload 1: ... +# workload: image resizing web service ####################################### -class Workload1: +class ImageResizingWebService(WorkloadExamples): @example def run_using_threads(): - # [start-w1-threads] + # [start-web-image-resize-threads] import threading def task(): @@ -258,57 +275,57 @@ def task(): t.start() ... - # [end-w1-threads] + # [end-web-image-resize-threads] @example def run_using_multiprocessing(): - # [start-w1-multiprocessing] + # [start-web-image-resize-multiprocessing] import multiprocessing def task(): ... ... - # [end-w1-multiprocessing] + # [end-web-image-resize-multiprocessing] @example def run_using_async(): - # [start-w1-async] + # [start-web-image-resize-async] # async 1 ... - # [end-w1-async] + # [end-web-image-resize-async] @example def run_using_subinterpreters(): - # [start-w1-subinterpreters] + # [start-web-image-resize-subinterpreters] # subinterpreters 1 ... - # [end-w1-subinterpreters] + # [end-web-image-resize-subinterpreters] @example def run_using_smp(): - # [start-w1-smp] + # [start-web-image-resize-smp] # smp 1 ... - # [end-w1-smp] + # [end-web-image-resize-smp] @example def run_using_concurrent_futures_thread(): - # [start-w1-concurrent-futures-thread] + # [start-web-image-resize-cf-thread] # concurrent.futures 1 ... - # [end-w1-concurrent-futures-thread] + # [end-web-image-resize-cf-thread] ####################################### -# workload 2: ... +# workload: grep ####################################### -class Workload2: +class Grep(WorkloadExamples): @example def run_using_threads(): - # [start-w2-threads] + # [start-grep-threads] import threading def task(): @@ -318,53 +335,53 @@ def task(): t.start() ... - # [end-w2-threads] + # [end-grep-threads] @example def run_using_multiprocessing(): - # [start-w2-multiprocessing] + # [start-grep-multiprocessing] import multiprocessing def task(): ... ... - # [end-w2-multiprocessing] + # [end-grep-multiprocessing] @example def run_using_async(): - # [start-w2-async] + # [start-grep-async] # async 2 ... - # [end-w2-async] + # [end-grep-async] @example def run_using_subinterpreters(): - # [start-w2-subinterpreters] + # [start-grep-subinterpreters] # subinterpreters 2 ... - # [end-w2-subinterpreters] + # [end-grep-subinterpreters] @example def run_using_smp(): - # [start-w2-smp] + # [start-grep-smp] # smp 2 ... - # [end-w2-smp] + # [end-grep-smp] @example def run_using_concurrent_futures_thread(): - # [start-w2-concurrent-futures-thread] + # [start-grep-concurrent-futures-thread] # concurrent.futures 2 ... - # [end-w2-concurrent-futures-thread] + # [end-grep-concurrent-futures-thread] ####################################### -# workload 3: ... +# workload: ... ####################################### -class Workload3: +class WorkloadX(WorkloadExamples): @example def run_using_threads(): @@ -420,6 +437,10 @@ def run_using_concurrent_futures_thread(): # [end-w3-concurrent-futures-thread] +####################################### +# A script to run the examples +####################################### + if __name__ == '__main__': # Run all the examples. div1 = '#' * 40 From 5c055811875454326272ebeead05f271ecc33db6 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Tue, 27 Aug 2024 12:24:43 -0600 Subject: [PATCH 31/80] Do not worry about demonstrating a web service. --- Doc/howto/concurrency.rst | 33 ++++++++++++++++++-------------- Doc/includes/concurrency.py | 38 ++++++++++++++++++------------------- 2 files changed, 38 insertions(+), 33 deletions(-) diff --git a/Doc/howto/concurrency.rst b/Doc/howto/concurrency.rst index c9eaf78d5bb376..d95e06df71059a 100644 --- a/Doc/howto/concurrency.rst +++ b/Doc/howto/concurrency.rst @@ -987,7 +987,7 @@ Here's a summary of the examples, by workload: - req out - *N* core tasks - core task - * - `web: resize image `_ + * - `resize image `_ - image (**net**) - image (**net**) - | *N* small sub-images @@ -1027,11 +1027,16 @@ Also see: * https://github.com/faster-cpython/ideas/wiki/Tables:-Workloads * https://github.com/ericsnowcurrently/concurrency-benchmarks +.. note:: -Workload: Image Resizer Web Service ------------------------------------ + Each example is implemented as a basic commandline tool, but can be + easily adapted to run as a web service. -# ... +Workload: Image Resizer +----------------------- + +This example runs a web service that takes an image and a new size +and responds with the image at the new size. .. list-table:: :header-rows: 1 @@ -1049,8 +1054,8 @@ Workload: Image Resizer Web Service (expand) .. literalinclude:: ../includes/concurrency.py - :start-after: [start-web-image-resize-threads] - :end-before: [end-web-image-resize-threads] + :start-after: [start-image-resizer-threads] + :end-before: [end-image-resizer-threads] :dedent: :linenos: @@ -1064,8 +1069,8 @@ Workload: Image Resizer Web Service (expand) .. literalinclude:: ../includes/concurrency.py - :start-after: [start-web-image-resize-subinterpreters] - :end-before: [end-web-image-resize-subinterpreters] + :start-after: [start-image-resizer-subinterpreters] + :end-before: [end-image-resizer-subinterpreters] :dedent: :linenos: @@ -1079,8 +1084,8 @@ Workload: Image Resizer Web Service (expand) .. literalinclude:: ../includes/concurrency.py - :start-after: [start-web-image-resize-async] - :end-before: [end-web-image-resize-async] + :start-after: [start-image-resizer-async] + :end-before: [end-image-resizer-async] :dedent: :linenos: @@ -1094,8 +1099,8 @@ Workload: Image Resizer Web Service (expand) .. literalinclude:: ../includes/concurrency.py - :start-after: [start-web-image-resize-multiprocessing] - :end-before: [end-web-image-resize-multiprocessing] + :start-after: [start-image-resizer-multiprocessing] + :end-before: [end-image-resizer-multiprocessing] :dedent: :linenos: @@ -1109,8 +1114,8 @@ Workload: Image Resizer Web Service (expand) .. literalinclude:: ../includes/concurrency.py - :start-after: [start-web-image-resize-smp] - :end-before: [end-web-image-resize-smp] + :start-after: [start-image-resizer-smp] + :end-before: [end-image-resizer-smp] :dedent: :linenos: diff --git a/Doc/includes/concurrency.py b/Doc/includes/concurrency.py index 2bea4f0853b09b..b0003803d988db 100644 --- a/Doc/includes/concurrency.py +++ b/Doc/includes/concurrency.py @@ -258,14 +258,14 @@ def fail(): ####################################### -# workload: image resizing web service +# workload: image resizer ####################################### -class ImageResizingWebService(WorkloadExamples): +class ImageResizer(WorkloadExamples): @example def run_using_threads(): - # [start-web-image-resize-threads] + # [start-image-resizer-threads] import threading def task(): @@ -275,46 +275,46 @@ def task(): t.start() ... - # [end-web-image-resize-threads] + # [end-image-resizer-threads] + + @example + def run_using_cf_thread(): + # [start-image-resizer-cf-thread] + # concurrent.futures 1 + ... + # [end-image-resizer-cf-thread] @example def run_using_multiprocessing(): - # [start-web-image-resize-multiprocessing] + # [start-image-resizer-multiprocessing] import multiprocessing def task(): ... ... - # [end-web-image-resize-multiprocessing] + # [end-image-resizer-multiprocessing] @example def run_using_async(): - # [start-web-image-resize-async] + # [start-image-resizer-async] # async 1 ... - # [end-web-image-resize-async] + # [end-image-resizer-async] @example def run_using_subinterpreters(): - # [start-web-image-resize-subinterpreters] + # [start-image-resizer-subinterpreters] # subinterpreters 1 ... - # [end-web-image-resize-subinterpreters] + # [end-image-resizer-subinterpreters] @example def run_using_smp(): - # [start-web-image-resize-smp] + # [start-image-resizer-smp] # smp 1 ... - # [end-web-image-resize-smp] - - @example - def run_using_concurrent_futures_thread(): - # [start-web-image-resize-cf-thread] - # concurrent.futures 1 - ... - # [end-web-image-resize-cf-thread] + # [end-image-resizer-smp] ####################################### From a0a8ebb764b7f3a29ec3ac150ffc397ea330fd6f Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Tue, 27 Aug 2024 12:28:13 -0600 Subject: [PATCH 32/80] Move the grep example up. --- Doc/howto/concurrency.rst | 69 ++++++++++++++--------------- Doc/includes/concurrency.py | 88 ++++++++++++++++++------------------- 2 files changed, 78 insertions(+), 79 deletions(-) diff --git a/Doc/howto/concurrency.rst b/Doc/howto/concurrency.rst index d95e06df71059a..f1b3f4355f0206 100644 --- a/Doc/howto/concurrency.rst +++ b/Doc/howto/concurrency.rst @@ -987,13 +987,6 @@ Here's a summary of the examples, by workload: - req out - *N* core tasks - core task - * - `resize image `_ - - image (**net**) - - image (**net**) - - | *N* small sub-images - | **mem**: ~ 2x image size - - | **time**: short - | **mem**: small * - `grep `_ - | *N* filenames (**stdin**) | file bytes x *N* (**disk**) @@ -1001,6 +994,13 @@ Here's a summary of the examples, by workload: - 1+ per file - | **time**: ~ file size | **mem**: small + * - `resize image `_ + - image (**net**) + - image (**net**) + - | *N* small sub-images + | **mem**: ~ 2x image size + - | **time**: short + | **mem**: small * - `... `_ - ... - ... @@ -1032,11 +1032,10 @@ Also see: Each example is implemented as a basic commandline tool, but can be easily adapted to run as a web service. -Workload: Image Resizer ------------------------ +Workload: grep +-------------- -This example runs a web service that takes an image and a new size -and responds with the image at the new size. +# ... .. list-table:: :header-rows: 1 @@ -1054,8 +1053,8 @@ and responds with the image at the new size. (expand) .. literalinclude:: ../includes/concurrency.py - :start-after: [start-image-resizer-threads] - :end-before: [end-image-resizer-threads] + :start-after: [start-grep-threads] + :end-before: [end-grep-threads] :dedent: :linenos: @@ -1069,8 +1068,8 @@ and responds with the image at the new size. (expand) .. literalinclude:: ../includes/concurrency.py - :start-after: [start-image-resizer-subinterpreters] - :end-before: [end-image-resizer-subinterpreters] + :start-after: [start-grep-subinterpreters] + :end-before: [end-grep-subinterpreters] :dedent: :linenos: @@ -1084,8 +1083,8 @@ and responds with the image at the new size. (expand) .. literalinclude:: ../includes/concurrency.py - :start-after: [start-image-resizer-async] - :end-before: [end-image-resizer-async] + :start-after: [start-grep-async] + :end-before: [end-grep-async] :dedent: :linenos: @@ -1099,8 +1098,8 @@ and responds with the image at the new size. (expand) .. literalinclude:: ../includes/concurrency.py - :start-after: [start-image-resizer-multiprocessing] - :end-before: [end-image-resizer-multiprocessing] + :start-after: [start-grep-multiprocessing] + :end-before: [end-grep-multiprocessing] :dedent: :linenos: @@ -1114,8 +1113,8 @@ and responds with the image at the new size. (expand) .. literalinclude:: ../includes/concurrency.py - :start-after: [start-image-resizer-smp] - :end-before: [end-image-resizer-smp] + :start-after: [start-grep-smp] + :end-before: [end-grep-smp] :dedent: :linenos: @@ -1123,10 +1122,11 @@ and responds with the image at the new size. -Workload: grep --------------- +Workload: Image Resizer +----------------------- -# ... +This example runs a web service that takes an image and a new size +and responds with the image at the new size. .. list-table:: :header-rows: 1 @@ -1144,8 +1144,8 @@ Workload: grep (expand) .. literalinclude:: ../includes/concurrency.py - :start-after: [start-grep-threads] - :end-before: [end-grep-threads] + :start-after: [start-image-resizer-threads] + :end-before: [end-image-resizer-threads] :dedent: :linenos: @@ -1159,8 +1159,8 @@ Workload: grep (expand) .. literalinclude:: ../includes/concurrency.py - :start-after: [start-grep-subinterpreters] - :end-before: [end-grep-subinterpreters] + :start-after: [start-image-resizer-subinterpreters] + :end-before: [end-image-resizer-subinterpreters] :dedent: :linenos: @@ -1174,8 +1174,8 @@ Workload: grep (expand) .. literalinclude:: ../includes/concurrency.py - :start-after: [start-grep-async] - :end-before: [end-grep-async] + :start-after: [start-image-resizer-async] + :end-before: [end-image-resizer-async] :dedent: :linenos: @@ -1189,8 +1189,8 @@ Workload: grep (expand) .. literalinclude:: ../includes/concurrency.py - :start-after: [start-grep-multiprocessing] - :end-before: [end-grep-multiprocessing] + :start-after: [start-image-resizer-multiprocessing] + :end-before: [end-image-resizer-multiprocessing] :dedent: :linenos: @@ -1204,8 +1204,8 @@ Workload: grep (expand) .. literalinclude:: ../includes/concurrency.py - :start-after: [start-grep-smp] - :end-before: [end-grep-smp] + :start-after: [start-image-resizer-smp] + :end-before: [end-image-resizer-smp] :dedent: :linenos: @@ -1213,7 +1213,6 @@ Workload: grep - Workload: ... ------------- diff --git a/Doc/includes/concurrency.py b/Doc/includes/concurrency.py index b0003803d988db..14d1141e81ebfb 100644 --- a/Doc/includes/concurrency.py +++ b/Doc/includes/concurrency.py @@ -258,14 +258,14 @@ def fail(): ####################################### -# workload: image resizer +# workload: grep ####################################### -class ImageResizer(WorkloadExamples): +class Grep(WorkloadExamples): @example def run_using_threads(): - # [start-image-resizer-threads] + # [start-grep-threads] import threading def task(): @@ -275,57 +275,57 @@ def task(): t.start() ... - # [end-image-resizer-threads] - - @example - def run_using_cf_thread(): - # [start-image-resizer-cf-thread] - # concurrent.futures 1 - ... - # [end-image-resizer-cf-thread] + # [end-grep-threads] @example def run_using_multiprocessing(): - # [start-image-resizer-multiprocessing] + # [start-grep-multiprocessing] import multiprocessing def task(): ... ... - # [end-image-resizer-multiprocessing] + # [end-grep-multiprocessing] @example def run_using_async(): - # [start-image-resizer-async] - # async 1 + # [start-grep-async] + # async 2 ... - # [end-image-resizer-async] + # [end-grep-async] @example def run_using_subinterpreters(): - # [start-image-resizer-subinterpreters] - # subinterpreters 1 + # [start-grep-subinterpreters] + # subinterpreters 2 ... - # [end-image-resizer-subinterpreters] + # [end-grep-subinterpreters] @example def run_using_smp(): - # [start-image-resizer-smp] - # smp 1 + # [start-grep-smp] + # smp 2 ... - # [end-image-resizer-smp] + # [end-grep-smp] + + @example + def run_using_concurrent_futures_thread(): + # [start-grep-concurrent-futures-thread] + # concurrent.futures 2 + ... + # [end-grep-concurrent-futures-thread] ####################################### -# workload: grep +# workload: image resizer ####################################### -class Grep(WorkloadExamples): +class ImageResizer(WorkloadExamples): @example def run_using_threads(): - # [start-grep-threads] + # [start-image-resizer-threads] import threading def task(): @@ -335,46 +335,46 @@ def task(): t.start() ... - # [end-grep-threads] + # [end-image-resizer-threads] + + @example + def run_using_cf_thread(): + # [start-image-resizer-cf-thread] + # concurrent.futures 1 + ... + # [end-image-resizer-cf-thread] @example def run_using_multiprocessing(): - # [start-grep-multiprocessing] + # [start-image-resizer-multiprocessing] import multiprocessing def task(): ... ... - # [end-grep-multiprocessing] + # [end-image-resizer-multiprocessing] @example def run_using_async(): - # [start-grep-async] - # async 2 + # [start-image-resizer-async] + # async 1 ... - # [end-grep-async] + # [end-image-resizer-async] @example def run_using_subinterpreters(): - # [start-grep-subinterpreters] - # subinterpreters 2 + # [start-image-resizer-subinterpreters] + # subinterpreters 1 ... - # [end-grep-subinterpreters] + # [end-image-resizer-subinterpreters] @example def run_using_smp(): - # [start-grep-smp] - # smp 2 - ... - # [end-grep-smp] - - @example - def run_using_concurrent_futures_thread(): - # [start-grep-concurrent-futures-thread] - # concurrent.futures 2 + # [start-image-resizer-smp] + # smp 1 ... - # [end-grep-concurrent-futures-thread] + # [end-image-resizer-smp] ####################################### From c3d1688c1210423cf348aeadd0ec3b47b1f4c82f Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Tue, 27 Aug 2024 17:01:27 -0600 Subject: [PATCH 33/80] Implement threads for the grep example. --- Doc/howto/concurrency.rst | 12 +- Doc/includes/concurrency.py | 271 +++++++++++++++++++++++++++++++++++- 2 files changed, 276 insertions(+), 7 deletions(-) diff --git a/Doc/howto/concurrency.rst b/Doc/howto/concurrency.rst index f1b3f4355f0206..cc5fc130004fe8 100644 --- a/Doc/howto/concurrency.rst +++ b/Doc/howto/concurrency.rst @@ -1035,7 +1035,9 @@ Also see: Workload: grep -------------- -# ... +This a basic Python implementation of the linux ``grep`` tool. +From a concurrency standpoint, each file is processed in its own +logical thread. .. list-table:: :header-rows: 1 @@ -1122,6 +1124,14 @@ Workload: grep +Common code: + +.. literalinclude:: ../includes/concurrency.py + :start-after: [start-grep-common] + :end-before: [end-grep-common] + :dedent: + :linenos: + Workload: Image Resizer ----------------------- diff --git a/Doc/includes/concurrency.py b/Doc/includes/concurrency.py index 14d1141e81ebfb..398dc6b02a7fba 100644 --- a/Doc/includes/concurrency.py +++ b/Doc/includes/concurrency.py @@ -263,19 +263,278 @@ def fail(): class Grep(WorkloadExamples): + @staticmethod + def common(): + # [start-grep-common] + import os + import os.path + import re + + class GrepOptions: + # file selection + recursive = False # -r --recursive + # matching control + ignorecase = False # -i --ignore-case + invertmatch = False # -v --invert-match + # output control + showfilename = None # -H --with-filename + # -h --no-filename + filesonly = None # -L --files-without-match + # -l --files-with-matches + showonlymatch = False # -o --only-matching + quiet = False # -q --quiet, --silent + hideerrors = False # -s --no-messages + + def grep(regex, opts, infile): + if isinstance(infile, str): + filename = infile + with open(filename) as infile: + infile = (filename, infile) + yield from grep(regex, opts, infile) + return + + filename, infile = infile + invert = not opts.filesonly and opts.invertmatch + if invert: + for line in infile: + m = regex.search(line) + if m: + continue + if line.endswith(os.linesep): + line = line[:-len(os.linesep)] + yield filename, line, None + else: + for line in infile: + m = regex.search(line) + if not m: + continue + if line.endswith(os.linesep): + line = line[:-len(os.linesep)] + yield filename, line, m.group(0) + + def grep_file(regex, opts, infile): + matches = grep(regex, opts, infile) + try: + if opts.filesonly == 'invert': + for _ in matches: + break + else: + if isinstance(infile, str): + filename = infile + else: + filename, _ = infile + yield filename, None, None + elif opts.filesonly: + for filename, _, _ in matches: + yield filename, None, None + break + else: + yield from matches + except UnicodeDecodeError: + # It must be a binary file. + return + + def run_all(regex, opts, files, grep=grep_file): + raise NotImplementedError + + def main(pat, opts, *filenames, run_all=run_all): # -e --regexp + # Create the regex object. + regex = re.compile(pat) + + # Resolve the files. + if not filenames: + raise ValueError('missing filenames') + if opts.recursive: + recursed = [] + for filename in filenames: + if os.path.isdir(filename): + for d, _, files in os.walk(filename): + for base in files: + recursed.append( + os.path.join(d, base)) + else: + recursed.append(filename) + filenames = recursed + + # Process the files. + matches = run_all(regex, opts, filenames, grep_file) + + # Handle the first match. + for filename, line, match in matches: + if opts.quiet: + return 0 + elif opts.filesonly: + print(filename) + elif opts.showonlymatch: + if opts.invertmatch: + return 0 + elif opts.showfilename is False: + print(match) + elif opts.showfilename: + print(f'{filename}: {match}') + else: + try: + second = next(matches) + except StopIteration: + print(match) + else: + print(f'{filename}: {match}') + filename, _, match = second + print(f'{filename}: {match}') + else: + if opts.showfilename is False: + print(line) + elif opts.showfilename: + print(f'{filename}: {line}') + else: + try: + second = next(matches) + except StopIteration: + print(line) + else: + print(f'{filename}: {line}') + filename, line, _ = second + print(f'{filename}: {line}') + break + else: + return 1 + + # Handle the remaining matches. + if opts.filesonly: + for filename, _, _ in matches: + print(filename) + elif opts.showonlymatch: + if opts.showfilename is False: + for filename, _, match in matches: + print(match) + else: + for filename, _, match in matches: + print(f'{filename}: {match}') + else: + if opts.showfilename is False: + for filename, line, _ in matches: + print(line) + else: + for filename, line, _ in matches: + print(f'{filename}: {line}') + return 0 + # [end-grep-common] + + return main, GrepOptions + + @example + def run_sequentially(): + # [start-grep-sequential] + def run_all(regex, opts, files, grep): + for infile in files: + yield from grep(regex, opts, infile) + # [end-grep-sequential] + + main, GrepOptions = Grep.common() + + opts = GrepOptions() + opts.recursive = True + #opts.ignorecase = True + #opts.invertmatch = True + #opts.showfilename = True + #opts.showfilename = False + #opts.filesonly = 'invert' + #opts.filesonly = 'match' + #opts.showonlymatch = True + #opts.quiet = True + #opts.hideerrors = True + main('help', opts, 'make.bat', 'Makefile', run_all=run_all) + #main('help', opts, '.', run_all=run_all) + + @example def run_using_threads(): # [start-grep-threads] + import queue import threading + import time + + MAX_THREADS = 10 + + def run_all(regex, opts, files, grep): + FINISHED = object() + matches_by_file = [] + + done = False + def start_tasks(): + nonlocal done + numfiles = 0 + active = {} + for infile in files: + if isinstance(infile, str): + filename = infile + else: + filename, _ = infile + numfiles += 1 + index = numfiles + + while len(active) >= MAX_THREADS: + time.sleep(0.01) + + q = queue.Queue() + + def task(index=index, q=q, infile=infile): + for match in grep(regex, opts, infile): + q.put(match) + q.put(FINISHED) + while index not in active: + pass + del active[index] + t = threading.Thread(target=task) + t.start() + + active[index] = (t, filename) + matches_by_file.append((filename, q)) + for t, _ in list(active.values()): + t.join() + done = True + t = threading.Thread(target=start_tasks) + t.start() + + # Yield the results as they are received, in order. + while True: + while matches_by_file: + filename, q = matches_by_file.pop(0) + while True: + try: + match = q.get(block=False) + except queue.Empty: + continue + if match is FINISHED: + break + yield match + if done: + break + + t.join() + # [end-grep-threads] - def task(): - ... - - t = threading.Thread(target=task) - t.start() + main, GrepOptions = Grep.common() + + opts = GrepOptions() + opts.recursive = True + #opts.ignorecase = True + #opts.invertmatch = True + #opts.showfilename = True + #opts.showfilename = False + #opts.filesonly = 'invert' + #opts.filesonly = 'match' + #opts.showonlymatch = True + #opts.quiet = True + #opts.hideerrors = True + main('help', opts, 'make.bat', 'Makefile', run_all=run_all) + #main('help', opts, '.', run_all=run_all) + @example + def run_using_cf_threads(): + # [startgrep-cf-threads] ... - # [end-grep-threads] + # [end-grep--cf-threads] @example def run_using_multiprocessing(): From 48ac79d91049f9114fc20978b2f964e60efe4f74 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Wed, 28 Aug 2024 14:26:26 -0600 Subject: [PATCH 34/80] Implement grep using concurrent.futures. --- Doc/howto/concurrency.rst | 9 ++ Doc/includes/concurrency.py | 257 ++++++++++++++++++++---------------- 2 files changed, 154 insertions(+), 112 deletions(-) diff --git a/Doc/howto/concurrency.rst b/Doc/howto/concurrency.rst index cc5fc130004fe8..8fcacd286a3a59 100644 --- a/Doc/howto/concurrency.rst +++ b/Doc/howto/concurrency.rst @@ -1126,12 +1126,21 @@ logical thread. Common code: +.. raw:: html + +
+ (expand) + .. literalinclude:: ../includes/concurrency.py :start-after: [start-grep-common] :end-before: [end-grep-common] :dedent: :linenos: +.. raw:: html + +
+ Workload: Image Resizer ----------------------- diff --git a/Doc/includes/concurrency.py b/Doc/includes/concurrency.py index 398dc6b02a7fba..32f33c154c9c8a 100644 --- a/Doc/includes/concurrency.py +++ b/Doc/includes/concurrency.py @@ -269,8 +269,10 @@ def common(): import os import os.path import re + import sys + import types - class GrepOptions: + class GrepOptions(types.SimpleNamespace): # file selection recursive = False # -r --recursive # matching control @@ -285,15 +287,34 @@ class GrepOptions: quiet = False # -q --quiet, --silent hideerrors = False # -s --no-messages - def grep(regex, opts, infile): - if isinstance(infile, str): - filename = infile - with open(filename) as infile: - infile = (filename, infile) - yield from grep(regex, opts, infile) - return + def normalize_file(unresolved, opts): + if not isinstance(unresolved, str): + infile, filename = unresolved + if not filename: + filename = infile.name + yield infile, filename + else: + assert not os.path.isdir(filename) + yield unresolved + elif unresolved == '-': + yield sys.stdin, '-' + else: + filename = unresolved + if not opts.recursive: + assert not os.path.isdir(filename) + yield None, filename + elif not os.path.isdir(filename): + yield None, filename + else: + for d, _, files in os.walk(filename): + for base in files: + yield None, os.path.join(d, base) + + def iter_files(files, opts): + for unresolved in files: + yield from normalize_file(unresolved, opts) - filename, infile = infile + def _grep_file(regex, opts, infile, filename): invert = not opts.filesonly and opts.invertmatch if invert: for line in infile: @@ -312,17 +333,19 @@ def grep(regex, opts, infile): line = line[:-len(os.linesep)] yield filename, line, m.group(0) - def grep_file(regex, opts, infile): - matches = grep(regex, opts, infile) + def _grep(regex, opts, infile): + infile, filename = infile + if infile is None: + with open(filename) as infile: + infile = (infile, filename) + yield from _grep(regex, opts, infile) + return + matches = _grep_file(regex, opts, infile, filename) try: if opts.filesonly == 'invert': for _ in matches: break else: - if isinstance(infile, str): - filename = infile - else: - filename, _ = infile yield filename, None, None elif opts.filesonly: for filename, _, _ in matches: @@ -334,30 +357,19 @@ def grep_file(regex, opts, infile): # It must be a binary file. return - def run_all(regex, opts, files, grep=grep_file): - raise NotImplementedError + def run_all(regex, opts, files, grep=_grep): + for infile in files: + yield from grep(regex, opts, infile) def main(pat, opts, *filenames, run_all=run_all): # -e --regexp # Create the regex object. regex = re.compile(pat) # Resolve the files. - if not filenames: - raise ValueError('missing filenames') - if opts.recursive: - recursed = [] - for filename in filenames: - if os.path.isdir(filename): - for d, _, files in os.walk(filename): - for base in files: - recursed.append( - os.path.join(d, base)) - else: - recursed.append(filename) - filenames = recursed + files = iter_files(filenames, opts) # Process the files. - matches = run_all(regex, opts, filenames, grep_file) + matches = run_all(regex, opts, files, _grep) # Handle the first match. for filename, line, match in matches: @@ -422,6 +434,25 @@ def main(pat, opts, *filenames, run_all=run_all): # -e --regexp return main, GrepOptions + @staticmethod + def app(run_all): + main, GrepOptions = Grep.common() + opts = GrepOptions( + #recursive=True, + #ignorecase = True, + #invertmatch = True, + #showfilename = True, + #showfilename = False, + #filesonly = 'invert', + #filesonly = 'match', + #showonlymatch = True, + #quiet = True, + #hideerrors = True, + ) + #main('help', opts, 'make.bat', 'Makefile', run_all=run_all) + opts = GrepOptions(recursive=True, filesonly='match') + main('help', opts, '.', run_all=run_all) + @example def run_sequentially(): # [start-grep-sequential] @@ -429,22 +460,7 @@ def run_all(regex, opts, files, grep): for infile in files: yield from grep(regex, opts, infile) # [end-grep-sequential] - - main, GrepOptions = Grep.common() - - opts = GrepOptions() - opts.recursive = True - #opts.ignorecase = True - #opts.invertmatch = True - #opts.showfilename = True - #opts.showfilename = False - #opts.filesonly = 'invert' - #opts.filesonly = 'match' - #opts.showonlymatch = True - #opts.quiet = True - #opts.hideerrors = True - main('help', opts, 'make.bat', 'Makefile', run_all=run_all) - #main('help', opts, '.', run_all=run_all) + Grep.app(run_all) @example @@ -452,89 +468,106 @@ def run_using_threads(): # [start-grep-threads] import queue import threading - import time - MAX_THREADS = 10 + MAX_FILES = 10 + MAX_QUEUE = 100 def run_all(regex, opts, files, grep): FINISHED = object() - matches_by_file = [] + matches_by_file = queue.Queue() + + def manage_tasks(): + counter = queue.Queue(MAX_FILES) + + def task(infile, matches): + for match in grep(regex, opts, infile): + matches.put(match) + matches.put(FINISHED) + # Let a new thread start. + counter.get() - done = False - def start_tasks(): - nonlocal done - numfiles = 0 - active = {} for infile in files: - if isinstance(infile, str): - filename = infile - else: - filename, _ = infile - numfiles += 1 - index = numfiles - - while len(active) >= MAX_THREADS: - time.sleep(0.01) - - q = queue.Queue() - - def task(index=index, q=q, infile=infile): - for match in grep(regex, opts, infile): - q.put(match) - q.put(FINISHED) - while index not in active: - pass - del active[index] - t = threading.Thread(target=task) + _, filename = infile + + # Prepare for the file. + matches = queue.Queue(MAX_QUEUE) + matches_by_file.put((filename, matches)) + + # Start a thread to process the file. + t = threading.Thread( + target=task, + args=(infile, matches), + ) + counter.put(t, block=True) t.start() - - active[index] = (t, filename) - matches_by_file.append((filename, q)) - for t, _ in list(active.values()): - t.join() - done = True - t = threading.Thread(target=start_tasks) + matches_by_file.put(FINISHED) + t = threading.Thread(target=manage_tasks) t.start() # Yield the results as they are received, in order. - while True: - while matches_by_file: - filename, q = matches_by_file.pop(0) - while True: - try: - match = q.get(block=False) - except queue.Empty: - continue - if match is FINISHED: - break - yield match - if done: - break + next_matches = matches_by_file.get(block=True) + while next_matches is not FINISHED: + filename, matches = next_matches + match = matches.get(block=True) + while match is not FINISHED: + yield match + match = matches.get(block=True) + next_matches = matches_by_file.get(block=True) t.join() # [end-grep-threads] - - main, GrepOptions = Grep.common() - - opts = GrepOptions() - opts.recursive = True - #opts.ignorecase = True - #opts.invertmatch = True - #opts.showfilename = True - #opts.showfilename = False - #opts.filesonly = 'invert' - #opts.filesonly = 'match' - #opts.showonlymatch = True - #opts.quiet = True - #opts.hideerrors = True - main('help', opts, 'make.bat', 'Makefile', run_all=run_all) - #main('help', opts, '.', run_all=run_all) + Grep.app(run_all) @example def run_using_cf_threads(): # [startgrep-cf-threads] - ... + import concurrent.futures + import queue + import threading + + MAX_FILES = 10 + MAX_QUEUE = 100 + + def run_all(regex, opts, files, grep): + FINISHED = object() + matches_by_file = queue.Queue() + + def manage_tasks(): + threads = concurrent.futures.ThreadPoolExecutor(MAX_FILES) + + def task(infile, matches): + for match in grep(regex, opts, infile): + matches.put(match) + matches.put(FINISHED) + # Let a new thread start. + counter.get() + + for infile in files: + _, filename = infile + + # Prepare for the file. + matches = queue.Queue(MAX_QUEUE) + matches_by_file.put((filename, matches)) + + # Start a thread to process the file. + threads.submit(task, infile, matches) + matches_by_file.put(FINISHED) + t = threading.Thread(target=manage_tasks) + t.start() + + # Yield the results as they are received, in order. + next_matches = matches_by_file.get(block=True) + while next_matches is not FINISHED: + filename, matches = next_matches + match = matches.get(block=True) + while match is not FINISHED: + yield match + match = matches.get(block=True) + next_matches = matches_by_file.get(block=True) + + t.join() # [end-grep--cf-threads] + Grep.app(run_all) @example def run_using_multiprocessing(): From 9a1a81c5a1e95a965c47d930c57160205a4082f0 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Wed, 28 Aug 2024 19:14:34 -0600 Subject: [PATCH 35/80] Fix nested lists and add quadrants. --- Doc/howto/concurrency.rst | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/Doc/howto/concurrency.rst b/Doc/howto/concurrency.rst index 8fcacd286a3a59..79f8eb5b0b6e89 100644 --- a/Doc/howto/concurrency.rst +++ b/Doc/howto/concurrency.rst @@ -316,23 +316,32 @@ understand the problem and, potentially, which concurrency model would be the best fit: * requests + * frequency * expected latency for (at least partial) response + * inputs per request + * how many * size of each input + * tasks (logical threads) per input + * how many * variety vs. uniformity * compute per task: how much * data per task: how much and what kinds * I/O per task: how much and what kinds * tasks not tied to outputs + * task interaction + * how much and in what ways * what data is shared between tasks * how much blocking while waiting + * outputs per request + * how many * size pf each output * correlation to inputs @@ -343,6 +352,29 @@ To some extent the most critical factors can be compressed down to: * many outputs vs. combined output vs. matching large output * many short computations vs. fewer medium/long computations +We could also break it down into quadrants:: + + . stream of tasks queue of tasks + C | + P | + U | + - | + b | + o | + u | + n | + d | + -----------------------|----------------------- + I | + O | + - | + b | + o | + u | + n | + d | + + Aside from the concurrency model, the answers to the above can impact the following: @@ -353,16 +385,22 @@ In the context of the above characteristics, let's revisit the ways that concurrency can be helpful: * get work done faster + * run more tasks at once (multi-core) + * make the app feel more responsive + * make sure critical tasks have priority * process results as they come, instead of waiting for them all * send payload to multiple targets before starting next task + * use system resources more efficiently + * keep slow parts from blocking fast parts * keep blocking resources from blocking the whole program * make sure other tasks have a fair share of time * task scheduling & resource usage optimization + * scaling * handle asynchronous events From 03156b5fd56354a2b7ccad5b57811d23684b8bf0 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Wed, 28 Aug 2024 19:15:07 -0600 Subject: [PATCH 36/80] Implement grep using multiprocessing. --- Doc/howto/concurrency.rst | 108 +++++++- Doc/includes/concurrency.py | 490 ++++++++++++++++++++++++++++++------ 2 files changed, 507 insertions(+), 91 deletions(-) diff --git a/Doc/howto/concurrency.rst b/Doc/howto/concurrency.rst index 79f8eb5b0b6e89..0f0ba42e60002b 100644 --- a/Doc/howto/concurrency.rst +++ b/Doc/howto/concurrency.rst @@ -1077,12 +1077,33 @@ This a basic Python implementation of the linux ``grep`` tool. From a concurrency standpoint, each file is processed in its own logical thread. +Here's the non-concurrent app code that all the implementations share: + +.. raw:: html + +
+ (expand) + +.. literalinclude:: ../includes/concurrency.py + :start-after: [start-grep-common] + :end-before: [end-grep-common] + :dedent: + :linenos: + +.. raw:: html + +
+ +Here's the implementations for the different concurrency models, +side-by-side for easy comparison: + .. list-table:: :header-rows: 1 :class: borderless vert-aligned :align: left - * - threads + * - sequential + - threads - multiple interpreters - coroutines - multiple processes @@ -1092,6 +1113,21 @@ logical thread.
(expand) + .. literalinclude:: ../includes/concurrency.py + :start-after: [start-grep-sequential] + :end-before: [end-grep-sequential] + :dedent: + :linenos: + + .. raw:: html + +
+ + - .. raw:: html + +
+ (expand) + .. literalinclude:: ../includes/concurrency.py :start-after: [start-grep-threads] :end-before: [end-grep-threads] @@ -1153,8 +1189,8 @@ logical thread. (expand) .. literalinclude:: ../includes/concurrency.py - :start-after: [start-grep-smp] - :end-before: [end-grep-smp] + :start-after: [start-grep-dask] + :end-before: [end-grep-dask] :dedent: :linenos: @@ -1162,7 +1198,9 @@ logical thread.
-Common code: +For threads, multiprocessing, and +`multiple interpreters * `_, +you can also use :mod:`concurrent.futures`: .. raw:: html @@ -1170,8 +1208,8 @@ Common code: (expand) .. literalinclude:: ../includes/concurrency.py - :start-after: [start-grep-common] - :end-before: [end-grep-common] + :start-after: [start-grep-cf-threads] + :end-before: [end-grep-cf-threads] :dedent: :linenos: @@ -1179,18 +1217,26 @@ Common code: +.. raw:: html + +
+ Workload: Image Resizer ----------------------- This example runs a web service that takes an image and a new size and responds with the image at the new size. +Here's the implementations for the different concurrency models, +side-by-side for easy comparison: + .. list-table:: :header-rows: 1 :class: borderless vert-aligned :align: left - * - threads + * - sequential + - threads - multiple interpreters - coroutines - multiple processes @@ -1200,6 +1246,21 @@ and responds with the image at the new size.
(expand) + .. literalinclude:: ../includes/concurrency.py + :start-after: [start-image-resizer-sequential] + :end-before: [end-image-resizer-sequential] + :dedent: + :linenos: + + .. raw:: html + +
+ + - .. raw:: html + +
+ (expand) + .. literalinclude:: ../includes/concurrency.py :start-after: [start-image-resizer-threads] :end-before: [end-image-resizer-threads] @@ -1261,8 +1322,8 @@ and responds with the image at the new size. (expand) .. literalinclude:: ../includes/concurrency.py - :start-after: [start-image-resizer-smp] - :end-before: [end-image-resizer-smp] + :start-after: [start-image-resizer-dask] + :end-before: [end-image-resizer-dask] :dedent: :linenos: @@ -1270,17 +1331,25 @@ and responds with the image at the new size.
+.. raw:: html + +
+ Workload: ... ------------- # ... +Here's the implementations for the different concurrency models, +side-by-side for easy comparison: + .. list-table:: :header-rows: 1 :class: borderless vert-aligned :align: left - * - threads + * - sequential + - threads - multiple interpreters - coroutines - multiple processes @@ -1290,6 +1359,21 @@ Workload: ...
(expand) + .. literalinclude:: ../includes/concurrency.py + :start-after: [start-w3-sequential] + :end-before: [end-w3-sequential] + :dedent: + :linenos: + + .. raw:: html + +
+ + - .. raw:: html + +
+ (expand) + .. literalinclude:: ../includes/concurrency.py :start-after: [start-w3-threads] :end-before: [end-w3-threads] @@ -1351,8 +1435,8 @@ Workload: ... (expand) .. literalinclude:: ../includes/concurrency.py - :start-after: [start-w3-smp] - :end-before: [end-w3-smp] + :start-after: [start-w3-dask] + :end-before: [end-w3-dask] :dedent: :linenos: diff --git a/Doc/includes/concurrency.py b/Doc/includes/concurrency.py index 32f33c154c9c8a..ff825cf8e314b7 100644 --- a/Doc/includes/concurrency.py +++ b/Doc/includes/concurrency.py @@ -7,6 +7,7 @@ import contextlib import os import tempfile +import sys @contextlib.contextmanager @@ -456,7 +457,51 @@ def app(run_all): @example def run_sequentially(): # [start-grep-sequential] + # + # + # + + # + # + + # + # + # + def run_all(regex, opts, files, grep): + # + + #def manage_tasks(): + # + # + # + # + # + # + # + # + # + # + # + # + # + # + # + # + # + # + # + # + # + # + + # + # + # + # + # + # + for infile in files: yield from grep(regex, opts, infile) # [end-grep-sequential] @@ -468,29 +513,54 @@ def run_using_threads(): # [start-grep-threads] import queue import threading + # MAX_FILES = 10 - MAX_QUEUE = 100 + MAX_MATCHES = 100 + + # + # + # def run_all(regex, opts, files, grep): - FINISHED = object() matches_by_file = queue.Queue() def manage_tasks(): - counter = queue.Queue(MAX_FILES) + # + counter = threading.Semaphore(MAX_FILES) + # + # + # + + # + # + # + # + # + # + # + # + # + # + # + # + # + # + # + # def task(infile, matches): for match in grep(regex, opts, infile): matches.put(match) - matches.put(FINISHED) + matches.put(None) # Let a new thread start. - counter.get() + counter.release() for infile in files: _, filename = infile # Prepare for the file. - matches = queue.Queue(MAX_QUEUE) + matches = queue.Queue(MAX_MATCHES) matches_by_file.put((filename, matches)) # Start a thread to process the file. @@ -498,18 +568,24 @@ def task(infile, matches): target=task, args=(infile, matches), ) - counter.put(t, block=True) + counter.acquire(blocking=True) + # t.start() - matches_by_file.put(FINISHED) + matches_by_file.put(None) + # + # + # + # + # t = threading.Thread(target=manage_tasks) t.start() # Yield the results as they are received, in order. next_matches = matches_by_file.get(block=True) - while next_matches is not FINISHED: + while next_matches is not None: filename, matches = next_matches match = matches.get(block=True) - while match is not FINISHED: + while match is not None: yield match match = matches.get(block=True) next_matches = matches_by_file.get(block=True) @@ -520,93 +596,289 @@ def task(infile, matches): @example def run_using_cf_threads(): - # [startgrep-cf-threads] + # [start-grep-cf-threads] import concurrent.futures import queue import threading MAX_FILES = 10 - MAX_QUEUE = 100 + MAX_MATCHES = 100 + + # Alternately, swap in ProcessPoolExecutor + # or InterpreterPoolExecutor. + c_f_Executor = concurrent.futures.ThreadPoolExecutor def run_all(regex, opts, files, grep): - FINISHED = object() matches_by_file = queue.Queue() def manage_tasks(): - threads = concurrent.futures.ThreadPoolExecutor(MAX_FILES) + threads = c_f_Executor(MAX_FILES) + # + # + # + # + + # + # + # + # + # + # + # + # + # + # + # + # + # + # + # + # def task(infile, matches): for match in grep(regex, opts, infile): matches.put(match) - matches.put(FINISHED) - # Let a new thread start. - counter.get() + matches.put(None) + # + # for infile in files: _, filename = infile # Prepare for the file. - matches = queue.Queue(MAX_QUEUE) + matches = queue.Queue(MAX_MATCHES) matches_by_file.put((filename, matches)) # Start a thread to process the file. threads.submit(task, infile, matches) - matches_by_file.put(FINISHED) + # + # + # + # + # + # + matches_by_file.put(None) + # + # + # + # + # t = threading.Thread(target=manage_tasks) t.start() # Yield the results as they are received, in order. next_matches = matches_by_file.get(block=True) - while next_matches is not FINISHED: + while next_matches is not None: filename, matches = next_matches match = matches.get(block=True) - while match is not FINISHED: + while match is not None: yield match match = matches.get(block=True) next_matches = matches_by_file.get(block=True) t.join() - # [end-grep--cf-threads] + # [end-grep-cf-threads] Grep.app(run_all) @example - def run_using_multiprocessing(): - # [start-grep-multiprocessing] - import multiprocessing - - def task(): - ... + def run_using_subinterpreters(): + # [start-grep-subinterpreters] + # subinterpreters 1 + ... + # [end-grep-subinterpreters] + @example + def run_using_cf_subinterpreters(): + # [start-grep-cf-subinterpreters] + # concurrent.futures 1 ... - # [end-grep-multiprocessing] + # [end-grep-cf-subinterpreters] @example def run_using_async(): # [start-grep-async] - # async 2 + # async 1 ... # [end-grep-async] @example - def run_using_subinterpreters(): - # [start-grep-subinterpreters] - # subinterpreters 2 - ... - # [end-grep-subinterpreters] + def run_using_multiprocessing(): + # [start-grep-multiprocessing] + import multiprocessing + import queue + import threading + + MAX_FILES = 10 + MAX_MATCHES = 100 + + # + # + # + + def run_all(regex, opts, files, grep): + matches_by_file = queue.Queue() + + def manage_tasks(): + # + counter = threading.Semaphore(MAX_FILES) + finished = multiprocessing.Queue() + active = {} + done = False + + def monitor_tasks(): + while not done: + try: + index = finished.get(timeout=0.1) + except queue.Empty: + continue + proc = active.pop(index) + proc.join(0.1) + if proc.is_alive(): + # It's taking too long to terminate. + # We can wait for it at the end. + active[index] = proc + # Let a new thread start. + counter.release() + monitor = threading.Thread(target=monitor_tasks) + monitor.start() + + def task(infile, index, matches, finished): + for match in grep(regex, opts, infile): + matches.put(match) + matches.put(None) + # + finished.put(index) + + for index, infile in enumerate(files): + _, filename = infile + + # Prepare for the file. + matches = multiprocessing.Queue(MAX_MATCHES) + matches_by_file.put((filename, matches)) + + # Start a subprocess to process the file. + proc = multiprocessing.Process( + target=task, + args=(infile, index, matches, finished), + ) + counter.acquire(blocking=True) + active[index] = proc + proc.start() + matches_by_file.put(None) + # Wait for all remaining tasks to finish. + done = True + monitor.join() + for proc in active.values(): + proc.join() + t = threading.Thread(target=manage_tasks) + t.start() + + # Yield the results as they are received, in order. + next_matches = matches_by_file.get(block=True) + while next_matches is not None: + filename, matches = next_matches + match = matches.get(block=True) + while match is not None: + yield match + match = matches.get(block=True) + next_matches = matches_by_file.get(block=True) + + t.join() + # [end-grep-multiprocessing] + Grep.app(run_all) @example - def run_using_smp(): - # [start-grep-smp] - # smp 2 - ... - # [end-grep-smp] + def run_using_cf_multiprocessing(): + # [start-grep-cf-multiprocessing] + import concurrent.futures + import queue, multiprocessing + import threading + + MAX_FILES = 10 + MAX_MATCHES = 100 + + # Alternately, swap in ThreadPoolExecutor + # or InterpreterPoolExecutor. + c_f_Executor = concurrent.futures.ThreadPoolExecutor + + def run_all(regex, opts, files, grep): + matches_by_file = queue.Queue() + + def manage_tasks(): + threads = c_f_Executor(MAX_FILES) + # + # + # + # + + # + # + # + # + # + # + # + # + # + # + # + # + # + # + # + # + + def task(infile, matches): + for match in grep(regex, opts, infile): + matches.put(match) + matches.put(None) + # + # + + for infile in files: + _, filename = infile + + # Prepare for the file. + matches = multiprocessing.Queue(MAX_MATCHES) + matches_by_file.put((filename, matches)) + + # Start a thread to process the file. + threads.submit(task, infile, matches) + # + # + # + # + # + # + matches_by_file.put(None) + # + # + # + # + # + t = threading.Thread(target=manage_tasks) + t.start() + + # Yield the results as they are received, in order. + next_matches = matches_by_file.get(block=True) + while next_matches is not None: + filename, matches = next_matches + match = matches.get(block=True) + while match is not None: + yield match + match = matches.get(block=True) + next_matches = matches_by_file.get(block=True) + + t.join() + # [end-grep-cf-multiprocessing] + Grep.app(run_all) @example - def run_using_concurrent_futures_thread(): - # [start-grep-concurrent-futures-thread] - # concurrent.futures 2 + def run_using_dask(): + # [start-grep-dask] + # dask 1 ... - # [end-grep-concurrent-futures-thread] + # [end-grep-dask] ####################################### @@ -615,6 +887,13 @@ def run_using_concurrent_futures_thread(): class ImageResizer(WorkloadExamples): + @example + def run_sequentially(): + # [start-image-resizer-sequential] + # sequential 2 + ... + # [end-image-resizer-sequential] + @example def run_using_threads(): # [start-image-resizer-threads] @@ -632,10 +911,31 @@ def task(): @example def run_using_cf_thread(): # [start-image-resizer-cf-thread] - # concurrent.futures 1 + # concurrent.futures 2 ... # [end-image-resizer-cf-thread] + @example + def run_using_subinterpreters(): + # [start-image-resizer-subinterpreters] + # subinterpreters 2 + ... + # [end-image-resizer-subinterpreters] + + @example + def run_using_cf_subinterpreters(): + # [start-image-resizer-cf-subinterpreters] + # concurrent.futures 2 + ... + # [end-image-resizer-cf-subinterpreters] + + @example + def run_using_async(): + # [start-image-resizer-async] + # async 2 + ... + # [end-image-resizer-async] + @example def run_using_multiprocessing(): # [start-image-resizer-multiprocessing] @@ -648,25 +948,18 @@ def task(): # [end-image-resizer-multiprocessing] @example - def run_using_async(): - # [start-image-resizer-async] - # async 1 - ... - # [end-image-resizer-async] - - @example - def run_using_subinterpreters(): - # [start-image-resizer-subinterpreters] - # subinterpreters 1 + def run_using_cf_multiprocessing(): + # [start-image-resizer-cf-multiprocessing] + # concurrent.futures 2 ... - # [end-image-resizer-subinterpreters] + # [end-image-resizer-cf-multiprocessing] @example - def run_using_smp(): - # [start-image-resizer-smp] - # smp 1 + def run_using_dask(): + # [start-image-resizer-dask] + # dask 2 ... - # [end-image-resizer-smp] + # [end-image-resizer-dask] ####################################### @@ -675,6 +968,13 @@ def run_using_smp(): class WorkloadX(WorkloadExamples): + @example + def run_sequentially(): + # [start-w3-sequential] + # sequential 3 + ... + # [end-w3-sequential] + @example def run_using_threads(): # [start-w3-threads] @@ -690,15 +990,25 @@ def task(): # [end-w3-threads] @example - def run_using_multiprocessing(): - # [start-w3-multiprocessing] - import multiprocessing + def run_using_cf_thread(): + # [start-w3-cf-thread] + # concurrent.futures 3 + ... + # [end-w3-cf-thread] - def task(): - ... + @example + def run_using_subinterpreters(): + # [start-w3-subinterpreters] + # subinterpreters 3 + ... + # [end-w3-subinterpreters] + @example + def run_using_cf_subinterpreters(): + # [start-w3-cf-subinterpreters] + # concurrent.futures 3 ... - # [end-w3-multiprocessing] + # [end-w3-cf-subinterpreters] @example def run_using_async(): @@ -708,25 +1018,29 @@ def run_using_async(): # [end-w3-async] @example - def run_using_subinterpreters(): - # [start-w3-subinterpreters] - # subinterpreters 3 + def run_using_multiprocessing(): + # [start-w3-multiprocessing] + import multiprocessing + + def task(): + ... + ... - # [end-w3-subinterpreters] + # [end-w3-multiprocessing] @example - def run_using_smp(): - # [start-w3-smp] - # smp 3 + def run_using_cf_multiprocessing(): + # [start-w3-cf-multiprocessing] + # concurrent.futures 3 ... - # [end-w3-smp] + # [end-w3-cf-multiprocessing] @example - def run_using_concurrent_futures_thread(): - # [start-w3-concurrent-futures-thread] - # concurrent.futures 3 + def run_using_dask(): + # [start-w3-dask] + # dask 3 ... - # [end-w3-concurrent-futures-thread] + # [end-w3-dask] ####################################### @@ -734,11 +1048,29 @@ def run_using_concurrent_futures_thread(): ####################################### if __name__ == '__main__': - # Run all the examples. + # Run (all) the examples. + argv = sys.argv[1:] + if argv: + classname, _, funcname = argv[0].rpartition('.') + requested = (classname, funcname) + else: + requested = None + div1 = '#' * 40 div2 = '#' + '-' * 39 last = None for func, cls in example.registry: + if requested: + classname, funcname = requested + if classname: + if cls.__name__ != classname: + continue + if func.__name__ != funcname: + continue + else: + if func.__name__ != funcname: + if cls.__name__ != funcname: + continue print() if cls is not last: last = cls From e0d833e844bcf621bef81cb2cb16bf1e514e0e16 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Thu, 29 Aug 2024 15:47:14 -0600 Subject: [PATCH 37/80] Start reorganizing, and dump a bunch of explanation. --- Doc/howto/concurrency.rst | 1487 ++++++++++++++++++----------------- Doc/includes/concurrency.py | 315 ++------ 2 files changed, 838 insertions(+), 964 deletions(-) diff --git a/Doc/howto/concurrency.rst b/Doc/howto/concurrency.rst index 0f0ba42e60002b..f0ab2c2c2ec2aa 100644 --- a/Doc/howto/concurrency.rst +++ b/Doc/howto/concurrency.rst @@ -4,12 +4,12 @@ Concurrency HOWTO ***************** -Python is a language the accommodates a variety of programming styles, -from procedural to object-oriented to functional. The same applies -to concurrency. Here we'll look at how different concurrency models -look in Python, with an emphasis on practical workload-oriented examples. +There are many outstanding resources, both online and in print, +that would do an excellent job of introducing you to concurrency. +This howto document builds on those by walking you through how +to apply that knowledge using Python. -The following concurrency models are covered: +Python supports the following concurrency models: * free-threading * isolated threads, AKA CSP/actor model @@ -17,9 +17,15 @@ The following concurrency models are covered: * multi-processing * distributed, e.g. SMP -Each of these will be explained, with some simple examples. The later -workload-oriented examples will be implemented using each, -for comparison, when possible. +In this document, we'll look at how to take advantage of this +concurrency support. The overall focus is on the following: + +* understanding the supported concurrency models +* factors to consider when designing a concurrent solution +* key concurrency primitives +* high-level, app-oriented practical examples + +.. XXX Add a summary and section about key concurrency patterns .. note:: @@ -39,6 +45,30 @@ for comparison, when possible. Take that into consideration before reaching for threads and look at the alternatives first. +We'll be using the following terms and ideas throughout: + +task (logical thread) + | a cohesive *linear* sequence of abstract steps in a program; + | effectively, a mini-program; + | the logical equivalent of executed instructions corresponding to code + +physical thread (OS thread) + | where the actual code for a logical thread runs on the CPU (and operating system); + | we avoid using using plain "thread" for this, to avoid ambiguity + +Python thread + | the Python runtime running in a physical thread + | particularly the portion of the runtime state active in the physical thread + | (see :class:`threading.Thread`) + +concurrency (multitasking) + | a program with multiple logical threads running simultaneously + | (not necessarily in parallel) + +parallelism (multi-core) + running a program's multiple logical threads on multiple physical + threads (CPU cores) + .. raw:: html -For convenience, here's a summary comparing the concurrency models -in Python: +For convenience, here are the concurrency primitives we'll cover later: .. list-table:: :header-rows: 1 :class: borderless vert-aligned :align: left - * - `model `_ - - Python API - - scale - - `multi-core `_ - - `races `_ - - `overhead `_ - - `c.f `_ - * - free threading `(Python) `_ - - :mod:`threading` - - small-medium - - `yes* `_ - - **yes** - - very low - - yes - * - isolated threads `(Python) `_ - - `interpreters `_ - - small-medium - - yes - - no - - `low+ `_ - - `yes* `_ - * - coroutines `(Python) `_ - - :mod:`asyncio` - - small-medium - - **no** - - no - - low - - no - * - multiprocessing `(Python) `_ - - :mod:`multiprocessing` - - small - - yes - - no - - **medium** - - yes - * - distributed `(Python) `_ - - :pypi:`dask` - - large - - yes - - no - - **medium+** - - no - - -All About Concurrency -===================== - -What is concurrency? --------------------- - -At its most fundamental, concurrency means doing multiple things at once, -from a strictly *logical* viewpoint. - -When a computer program runs, it executes a sequence of code -in a given order. If you were to trace the actual execution, you would -still end up with a *linear* series of executed instructions that matches -the code. We call this sequence of code (and instructions) a logical -"thread" of execution. - -Sometimes it makes sense to break up that sequence into smaller pieces, -where some of them can run independently of others. Thus the program -then involves multiple logical threads. This is also called -"multitasking" and each logical thread a "task". - -As an example of splitting up the sequence, consider the following -abstract program with three pieces:: - - prep - do A - do B - -If both ``do A`` and ``do B`` only rely on ``prep`` having completed, -then we could rearrange the program in one of the following ways and -end up with the same result:: - - prep = prep prep = prep ----- - do B = do A do B = | | - do A = = do A do B - -In the first alternative, we swap ``do A`` and ``do B``. In the second -one we split the original program into two programs that we can run at -the same time. In the third one, we run ``do A`` and ``do B`` at the -same time. "At the same time" means concurrency. It always involves -multiple logical threads. - -Additionally, concurrency often involves some degree of synchronization -between the logical threads. At the most basic conceptual level: -one thread may wait for another to finish. - -Aside from code running at the same time, concurrency typically -also involves some amount of resources shared between the concurrent -tasks. That may include memory, files, and sockets. - -One important observation is that most concurrent programs -can be represented instead as a single task, with the code of the -concurrent tasks merged into a single sequence. - -What is parallelism? --------------------- - -Concurrency may happen in one of two ways. The concurrent tasks may -share a single CPU, each running a little bit at a time, with the -operating system (or language runtime) taking care of the switching. -The other way is where each task runs on its own CPU, meaning they -are physically running at the same time, not just logically. - -That second way is parallelism. - -Modern CPUs are designed around parallelism, with multiple cores -and sometimes multiple execution pipelines per core. The operating -system exposes physical CPU threads as OS threads and as processes. -A programming language (or runtime) may add additional layers of -abstraction on top of that. - -Parallelism is where concurrent logical threads are running -on distinct physical threads across multiple cores, - -Concurrency Models ------------------- - -The concept of concurrency has been a part of the study and practice -of computer software since very early on, in the 1950s and 1960s, -long before the wide-spread adoption of multi-core CPUs. Clearly -its about more than just parallelism. - -Over the decades, research and use of concurrency has led to a variety -of well defined abstract models, with different characteristics and -tradeoffs. The application of the different theoretical concurrency -models can be categorized as follows: - -================= ========== -free threads using multiple physical threads in the same process, - with no isolation between them -isolated threads threads, often physical, with strict isolation - between them (e.g. CSP and actor model) -coroutines "cooperative multitasking", AKA async/await -multiprocessing using multiple isolated processes -distributed multiprocessing across multiple computers -================= ========== - -(There are certainly others, but these are the focus here.) - -There are tradeoffs to each. Free-threading probably has the most -notoriety and the most examples, but is also has the most pitfalls -(see `concurrency-downsides`_ below). -Isolated threads have few of those pitfalls but are less familiar. -Multiprocessing and distributed are likewise isolated, but less -efficient, which can have a larger negative impact at smaller scales. -Async can be straightforward, but may cascade throughout a code base -and doesn't necessarily give you parallelism. - -What problems can concurrency help solve? ------------------------------------------ - -Primarily, concurrency can be helpful by making your program faster -and more responsive (less latency), when possible. In other words, -you get better computational throughput. That happens by enabling -the following: - -* run on multiple CPU cores (parallelism) -* keep blocking resources from blocking the whole program -* make sure critical tasks have priority -* make sure other tasks have a fair share of time -* process results as they come, instead of waiting for them all - -Other possible benefits: - -* asynchronous events can be handled more cleanly -* better efficiency using hardware resources -* improved scalability - -.. _concurrency-downsides: - -What are the downsides? ------------------------ - -The main challenge when using concurrency is the (potential) extra -complexity. This complexity comes from the effect of multiple logical -threads running at the same time and interacting with each other. -In practice, this falls into two categories: data races and tracing -relative execution. Both are a form of "spooky action at a distance" [#f1]_ -(meaning something changes unexpectedly in one place due to unknown -changes somewhere else). - -The first category relates to mutable data shared between threads: -a data race is where one thread writes to memory at a time when another -thread is expecting the value to be unchanged, invalidating its logic. -Similarly, two threads could write to the same memory location at the -same time, either corrupting the data there or invalidating -the expectations of one of the threads. - -In each case, the non-deterministic scheduling of threads means it is -both hard to reproduce races and to track down where a race happened. -These qualities much these bugs especially frustrating -and worth diligently avoiding. - -Races are possible when the concurrency approach is subject -to parallel execution or to non-deterministic switching. -(This excludes coroutines, which rely on cooperative multitasking.) -When all memory is possibly shared, as is the case with free-threading, -then all memory is at risk. - -Dealing with data races is often managed using locks (AKA mutexes), -at a low level, and thread-safe types and APIs at a high level. -Depending on the programming language, the complexity is sometimes -mitigated somewhat by the compiler and runtime. There are even -libraries and frameworks that help abstract away the complexity -to an extent. On top of that, there are tools that can help identify -potential races via static analysis. Unfortunately, none of these aids -is foolproof and the risk of hitting a race is always looming. - -.. XXX mention reentrancy? - -The second category of complexity is the problem of tracing the execution -of one logical thread relative to another. This is especially relevant -for error handling, when an error in the one thread is exposed in the -other. This applies equally to threads that start other threads as to -concurrency models that use callbacks. Knowing where the failing thread -was started is valuable when debugging, as is knowing where a callback -was registered. - -Workloads ---------- - -In practice, concurrency is used in a wide variety of software. -Here's a not-comprehensive list: - -======================= =========== -application concurrency -======================= =========== -web server handle simultaneous static requests, CGI requests -web browser load multiple resources at once -database server handle simultaneous requests -devops script process multiple files at once -system logger handle simultaneous logging requests -ATM network handle multiple bank transactions at once -hacker toolkit decode a passwd file with brute force -raytracer compute RGB for each image pixel -machine learning apply matrices on training data set -astrophysics merge black hole data from multiple satellites - and observatories -investing combine thousands of industry data sources into - a concise actionable analysis -MMO game server handle login requests, handle client updates -game client GUI, physics engine, handle server updates -audio transcoder process chunks -engineering simulation calculate stress loads at vertices -molecular modeling try many permutations -======================= =========== - -For a given workload, here are some characteristics that will help you -understand the problem and, potentially, which concurrency model would -be the best fit: - -* requests - - * frequency - * expected latency for (at least partial) response - -* inputs per request - - * how many - * size of each input - -* tasks (logical threads) per input - - * how many - * variety vs. uniformity - * compute per task: how much - * data per task: how much and what kinds - * I/O per task: how much and what kinds - * tasks not tied to outputs - -* task interaction - - * how much and in what ways - * what data is shared between tasks - * how much blocking while waiting - -* outputs per request - - * how many - * size pf each output - * correlation to inputs - -To some extent the most critical factors can be compressed down to: - -* many inputs vs. 1 large divisible input -* many outputs vs. combined output vs. matching large output -* many short computations vs. fewer medium/long computations - -We could also break it down into quadrants:: - - . stream of tasks queue of tasks - C | - P | - U | - - | - b | - o | - u | - n | - d | - -----------------------|----------------------- - I | - O | - - | - b | - o | - u | - n | - d | - - -Aside from the concurrency model, the answers to the above can impact -the following: - -* use of a worker pool -* use of background tasks/threads - -In the context of the above characteristics, let's revisit the ways that -concurrency can be helpful: - -* get work done faster - - * run more tasks at once (multi-core) - -* make the app feel more responsive - - * make sure critical tasks have priority - * process results as they come, instead of waiting for them all - * send payload to multiple targets before starting next task + * - primitive + - used with + - purpose + * - ... + - ... + - ... -* use system resources more efficiently +Likewise, the high-level examples: - * keep slow parts from blocking fast parts - * keep blocking resources from blocking the whole program - * make sure other tasks have a fair share of time - * task scheduling & resource usage optimization +.. list-table:: + :header-rows: 1 + :class: borderless vert-aligned + :align: left -* scaling -* handle asynchronous events + * - workload (app) + - per-request inputs + - per-request outputs + - *N* core tasks + - core task + * - `grep `_ + - | *N* filenames (**stdin**) + | file bytes x *N* (**disk**) + - *M* matches (**stdout**) + - 1+ per file + - | **time**: ~ file size + | **mem**: small + * - `... `_ + - ... + - ... + - ... + - ... + * - `... `_ + - ... + - ... + - ... + - ... Python Concurrency Models ========================= -We've looked at concurrency and concurrency models generally. -Now let's see what each looks like in Python. -We'll also look at `concurrent.futures `_ -provides a high-level API for some of the concurrency models. - -Here's a summary: +As mentioned, there are essentially five concurrency models that +Python supports directly: .. list-table:: :header-rows: 1 @@ -422,63 +141,78 @@ Here's a summary: * - model - Python API - - scale - - pros - - cons + - description * - free threading - :mod:`threading` - - small-medium - - * familiar to many - * many examples available - * can enable multi-core parallelism (`caveat: GIL `_) - - * all memory is subject to races - * some IO may have races (e.g. writing to stdout) - * can be hard for humans to follow what's happening in different - threads at any given point - * - multiple interpreters (isolated threads) + - using multiple physical threads in the same process, + with no isolation between them + * - | isolated threads + | (multiple interpreters) - `interpreters `_ - - small-medium - - * isolation eliminates nearly all races, by default - (sharing is strictly opt-in) - * synchronization is built in to cross-interpreter interaction - * enables full multi-core parallelism of all Python code - - * unfamiliar to many - * less efficient than threads - * (currently) limited in what data can be shared between - interpreters + - threads, often physical, with strict isolation + between them (e.g. CSP and actor model) * - coroutines (async/await) - :mod:`asyncio` - - small-medium - - * not subject to races - * increasingly familiar to many; popular in newer languages - * has a long history in Python (e.g. ``twisted``) - - * async and non-async functions don't mix well, - potentially leading to duplication of code - * switching to async can require substantial cascading code churn - * callbacks can make it difficult to follow program logic, - making debugging harder - * does not enable multi-core parallelism - * - multiprocessing + - switching between logical threads is explicitly controlled by each + * - multi-processing - :mod:`multiprocessing` + - using multiple isolated processes + * - distributed + - | :pypi:`dask` + | (`multiprocessing `_) + - multiprocessing across multiple computers + +After we look at some comparisons of the concurrency models, +we'll briefly talk about critical caveats for specific models. + +Tables +------ + +The following tables provide a detailed look with side-by-side comparisons. +We'll also compare them at a high level in +`a later section `_. + +key characteristics +^^^^^^^^^^^^^^^^^^^ + +.. list-table:: + :header-rows: 1 + :class: borderless vert-aligned + :align: left + + * - + - scale + - multi-core + - `races `_ + - overhead + * - free-threading + - small-medium + - `yes* `_ + - **yes** + - very low + * - multiple interpreters + - small-medium + - yes + - limited + - `low+ `_ + * - coroutines + - small-medium + - **no** + - no + - low + * - multi-processing - small - - * isolated (no races) - * enables full multi-core parallelism of all Python code - - * substantially less efficient than using a single process - * can lead to exhaustion of system resources - (e.g. file handles, PIDs) - * API can be hard to use + - yes + - limited + - **medium** * - distributed - - :pypi:`dask` - large - - * isolated (no races) - * fully parallel - * facilitates massive scaling - - * not necessarily a good fit for small-scale applications - * often requires configuration - -.. _concurrency-overhead-table: + - yes + - limited + - **medium** -Here's a comparison of the overhead of each model in Python: +overhead details +^^^^^^^^^^^^^^^^ .. list-table:: :header-rows: 1 @@ -509,61 +243,212 @@ Here's a comparison of the overhead of each model in Python: - none - low - none - * - multiprocessing + * - multi-processing - medium - medium - medium - medium - low * - distributed - - medium+ - - medium+ - - medium-high - - medium - - low-medium + - medium+ + - medium+ + - medium-high + - medium + - low-medium + +complexity +^^^^^^^^^^ + +.. XXX "human-friendly" + +.. list-table:: + :header-rows: 1 + :class: borderless vert-aligned + :align: left + + * - + - parallel + - | shared + | mem + - | shared + | I/O + - | shared + | env + - | cross + | thread + - :abbr:`sync (synchronization between logical threads)` + - :abbr:`tracking (how easy it is to keep track of where one logical thread is running relative to another, especially when one terminates)` + - :abbr:`compat (compatibility with code not using this concurrency model)` + - | extra + | LOC + * - free-threading + - `yes* `_ + - **all** + - **all** + - **yes** + - **high** + - **explicit** + - + - yes + - low? + * - multiple interpreters + - yes + - limited + - **all** + - **yes** + - low + - implicit + - ??? + - yes + - low? + * - coroutines + - **no** + - all + - all + - yes + - low-med? + - implicit + - ??? + - **no** + - low-med + * - multi-processing + - yes + - limited + - no + - no? + - low + - | implicit + | +optional + - ??? + - yes + - low-med? + * - distributed + - yes + - limited + - no + - no? + - low + - | implicit + | +optional + - ??? + - yes + - medium? -.. _python-free-threading: +exposure +^^^^^^^^ -Free-threading --------------- +.. list-table:: + :header-rows: 1 + :class: borderless vert-aligned + :align: left + + * - + - research + - curriculum + - industry + - examples + - | Python + | history + * - free-threading + - very high + - high + - high + - high + - 0.9? + * - | isolated threads + | (multiple interpreters) + - high + - low? + - low-medium? + - low-medium? + - `2.2 `_ + * - coroutines + - medium-high? + - medium? + - medium? + - medium-high? + - 3.3-3.5 (2.2) + * - multi-processing + - ??? + - low? + - low-medium? + - low? + - 2.6 + * - distributed + - medium-high? + - low? + - medium? + - medium? + - n/a -.. currentmodule:: threading +high-level APIs +--------------- -Threads, through the :mod:`threading` module, have been the dominant -tool in Python concurrency for decades, which mirrors the generate state -of software in general. Threads are very light-weight and efficient. -Most importantly, they are the most direct route to taking advantage -of multi-core parallelism (more an that in a moment). +Also note that Python's stdlib provides various higher-level APIs +that support these concurrency models in various contexts: -The main downside to using threads is that each one shares the full -memory of the process with all the others. That exposes programs -to a significant risk of `races `_. +.. list-table:: + :header-rows: 1 + :class: borderless vert-aligned + :align: left -The other potential problem with using threads is that the conceptual -model has no inherent synchronization, so it can be hard to follow -what is going on in the program at any given moment. That is -especially challenging for testing and debugging. + * - + - :mod:`concurrent.futures` + - :mod:`socketserver` + - :mod:`http.server` + * - free-threading + - :class:`yes ` + - :class:`yes ` + - :class:`yes ` + * - multiple interpreters + - (`pending `_) + - + - + * - coroutines + - ??? + - + - + * - multi-processing + - | :class:`yes ` + | (:class:`similar `) + - :class:`yes ` + - + * - distributed + - ??? + - + - + +Critical caveats +---------------- -Using threads for concurrency boils down to: -1. create a thread object to run a function -2. start the thread -3. (optionally) wait for it to finish +There are tradeoffs to each, whether in performance or complexity. +Free-threading probably has the most notoriety and the most examples, +but is also has the most pitfalls (see `concurrency-downsides`_ below). +Isolated threads have few of those pitfalls but are less familiar. +Multiprocessing and distributed are likewise isolated, but less +efficient, which can have a larger negative impact at smaller scales. +Async can be straightforward, but may cascade throughout a code base +and doesn't necessarily give you parallelism. -Here's how that looks:: - import threading +free-threading +^^^^^^^^^^^^^^ - def task(): - # Do something. - ... +Python directly supports use of physical threads through the +:mod:`threading` module. - t = threading.Thread(target=task) - t.start() +* minimal conceptual indirection: closely tied to low-level physical threads +* the most direct route to taking advantage of multi-core parallelism - # Do other stuff. +The main downside to using threads is that each one shares the full +memory of the process with all the others. That exposes programs +to a significant risk of `races `_. - t.join() +The other potential problem with using threads is that the conceptual +model has no inherent synchronization, so it can be hard to follow +what is going on in the program at any given moment. That is +especially challenging for testing and debugging. .. _python-gil: @@ -598,12 +483,9 @@ burden to the Python project and extension module maintainers. However, there is sufficient interest in unlocking full multi-core parallelism to justify the current experiment. -.. currentmodule:: None - -.. _python-isolated-threads: Isolated Threads (CSP/Actor Model) ----------------------------------- +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ There's a major alternative to free-threading, both for multi-core parallelism and for a simpler conceptual model: use multiple interpreters. @@ -633,19 +515,6 @@ Using multiple interpreters is fairly straight-forward: 3. call :func:`exec`, but targeting the new interpreter 4. switch back -You can use the :mod:`!interpreters` module (more on that in a moment) -to do this:: - - import interpreters - - script = """if True: - # Do something. - ... - """ - - interp = interpreters.create() - interp.exec(script) - Note that no threads were involved. That's because running in an interpreter happens relative to the current thread. New threads aren't implicitly involved. They can be added in explicitly though. @@ -655,347 +524,502 @@ If you want multi-core parallelism, run a different interpreter in each thread. Their isolation means that each can run unblocked in that thread. -Here's the very explicit way to do that:: +.. _python-stdlib-interpreters: + +A Stdlib Module for Using Multiple Interpreters +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +While use of multiple interpreters has been part of Python's C-API +for decades, the feature hasn't been exposed to Python code through +the stdlib. :pep:`734` proposes changing that by adding a new +:mod:`!interpreters` module. - import interpreters - import threading +In the meantime, an implementation of that PEP is available for +Python 3.13+ on PyPI: :pypi:`interpreters-pep-734`. - script = """if True: - # Do something. - ... - """ +.. _python-interpreters-overhead: - def task(): - interp = interpreters.create() - interp.exec(script) +Improving Performance for Multiple Interpreters +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - t = threading.Thread(target=task) - t.start() +The long effort to improve on Python's implementation of multiple +interpreters focused on isolation and stability. There was very little +done to improve performance. This has the most impact on: - # Do other stuff. +* how much memory each interpreter uses + (i.e. how many can run at the same time) +* how long it takes to create a new interpreter - t.join() +As the work on isolation wraps up, improvements will shift to focus +on performance and memory usage. Thus the overhead associated with +using multiple interpreters will drastically decrease over time. -There's a convenience method too:: +Coroutines (Async/Await) +^^^^^^^^^^^^^^^^^^^^^^^^ - import interpreters +The use of :term:`coroutines ` for concurrency has been +around a long time and has grown in popularity in the software world, +particularly with the addition of ``async/await`` syntax in +various languages. - def task(): - # Do something. - ... +Python has supported coroutines to some degree since the beginning. +The best example is :pypi:`twisted`, which has provided this concurrency +model for decades. For most of that time :pypi:`!twisted` did it +primarily through callbacks and a form of "promises"/"futures". - interp = interpreters.create() - t = interp.call_in_thread(task) +Explicit support for coroutines in Python really started with the +introduction of :term:`generators ` in Python 2.2 +(:pep:`255`). In Python 2.5 (:pep:`342`), :term:`!generators` were +tweaked to explicitly support use as coroutines. That went a step +further in Python 3.3 with the addition of ``yield from`` (:pep:`380`) +and the :mod:`asyncio` module (:pep:`3156`). Finally, in Python 3.5 +(:pep:`492`), we got dedicated ``async/await`` syntax +and :ref:`a dedicated protocol ` +for :term:`!coroutine` objects. - # Do other stuff. +There are three main pieces to using coroutines: - t.join() +* coroutines (non-blocking, yield control instead) +* an event loop (schedules coroutines) +* coroutine wrappers around blocking operations -.. _python-stdlib-interpreters: +A :term:`coroutine function` looks *almost* the same as a regular +function. It is a non-blocking function that *cooperatively* yields +control of the program to other coroutines, which in turn yield control +back (eventually). At those points of synchronization, +coroutines often provide data to one another. -A Stdlib Module for Using Multiple Interpreters -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +The event loop is what keeps track of which coroutines have yielded +control and which should get control next. -While use of multiple interpreters has been part of Python's C-API -for decades, the feature hasn't been exposed to Python code through -the stdlib. :pep:`734` proposes changing that by adding a new -:mod:`!interpreters` module. +Generally a coroutine needs to avoid doing anything that takes very long +before yielding control back to the event loop. Any blocking operation +in a coroutine, like waiting on a socket, has to be implemented in a way +that only waits a little while, yields, and then waits again, etc. until +ready. The alternative is to wrap the blocking operation/function +in some sort of "future" coroutine that yields until the blocking +operation completes. The event loop can also fill that role +to an extent. -In the meantime, an implementation of that PEP is available for -Python 3.13+ on PyPI: :pypi:`interpreters-pep-734`. +In addition to support for coroutines in the language, Python's stdlib +provides the :mod:`asyncio` module, which includes: -.. _python-interpreters-overhead: +* an event loop +* a number of useful coroutines +* a variety of helpful APIs that build on coroutines and the event loop -Improving Performance for Multiple Interpreters -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +One of the main challenges with using coroutines is that they do not +normally mix well with non-coroutines. As a result, ``async/await`` +can be contagious, requiring surrounding code to be async. This can +lead to having the same thing implemented twice, once normal and once +async, with significant code duplication. + +Multi-processing +^^^^^^^^^^^^^^^^ + +The stdlib :mod:`multiprocessing` module, which has been around many +years, provides an API for using multiple processes for concurrency. +Furthermore, processes are always isolated, so you have many of the +same benefits of using multiple interpreters, including multi-core +parallelism. + +There are some obstacles however. First of all, using multiple +processes has a higher overhead than operating in a single process, +sometimes significantly higher. This applies in just about every +dimension of overhead. Secondly, the :mod:`multiprocessing` module's +API is substantially larger and more complex that what we use for +threads and multiple interpreters. Finally, there are some scaling +issues with using multiple processes, related both to the performance +overhead and to how the operating system assigns resources like +file handles. + +The similarity with :class:`threading.Thread` is intentional. +On top of that, the :mod:`multiprocessing` module provides an extensive +API to address a variety of needs, including machinery for inter-process +shared memory. Also note that that API can be used for threads and +(eventually) interpreters using different backends. + +Distributed +^^^^^^^^^^^ + +When it comes to concurrency at scale, through distributed concurrency, +one of the best examples is :pypi:`dask`. + +.. _multiprocessing-distributed: + +Using multiprocessing for distributed computing +----------------------------------------------- + +... + + + + + +.. _concurrency-races: + +Data races +---------- + +The first category relates to mutable data shared between threads: +a data race is where one thread writes to memory at a time when another +thread is expecting the value to be unchanged, invalidating its logic. +Similarly, two threads could write to the same memory location at the +same time, either corrupting the data there or invalidating +the expectations of one of the threads. + +In each case, the non-deterministic scheduling of threads means it is +both hard to reproduce races and to track down where a race happened. +These qualities much these bugs especially frustrating +and worth diligently avoiding. + +Races are possible when the concurrency approach is subject +to parallel execution or to non-deterministic switching. +(This excludes coroutines, which rely on cooperative multitasking.) +When all memory is possibly shared, as is the case with free-threading, +then all memory is at risk. + +Dealing with data races is often managed using locks (AKA mutexes), +at a low level, and thread-safe types and APIs at a high level. +Depending on the programming language, the complexity is sometimes +mitigated somewhat by the compiler and runtime. There are even +libraries and frameworks that help abstract away the complexity +to an extent. On top of that, there are tools that can help identify +potential races via static analysis. Unfortunately, none of these aids +is foolproof and the risk of hitting a race is always looming. + + + +.. _concurrency-downsides: + +What are the downsides? +----------------------- + +The main challenge when using concurrency is the (potential) extra +complexity. This complexity comes from the effect of multiple logical +threads running at the same time and interacting with each other. +In practice, this falls into two categories: data races and tracing +relative execution. Both are a form of "spooky action at a distance" [#f1]_ +(meaning something changes unexpectedly in one place due to unknown +changes somewhere else). + +The first category relates to mutable data shared between threads: +a data race is where one thread writes to memory at a time when another +thread is expecting the value to be unchanged, invalidating its logic. +Similarly, two threads could write to the same memory location at the +same time, either corrupting the data there or invalidating +the expectations of one of the threads. + +In each case, the non-deterministic scheduling of threads means it is +both hard to reproduce races and to track down where a race happened. +These qualities much these bugs especially frustrating +and worth diligently avoiding. + +Races are possible when the concurrency approach is subject +to parallel execution or to non-deterministic switching. +(This excludes coroutines, which rely on cooperative multitasking.) +When all memory is possibly shared, as is the case with free-threading, +then all memory is at risk. + +Dealing with data races is often managed using locks (AKA mutexes), +at a low level, and thread-safe types and APIs at a high level. +Depending on the programming language, the complexity is sometimes +mitigated somewhat by the compiler and runtime. There are even +libraries and frameworks that help abstract away the complexity +to an extent. On top of that, there are tools that can help identify +potential races via static analysis. Unfortunately, none of these aids +is foolproof and the risk of hitting a race is always looming. + +.. XXX mention reentrancy? + +The second category of complexity is the problem of tracing the execution +of one logical thread relative to another. This is especially relevant +for error handling, when an error in the one thread is exposed in the +other. This applies equally to threads that start other threads as to +concurrency models that use callbacks. Knowing where the failing thread +was started is valuable when debugging, as is knowing where a callback +was registered. + + +Designing A Program For Concurrency +=================================== + +Whether you are starting a new project using concurrency or refactoring +an existing one to use it, it's important to design for concurrency +before taking one more step. Doing so will save you a lot of +headache later. + +1. decide if your program *might* benefit from concurrency +2. `break down your *logical* program into distinct tasks `_ +3. `determine which tasks could run at the same time `_ +4. `identify the other concurrency-related characteristics of your program `_ +5. `decide which concurrency model fits best `_ +6. go for it! + +At each step you should be continuously asking yourself if concurrency +is still a good fit for your program. + +Some problems are obviously not solvable with concurrency. Otherwise, +even if you *could* use concurrency, it might not provide much value. +Furthermore, even if it seems like it would provide meaningful value, +the additional costs in performance, complexity, or maintainability +might outweigh that benefit. -The long effort to improve on Python's implementation of multiple -interpreters focused on isolation and stability. There was very little -done to improve performance. This has the most impact on: +Thus, when you're thinking of solving a problem using concurrency, +it's crucial that you understand the problem well. -* how much memory each interpreter uses - (i.e. how many can run at the same time) -* how long it takes to create a new interpreter +How can concurrency help? +------------------------- -As the work on isolation wraps up, improvements will shift to focus -on performance and memory usage. Thus the overhead associated with -using multiple interpreters will drastically decrease over time. +Here are the benefits concurrency can bring to the table: -.. _python-coroutines: +* -Coroutines (Async/Await) ------------------------- -.. currentmodule:: asyncio +Primarily, concurrency can be helpful by making your program faster +and more responsive (less latency), when possible. In other words, +you get better computational throughput. That happens by enabling +the following: -The use of :term:`coroutines ` for concurrency has been -around a long time and has grown in popularity in the software world, -particularly with the addition of ``async/await`` syntax in -various languages. +* run on multiple CPU cores (parallelism) +* keep blocking resources from blocking the whole program +* make sure critical tasks have priority +* make sure other tasks have a fair share of time +* process results as they come, instead of waiting for them all -Python has supported coroutines to some degree since the beginning. -The best example is :pypi:`twisted`, which has provided this concurrency -model for decades. For most of that time :pypi:`!twisted` did it -primarily through callbacks and a form of "promises"/"futures". +Other possible benefits: -Explicit support for coroutines in Python really started with the -introduction of :term:`generators ` in Python 2.2 -(:pep:`255`). In Python 2.5 (:pep:`342`), :term:`!generators` were -tweaked to explicitly support use as coroutines. That went a step -further in Python 3.3 with the addition of ``yield from`` (:pep:`380`) -and the :mod:`asyncio` module (:pep:`3156`). Finally, in Python 3.5 -(:pep:`492`), we got dedicated ``async/await`` syntax -and :ref:`a dedicated protocol ` -for :term:`!coroutine` objects. +* asynchronous events can be handled more cleanly +* better efficiency using hardware resources +* improved scalability -There are three main pieces to using coroutines: +How can concurrency hurt? +------------------------- -* coroutines (non-blocking, yield control instead) -* an event loop (schedules coroutines) -* coroutine wrappers around blocking operations +... -A :term:`coroutine function` looks *almost* the same as a regular -function. It is a non-blocking function that *cooperatively* yields -control of the program to other coroutines, which in turn yield control -back (eventually). At those points of synchronization, -coroutines often provide data to one another. +.. _concurrency-identify-tasks: -The event loop is what keeps track of which coroutines have yielded -control and which should get control next. +Identifying the logical tasks in your program +------------------------------------------------ -Generally a coroutine needs to avoid doing anything that takes very long -before yielding control back to the event loop. Any blocking operation -in a coroutine, like waiting on a socket, has to be implemented in a way -that only waits a little while, yields, and then waits again, etc. until -ready. The alternative is to wrap the blocking operation/function -in some sort of "future" coroutine that yields until the blocking -operation completes. The event loop can also fill that role -to an extent. +... -In addition to support for coroutines in the language, Python's stdlib -provides the :mod:`asyncio` module, which includes: +.. _concurrency-characteristics: -* an event loop -* a number of useful coroutines -* a variety of helpful APIs that build on coroutines and the event loop +The concurrency characteristics of your program +------------------------------------------------ -Here's a very basic example of using coroutines with :mod:`!asyncio`:: +... - import asyncio +.. _concurrency-pick-a-model: - async def task(data): - # Do something small. - await asyncio.sleep(0.1) - # Do something else small. - return data +Picking a concurrency model +--------------------------- - # Run it once, basically synchronously. - res = asyncio.run(task('spam!') - assert res == 'spam!', repr(res) +... - # Run it multiple times concurrently. - values = list(range(5)) - res = asyncio.run( - asyncio.gather(*(task(v) for v in values)) - ) - assert res == values, (res, values) +free-threading: -One of the main challenges with using coroutines is that they do not -normally mix well with non-coroutines. As a result, ``async/await`` -can be contagious, requiring surrounding code to be async. This can -lead to having the same thing implemented twice, once normal and once -async, with significant code duplication. +* main value: efficient multi-core +* main costs: races & conceptual overhead -.. currentmodule:: None +A high-level look: -.. _python-multiprocessing: +.. list-table:: + :header-rows: 1 + :class: borderless vert-aligned + :align: left -Multi-processing ----------------- + * - model + - pros + - cons + * - free threading + - * very light-weight and efficient + * wide-spread + * can enable multi-core parallelism (`caveat: GIL `_) + - * all memory is shared, subject to races + * some IO may have races (e.g. writing to stdout) + * can be hard for humans to follow what's happening in different + threads at any given point + * - multiple interpreters (isolated threads) + - * isolation eliminates nearly all races, by default + (sharing is strictly opt-in) + * synchronization is built in to cross-interpreter interaction + * enables full multi-core parallelism of all Python code + - * unfamiliar to many + * less efficient than threads + * (currently) limited in what data can be shared between + interpreters + * - coroutines (async/await) + - * not subject to races + * increasingly familiar to many; popular in newer languages + * has a long history in Python (e.g. ``twisted``) + - * async and non-async functions don't mix well, + potentially leading to duplication of code + * switching to async can require substantial cascading code churn + * callbacks can make it difficult to follow program logic, + making debugging harder + * does not enable multi-core parallelism + * - multiprocessing + - * isolated (no races) + * enables full multi-core parallelism of all Python code + - * substantially less efficient than using a single process + * can lead to exhaustion of system resources + (e.g. file handles, PIDs) + * API can be hard to use + * - distributed + - * isolated (no races) + * fully parallel + * facilitates massive scaling + - * not necessarily a good fit for small-scale applications + * often requires configuration -.. currentmodule:: multiprocessing -The stdlib :mod:`multiprocessing` module, which has been around many -years, provides an API for using multiple processes for concurrency. -Furthermore, processes are always isolated, so you have many of the -same benefits of using multiple interpreters, including multi-core -parallelism. -There are some obstacles however. First of all, using multiple -processes has a higher overhead than operating in a single process, -sometimes significantly higher. This applies in just about every -dimension of overhead. Secondly, the :mod:`multiprocessing` module's -API is substantially larger and more complex that what we use for -threads and multiple interpreters. Finally, there are some scaling -issues with using multiple processes, related both to the performance -overhead and to how the operating system assigns resources like -file handles. -Here's a very basic example:: +* are there libraries that can take care of the concurrency parts? - import multiprocessing - def task(): - # Do something. - pass - p = multiprocessing.Process(target=task) - p.start() +At its most fundamental, concurrency means doing multiple things at once, +from a strictly *logical* viewpoint. - # Do other stuff. +When a computer program runs, it executes a sequence of code +in a given order. If you were to trace the actual execution, you would +still end up with a *linear* series of executed instructions that matches +the code. We call this sequence of code (and instructions) a logical +"thread" of execution. - p.join() +Sometimes it makes sense to break up that sequence into smaller pieces, +where some of them can run independently of others. Thus the program +then involves multiple logical threads. This is also called +"multitasking" and each logical thread a "task". -The similarity with :class:`threading.Thread` is intentional. -On top of that, the :mod:`multiprocessing` module provides an extensive -API to address a variety of needs, including machinery for inter-process -shared memory. Also note that that API can be used for threads and -(eventually) interpreters using different backends. -.. currentmodule:: None +One important observation is that most concurrent programs +can be represented instead as a single task, with the code of the +concurrent tasks merged into a single sequence. -.. _python-distributed: -Distributed ------------ +What problems can concurrency help solve? +----------------------------------------- -When it comes to concurrency at scale, through distributed concurrency, -one of the best examples is :pypi:`dask`. -Here's a very basic example:: +synchronization +--------------- - from dask.distributed import LocalCluster +Additionally, concurrency often involves some degree of synchronization +between the logical threads. At the most basic conceptual level: +one thread may wait for another to finish. - def task(data): - # Do something. - return data +shared resources +---------------- - client = LocalCluster().get_client() +Aside from code running at the same time, concurrency typically +also involves some amount of resources shared between the concurrent +tasks. That may include memory, files, and sockets. - # Run it once, basically synchronously. - fut = client.submit(task, 'spam!') - res = fut.result() - assert res == 'spam!', repr(res) - # Run it multiple times concurrently. - values = list(range(5)) - res = client.gather( - (client.submit(task, v) for v in values), - ) - assert res == values, (res, values) -concurrent.futures ------------------- +For a given workload, here are some characteristics that will help you +understand the problem and, potentially, which concurrency model would +be the best fit: -.. currentmodule:: concurrent.futures +* requests -:mod:`concurrent.futures` provides a high-level abstraction around -using concurrency in Python. + * frequency + * expected latency for (at least partial) response -The :class:`Executor` base class is the focal point of the API. -It is implemented for threads as :class:`ThreadPoolExecutor`, wrapping -:class:`threading.Thread`. It is implemented for subprocesses as -:class:`ProcessPoolExecutor`, wrapping :mod:`multiprocessing`. -It will be implemented for multiple interpreters as -:class:`!InterpreterPoolExecutor`. Each implementation has some very -minor uniqueness that we'll look at in a moment. +* inputs per request -.. note: :mod:`multiprocessing`, :mod:`asyncio`, and :pypi:`dask` - provide similar APIs. In the case of :mod:`!multiprocessing`, - that API also supports thread and interpreter backends. + * how many + * size of each input -.. note: Generic examples in this section will use the thread-based - implementation. However, any of the other implementations - can be simply substituted. +* tasks (logical threads) per input -With an executor you can call a function asynchronously (in the background) -using :meth:`Executor.submit`. It returns a :class:`Future` object -which tracks completion and provides the result. -:class:`!Future` objects have a few other tricks, like cancellation -and completion callbacks, which we won't cover here. -Likewise we won't cover the various uses of timeouts. + * how many + * variety vs. uniformity + * compute per task: how much + * data per task: how much and what kinds + * I/O per task: how much and what kinds + * tasks not tied to outputs -Here's an example of using :meth:`Executor.submit` -and :meth:`Future.result`: +* task interaction -.. literalinclude:: ../includes/concurrency.py - :name: concurrency-cf-basic - :start-after: [start-cf-basic] - :end-before: [end-cf-basic] - :dedent: - :linenos: + * how much and in what ways + * what data is shared between tasks + * how much blocking while waiting -You can use :meth:`Executor.map` to call a function multiple times -and yield each result: +* outputs per request -.. literalinclude:: ../includes/concurrency.py - :name: concurrency-cf-map-1 - :start-after: [start-cf-map-1] - :end-before: [end-cf-map-1] - :dedent: - :linenos: + * how many + * size pf each output + * correlation to inputs -.. literalinclude:: ../includes/concurrency.py - :name: concurrency-cf-map-2 - :start-after: [start-cf-map-2] - :end-before: [end-cf-map-2] - :dedent: - :linenos: +To some extent the most critical factors can be compressed down to: -You can wait for an existing set of :class:`!Future` objects using -:func:`wait` (and :func:`as_completed` and :meth:`Executor.map`): +* many inputs vs. 1 large divisible input +* many outputs vs. combined output vs. matching large output +* many short computations vs. fewer medium/long computations -.. literalinclude:: ../includes/concurrency.py - :name: concurrency-cf-wait - :start-after: [start-cf-wait] - :end-before: [end-cf-wait] - :dedent: - :linenos: +We could also break it down into quadrants:: + + . stream of tasks queue of tasks + C | + P | + U | + - | + b | + o | + u | + n | + d | + -----------------------|----------------------- + I | + O | + - | + b | + o | + u | + n | + d | -You can use :func:`as_completed` to handle each :class:`!Future` -as it completes: -.. literalinclude:: ../includes/concurrency.py - :name: concurrency-cf-as-completed - :start-after: [start-cf-as-completed] - :end-before: [end-cf-as-completed] - :dedent: - :linenos: +Aside from the concurrency model, the answers to the above can impact +the following: -In each case handling errors on a per-:class:`!Future` basis -is straightforward: +* use of a worker pool +* use of background tasks/threads -.. literalinclude:: ../includes/concurrency.py - :name: concurrency-cf-error-result-1 - :start-after: [start-cf-error-result-1] - :end-before: [end-cf-error-result-1] - :dedent: - :linenos: +In the context of the above characteristics, let's revisit the ways that +concurrency can be helpful: -.. literalinclude:: ../includes/concurrency.py - :name: concurrency-cf-error-result-2 - :start-after: [start-cf-error-result-2] - :end-before: [end-cf-error-result-2] - :dedent: - :linenos: +* get work done faster -As promised, here's a look at what is unique to each of the -:class:`Executor` implementations. + * run more tasks at once (multi-core) +* make the app feel more responsive -:class:`ThreadPoolExecutor`: + * make sure critical tasks have priority + * process results as they come, instead of waiting for them all + * send payload to multiple targets before starting next task -* ... +* use system resources more efficiently -:class:`ProcessPoolExecutor`: + * keep slow parts from blocking fast parts + * keep blocking resources from blocking the whole program + * make sure other tasks have a fair share of time + * task scheduling & resource usage optimization -* ... +* scaling +* handle asynchronous events -:class:`!InterpreterPoolExecutor`: -* ... +Python Concurrency Primitives +============================= -.. currentmodule:: None +... Python Concurrency Workload Examples @@ -1032,14 +1056,12 @@ Here's a summary of the examples, by workload: - 1+ per file - | **time**: ~ file size | **mem**: small - * - `resize image `_ - - image (**net**) - - image (**net**) - - | *N* small sub-images - | **mem**: ~ 2x image size - - | **time**: short - | **mem**: small - * - `... `_ + * - `... `_ + - ... + - ... + - ... + - ... + * - `... `_ - ... - ... - ... @@ -1107,7 +1129,7 @@ side-by-side for easy comparison: - multiple interpreters - coroutines - multiple processes - - SMP + - distributed * - .. raw:: html
@@ -1221,11 +1243,10 @@ you can also use :mod:`concurrent.futures`:
-Workload: Image Resizer ------------------------ +Workload 2: ... +--------------- -This example runs a web service that takes an image and a new size -and responds with the image at the new size. +# ... Here's the implementations for the different concurrency models, side-by-side for easy comparison: @@ -1240,15 +1261,15 @@ side-by-side for easy comparison: - multiple interpreters - coroutines - multiple processes - - SMP + - distributed * - .. raw:: html
(expand) .. literalinclude:: ../includes/concurrency.py - :start-after: [start-image-resizer-sequential] - :end-before: [end-image-resizer-sequential] + :start-after: [start-w2-sequential] + :end-before: [end-w2-sequential] :dedent: :linenos: @@ -1262,8 +1283,8 @@ side-by-side for easy comparison: (expand) .. literalinclude:: ../includes/concurrency.py - :start-after: [start-image-resizer-threads] - :end-before: [end-image-resizer-threads] + :start-after: [start-w2-threads] + :end-before: [end-w2-threads] :dedent: :linenos: @@ -1277,8 +1298,8 @@ side-by-side for easy comparison: (expand) .. literalinclude:: ../includes/concurrency.py - :start-after: [start-image-resizer-subinterpreters] - :end-before: [end-image-resizer-subinterpreters] + :start-after: [start-w2-subinterpreters] + :end-before: [end-w2-subinterpreters] :dedent: :linenos: @@ -1292,8 +1313,8 @@ side-by-side for easy comparison: (expand) .. literalinclude:: ../includes/concurrency.py - :start-after: [start-image-resizer-async] - :end-before: [end-image-resizer-async] + :start-after: [start-w2-async] + :end-before: [end-w2-async] :dedent: :linenos: @@ -1307,8 +1328,8 @@ side-by-side for easy comparison: (expand) .. literalinclude:: ../includes/concurrency.py - :start-after: [start-image-resizer-multiprocessing] - :end-before: [end-image-resizer-multiprocessing] + :start-after: [start-w2-multiprocessing] + :end-before: [end-w2-multiprocessing] :dedent: :linenos: @@ -1322,8 +1343,8 @@ side-by-side for easy comparison: (expand) .. literalinclude:: ../includes/concurrency.py - :start-after: [start-image-resizer-dask] - :end-before: [end-image-resizer-dask] + :start-after: [start-w2-dask] + :end-before: [end-w2-dask] :dedent: :linenos: @@ -1335,8 +1356,8 @@ side-by-side for easy comparison:
-Workload: ... -------------- +Workload 3: ... +--------------- # ... @@ -1353,7 +1374,7 @@ side-by-side for easy comparison: - multiple interpreters - coroutines - multiple processes - - SMP + - distributed * - .. raw:: html
diff --git a/Doc/includes/concurrency.py b/Doc/includes/concurrency.py index ff825cf8e314b7..0a005c73a1505e 100644 --- a/Doc/includes/concurrency.py +++ b/Doc/includes/concurrency.py @@ -4,34 +4,6 @@ :start-after: and :end-before: options. """ -import contextlib -import os -import tempfile -import sys - - -@contextlib.contextmanager -def dummy_files(*filenames): - """A context manager that creates empty files in a temp directory.""" - with tempfile.TemporaryDirectory() as tempdir: - orig = os.getcwd() - os.chdir(tempdir) - try: - for filename in filenames: - with open(filename, 'w') as outfile: - outfile.write(f'# {filename}\n') - yield tempdir - finally: - os.chdir(orig) - - -try: - zip((), (), strict=True) -except TypeError: - def zip(*args, strict=False, _zip=zip): - return _zip(*args) - - class example(staticmethod): """A function containing example code. @@ -57,207 +29,6 @@ class WorkloadExamples(Examples): """Examples of a single concurrency workload.""" -####################################### -# concurrent.futures examples -####################################### - -class ConcurrentFutures(Examples): - - @example - def example_basic(): - with dummy_files('src1.txt', 'src2.txt', 'src3.txt', 'src4.txt'): - # [start-cf-basic] - import shutil - from concurrent.futures import ThreadPoolExecutor as Executor - - with Executor() as e: - # Copy 4 files concurrently. - e.submit(shutil.copy, 'src1.txt', 'dest1.txt') - e.submit(shutil.copy, 'src2.txt', 'dest2.txt') - e.submit(shutil.copy, 'src3.txt', 'dest3.txt') - e.submit(shutil.copy, 'src4.txt', 'dest4.txt') - - # Run a function asynchronously and check the result. - fut = e.submit(pow, 323, 1235) - res = fut.result() - assert res == 323**1235 - # [end-cf-basic] - - @example - def example_map(): - # [start-cf-map-1] - from concurrent.futures import ThreadPoolExecutor as Executor - - pow_args = { - 323: 1235, - 100: 10, - -1: 3, - } - for i in range(100): - pow_args[i] = i - - with Executor() as e: - # Run a function asynchronously and check the results. - results = e.map(pow, pow_args.keys(), pow_args.values()) - for (a, n), res in zip(pow_args.items(), results): - assert res == a**n - # [end-cf-map-1] - - with dummy_files('src1.txt', 'src2.txt', 'src3.txt', 'src4.txt'): - # [start-cf-map-2] - import shutil - from concurrent.futures import ThreadPoolExecutor as Executor - - # Copy files concurrently. - - files = { - 'src1.txt': 'dest1.txt', - 'src2.txt': 'dest2.txt', - 'src3.txt': 'dest3.txt', - 'src4.txt': 'dest4.txt', - } - - with Executor() as e: - copied = {} - results = e.map(shutil.copy, files.keys(), files.values()) - for src, dest in zip(files, results, strict=True): - print(f'copied {src} to {dest}') - copied[src] = dest - assert list(copied.values()) == list(files.values()) - # [end-cf-map-2] - - @example - def example_wait(): - with dummy_files('src1.txt', 'src2.txt', 'src3.txt', 'src4.txt'): - # [start-cf-wait] - import shutil - import concurrent.futures - from concurrent.futures import ThreadPoolExecutor as Executor - - # Copy 4 files concurrently and wait for them all to finish. - - files = { - 'src1.txt': 'dest1.txt', - 'src2.txt': 'dest2.txt', - 'src3.txt': 'dest3.txt', - 'src4.txt': 'dest4.txt', - } - - with Executor() as e: - # Using wait(): - futures = [e.submit(shutil.copy, src, tgt) - for src, tgt in files.items()] - concurrent.futures.wait(futures) - - # Using as_completed(): - futures = (e.submit(shutil.copy, src, tgt) - for src, tgt in files.items()) - list(concurrent.futures.as_completed(futures)) - - # Using Executor.map(): - list(e.map(shutil.copy, files.keys(), files.values())) - # [end-cf-wait] - - @example - def example_as_completed(): - with dummy_files('src1.txt', 'src2.txt', 'src3.txt', 'src4.txt'): - # [start-cf-as-completed] - import shutil - import concurrent.futures - from concurrent.futures import ThreadPoolExecutor as Executor - - # Copy 4 files concurrently and handle each completion. - - files = { - 'src1.txt': 'dest1.txt', - 'src2.txt': 'dest2.txt', - 'src3.txt': 'dest3.txt', - 'src4.txt': 'dest4.txt', - } - - with Executor() as e: - copied = {} - futures = (e.submit(shutil.copy, src, tgt) - for src, tgt in files.items()) - futures = dict(zip(futures, enumerate(files, 1))) - for fut in concurrent.futures.as_completed(futures): - i, src = futures[fut] - res = fut.result() - print(f'({i}) {src} copied') - copied[src] = res - assert set(copied.values()) == set(files.values()), (copied, files) - # [end-cf-as-completed] - - @example - def example_error_result(): - # [start-cf-error-result-1] - import shutil - import concurrent.futures - from concurrent.futures import ThreadPoolExecutor as Executor - - # Run a function asynchronously and catch the error. - def fail(): - raise Exception('spam!') - with Executor() as e: - fut = e.submit(fail) - try: - fut.result() - except Exception as exc: - arg, = exc.args - assert arg == 'spam!' - # [end-cf-error-result-1] - - with dummy_files('src1.txt', 'src2.txt', 'src3.txt', 'src4.txt'): - # [start-cf-error-result-2] - import shutil - import concurrent.futures - from concurrent.futures import ThreadPoolExecutor as Executor - - # Copy files concurrently, tracking missing files. - - files = { - 'src1.txt': 'dest1.txt', - 'src2.txt': 'dest2.txt', - 'src3.txt': 'dest3.txt', - 'src4.txt': 'dest4.txt', - 'missing.txt': 'dest5.txt', - } - - with Executor() as e: - # using executor.map(): - results = e.map(shutil.copy, files.keys(), files.values()) - for src in files: - try: - next(results) - except FileNotFoundError: - print(f'missing {src}') - assert not list(results) - - # using wait(): - futures = [e.submit(shutil.copy, src, tgt) - for src, tgt in files.items()] - futures = dict(zip(futures, files)) - completed, _ = concurrent.futures.wait(futures) - for fut in completed: - src = futures[fut] - try: - fut.result() - except FileNotFoundError: - print(f'missing {src}') - - # using as_completed(): - futures = (e.submit(shutil.copy, src, tgt) - for src, tgt in files.items()) - futures = dict(zip(futures, files)) - for fut in concurrent.futures.as_completed(futures): - src = futures[fut] - try: - fut.result() - except FileNotFoundError: - print(f'missing {src}') - # [end-cf-error-result-2] - - ####################################### # workload: grep ####################################### @@ -963,10 +734,91 @@ def run_using_dask(): ####################################### -# workload: ... +# workload 2: ... +####################################### + +class Workload2(WorkloadExamples): + + @example + def run_sequentially(): + # [start-w2-sequential] + # sequential 3 + ... + # [end-w2-sequential] + + @example + def run_using_threads(): + # [start-w2-threads] + import threading + + def task(): + ... + + t = threading.Thread(target=task) + t.start() + + ... + # [end-w2-threads] + + @example + def run_using_cf_thread(): + # [start-w2-cf-thread] + # concurrent.futures 3 + ... + # [end-w2-cf-thread] + + @example + def run_using_subinterpreters(): + # [start-w2-subinterpreters] + # subinterpreters 3 + ... + # [end-w2-subinterpreters] + + @example + def run_using_cf_subinterpreters(): + # [start-w2-cf-subinterpreters] + # concurrent.futures 3 + ... + # [end-w2-cf-subinterpreters] + + @example + def run_using_async(): + # [start-w2-async] + # async 3 + ... + # [end-w2-async] + + @example + def run_using_multiprocessing(): + # [start-w2-multiprocessing] + import multiprocessing + + def task(): + ... + + ... + # [end-w2-multiprocessing] + + @example + def run_using_cf_multiprocessing(): + # [start-w2-cf-multiprocessing] + # concurrent.futures 3 + ... + # [end-w2-cf-multiprocessing] + + @example + def run_using_dask(): + # [start-w2-dask] + # dask 3 + ... + # [end-w2-dask] + + +####################################### +# workload 3: ... ####################################### -class WorkloadX(WorkloadExamples): +class Workload3(WorkloadExamples): @example def run_sequentially(): @@ -1049,6 +901,7 @@ def run_using_dask(): if __name__ == '__main__': # Run (all) the examples. + import sys argv = sys.argv[1:] if argv: classname, _, funcname = argv[0].rpartition('.') From 6dd91e3e55073fee198850d53b55388d819cd33a Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Tue, 3 Sep 2024 11:06:10 -0600 Subject: [PATCH 38/80] Clarify about Python-supported concurrency models. --- Doc/howto/concurrency.rst | 66 +++++++------------------------------ Doc/includes/concurrency.py | 28 ---------------- 2 files changed, 12 insertions(+), 82 deletions(-) diff --git a/Doc/howto/concurrency.rst b/Doc/howto/concurrency.rst index f0ab2c2c2ec2aa..9bccdccf242948 100644 --- a/Doc/howto/concurrency.rst +++ b/Doc/howto/concurrency.rst @@ -9,13 +9,13 @@ that would do an excellent job of introducing you to concurrency. This howto document builds on those by walking you through how to apply that knowledge using Python. -Python supports the following concurrency models: +Python supports the following concurrency models directly: -* free-threading -* isolated threads, AKA CSP/actor model -* coroutines (async/await) -* multi-processing -* distributed, e.g. SMP +* free-threading (stdlib, C-API) +* isolated threads, AKA CSP/actor model (stdlib*, C-API) +* coroutines, AKA async/await (language, stdlib, C-API) +* multi-processing (stdlib) +* distributed, e.g. SMP (stdlib) In this document, we'll look at how to take advantage of this concurrency support. The overall focus is on the following: @@ -45,6 +45,12 @@ concurrency support. The overall focus is on the following: Take that into consideration before reaching for threads and look at the alternatives first. +.. note:: + + Python supports other concurrency models indirectly through + community-maintained PyPI packages. One well-known example is + :pypi:`dask`, which supports "distributed" computing. + We'll be using the following terms and ideas throughout: task (logical thread) @@ -1129,7 +1135,6 @@ side-by-side for easy comparison: - multiple interpreters - coroutines - multiple processes - - distributed * - .. raw:: html
@@ -1205,21 +1210,6 @@ side-by-side for easy comparison:
- - .. raw:: html - -
- (expand) - - .. literalinclude:: ../includes/concurrency.py - :start-after: [start-grep-dask] - :end-before: [end-grep-dask] - :dedent: - :linenos: - - .. raw:: html - -
- For threads, multiprocessing, and `multiple interpreters * `_, you can also use :mod:`concurrent.futures`: @@ -1261,7 +1251,6 @@ side-by-side for easy comparison: - multiple interpreters - coroutines - multiple processes - - distributed * - .. raw:: html
@@ -1337,21 +1326,6 @@ side-by-side for easy comparison:
- - .. raw:: html - -
- (expand) - - .. literalinclude:: ../includes/concurrency.py - :start-after: [start-w2-dask] - :end-before: [end-w2-dask] - :dedent: - :linenos: - - .. raw:: html - -
- .. raw:: html
@@ -1374,7 +1348,6 @@ side-by-side for easy comparison: - multiple interpreters - coroutines - multiple processes - - distributed * - .. raw:: html
@@ -1450,21 +1423,6 @@ side-by-side for easy comparison:
- - .. raw:: html - -
- (expand) - - .. literalinclude:: ../includes/concurrency.py - :start-after: [start-w3-dask] - :end-before: [end-w3-dask] - :dedent: - :linenos: - - .. raw:: html - -
- .. rubric:: Footnotes diff --git a/Doc/includes/concurrency.py b/Doc/includes/concurrency.py index 0a005c73a1505e..fa6120849e6953 100644 --- a/Doc/includes/concurrency.py +++ b/Doc/includes/concurrency.py @@ -644,13 +644,6 @@ def task(infile, matches): # [end-grep-cf-multiprocessing] Grep.app(run_all) - @example - def run_using_dask(): - # [start-grep-dask] - # dask 1 - ... - # [end-grep-dask] - ####################################### # workload: image resizer @@ -725,13 +718,6 @@ def run_using_cf_multiprocessing(): ... # [end-image-resizer-cf-multiprocessing] - @example - def run_using_dask(): - # [start-image-resizer-dask] - # dask 2 - ... - # [end-image-resizer-dask] - ####################################### # workload 2: ... @@ -806,13 +792,6 @@ def run_using_cf_multiprocessing(): ... # [end-w2-cf-multiprocessing] - @example - def run_using_dask(): - # [start-w2-dask] - # dask 3 - ... - # [end-w2-dask] - ####################################### # workload 3: ... @@ -887,13 +866,6 @@ def run_using_cf_multiprocessing(): ... # [end-w3-cf-multiprocessing] - @example - def run_using_dask(): - # [start-w3-dask] - # dask 3 - ... - # [end-w3-dask] - ####################################### # A script to run the examples From 1e935dffda905623519bbcd0dfcf72611ed03957 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Tue, 3 Sep 2024 11:15:53 -0600 Subject: [PATCH 39/80] Small clarifications and cleanup. --- Doc/howto/concurrency.rst | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/Doc/howto/concurrency.rst b/Doc/howto/concurrency.rst index 9bccdccf242948..f9da4acf824c5a 100644 --- a/Doc/howto/concurrency.rst +++ b/Doc/howto/concurrency.rst @@ -56,7 +56,8 @@ We'll be using the following terms and ideas throughout: task (logical thread) | a cohesive *linear* sequence of abstract steps in a program; | effectively, a mini-program; - | the logical equivalent of executed instructions corresponding to code + | the logical equivalent of executed instructions corresponding to code; + | also known as "logical process" physical thread (OS thread) | where the actual code for a logical thread runs on the CPU (and operating system); @@ -171,8 +172,8 @@ Python supports directly: After we look at some comparisons of the concurrency models, we'll briefly talk about critical caveats for specific models. -Tables ------- +Comparison tables +----------------- The following tables provide a detailed look with side-by-side comparisons. We'll also compare them at a high level in @@ -427,7 +428,6 @@ that support these concurrency models in various contexts: Critical caveats ---------------- - There are tradeoffs to each, whether in performance or complexity. Free-threading probably has the most notoriety and the most examples, but is also has the most pitfalls (see `concurrency-downsides`_ below). @@ -437,7 +437,6 @@ efficient, which can have a larger negative impact at smaller scales. Async can be straightforward, but may cascade throughout a code base and doesn't necessarily give you parallelism. - free-threading ^^^^^^^^^^^^^^ From 4fdf9f511fc1f6c33214f45b12dc06191731938b Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Tue, 3 Sep 2024 11:18:34 -0600 Subject: [PATCH 40/80] Move the high-level APIs section down. --- Doc/howto/concurrency.rst | 77 ++++++++++++++++++++------------------- 1 file changed, 40 insertions(+), 37 deletions(-) diff --git a/Doc/howto/concurrency.rst b/Doc/howto/concurrency.rst index f9da4acf824c5a..05e924d38e2116 100644 --- a/Doc/howto/concurrency.rst +++ b/Doc/howto/concurrency.rst @@ -388,43 +388,6 @@ exposure - medium? - n/a -high-level APIs ---------------- - -Also note that Python's stdlib provides various higher-level APIs -that support these concurrency models in various contexts: - -.. list-table:: - :header-rows: 1 - :class: borderless vert-aligned - :align: left - - * - - - :mod:`concurrent.futures` - - :mod:`socketserver` - - :mod:`http.server` - * - free-threading - - :class:`yes ` - - :class:`yes ` - - :class:`yes ` - * - multiple interpreters - - (`pending `_) - - - - - * - coroutines - - ??? - - - - - * - multi-processing - - | :class:`yes ` - | (:class:`similar `) - - :class:`yes ` - - - * - distributed - - ??? - - - - - Critical caveats ---------------- @@ -746,6 +709,46 @@ was started is valuable when debugging, as is knowing where a callback was registered. + + +high-level APIs +--------------- + +Also note that Python's stdlib provides various higher-level APIs +that support these concurrency models in various contexts: + +.. list-table:: + :header-rows: 1 + :class: borderless vert-aligned + :align: left + + * - + - :mod:`concurrent.futures` + - :mod:`socketserver` + - :mod:`http.server` + * - free-threading + - :class:`yes ` + - :class:`yes ` + - :class:`yes ` + * - multiple interpreters + - (`pending `_) + - + - + * - coroutines + - ??? + - + - + * - multi-processing + - | :class:`yes ` + | (:class:`similar `) + - :class:`yes ` + - + * - distributed + - ??? + - + - + + Designing A Program For Concurrency =================================== From cb843f6fed90d97cc3fa662e982637444472a9a8 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Tue, 3 Sep 2024 11:43:35 -0600 Subject: [PATCH 41/80] Clarify about the comparisons. --- Doc/howto/concurrency.rst | 31 +++++++++++++++++++------------ 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/Doc/howto/concurrency.rst b/Doc/howto/concurrency.rst index 05e924d38e2116..9aa1d0a910a18b 100644 --- a/Doc/howto/concurrency.rst +++ b/Doc/howto/concurrency.rst @@ -169,15 +169,18 @@ Python supports directly: | (`multiprocessing `_) - multiprocessing across multiple computers -After we look at some comparisons of the concurrency models, -we'll briefly talk about critical caveats for specific models. +There are tradeoffs to each, whether in performance or complexity. +We'll take a look at those tradeoffs in detail +`later `_. + +Before that, we'll review various comparisons of the concurrency models, +and we'll briefly talk about `critical caveats `_ +for specific models. Comparison tables ----------------- The following tables provide a detailed look with side-by-side comparisons. -We'll also compare them at a high level in -`a later section `_. key characteristics ^^^^^^^^^^^^^^^^^^^ @@ -391,14 +394,8 @@ exposure Critical caveats ---------------- -There are tradeoffs to each, whether in performance or complexity. -Free-threading probably has the most notoriety and the most examples, -but is also has the most pitfalls (see `concurrency-downsides`_ below). -Isolated threads have few of those pitfalls but are less familiar. -Multiprocessing and distributed are likewise isolated, but less -efficient, which can have a larger negative impact at smaller scales. -Async can be straightforward, but may cascade throughout a code base -and doesn't necessarily give you parallelism. +Here are some important details to consider, specific to individual +concurrency models in Python. free-threading ^^^^^^^^^^^^^^ @@ -827,6 +824,16 @@ Picking a concurrency model ... +As mentioned earlier, each concurrency model has its own set of tradeoffs. +Free-threading probably has the most notoriety and the most examples, +but is also has the most pitfalls (see `Critical caveats`_ above). +Isolated threads have few of those pitfalls but are less familiar +and at least a little less efficient. +Multiprocessing and distributed are likewise isolated, but less +efficient, which can have a larger negative impact at smaller scales. +Async can be straightforward, but may cascade throughout a code base +and doesn't necessarily give you parallelism. + free-threading: * main value: efficient multi-core From 8a87617ad65f71e7da10ecd719896719de4154cc Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Tue, 3 Sep 2024 12:57:59 -0600 Subject: [PATCH 42/80] Updates about free-threading caveats. --- Doc/howto/concurrency.rst | 111 ++++++++++++++++++++++---------------- 1 file changed, 65 insertions(+), 46 deletions(-) diff --git a/Doc/howto/concurrency.rst b/Doc/howto/concurrency.rst index 9aa1d0a910a18b..2d5ee6aba80e9c 100644 --- a/Doc/howto/concurrency.rst +++ b/Doc/howto/concurrency.rst @@ -397,24 +397,67 @@ Critical caveats Here are some important details to consider, specific to individual concurrency models in Python. -free-threading -^^^^^^^^^^^^^^ +.. _concurrency-races: -Python directly supports use of physical threads through the -:mod:`threading` module. +Data races, AKA non-deterministic scheduling (free-threading) +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Python threads are light wrappers around physical threads +and thus have the same caveats. The principal one is that each thread +shares the *full* memory of the process with all its other threads. +Combined with their non-deterministic scheduling (and parallel +execution), threads expose programs to a significant risk of races. + +The potential consequences of a race are data corruption and invalidated +expectations of data consistency. In each case, the non-deterministic +scheduling of threads means it is both hard to reproduce races and to +track down where a race happened. These qualities make these bugs +especially frustrating and worth diligently avoiding. + +The majority of data in a Python program is mutable and *all* of the +program's data is subject to potential modification by any thread +at any moment. This requires extra effort, to synchronize around +reads and writes. Furthermore, given the maximally-broad scope +of the data involved, it's difficult to be sure all possible races +have been dealt with, especially as a code base changes over time. + +The other concurrency models essentially don't have this problem. +In the case of coroutines, explicit cooperative scheduling eliminates +the risk of a simultaneous read-write or write-write. It also means +program logic can rely on memory consistency between synchronization +points (``await``). + +With the remaining concurrency models, data is never shared between +logical threads unless done explicitly (typically at the existing +inherent points of synchronization). By default that shared data is +either read-only or managed in a thread-safe way. Most notably, +the opt-in sharing means the set of shared data to manage is +explicitly defined (and often small) instead of covering +*all* memory in the process. + +Shared resources +^^^^^^^^^^^^^^^^ -* minimal conceptual indirection: closely tied to low-level physical threads -* the most direct route to taking advantage of multi-core parallelism +Aside from memory, all physical threads in a process share the +following resources: + +* env vars +* file descriptors +* ... -The main downside to using threads is that each one shares the full -memory of the process with all the others. That exposes programs -to a significant risk of `races `_. +Tracing execution +^^^^^^^^^^^^^^^^^ The other potential problem with using threads is that the conceptual model has no inherent synchronization, so it can be hard to follow what is going on in the program at any given moment. That is especially challenging for testing and debugging. +* "callback hell" +* "where was this thread/coroutine started?" +* composing a reliable sequential representation of the program? +* "what happened (in order) leading up to this point?" + .. _python-gil: The Global Interpreter Lock (GIL) @@ -426,7 +469,7 @@ the :term:`global interpreter lock` (GIL). The :term:`!GIL` is very efficient tool for keeping the Python implementation simple, which is an important constraint for the project. -In fact, it protects Python's maintainers and users from a large +In fact, it protects Python's maintainers *and* users from a large category of concurrency problems that one must normally face when threads are involved. @@ -434,11 +477,11 @@ The big tradeoff is that the bytecode interpreter, which executes your Python code, only runs while holding the :term:`!GIL`. That means only one thread can be running Python code at a time. Threads will take short turns, so none have to wait too long, but it still prevents -any actual parallelism. +any actual parallelism of CPU-bound code. -At the same time, the Python runtime (and extension modules) can -release the :term:`!GIL` when the thread is going to be doing something -unrelated to Python, particularly something slow or long, +That said, the Python runtime (and extension modules) can release the +:term:`!GIL` when the thread is going to be doing something unrelated +to Python, particularly something slow or long, like a blocking IO operation. There is also an ongoing effort to eliminate the :term:`!GIL`: @@ -449,6 +492,10 @@ However, there is sufficient interest in unlocking full multi-core parallelism to justify the current experiment. + + + + Isolated Threads (CSP/Actor Model) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -621,38 +668,6 @@ Using multiprocessing for distributed computing -.. _concurrency-races: - -Data races ----------- - -The first category relates to mutable data shared between threads: -a data race is where one thread writes to memory at a time when another -thread is expecting the value to be unchanged, invalidating its logic. -Similarly, two threads could write to the same memory location at the -same time, either corrupting the data there or invalidating -the expectations of one of the threads. - -In each case, the non-deterministic scheduling of threads means it is -both hard to reproduce races and to track down where a race happened. -These qualities much these bugs especially frustrating -and worth diligently avoiding. - -Races are possible when the concurrency approach is subject -to parallel execution or to non-deterministic switching. -(This excludes coroutines, which rely on cooperative multitasking.) -When all memory is possibly shared, as is the case with free-threading, -then all memory is at risk. - -Dealing with data races is often managed using locks (AKA mutexes), -at a low level, and thread-safe types and APIs at a high level. -Depending on the programming language, the complexity is sometimes -mitigated somewhat by the compiler and runtime. There are even -libraries and frameworks that help abstract away the complexity -to an extent. On top of that, there are tools that can help identify -potential races via static analysis. Unfortunately, none of these aids -is foolproof and the risk of hitting a race is always looming. - .. _concurrency-downsides: @@ -839,6 +854,10 @@ free-threading: * main value: efficient multi-core * main costs: races & conceptual overhead +* minimal conceptual indirection: closely tied to low-level physical threads +* the most direct route to taking advantage of multi-core parallelism + + A high-level look: .. list-table:: From 288c4f4c42efd6acbd3d3241367034d3e5ce0d79 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Wed, 4 Sep 2024 09:39:33 -0600 Subject: [PATCH 43/80] Formatting and links for intro section. --- Doc/howto/concurrency.rst | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/Doc/howto/concurrency.rst b/Doc/howto/concurrency.rst index 2d5ee6aba80e9c..9eadb3f8e49fb3 100644 --- a/Doc/howto/concurrency.rst +++ b/Doc/howto/concurrency.rst @@ -11,19 +11,19 @@ to apply that knowledge using Python. Python supports the following concurrency models directly: -* free-threading (stdlib, C-API) -* isolated threads, AKA CSP/actor model (stdlib*, C-API) -* coroutines, AKA async/await (language, stdlib, C-API) -* multi-processing (stdlib) -* distributed, e.g. SMP (stdlib) +* **free-threading** (stdlib, C-API) +* **isolated threads**, *AKA CSP/actor model* (stdlib\*, C-API) +* **coroutines, AKA** *async/await* (language, stdlib, C-API) +* **multi-processing** (stdlib) +* **distributed**, *e.g. SMP* (stdlib) In this document, we'll look at how to take advantage of this concurrency support. The overall focus is on the following: -* understanding the supported concurrency models -* factors to consider when designing a concurrent solution -* key concurrency primitives -* high-level, app-oriented practical examples +* `understanding the supported concurrency models `_ +* `factors to consider when designing a concurrent solution `_ +* `key concurrency primitives `_ +* `high-level, app-oriented practical examples `_ .. XXX Add a summary and section about key concurrency patterns @@ -135,6 +135,8 @@ Likewise, the high-level examples: - ... +.. _concurrency-models: + Python Concurrency Models ========================= @@ -760,6 +762,7 @@ that support these concurrency models in various contexts: - - +.. _concurrency-design: Designing A Program For Concurrency =================================== @@ -1050,12 +1053,16 @@ concurrency can be helpful: * handle asynchronous events +.. _concurrency-primitives: + Python Concurrency Primitives ============================= ... +.. _concurrency-workload-examples: + Python Concurrency Workload Examples ==================================== From 0b768ae3ac0650a2f941757e7c0d08c8b6a945f1 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Wed, 4 Sep 2024 09:52:26 -0600 Subject: [PATCH 44/80] wording tweaks --- Doc/howto/concurrency.rst | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/Doc/howto/concurrency.rst b/Doc/howto/concurrency.rst index 9eadb3f8e49fb3..1e8e0d0cae7375 100644 --- a/Doc/howto/concurrency.rst +++ b/Doc/howto/concurrency.rst @@ -149,7 +149,7 @@ Python supports directly: :align: left * - model - - Python API + - Python stdlib - description * - free threading - :mod:`threading` @@ -167,8 +167,7 @@ Python supports directly: - :mod:`multiprocessing` - using multiple isolated processes * - distributed - - | :pypi:`dask` - | (`multiprocessing `_) + - `multiprocessing `_ - multiprocessing across multiple computers There are tradeoffs to each, whether in performance or complexity. @@ -401,11 +400,10 @@ concurrency models in Python. .. _concurrency-races: -Data races, AKA non-deterministic scheduling (free-threading) +Data races and non-deterministic scheduling (free-threading) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -Python threads are light wrappers around physical threads -and thus have the same caveats. The principal one is that each thread +The principal caveat for physical threads is that each thread shares the *full* memory of the process with all its other threads. Combined with their non-deterministic scheduling (and parallel execution), threads expose programs to a significant risk of races. @@ -416,10 +414,11 @@ scheduling of threads means it is both hard to reproduce races and to track down where a race happened. These qualities make these bugs especially frustrating and worth diligently avoiding. -The majority of data in a Python program is mutable and *all* of the -program's data is subject to potential modification by any thread -at any moment. This requires extra effort, to synchronize around -reads and writes. Furthermore, given the maximally-broad scope +Python threads are light wrappers around physical threads and thus have +the same caveats. The majority of data in a Python program is mutable +and *all* of the program's data is subject to potential modification +by any thread at any moment. This requires extra effort, to synchronize +around reads and writes. Furthermore, given the maximally-broad scope of the data involved, it's difficult to be sure all possible races have been dealt with, especially as a code base changes over time. From ab24e837190e218958cdf85ba1388907dee38385 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Wed, 4 Sep 2024 10:38:47 -0600 Subject: [PATCH 45/80] Update the section about multiple interpreters. --- Doc/howto/concurrency.rst | 104 +++++++++++++++++++++----------------- 1 file changed, 58 insertions(+), 46 deletions(-) diff --git a/Doc/howto/concurrency.rst b/Doc/howto/concurrency.rst index 1e8e0d0cae7375..d11413713b847b 100644 --- a/Doc/howto/concurrency.rst +++ b/Doc/howto/concurrency.rst @@ -436,6 +436,46 @@ the opt-in sharing means the set of shared data to manage is explicitly defined (and often small) instead of covering *all* memory in the process. +Thread isolation and multiple interpreters +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +As just noted, races effectively stop being a problem if the memory +used by each physical thread is effectively isolated from the others. +That isolation can also help with the other caveats related to +physical threads. In Python you can get this isolation +by using multiple interpreters. + +In this context, an "interpreter" represents nearly all the capability +and state of the Python runtime, for its C-API and to execute Python +code. The full runtime supports multiple interpreters and includes +some state that all interpreters share. Most importantly, the state +of each interpreter is effectively isolated from the others. + +That isolation includes things like :data:`sys.modules`. By default, +interpreters mostly don't share any data (including objects) at all. +Anything that gets shared is done on a strictly opt-in basis. That +means programmers wouldn't need to worry about possible races with +*any* data in the program. They would only need to worry about data +that was explicitly shared. + +Interpreters themselves are not specific to any thread, but instead +each physical thread has (at most) one interpreter active at any given +moment. Each interpreter can be associated in this way with any number +of threads. Since each interpreter is isolated from the others, +any thread using one interpreter is thus isolated from threads +using any other interpreter. + +Using multiple interpreters is fairly straight-forward: + +1. create a new interpreter +2. switch the current thread to use that interpreter +3. call :func:`exec`, but targeting the new interpreter +4. switch back + +Note that no threads were involved. That's because running in an +interpreter happens relative to the current thread. New threads +aren't implicitly involved. + Shared resources ^^^^^^^^^^^^^^^^ @@ -459,6 +499,10 @@ especially challenging for testing and debugging. * composing a reliable sequential representation of the program? * "what happened (in order) leading up to this point?" +Besides unlocking full multi-core parallelism, the isolation between +interpreters means that, from a conceptual level, concurrency can be +simpler. + .. _python-gil: The Global Interpreter Lock (GIL) @@ -492,54 +536,15 @@ burden to the Python project and extension module maintainers. However, there is sufficient interest in unlocking full multi-core parallelism to justify the current experiment. - - - - - -Isolated Threads (CSP/Actor Model) -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -There's a major alternative to free-threading, both for multi-core -parallelism and for a simpler conceptual model: use multiple interpreters. - -Python's major implementation, CPython, has for decades supported -running with multiple independent copies of the Python runtime -("interpreter") in a single process. However, these interpreters -weren't completely isolated from one another; most importantly they -shared the one :term:`!GIL`. Over several years a lot of work went -into improving the isolation between interpreters, culminating in -no longer sharing a single :term:`!GIL`. - -Besides unlocking full multi-core parallelism, the isolation between -interpreters means that, from a conceptual level, concurrency can be -simpler. An interpreter encapsulates all of Python's runtime state, -including things like :data:`sys.modules`. By default, interpreters -mostly don't share any data (including objects) at all. Anything that -gets shared is done on a strictly opt-in basis. That means programmers -don't need to worry about possible `races `_ -with *any* data in the program. They only need to worry about data -that was explicitly shared. - -Using multiple interpreters is fairly straight-forward: - -1. create a new interpreter -2. switch the current thread to use that interpreter -3. call :func:`exec`, but targeting the new interpreter -4. switch back - -Note that no threads were involved. That's because running in an -interpreter happens relative to the current thread. New threads -aren't implicitly involved. They can be added in explicitly though. -Why? For multi-core parallelism. - -If you want multi-core parallelism, run a different interpreter in each -thread. Their isolation means that each can run unblocked in that -thread. +You can also move from free-threading to isolated threads using multiple +interpreters. Each interpreter has has its own +:term:`GIL `. Thus, If you want multi-core +parallelism, run a different interpreter in each thread. Their +isolation means that each can run unblocked in that thread. .. _python-stdlib-interpreters: -A Stdlib Module for Using Multiple Interpreters +A stdlib module for using multiple interpreters ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ While use of multiple interpreters has been part of Python's C-API @@ -552,7 +557,7 @@ Python 3.13+ on PyPI: :pypi:`interpreters-pep-734`. .. _python-interpreters-overhead: -Improving Performance for Multiple Interpreters +Improving performance for multiple interpreters ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ The long effort to improve on Python's implementation of multiple @@ -563,10 +568,17 @@ done to improve performance. This has the most impact on: (i.e. how many can run at the same time) * how long it takes to create a new interpreter +It also impacts how efficiently data/objects can be passed between +interpreters, and how effectively objects can be shared. + As the work on isolation wraps up, improvements will shift to focus on performance and memory usage. Thus the overhead associated with using multiple interpreters will drastically decrease over time. + + + + Coroutines (Async/Await) ^^^^^^^^^^^^^^^^^^^^^^^^ From f7f176994a351922aee6b3b718f34b9f341b5795 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Wed, 4 Sep 2024 11:11:22 -0600 Subject: [PATCH 46/80] Update the section about coroutines. --- Doc/howto/concurrency.rst | 73 ++++++++++----------------------------- 1 file changed, 18 insertions(+), 55 deletions(-) diff --git a/Doc/howto/concurrency.rst b/Doc/howto/concurrency.rst index d11413713b847b..ff8ccb62991055 100644 --- a/Doc/howto/concurrency.rst +++ b/Doc/howto/concurrency.rst @@ -575,69 +575,32 @@ As the work on isolation wraps up, improvements will shift to focus on performance and memory usage. Thus the overhead associated with using multiple interpreters will drastically decrease over time. +Coroutines are contagious +^^^^^^^^^^^^^^^^^^^^^^^^^ +Coroutines can be an effective mechanism for letting a program's +non-blocking code run while simultaneously waiting for blocking code +to finish. The tricky part is that the underlying machinery (the +:ref:`event loop `) relies on each coroutine +explicitly yielding control at the appropriate moments. +Normal functions do not follow this pattern, so they cannot take +advantage of that cooperative scheduling to avoid blocking +the program. Thus coroutines and non-coroutines don't mix well. +While there are tools for wrapping normal functions to act like +coroutines, they are often converted into coroutines instead. +At that point, if any non-async code relies on the function then +either you'll need to convert the other code a coroutine or you'll +need to keep the original non-async implementation around along +with the new, almost identical async one. +You can see how that can proliferate, leading to possible extra +maintenance/development costs. -Coroutines (Async/Await) -^^^^^^^^^^^^^^^^^^^^^^^^ -The use of :term:`coroutines ` for concurrency has been -around a long time and has grown in popularity in the software world, -particularly with the addition of ``async/await`` syntax in -various languages. -Python has supported coroutines to some degree since the beginning. -The best example is :pypi:`twisted`, which has provided this concurrency -model for decades. For most of that time :pypi:`!twisted` did it -primarily through callbacks and a form of "promises"/"futures". -Explicit support for coroutines in Python really started with the -introduction of :term:`generators ` in Python 2.2 -(:pep:`255`). In Python 2.5 (:pep:`342`), :term:`!generators` were -tweaked to explicitly support use as coroutines. That went a step -further in Python 3.3 with the addition of ``yield from`` (:pep:`380`) -and the :mod:`asyncio` module (:pep:`3156`). Finally, in Python 3.5 -(:pep:`492`), we got dedicated ``async/await`` syntax -and :ref:`a dedicated protocol ` -for :term:`!coroutine` objects. -There are three main pieces to using coroutines: - -* coroutines (non-blocking, yield control instead) -* an event loop (schedules coroutines) -* coroutine wrappers around blocking operations - -A :term:`coroutine function` looks *almost* the same as a regular -function. It is a non-blocking function that *cooperatively* yields -control of the program to other coroutines, which in turn yield control -back (eventually). At those points of synchronization, -coroutines often provide data to one another. - -The event loop is what keeps track of which coroutines have yielded -control and which should get control next. - -Generally a coroutine needs to avoid doing anything that takes very long -before yielding control back to the event loop. Any blocking operation -in a coroutine, like waiting on a socket, has to be implemented in a way -that only waits a little while, yields, and then waits again, etc. until -ready. The alternative is to wrap the blocking operation/function -in some sort of "future" coroutine that yields until the blocking -operation completes. The event loop can also fill that role -to an extent. - -In addition to support for coroutines in the language, Python's stdlib -provides the :mod:`asyncio` module, which includes: - -* an event loop -* a number of useful coroutines -* a variety of helpful APIs that build on coroutines and the event loop - -One of the main challenges with using coroutines is that they do not -normally mix well with non-coroutines. As a result, ``async/await`` -can be contagious, requiring surrounding code to be async. This can -lead to having the same thing implemented twice, once normal and once -async, with significant code duplication. Multi-processing ^^^^^^^^^^^^^^^^ From fdf46cb94fc7f93f0cd9b5b83bd6e0b9c063ca6d Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Wed, 4 Sep 2024 11:32:45 -0600 Subject: [PATCH 47/80] Update the sections about multi-processing. --- Doc/howto/concurrency.rst | 64 +++++++++++++++++---------------------- 1 file changed, 27 insertions(+), 37 deletions(-) diff --git a/Doc/howto/concurrency.rst b/Doc/howto/concurrency.rst index ff8ccb62991055..f560274cf86916 100644 --- a/Doc/howto/concurrency.rst +++ b/Doc/howto/concurrency.rst @@ -476,6 +476,9 @@ Note that no threads were involved. That's because running in an interpreter happens relative to the current thread. New threads aren't implicitly involved. +Multi-processing and distributed computing provide similar isolation, +though with some tradeoffs. + Shared resources ^^^^^^^^^^^^^^^^ @@ -597,49 +600,36 @@ with the new, almost identical async one. You can see how that can proliferate, leading to possible extra maintenance/development costs. +Processes consume extra resources +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - - - - -Multi-processing -^^^^^^^^^^^^^^^^ - -The stdlib :mod:`multiprocessing` module, which has been around many -years, provides an API for using multiple processes for concurrency. -Furthermore, processes are always isolated, so you have many of the -same benefits of using multiple interpreters, including multi-core -parallelism. - -There are some obstacles however. First of all, using multiple -processes has a higher overhead than operating in a single process, -sometimes significantly higher. This applies in just about every -dimension of overhead. Secondly, the :mod:`multiprocessing` module's -API is substantially larger and more complex that what we use for -threads and multiple interpreters. Finally, there are some scaling -issues with using multiple processes, related both to the performance -overhead and to how the operating system assigns resources like -file handles. - -The similarity with :class:`threading.Thread` is intentional. -On top of that, the :mod:`multiprocessing` module provides an extensive -API to address a variety of needs, including machinery for inter-process -shared memory. Also note that that API can be used for threads and -(eventually) interpreters using different backends. - -Distributed -^^^^^^^^^^^ - -When it comes to concurrency at scale, through distributed concurrency, -one of the best examples is :pypi:`dask`. +When using multi-processing for concurrency, keep in mind that the +operating system will assign a certain set of limited resources to each +process. For example, each process has its own PID and handle to the +executable. You can run only so many processes before you run out of +these resources. Concurrency in a single process doesn't have this +problem, and a distributed program can work around it. .. _multiprocessing-distributed: Using multiprocessing for distributed computing ------------------------------------------------ - -... +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +Not only does the :mod:`multiprocessing` module support concurrency +with multiple local processes, it can also support a distributed model +using remote computers. That said, consider first looking into tools +that have been designed specifically for distributed computing, +like :pypi:`dask`. + +Resilience to crashes +^^^^^^^^^^^^^^^^^^^^^ + +A process can crash if it does something it shouldn't, like try to +access memory outside what the OS has provided it. If your program +is running in multiple processes (incl. distributed) then you can +more easily recover from a crash in any one process. Recovering +from a crash when using free-threading, multiple interpreters, or +coroutines isn't nearly so easy. From 4cc62a19dabe3874ce133015cfd49b33e4ce5262 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Wed, 4 Sep 2024 11:39:19 -0600 Subject: [PATCH 48/80] Clear out the section on downsides. --- Doc/howto/concurrency.rst | 88 +++++++++++++-------------------------- 1 file changed, 30 insertions(+), 58 deletions(-) diff --git a/Doc/howto/concurrency.rst b/Doc/howto/concurrency.rst index f560274cf86916..2e06702a6230ad 100644 --- a/Doc/howto/concurrency.rst +++ b/Doc/howto/concurrency.rst @@ -492,6 +492,8 @@ following resources: Tracing execution ^^^^^^^^^^^^^^^^^ +... + The other potential problem with using threads is that the conceptual model has no inherent synchronization, so it can be hard to follow what is going on in the program at any given moment. That is @@ -506,6 +508,16 @@ Besides unlocking full multi-core parallelism, the isolation between interpreters means that, from a conceptual level, concurrency can be simpler. +The second category of complexity is the problem of tracing the execution +of one logical thread relative to another. This is especially relevant +for error handling, when an error in the one thread is exposed in the +other. This applies equally to threads that start other threads as to +concurrency models that use callbacks. Knowing where the failing thread +was started is valuable when debugging, as is knowing where a callback +was registered. + + + .. _python-gil: The Global Interpreter Lock (GIL) @@ -631,64 +643,6 @@ more easily recover from a crash in any one process. Recovering from a crash when using free-threading, multiple interpreters, or coroutines isn't nearly so easy. - - - - - -.. _concurrency-downsides: - -What are the downsides? ------------------------ - -The main challenge when using concurrency is the (potential) extra -complexity. This complexity comes from the effect of multiple logical -threads running at the same time and interacting with each other. -In practice, this falls into two categories: data races and tracing -relative execution. Both are a form of "spooky action at a distance" [#f1]_ -(meaning something changes unexpectedly in one place due to unknown -changes somewhere else). - -The first category relates to mutable data shared between threads: -a data race is where one thread writes to memory at a time when another -thread is expecting the value to be unchanged, invalidating its logic. -Similarly, two threads could write to the same memory location at the -same time, either corrupting the data there or invalidating -the expectations of one of the threads. - -In each case, the non-deterministic scheduling of threads means it is -both hard to reproduce races and to track down where a race happened. -These qualities much these bugs especially frustrating -and worth diligently avoiding. - -Races are possible when the concurrency approach is subject -to parallel execution or to non-deterministic switching. -(This excludes coroutines, which rely on cooperative multitasking.) -When all memory is possibly shared, as is the case with free-threading, -then all memory is at risk. - -Dealing with data races is often managed using locks (AKA mutexes), -at a low level, and thread-safe types and APIs at a high level. -Depending on the programming language, the complexity is sometimes -mitigated somewhat by the compiler and runtime. There are even -libraries and frameworks that help abstract away the complexity -to an extent. On top of that, there are tools that can help identify -potential races via static analysis. Unfortunately, none of these aids -is foolproof and the risk of hitting a race is always looming. - -.. XXX mention reentrancy? - -The second category of complexity is the problem of tracing the execution -of one logical thread relative to another. This is especially relevant -for error handling, when an error in the one thread is exposed in the -other. This applies equally to threads that start other threads as to -concurrency models that use callbacks. Knowing where the failing thread -was started is valuable when debugging, as is knowing where a callback -was registered. - - - - high-level APIs --------------- @@ -726,6 +680,7 @@ that support these concurrency models in various contexts: - - + .. _concurrency-design: Designing A Program For Concurrency @@ -785,6 +740,14 @@ How can concurrency hurt? ... +The main challenge when using concurrency is the (potential) extra +complexity. This complexity comes from the effect of multiple logical +threads running at the same time and interacting with each other. +In practice, this falls into two categories: data races and tracing +relative execution. Both are a form of "spooky action at a distance" [#f1]_ +(meaning something changes unexpectedly in one place due to unknown +changes somewhere else). + .. _concurrency-identify-tasks: Identifying the logical tasks in your program @@ -1024,6 +987,15 @@ Python Concurrency Primitives ... +Dealing with data races is often managed using locks (AKA mutexes), +at a low level, and thread-safe types and APIs at a high level. +Depending on the programming language, the complexity is sometimes +mitigated somewhat by the compiler and runtime. There are even +libraries and frameworks that help abstract away the complexity +to an extent. On top of that, there are tools that can help identify +potential races via static analysis. Unfortunately, none of these aids +is foolproof and the risk of hitting a race is always looming. + .. _concurrency-workload-examples: From 54bacb6acc5d305ea4ca834137ca03be2044aa5c Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Wed, 4 Sep 2024 12:29:24 -0600 Subject: [PATCH 49/80] Update the section about shared resources. --- Doc/howto/concurrency.rst | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/Doc/howto/concurrency.rst b/Doc/howto/concurrency.rst index 2e06702a6230ad..66694e651062da 100644 --- a/Doc/howto/concurrency.rst +++ b/Doc/howto/concurrency.rst @@ -485,9 +485,11 @@ Shared resources Aside from memory, all physical threads in a process share the following resources: +* commandline arguments ("argv") * env vars -* file descriptors -* ... +* current working directory +* signals, IPC, etc. +* open I/O resources (file descriptors, sockets, etc.) Tracing execution ^^^^^^^^^^^^^^^^^ From 49ec62f8449bc1d30fa53dde40d02d9f87b268de Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Wed, 4 Sep 2024 12:30:30 -0600 Subject: [PATCH 50/80] Normalize TODO comments. --- Doc/howto/concurrency.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Doc/howto/concurrency.rst b/Doc/howto/concurrency.rst index 66694e651062da..c74a14f55c37ef 100644 --- a/Doc/howto/concurrency.rst +++ b/Doc/howto/concurrency.rst @@ -25,7 +25,7 @@ concurrency support. The overall focus is on the following: * `key concurrency primitives `_ * `high-level, app-oriented practical examples `_ -.. XXX Add a summary and section about key concurrency patterns +.. TODO Add a summary and section about key concurrency patterns .. note:: @@ -270,7 +270,7 @@ overhead details complexity ^^^^^^^^^^ -.. XXX "human-friendly" +.. TODO "human-friendly" .. list-table:: :header-rows: 1 @@ -494,7 +494,7 @@ following resources: Tracing execution ^^^^^^^^^^^^^^^^^ -... +.. TODO finish The other potential problem with using threads is that the conceptual model has no inherent synchronization, so it can be hard to follow From 8fd8406a54f344b9d62fdbabc1dfe421cfa462ae Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Wed, 4 Sep 2024 12:43:08 -0600 Subject: [PATCH 51/80] Move the explanations to the right places. --- Doc/howto/concurrency.rst | 233 ++++++++++++++++++++------------------ 1 file changed, 121 insertions(+), 112 deletions(-) diff --git a/Doc/howto/concurrency.rst b/Doc/howto/concurrency.rst index c74a14f55c37ef..4eb0299769dcd1 100644 --- a/Doc/howto/concurrency.rst +++ b/Doc/howto/concurrency.rst @@ -491,6 +491,8 @@ following resources: * signals, IPC, etc. * open I/O resources (file descriptors, sockets, etc.) +When relevant, these must be managed in a thread-safe way. + Tracing execution ^^^^^^^^^^^^^^^^^ @@ -715,9 +717,11 @@ it's crucial that you understand the problem well. How can concurrency help? ------------------------- +.. TODO finish + Here are the benefits concurrency can bring to the table: -* +* ... Primarily, concurrency can be helpful by making your program faster @@ -740,7 +744,9 @@ Other possible benefits: How can concurrency hurt? ------------------------- -... +.. TODO finish + +TBD The main challenge when using concurrency is the (potential) extra complexity. This complexity comes from the effect of multiple logical @@ -753,100 +759,11 @@ changes somewhere else). .. _concurrency-identify-tasks: Identifying the logical tasks in your program ------------------------------------------------- - -... - -.. _concurrency-characteristics: - -The concurrency characteristics of your program ------------------------------------------------- - -... - -.. _concurrency-pick-a-model: - -Picking a concurrency model ---------------------------- - -... - -As mentioned earlier, each concurrency model has its own set of tradeoffs. -Free-threading probably has the most notoriety and the most examples, -but is also has the most pitfalls (see `Critical caveats`_ above). -Isolated threads have few of those pitfalls but are less familiar -and at least a little less efficient. -Multiprocessing and distributed are likewise isolated, but less -efficient, which can have a larger negative impact at smaller scales. -Async can be straightforward, but may cascade throughout a code base -and doesn't necessarily give you parallelism. - -free-threading: - -* main value: efficient multi-core -* main costs: races & conceptual overhead - -* minimal conceptual indirection: closely tied to low-level physical threads -* the most direct route to taking advantage of multi-core parallelism - - -A high-level look: - -.. list-table:: - :header-rows: 1 - :class: borderless vert-aligned - :align: left - - * - model - - pros - - cons - * - free threading - - * very light-weight and efficient - * wide-spread - * can enable multi-core parallelism (`caveat: GIL `_) - - * all memory is shared, subject to races - * some IO may have races (e.g. writing to stdout) - * can be hard for humans to follow what's happening in different - threads at any given point - * - multiple interpreters (isolated threads) - - * isolation eliminates nearly all races, by default - (sharing is strictly opt-in) - * synchronization is built in to cross-interpreter interaction - * enables full multi-core parallelism of all Python code - - * unfamiliar to many - * less efficient than threads - * (currently) limited in what data can be shared between - interpreters - * - coroutines (async/await) - - * not subject to races - * increasingly familiar to many; popular in newer languages - * has a long history in Python (e.g. ``twisted``) - - * async and non-async functions don't mix well, - potentially leading to duplication of code - * switching to async can require substantial cascading code churn - * callbacks can make it difficult to follow program logic, - making debugging harder - * does not enable multi-core parallelism - * - multiprocessing - - * isolated (no races) - * enables full multi-core parallelism of all Python code - - * substantially less efficient than using a single process - * can lead to exhaustion of system resources - (e.g. file handles, PIDs) - * API can be hard to use - * - distributed - - * isolated (no races) - * fully parallel - * facilitates massive scaling - - * not necessarily a good fit for small-scale applications - * often requires configuration - - - - -* are there libraries that can take care of the concurrency parts? +--------------------------------------------- +.. TODO finish +TBD At its most fundamental, concurrency means doing multiple things at once, from a strictly *logical* viewpoint. @@ -867,26 +784,14 @@ One important observation is that most concurrent programs can be represented instead as a single task, with the code of the concurrent tasks merged into a single sequence. +.. _concurrency-characteristics: -What problems can concurrency help solve? ------------------------------------------ - - -synchronization ---------------- - -Additionally, concurrency often involves some degree of synchronization -between the logical threads. At the most basic conceptual level: -one thread may wait for another to finish. - -shared resources ----------------- - -Aside from code running at the same time, concurrency typically -also involves some amount of resources shared between the concurrent -tasks. That may include memory, files, and sockets. +The concurrency characteristics of your program +----------------------------------------------- +.. TODO finish +TBD For a given workload, here are some characteristics that will help you understand the problem and, potentially, which concurrency model would @@ -981,13 +886,104 @@ concurrency can be helpful: * scaling * handle asynchronous events +.. _concurrency-pick-a-model: + +Picking a concurrency model +--------------------------- + +.. TODO finish + +TBD + +As mentioned earlier, each concurrency model has its own set of tradeoffs. +Free-threading probably has the most notoriety and the most examples, +but is also has the most pitfalls (see `Critical caveats`_ above). +Isolated threads have few of those pitfalls but are less familiar +and at least a little less efficient. +Multiprocessing and distributed are likewise isolated, but less +efficient, which can have a larger negative impact at smaller scales. +Async can be straightforward, but may cascade throughout a code base +and doesn't necessarily give you parallelism. + +free-threading: + +* main value: efficient multi-core +* main costs: races & conceptual overhead + +* minimal conceptual indirection: closely tied to low-level physical threads +* the most direct route to taking advantage of multi-core parallelism + + +A high-level look: + +.. list-table:: + :header-rows: 1 + :class: borderless vert-aligned + :align: left + + * - model + - pros + - cons + * - free threading + - * very light-weight and efficient + * wide-spread + * can enable multi-core parallelism (`caveat: GIL `_) + - * all memory is shared, subject to races + * some IO may have races (e.g. writing to stdout) + * can be hard for humans to follow what's happening in different + threads at any given point + * - multiple interpreters (isolated threads) + - * isolation eliminates nearly all races, by default + (sharing is strictly opt-in) + * synchronization is built in to cross-interpreter interaction + * enables full multi-core parallelism of all Python code + - * unfamiliar to many + * less efficient than threads + * (currently) limited in what data can be shared between + interpreters + * - coroutines (async/await) + - * not subject to races + * increasingly familiar to many; popular in newer languages + * has a long history in Python (e.g. ``twisted``) + - * async and non-async functions don't mix well, + potentially leading to duplication of code + * switching to async can require substantial cascading code churn + * callbacks can make it difficult to follow program logic, + making debugging harder + * does not enable multi-core parallelism + * - multiprocessing + - * isolated (no races) + * enables full multi-core parallelism of all Python code + - * substantially less efficient than using a single process + * can lead to exhaustion of system resources + (e.g. file handles, PIDs) + * API can be hard to use + * - distributed + - * isolated (no races) + * fully parallel + * facilitates massive scaling + - * not necessarily a good fit for small-scale applications + * often requires configuration + +Other considerations +-------------------- + +.. TODO finish + +TBD + +* are there libraries that can take care of the concurrency parts? +* ... + .. _concurrency-primitives: Python Concurrency Primitives ============================= -... +.. TODO finish + +TBD Dealing with data races is often managed using locks (AKA mutexes), at a low level, and thread-safe types and APIs at a high level. @@ -998,6 +994,19 @@ to an extent. On top of that, there are tools that can help identify potential races via static analysis. Unfortunately, none of these aids is foolproof and the risk of hitting a race is always looming. +synchronization +--------------- + +Additionally, concurrency often involves some degree of synchronization +between the logical threads. At the most basic conceptual level: +one thread may wait for another to finish. + +shared resources +---------------- + +Aside from code running at the same time, concurrency typically +also involves some amount of resources shared between the concurrent +tasks. That may include memory, files, and sockets. .. _concurrency-workload-examples: From fdcc12800df3e7a7a302aee5496cc4fc361d50e8 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Wed, 4 Sep 2024 13:05:57 -0600 Subject: [PATCH 52/80] Add sub-sections. --- Doc/howto/concurrency.rst | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/Doc/howto/concurrency.rst b/Doc/howto/concurrency.rst index 4eb0299769dcd1..2c297a96217994 100644 --- a/Doc/howto/concurrency.rst +++ b/Doc/howto/concurrency.rst @@ -51,6 +51,9 @@ concurrency support. The overall focus is on the following: community-maintained PyPI packages. One well-known example is :pypi:`dask`, which supports "distributed" computing. +Glossary +-------- + We'll be using the following terms and ideas throughout: task (logical thread) @@ -76,6 +79,9 @@ parallelism (multi-core) running a program's multiple logical threads on multiple physical threads (CPU cores) +Quick reference +--------------- + .. raw:: html -For convenience, here are the concurrency primitives we'll cover later: +For convenience, here is a summary of what we'll cover later. + +**Concurrency Primitives** .. list-table:: :header-rows: 1 @@ -110,7 +114,7 @@ For convenience, here are the concurrency primitives we'll cover later: - ... - ... -Likewise, the high-level examples: +**High-level App Examples** .. list-table:: :header-rows: 1 @@ -140,8 +144,7 @@ Likewise, the high-level examples: - ... - ... -To help further compare the models, there are side-by-side examples -of each of those "apps": +Each has side-by-side implementations for the different models: .. list-table:: :header-rows: 1 @@ -158,6 +161,12 @@ of each of those "apps": - `by concurrency models `_ +.. raw:: html + +
+ +---- + .. _concurrency-models: Python Concurrency Models @@ -176,13 +185,13 @@ Python supports directly: - description * - free threading - :mod:`threading` - - using multiple physical threads in the same process, - with no isolation between them + - | using multiple physical threads in the same process, + | with no isolation between them * - | isolated threads | (multiple interpreters) - `interpreters `_ - - threads, often physical, with strict isolation - between them (e.g. CSP and actor model) + - | threads, often physical, with strict isolation between them + | (e.g. CSP and actor model) * - coroutines (async/await) - :mod:`asyncio` - switching between logical threads is explicitly controlled by each @@ -190,7 +199,8 @@ Python supports directly: - :mod:`multiprocessing` - using multiple isolated processes * - distributed - - `multiprocessing `_ + - | `multiprocessing `_ + | (limited) - multiprocessing across multiple computers There are tradeoffs to each, whether in performance or complexity. @@ -713,6 +723,12 @@ that support these concurrency models in various contexts: - +.. raw:: html + +
+ +---- + .. _concurrency-design: Designing A Program For Concurrency @@ -1011,6 +1027,12 @@ TBD * often requires configuration +.. raw:: html + +
+ +---- + .. _concurrency-primitives: Python Concurrency Primitives @@ -1064,6 +1086,14 @@ TBD .. TODO finish +.. raw:: html + +
+ +---- + +.. XXX Move this section to a separate doc? + .. _concurrency-workload-examples: Python Concurrency Workload Examples @@ -1218,7 +1248,7 @@ We'll start with the high-level code corresponding to the application's five top-level tasks we identified earlier. Most of the high-level code has nothing to do with concurrency. -The parts that do, however marginally, are highlighted. +The part that does, ``search()``, is highlighted. .. raw:: html @@ -1228,8 +1258,9 @@ The parts that do, however marginally, are highlighted. .. literalinclude:: ../includes/concurrency/grep-parts.py :start-after: [start-high-level] :end-before: [end-high-level] + :dedent: :linenos: - :emphasize-lines: 7,13,16,22,35,38,42,44,47,53,66,69 + :emphasize-lines: 7 .. raw:: html @@ -1237,6 +1268,24 @@ The parts that do, however marginally, are highlighted. The ``search()`` function that gets called returns an iterator (or async iterator) that yields the matches, which get printed. +Here's the high-level code again, but with highlighting on each line +that uses the iterator. + +.. raw:: html + +
+ (expand) + +.. literalinclude:: ../includes/concurrency/grep-parts.py + :start-after: [start-high-level] + :end-before: [end-high-level] + :dedent: + :linenos: + :emphasize-lines: 13,16,22,35,38,42,44,47,53,66,69 + +.. raw:: html + +
Here's the search function for a non-concurrent implementation: @@ -1246,8 +1295,10 @@ Here's the search function for a non-concurrent implementation: :linenos: ``iter_lines()`` is a straight-forward helper that opens the file -and yields each line. ``search_lines()`` is a sequential-search -helper used by all the example implementations here: +and yields each line. + +``search_lines()`` is a sequential-search helper used by all the +example implementations here: .. literalinclude:: ../includes/concurrency/grep-parts.py :start-after: [start-search-lines] @@ -1266,27 +1317,45 @@ the same for all the concurrency models. :end-before: [end-impl-threads] :linenos: -The ``search()`` function still yields all the matches in the right -order. Concurrency may be happening as long as that iterator hasn't -been exhausted. That means it is happening more or less the entire time -we loop over the matches to print them in ``main()`` (in the high-level -code above). +We loop over the filenames and start a thread for each one. Each one +sends the matches it finds back using a queue. + +We want to start yielding matches as soon as possible, so we also use +a background thread to run the code that loops over the filenames. + +We use a queue of queues (``matches_by_file``) to make sure +we get results back in the right order, regardless of when the worker +threads provide them. + +The operating system will only let us have so many files open at once, +so we limit how many workers are running. (``MAX_FILES``) + +If the workers find matches substantially faster than we can use them +then we may end up using more memory than we need to. To avoid any +backlog, we limit how many matches can be queued up for any given file. +(``MAX_MATCHES``) One notable point is that the actual files are not opened until we need to iterate over the lines. For the most part, this is so we can avoid dealing with passing an open file to a concurrency worker. Instead we pass the filename, which is much simpler. -Common pattern -^^^^^^^^^^^^^^ - -TBD +Finally, we have to manage the workers manually. If we used +`concurrent.futures`_, it would take care of that for us. .. TODO finish -Here are some things we don't do but would be worth doing: +Here are some things we don't do but *might* be worth doing: * stop iteration when requested (or for ``ctrl-C``) +* split up each file between multiple workers +* ... + +Recall that the ``search()`` function returns an iterator that yields +all the matches. Concurrency may be happening as long as that iterator +hasn't been exhausted. That means it is happening more or less the +entire time we loop over the matches to print them in ``main()`` +(in the high-level code above). .. _concurrency-grep-side-by-side: @@ -1406,10 +1475,6 @@ asyncio: * ... -concurrent.futures: - -* ... - concurrent.futures ^^^^^^^^^^^^^^^^^^ diff --git a/Doc/includes/concurrency/grep-parts.py b/Doc/includes/concurrency/grep-parts.py index ec8505644c5b71..9cab871c261f21 100644 --- a/Doc/includes/concurrency/grep-parts.py +++ b/Doc/includes/concurrency/grep-parts.py @@ -124,7 +124,6 @@ def resolve_filenames(filenames, recursive=False): # [start-impl-sequential] def search_sequential(filenames, regex, opts): for filename in filenames: - # iter_lines() opens the file too. lines = iter_lines(filename) yield from search_lines(lines, regex, opts, filename) # [end-impl-sequential] @@ -487,7 +486,7 @@ def main(regex=regex, filenames=filenames): # step 4 if hasattr(type(matches), '__aiter__'): - async def search_and_show(matches=matches): + async def iter_and_show(matches=matches): matches = type(matches).__aiter__(matches) # Handle the first match. From 10a65718e3b1845211ace98355afddf0a3673662 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Tue, 24 Sep 2024 13:51:57 -0600 Subject: [PATCH 76/80] Fix typos and wording. --- Doc/howto/concurrency.rst | 44 +++++++++++++++++++-------------------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/Doc/howto/concurrency.rst b/Doc/howto/concurrency.rst index 47b981724dc675..b4290a0628619d 100644 --- a/Doc/howto/concurrency.rst +++ b/Doc/howto/concurrency.rst @@ -496,12 +496,11 @@ short turns, so none have to wait too long, but it still prevents any actual parallelism of CPU-bound code. That said, the Python runtime (and extension modules) can release the -:term:`!GIL` when the thread is going to be doing something unrelated -to Python, particularly something slow or long, -like a blocking IO operation. +:term:`!GIL` when the thread is doing slow or long-running work +unrelated to Python, like a blocking IO operation. There is also an ongoing effort to eliminate the :term:`!GIL`: -:pep:`630`. Any attempt to remove the :term:`!GIL` necessarily involves +:pep:`703`. Any attempt to remove the :term:`!GIL` necessarily involves some slowdown to single-threaded performance and extra maintenance burden to the Python project and extension module maintainers. However, there is sufficient interest in unlocking full multi-core @@ -549,7 +548,7 @@ Using multiple interpreters is fairly straight-forward: 3. call :func:`exec`, but targeting the new interpreter 4. switch back -Note that no threads were involved. That's because running in an +Note that no threads were involved; running in an interpreter happens relative to the current thread. New threads aren't implicitly involved. @@ -574,9 +573,9 @@ Python 3.13+ on PyPI: :pypi:`interpreters-pep-734`. Improving performance for multiple interpreters ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -The long effort to improve on Python's implementation of multiple -interpreters focused on isolation and stability. There was very little -done to improve performance. This has the most impact on: +The long-running effort to improve on Python's implementation of multiple +interpreters focused on isolation and stability; very little done +to improve performance. This has the most impact on: * how much memory each interpreter uses (i.e. how many can run at the same time) @@ -586,7 +585,7 @@ It also impacts how efficiently data/objects can be passed between interpreters, and how effectively objects can be shared. As the work on isolation wraps up, improvements will shift to focus -on performance and memory usage. Thus the overhead associated with +on performance and memory usage. Thus, the overhead of using multiple interpreters will drastically decrease over time. Shared resources @@ -595,7 +594,7 @@ Shared resources Aside from memory, all physical threads in a process share the following resources: -* commandline arguments ("argv") +* command line arguments ("argv") * env vars * current working directory * signals, IPC, etc. @@ -612,8 +611,8 @@ TBD The other potential problem with using threads is that the conceptual model has no inherent synchronization, so it can be hard to follow - what is going on in the program at any given moment. That is - especially challenging for testing and debugging. + what is going on in the program at any given moment. That + would especially impact your efforts at testing and debugging. * "callback hell" * "where was this thread/coroutine started?" @@ -643,7 +642,7 @@ explicitly yielding control at the appropriate moments. Normal functions do not follow this pattern, so they cannot take advantage of that cooperative scheduling to avoid blocking -the program. Thus coroutines and non-coroutines don't mix well. +the program. Thus, coroutines and non-coroutines don't mix well. While there are tools for wrapping normal functions to act like coroutines, they are often converted into coroutines instead. At that point, if any non-async code relies on the function then @@ -827,7 +826,7 @@ TBD "thread" of execution. Sometimes it makes sense to break up that sequence into smaller pieces, - where some of them can run independently of others. Thus the program + where some of them can run independently of others. Thus, the program then involves multiple logical threads. This is also called "multitasking" and each logical thread a "task". @@ -958,13 +957,14 @@ TBD As mentioned earlier, each concurrency model has its own set of tradeoffs. Free-threading probably has the most notoriety and the most examples, - but is also has the most pitfalls (see `Critical caveats`_ above). + but it also has the most pitfalls (see `Critical caveats`_ above). Isolated threads have few of those pitfalls but are less familiar and at least a little less efficient. - Multiprocessing and distributed are likewise isolated, but less - efficient, which can have a larger negative impact at smaller scales. + Multiprocessing and distributed computing are likewise isolated, + but less efficient, which can have a larger negative impact + at smaller scales. Async can be straightforward, but may cascade throughout a code base - and doesn't necessarily give you parallelism. + and doesn't provide parallelism. free-threading: @@ -1045,7 +1045,7 @@ TBD Dealing with data races is often managed using locks (AKA mutexes), at a low level, and thread-safe types and APIs at a high level. Depending on the programming language, the complexity is sometimes - mitigated somewhat by the compiler and runtime. There are even + mitigated by the compiler and runtime. There are even libraries and frameworks that help abstract away the complexity to an extent. On top of that, there are tools that can help identify potential races via static analysis. Unfortunately, none of these aids @@ -1099,9 +1099,9 @@ TBD Python Concurrency Workload Examples ==================================== -Below we have a series of examples of how to implement the most +Below, we have a series of examples of how to implement the most common Python workloads that take advantage of concurrency. -For each workload you will find an implementation for each of the +For each workload, you will find an implementation for each of the concurrency models. The implementations are meant to accurately demonstrate how best @@ -1163,7 +1163,7 @@ Also see: .. note:: - Each example is implemented as a basic commandline tool, but can be + Each example is implemented as a basic command line tool, but can be easily adapted to run as a web service. Workload: grep From 793bfa06b696c0d2685a36546d4160b86bcf3a5f Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Tue, 24 Sep 2024 17:41:02 -0600 Subject: [PATCH 77/80] Limit side-by-side example lines to 60 characters. --- Doc/includes/concurrency/grep-asyncio.py | 47 +++++++++-------- Doc/includes/concurrency/grep-interpreters.py | 51 +++++++++++-------- .../concurrency/grep-multiprocessing-cf.py | 44 +++++++++------- .../concurrency/grep-multiprocessing.py | 47 +++++++++-------- Doc/includes/concurrency/grep-sequential.py | 42 ++++++++------- Doc/includes/concurrency/grep-threads-cf.py | 45 +++++++++------- Doc/includes/concurrency/grep-threads.py | 45 +++++++++------- 7 files changed, 184 insertions(+), 137 deletions(-) diff --git a/Doc/includes/concurrency/grep-asyncio.py b/Doc/includes/concurrency/grep-asyncio.py index 1f915981a02179..59accf564773e9 100644 --- a/Doc/includes/concurrency/grep-asyncio.py +++ b/Doc/includes/concurrency/grep-asyncio.py @@ -13,14 +13,15 @@ async def do_background(): MAX_FILES = 10 MAX_MATCHES = 100 - # Make sure we don't have too many coroutines at once, + # Make sure we don't have too many coros at once, # i.e. too many files open at once. counter = asyncio.Semaphore(MAX_FILES) async def search_file(filename, matches): # aiter_lines() opens the file too. lines = iter_lines(filename) - async for match in search_lines(lines, regex, opts, filename): + async for m in search_lines( + lines, regex, opts, filename): await matches.put(match) await matches.put(None) # Let a new coroutine start. @@ -71,7 +72,8 @@ async def iter_lines(filename): async def read_line_async(infile): # XXX Do this async! - # maybe make use of asyncio.to_thread() or loop.run_in_executor()? + # maybe make use of asyncio.to_thread() + # or loop.run_in_executor()? return infile.readline() @@ -121,23 +123,28 @@ def resolve_filenames(filenames, recursive=False): if __name__ == '__main__': # Parse the args. import argparse - parser = argparse.ArgumentParser(prog='grep') - - parser.add_argument('-r', '--recursive', action='store_true') - parser.add_argument('-L', '--files-without-match', dest='filesonly', - action='store_const', const='invert') - parser.add_argument('-l', '--files-with-matches', dest='filesonly', - action='store_const', const='match') - parser.add_argument('-q', '--quiet', action='store_true') - parser.set_defaults(invert=False) - - regexopts = parser.add_mutually_exclusive_group(required=True) - regexopts.add_argument('-e', '--regexp', dest='regex', metavar='REGEX') - regexopts.add_argument('regex', nargs='?', metavar='REGEX') - - parser.add_argument('files', nargs='+', metavar='FILE') - - opts = parser.parse_args() + ap = argparse.ArgumentParser(prog='grep') + + ap.add_argument('-r', '--recursive', + action='store_true') + ap.add_argument('-L', '--files-without-match', + dest='filesonly', + action='store_const', const='invert') + ap.add_argument('-l', '--files-with-matches', + dest='filesonly', + action='store_const', const='match') + ap.add_argument('-q', '--quiet', action='store_true') + ap.set_defaults(invert=False) + + reopts = ap.add_mutually_exclusive_group(required=True) + reopts.add_argument('-e', '--regexp', dest='regex', + metavar='REGEX') + reopts.add_argument('regex', nargs='?', + metavar='REGEX') + + ap.add_argument('files', nargs='+', metavar='FILE') + + opts = ap.parse_args() ns = vars(opts) regex = ns.pop('regex') diff --git a/Doc/includes/concurrency/grep-interpreters.py b/Doc/includes/concurrency/grep-interpreters.py index 93dfe64e8059a5..d5910788dbbf3b 100644 --- a/Doc/includes/concurrency/grep-interpreters.py +++ b/Doc/includes/concurrency/grep-interpreters.py @@ -4,7 +4,7 @@ import sys import test.support.interpreters as interpreters -import test.support.interpreters.queues +import test.support.interpreters.queues as interp_queues import types import queue import threading @@ -16,6 +16,7 @@ def search(filenames, regex, opts): def do_background(): MAX_FILES = 10 MAX_MATCHES = 100 + new_queue = interpreters.queues.create def new_interpreter(): interp = interpreters.create() @@ -47,15 +48,15 @@ def next_worker(): return ready_workers.get() # blocking def do_work(filename, matches, interp): - #interp.call(search_file, (regex, opts, filename, matches)) interp.prepare_main(matches=matches) - interp.exec(f'search_file({filename!r}, matches)') + interp.exec( + f'search_file({filename!r}, matches)') # Let a new thread start. ready_work.put(interp) for filename in filenames: # Prepare for the file. - matches = interpreters.queues.create(MAX_MATCHES) + matches = interp_queues.create(MAX_MATCHES) matches_by_file.put(matches) interp = next_worker() @@ -88,7 +89,8 @@ def prep_interpreter(regex_pat, regex_flags, opts): def search_file(filename, matches): lines = iter_lines(filename) - for match in search_lines(lines, regex, opts, filename): + for match in search_lines( + lines, regex, opts, filename): matches.put(match) # blocking matches.put(None) # blocking return search_file @@ -148,23 +150,28 @@ def resolve_filenames(filenames, recursive=False): if __name__ == '__main__': # Parse the args. import argparse - parser = argparse.ArgumentParser(prog='grep') - - parser.add_argument('-r', '--recursive', action='store_true') - parser.add_argument('-L', '--files-without-match', dest='filesonly', - action='store_const', const='invert') - parser.add_argument('-l', '--files-with-matches', dest='filesonly', - action='store_const', const='match') - parser.add_argument('-q', '--quiet', action='store_true') - parser.set_defaults(invert=False) - - regexopts = parser.add_mutually_exclusive_group(required=True) - regexopts.add_argument('-e', '--regexp', dest='regex', metavar='REGEX') - regexopts.add_argument('regex', nargs='?', metavar='REGEX') - - parser.add_argument('files', nargs='+', metavar='FILE') - - opts = parser.parse_args() + ap = argparse.ArgumentParser(prog='grep') + + ap.add_argument('-r', '--recursive', + action='store_true') + ap.add_argument('-L', '--files-without-match', + dest='filesonly', + action='store_const', const='invert') + ap.add_argument('-l', '--files-with-matches', + dest='filesonly', + action='store_const', const='match') + ap.add_argument('-q', '--quiet', action='store_true') + ap.set_defaults(invert=False) + + reopts = ap.add_mutually_exclusive_group(required=True) + reopts.add_argument('-e', '--regexp', dest='regex', + metavar='REGEX') + reopts.add_argument('regex', nargs='?', + metavar='REGEX') + + ap.add_argument('files', nargs='+', metavar='FILE') + + opts = ap.parse_args() ns = vars(opts) regex = ns.pop('regex') diff --git a/Doc/includes/concurrency/grep-multiprocessing-cf.py b/Doc/includes/concurrency/grep-multiprocessing-cf.py index f59e1437381199..a41a30dfba6742 100644 --- a/Doc/includes/concurrency/grep-multiprocessing-cf.py +++ b/Doc/includes/concurrency/grep-multiprocessing-cf.py @@ -4,7 +4,7 @@ import sys from concurrent.futures import ProcessPoolExecutor -import multiprocessing +import multiprocessing import queue import threading @@ -23,7 +23,8 @@ def do_background(): matches_by_file.put(matches) # Start a thread to process the file. - workers.submit(search_file, filename, matches) + workers.submit( + search_file, filename, matches) matches_by_file.put(None) background = threading.Thread(target=do_background) @@ -104,23 +105,28 @@ def resolve_filenames(filenames, recursive=False): # Parse the args. import argparse - parser = argparse.ArgumentParser(prog='grep') - - parser.add_argument('-r', '--recursive', action='store_true') - parser.add_argument('-L', '--files-without-match', dest='filesonly', - action='store_const', const='invert') - parser.add_argument('-l', '--files-with-matches', dest='filesonly', - action='store_const', const='match') - parser.add_argument('-q', '--quiet', action='store_true') - parser.set_defaults(invert=False) - - regexopts = parser.add_mutually_exclusive_group(required=True) - regexopts.add_argument('-e', '--regexp', dest='regex', metavar='REGEX') - regexopts.add_argument('regex', nargs='?', metavar='REGEX') - - parser.add_argument('files', nargs='+', metavar='FILE') - - opts = parser.parse_args() + ap = argparse.ArgumentParser(prog='grep') + + ap.add_argument('-r', '--recursive', + action='store_true') + ap.add_argument('-L', '--files-without-match', + dest='filesonly', + action='store_const', const='invert') + ap.add_argument('-l', '--files-with-matches', + dest='filesonly', + action='store_const', const='match') + ap.add_argument('-q', '--quiet', action='store_true') + ap.set_defaults(invert=False) + + reopts = ap.add_mutually_exclusive_group(required=True) + reopts.add_argument('-e', '--regexp', dest='regex', + metavar='REGEX') + reopts.add_argument('regex', nargs='?', + metavar='REGEX') + + ap.add_argument('files', nargs='+', metavar='FILE') + + opts = ap.parse_args() ns = vars(opts) regex = ns.pop('regex') diff --git a/Doc/includes/concurrency/grep-multiprocessing.py b/Doc/includes/concurrency/grep-multiprocessing.py index ad8c3d1386d639..24f383b20547ce 100644 --- a/Doc/includes/concurrency/grep-multiprocessing.py +++ b/Doc/includes/concurrency/grep-multiprocessing.py @@ -15,7 +15,7 @@ def do_background(): MAX_FILES = 10 MAX_MATCHES = 100 - # Make sure we don't have too many processes at once, + # Make sure we don't have too many procs at once, # i.e. too many files open at once. counter = threading.Semaphore(MAX_FILES) finished = multiprocessing.Queue() @@ -47,7 +47,8 @@ def monitor_tasks(): # Start a subprocess to process the file. proc = multiprocessing.Process( target=search_file, - args=(filename, matches, regex, opts, index, finished), + args=(filename, matches, regex, opts, + index, finished), ) counter.acquire(blocking=True) active[index] = proc @@ -74,7 +75,8 @@ def monitor_tasks(): background.join() -def search_file(filename, matches, regex, opts, index, finished): +def search_file(filename, matches, regex, opts, + index, finished): lines = iter_lines(filename) for match in search_lines(lines, regex, opts, filename): matches.put(match) # blocking @@ -139,23 +141,28 @@ def resolve_filenames(filenames, recursive=False): # Parse the args. import argparse - parser = argparse.ArgumentParser(prog='grep') - - parser.add_argument('-r', '--recursive', action='store_true') - parser.add_argument('-L', '--files-without-match', dest='filesonly', - action='store_const', const='invert') - parser.add_argument('-l', '--files-with-matches', dest='filesonly', - action='store_const', const='match') - parser.add_argument('-q', '--quiet', action='store_true') - parser.set_defaults(invert=False) - - regexopts = parser.add_mutually_exclusive_group(required=True) - regexopts.add_argument('-e', '--regexp', dest='regex', metavar='REGEX') - regexopts.add_argument('regex', nargs='?', metavar='REGEX') - - parser.add_argument('files', nargs='+', metavar='FILE') - - opts = parser.parse_args() + ap = argparse.ArgumentParser(prog='grep') + + ap.add_argument('-r', '--recursive', + action='store_true') + ap.add_argument('-L', '--files-without-match', + dest='filesonly', + action='store_const', const='invert') + ap.add_argument('-l', '--files-with-matches', + dest='filesonly', + action='store_const', const='match') + ap.add_argument('-q', '--quiet', action='store_true') + ap.set_defaults(invert=False) + + reopts = ap.add_mutually_exclusive_group(required=True) + reopts.add_argument('-e', '--regexp', dest='regex', + metavar='REGEX') + reopts.add_argument('regex', nargs='?', + metavar='REGEX') + + ap.add_argument('files', nargs='+', metavar='FILE') + + opts = ap.parse_args() ns = vars(opts) regex = ns.pop('regex') diff --git a/Doc/includes/concurrency/grep-sequential.py b/Doc/includes/concurrency/grep-sequential.py index cb5a83e4b45c34..31ef2a11bee41b 100644 --- a/Doc/includes/concurrency/grep-sequential.py +++ b/Doc/includes/concurrency/grep-sequential.py @@ -8,7 +8,8 @@ def search(filenames, regex, opts): for filename in filenames: # iter_lines() opens the file too. lines = iter_lines(filename) - yield from search_lines(lines, regex, opts, filename) + yield from search_lines( + lines, regex, opts, filename) def iter_lines(filename): @@ -65,23 +66,28 @@ def resolve_filenames(filenames, recursive=False): if __name__ == '__main__': # Parse the args. import argparse - parser = argparse.ArgumentParser(prog='grep') - - parser.add_argument('-r', '--recursive', action='store_true') - parser.add_argument('-L', '--files-without-match', dest='filesonly', - action='store_const', const='invert') - parser.add_argument('-l', '--files-with-matches', dest='filesonly', - action='store_const', const='match') - parser.add_argument('-q', '--quiet', action='store_true') - parser.set_defaults(invert=False) - - regexopts = parser.add_mutually_exclusive_group(required=True) - regexopts.add_argument('-e', '--regexp', dest='regex', metavar='REGEX') - regexopts.add_argument('regex', nargs='?', metavar='REGEX') - - parser.add_argument('files', nargs='+', metavar='FILE') - - opts = parser.parse_args() + ap = argparse.ArgumentParser(prog='grep') + + ap.add_argument('-r', '--recursive', + action='store_true') + ap.add_argument('-L', '--files-without-match', + dest='filesonly', + action='store_const', const='invert') + ap.add_argument('-l', '--files-with-matches', + dest='filesonly', + action='store_const', const='match') + ap.add_argument('-q', '--quiet', action='store_true') + ap.set_defaults(invert=False) + + reopts = ap.add_mutually_exclusive_group(required=True) + reopts.add_argument('-e', '--regexp', dest='regex', + metavar='REGEX') + reopts.add_argument('regex', nargs='?', + metavar='REGEX') + + ap.add_argument('files', nargs='+', metavar='FILE') + + opts = ap.parse_args() ns = vars(opts) regex = ns.pop('regex') diff --git a/Doc/includes/concurrency/grep-threads-cf.py b/Doc/includes/concurrency/grep-threads-cf.py index 09a53add80e03c..682d9004a9c534 100644 --- a/Doc/includes/concurrency/grep-threads-cf.py +++ b/Doc/includes/concurrency/grep-threads-cf.py @@ -17,7 +17,8 @@ def do_background(): def search_file(filename, matches): lines = iter_lines(filename) - for match in search_lines(lines, regex, opts, filename): + for match in search_lines( + lines, regex, opts, filename): matches.put(match) # blocking matches.put(None) # blocking @@ -28,7 +29,8 @@ def search_file(filename, matches): matches_by_file.put(matches) # Start a thread to process the file. - workers.submit(search_file, filename, matches) + workers.submit( + search_file, filename, matches) matches_by_file.put(None) background = threading.Thread(target=do_background) @@ -100,23 +102,28 @@ def resolve_filenames(filenames, recursive=False): if __name__ == '__main__': # Parse the args. import argparse - parser = argparse.ArgumentParser(prog='grep') - - parser.add_argument('-r', '--recursive', action='store_true') - parser.add_argument('-L', '--files-without-match', dest='filesonly', - action='store_const', const='invert') - parser.add_argument('-l', '--files-with-matches', dest='filesonly', - action='store_const', const='match') - parser.add_argument('-q', '--quiet', action='store_true') - parser.set_defaults(invert=False) - - regexopts = parser.add_mutually_exclusive_group(required=True) - regexopts.add_argument('-e', '--regexp', dest='regex', metavar='REGEX') - regexopts.add_argument('regex', nargs='?', metavar='REGEX') - - parser.add_argument('files', nargs='+', metavar='FILE') - - opts = parser.parse_args() + ap = argparse.ArgumentParser(prog='grep') + + ap.add_argument('-r', '--recursive', + action='store_true') + ap.add_argument('-L', '--files-without-match', + dest='filesonly', + action='store_const', const='invert') + ap.add_argument('-l', '--files-with-matches', + dest='filesonly', + action='store_const', const='match') + ap.add_argument('-q', '--quiet', action='store_true') + ap.set_defaults(invert=False) + + reopts = ap.add_mutually_exclusive_group(required=True) + reopts.add_argument('-e', '--regexp', dest='regex', + metavar='REGEX') + reopts.add_argument('regex', nargs='?', + metavar='REGEX') + + ap.add_argument('files', nargs='+', metavar='FILE') + + opts = ap.parse_args() ns = vars(opts) regex = ns.pop('regex') diff --git a/Doc/includes/concurrency/grep-threads.py b/Doc/includes/concurrency/grep-threads.py index 11b33edffd807a..0a6d793962722a 100644 --- a/Doc/includes/concurrency/grep-threads.py +++ b/Doc/includes/concurrency/grep-threads.py @@ -20,7 +20,8 @@ def do_background(): def search_file(filename, matches): lines = iter_lines(filename) - for match in search_lines(lines, regex, opts, filename): + for match in search_lines( + lines, regex, opts, filename): matches.put(match) # blocking matches.put(None) # blocking # Let a new thread start. @@ -32,7 +33,8 @@ def search_file(filename, matches): matches_by_file.put(matches) # Start a thread to process the file. - t = threading.Thread(target=search_file, args=(filename, matches)) + t = threading.Thread(target=search_file, + args=(filename, matches)) counter.acquire() t.start() matches_by_file.put(None) @@ -106,23 +108,28 @@ def resolve_filenames(filenames, recursive=False): if __name__ == '__main__': # Parse the args. import argparse - parser = argparse.ArgumentParser(prog='grep') - - parser.add_argument('-r', '--recursive', action='store_true') - parser.add_argument('-L', '--files-without-match', dest='filesonly', - action='store_const', const='invert') - parser.add_argument('-l', '--files-with-matches', dest='filesonly', - action='store_const', const='match') - parser.add_argument('-q', '--quiet', action='store_true') - parser.set_defaults(invert=False) - - regexopts = parser.add_mutually_exclusive_group(required=True) - regexopts.add_argument('-e', '--regexp', dest='regex', metavar='REGEX') - regexopts.add_argument('regex', nargs='?', metavar='REGEX') - - parser.add_argument('files', nargs='+', metavar='FILE') - - opts = parser.parse_args() + ap = argparse.ArgumentParser(prog='grep') + + ap.add_argument('-r', '--recursive', + action='store_true') + ap.add_argument('-L', '--files-without-match', + dest='filesonly', + action='store_const', const='invert') + ap.add_argument('-l', '--files-with-matches', + dest='filesonly', + action='store_const', const='match') + ap.add_argument('-q', '--quiet', action='store_true') + ap.set_defaults(invert=False) + + reopts = ap.add_mutually_exclusive_group(required=True) + reopts.add_argument('-e', '--regexp', dest='regex', + metavar='REGEX') + reopts.add_argument('regex', nargs='?', + metavar='REGEX') + + ap.add_argument('files', nargs='+', metavar='FILE') + + opts = ap.parse_args() ns = vars(opts) regex = ns.pop('regex') From 3e19d7ed8a0f4a4ea82f142a267bd30f04024c50 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Tue, 24 Sep 2024 17:51:39 -0600 Subject: [PATCH 78/80] Fix the examples. --- Doc/includes/concurrency/grep-interpreters.py | 3 +-- Doc/includes/concurrency/grep-parts.py | 1 - Doc/includes/concurrency/run-examples.py | 2 ++ 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Doc/includes/concurrency/grep-interpreters.py b/Doc/includes/concurrency/grep-interpreters.py index d5910788dbbf3b..4bb0b7d865aed7 100644 --- a/Doc/includes/concurrency/grep-interpreters.py +++ b/Doc/includes/concurrency/grep-interpreters.py @@ -28,7 +28,6 @@ def new_interpreter(): prep_interpreter = ns['prep_interpreter'] del ns, text - import grep._implementations search_file = prep_interpreter( {regex.pattern!r}, {regex.flags}, @@ -52,7 +51,7 @@ def do_work(filename, matches, interp): interp.exec( f'search_file({filename!r}, matches)') # Let a new thread start. - ready_work.put(interp) + ready_workers.put(interp) for filename in filenames: # Prepare for the file. diff --git a/Doc/includes/concurrency/grep-parts.py b/Doc/includes/concurrency/grep-parts.py index 9cab871c261f21..a3382609e84cbd 100644 --- a/Doc/includes/concurrency/grep-parts.py +++ b/Doc/includes/concurrency/grep-parts.py @@ -233,7 +233,6 @@ def new_interpreter(): prep_interpreter = ns['prep_interpreter'] del ns, text - import grep._implementations search_file = prep_interpreter( {regex.pattern!r}, {regex.flags}, diff --git a/Doc/includes/concurrency/run-examples.py b/Doc/includes/concurrency/run-examples.py index b2dd8e4b3422b5..85d754c3b5d5ff 100644 --- a/Doc/includes/concurrency/run-examples.py +++ b/Doc/includes/concurrency/run-examples.py @@ -557,6 +557,8 @@ def executable(self): @property def executable_argv(self): + if self._executable_argv is None: + return () return self._executable_argv @property From f826c9c673e6a4bc4ee7389498cbb2d57cbcf6b1 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Tue, 24 Sep 2024 17:57:02 -0600 Subject: [PATCH 79/80] Fix a ref. --- Doc/howto/concurrency.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Doc/howto/concurrency.rst b/Doc/howto/concurrency.rst index b4290a0628619d..d7464cdc5c18ff 100644 --- a/Doc/howto/concurrency.rst +++ b/Doc/howto/concurrency.rst @@ -1496,8 +1496,8 @@ you can also use :mod:`concurrent.futures`:
-For processes`, use :class:`ProcessPoolExecutor`. -For interpreters, use :class:`InterpreterPoolExecutor`. +For processes`, use :class:`concurrent.futures.ProcessPoolExecutor`. +For interpreters, use :class:`!InterpreterPoolExecutor`. In both cases you must use the proper queue type and there are a few other minor differences. From 049f1dea8284e815d284cef43fc8c6d972031d8c Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Wed, 25 Sep 2024 11:05:15 -0600 Subject: [PATCH 80/80] lint --- Doc/includes/concurrency/grep-multiprocessing-cf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Doc/includes/concurrency/grep-multiprocessing-cf.py b/Doc/includes/concurrency/grep-multiprocessing-cf.py index a41a30dfba6742..3b784ed96d7195 100644 --- a/Doc/includes/concurrency/grep-multiprocessing-cf.py +++ b/Doc/includes/concurrency/grep-multiprocessing-cf.py @@ -4,7 +4,7 @@ import sys from concurrent.futures import ProcessPoolExecutor -import multiprocessing +import multiprocessing import queue import threading