From 5e965ab639ec222bb2ecf21728d40e1a29b80197 Mon Sep 17 00:00:00 2001
From: Neil Schemenauer <nas@arctrix.com>
Date: Sun, 4 May 2025 12:40:12 -0700
Subject: [PATCH 1/6] Check resident set size (RSS) before GC trigger.

---
 Include/internal/pycore_interp_structs.h |  10 ++
 Python/gc_free_threading.c               | 215 ++++++++++++++++++++++-
 2 files changed, 220 insertions(+), 5 deletions(-)

diff --git a/Include/internal/pycore_interp_structs.h b/Include/internal/pycore_interp_structs.h
index 45d878af967b86..d418169b29b542 100644
--- a/Include/internal/pycore_interp_structs.h
+++ b/Include/internal/pycore_interp_structs.h
@@ -245,6 +245,16 @@ struct _gc_runtime_state {
 
     /* True if gc.freeze() has been used. */
     int freeze_active;
+
+    /* Resident set size (RSS) of the process after last GC. */
+    Py_ssize_t last_rss;
+
+    /* This accumulates the new object count whenever collection is deferred
+       due to the RSS increase condition not being meet.  Reset on collection. */
+    Py_ssize_t deferred_count;
+
+    /* Mutex held for gc_should_collect_rss(). */
+    PyMutex mutex;
 #endif
 };
 
diff --git a/Python/gc_free_threading.c b/Python/gc_free_threading.c
index 2db75e0fd416f9..2861f838b4f286 100644
--- a/Python/gc_free_threading.c
+++ b/Python/gc_free_threading.c
@@ -17,6 +17,35 @@
 
 #include "pydtrace.h"
 
+// Platform-specific includes for get_current_rss().
+#ifdef _WIN32
+    #include <windows.h>
+    #include <psapi.h> // For GetProcessMemoryInfo
+#elif defined(__linux__)
+    #include <unistd.h> // For sysconf, getpid
+    //#include <errno.h>
+    // #include <fcntl.h> // Only if using open/read directly
+#elif defined(__APPLE__)
+    // macOS (Darwin)
+    #include <mach/mach.h>
+    #include <unistd.h> // For sysconf, getpid
+#elif defined(__FreeBSD__)
+    // FreeBSD
+    #include <sys/types.h>
+    #include <sys/sysctl.h>
+    #include <sys/user.h> // Requires sys/user.h for kinfo_proc definition
+    #include <kvm.h>
+    #include <unistd.h> // For sysconf, getpid
+    #include <fcntl.h> // For O_RDONLY
+    #include <limits.h> // For _POSIX2_LINE_MAX
+#elif defined(__OpenBSD__)
+    // OpenBSD
+    #include <sys/types.h>
+    #include <sys/sysctl.h>
+    #include <sys/user.h> // For kinfo_proc
+    #include <unistd.h> // For sysconf, getpid
+    //#include <errno.h>
+#endif
 
 // enable the "mark alive" pass of GC
 #define GC_ENABLE_MARK_ALIVE 1
@@ -1878,6 +1907,172 @@ cleanup_worklist(struct worklist *worklist)
     }
 }
 
+// Return the current resident set size (RSS) of the process, in units of KB.
+// Returns -1 if this operation is not supported or on failure.
+static Py_ssize_t
+get_current_rss(void)
+{
+#ifdef _WIN32
+    // Windows implementation using GetProcessMemoryInfo
+    PROCESS_MEMORY_COUNTERS pmc;
+    HANDLE hProcess = GetCurrentProcess();
+    if (NULL == hProcess) {
+        // Should not happen for the current process
+        return -1;
+    }
+
+    // GetProcessMemoryInfo returns non-zero on success
+    if (GetProcessMemoryInfo(hProcess, &pmc, sizeof(pmc))) {
+        // pmc.WorkingSetSize is in bytes. Convert to KB.
+        return (Py_ssize_t)(pmc.WorkingSetSize / 1024);
+    } else {
+        CloseHandle(hProcess);
+        return -1;
+    }
+
+#elif __linux__
+    // Linux implementation using /proc/self/statm
+    long page_size_bytes = sysconf(_SC_PAGE_SIZE);
+    if (page_size_bytes <= 0) {
+        return -1;
+    }
+
+    FILE *fp = fopen("/proc/self/statm", "r");
+    if (fp == NULL) {
+        return -1;
+    }
+
+    // Second number is resident size in pages
+    long rss_pages;
+    if (fscanf(fp, "%*d %ld", &rss_pages) != 1) {
+        fclose(fp);
+        return -1;
+    }
+    fclose(fp);
+
+    // Convert unit to KB
+    return (Py_ssize_t)rss_pages * (page_size_bytes / 1024);
+
+#elif defined(__APPLE__)
+    // --- MacOS (Darwin) ---
+    mach_msg_type_number_t count = MACH_TASK_BASIC_INFO_COUNT;
+    mach_task_basic_info_data_t info;
+    kern_return_t kerr;
+
+    kerr = task_info(mach_task_self(), MACH_TASK_BASIC_INFO, (task_info_t)&info, &count);
+    if (kerr != KERN_SUCCESS) {
+        return -1;
+    }
+    // info.resident_size is in bytes. Convert to KB.
+    return (Py_ssize_t)(info.resident_size / 1024);
+
+#elif defined(__FreeBSD__)
+    long page_size_kb = sysconf(_SC_PAGESIZE) / 1024;
+    if (page_size_kb <= 0) {
+        return -1;
+    }
+
+    // Using /dev/null for vmcore avoids needing dump file.
+    // NULL for kernel file uses running kernel.
+    char errbuf[_POSIX2_LINE_MAX]; // For kvm error messages
+    kvm_t *kd = kvm_openfiles(NULL, "/dev/null", NULL, O_RDONLY, errbuf);
+    if (kd == NULL) {
+        return -1;
+    }
+
+    // KERN_PROC_PID filters for the specific process ID
+    // n_procs will contain the number of processes returned (should be 1 or 0)
+    pid_t pid = getpid();
+    int n_procs;
+    struct kinfo_proc *kp = kvm_getprocs(kd, KERN_PROC_PID, pid, &n_procs);
+    if (kp == NULL) {
+        kvm_close(kd);
+        return -1;
+    }
+
+    Py_ssize_t rss_kb = -1;
+    if (n_procs > 0) {
+        // kp[0] contains the info for our process
+        // ki_rssize is in pages. Convert to KB.
+        rss_kb = (Py_ssize_t)kp->ki_rssize * page_size_kb;
+    } else {
+        // Process with PID not found, shouldn't happen for self.
+        rss_kb = -1;
+    }
+
+    kvm_close(kd);
+    return rss_kb;
+
+#elif defined(__OpenBSD__)
+    long page_size_kb = sysconf(_SC_PAGESIZE) / 1024;
+    if (page_size_kb <= 0) {
+        return -1;
+    }
+
+    struct kinfo_proc kp;
+    pid_t pid = getpid();
+    int mib[6];
+    size_t len = sizeof(kp);
+
+    mib[0] = CTL_KERN;
+    mib[1] = KERN_PROC;
+    mib[2] = KERN_PROC_PID;
+    mib[3] = pid;
+    mib[4] = sizeof(struct kinfo_proc); // size of the structure we want
+    mib[5] = 1;                         // want 1 structure back
+    if (sysctl(mib, 6, &kp, &len, NULL, 0) == -1) {
+         return -1;
+    }
+
+    if (len > 0) {
+        // p_vm_rssize is in pages on OpenBSD. Convert to KB.
+        return (Py_ssize_t)kp.p_vm_rssize * page_size_kb;
+    } else {
+        // Process info not returned
+        return -1;
+    }
+#else
+    // Unsupported platform
+    return -1;
+#endif
+}
+
+static bool
+gc_should_collect_rss(GCState *gcstate)
+{
+    Py_ssize_t rss = get_current_rss();
+    if (rss < 0) {
+        // Reading RSS is not support or failed.
+        return true;
+    }
+    int threshold = gcstate->young.threshold;
+    if (gcstate->deferred_count > threshold * 40) {
+        // Too many new container objects since last GC, even though RSS
+        // might not have increased much.  This is intended to avoid resource
+        // exhaustion if some objects consume resources but don't result in a
+        // RSS increase.  We use 40x as the factor here because older versions
+        // of Python would do full collections after roughly every 70,000 new
+        // container objects.
+        return true;
+    }
+    Py_ssize_t last_rss = gcstate->last_rss;
+    Py_ssize_t rss_threshold = Py_MAX(last_rss / 10, 128);
+    if ((rss - last_rss) > rss_threshold) {
+        // The RSS has increased too much, do a collection.
+        return true;
+    }
+    else {
+        // The RSS has not increased enough, defer the collection and clear
+        // the young object count so we don't check RSS again on the next call
+        // to gc_should_collect().
+        Py_BEGIN_CRITICAL_SECTION_MUT(&gcstate->mutex);
+        gcstate->deferred_count += gcstate->young.count;
+        gcstate->young.count = 0;
+        Py_END_CRITICAL_SECTION();
+        return false;
+    }
+}
+
 static bool
 gc_should_collect(GCState *gcstate)
 {
@@ -1887,11 +2082,17 @@ gc_should_collect(GCState *gcstate)
     if (count <= threshold || threshold == 0 || !gc_enabled) {
         return false;
     }
-    // Avoid quadratic behavior by scaling threshold to the number of live
-    // objects. A few tests rely on immediate scheduling of the GC so we ignore
-    // the scaled threshold if generations[1].threshold is set to zero.
-    return (count > gcstate->long_lived_total / 4 ||
-            gcstate->old[0].threshold == 0);
+    if (gcstate->old[0].threshold == 0) {
+        // A few tests rely on immediate scheduling of the GC so we ignore the
+        // extra conditions if generations[1].threshold is set to zero.
+        return true;
+    }
+    if (count < gcstate->long_lived_total / 4) {
+        // Avoid quadratic behavior by scaling threshold to the number of live
+        // objects.
+        return false;
+    }
+    return gc_should_collect_rss(gcstate);
 }
 
 static void
@@ -1940,6 +2141,7 @@ gc_collect_internal(PyInterpreterState *interp, struct collection_state *state,
     }
 
     state->gcstate->young.count = 0;
+    state->gcstate->deferred_count = 0;
     for (int i = 1; i <= generation; ++i) {
         state->gcstate->old[i-1].count = 0;
     }
@@ -2033,6 +2235,9 @@ gc_collect_internal(PyInterpreterState *interp, struct collection_state *state,
     // to be freed.
     delete_garbage(state);
 
+    // Store the current RSS, possibly smaller now that we deleted garbage.
+    state->gcstate->last_rss = get_current_rss();
+
     // Append objects with legacy finalizers to the "gc.garbage" list.
     handle_legacy_finalizers(state);
 }

From f1834dd274dff2547016fb405eeccbb3c9cbbd91 Mon Sep 17 00:00:00 2001
From: Neil Schemenauer <nas@arctrix.com>
Date: Sun, 4 May 2025 14:46:24 -0700
Subject: [PATCH 2/6] Update GC docs.

---
 Doc/library/gc.rst | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/Doc/library/gc.rst b/Doc/library/gc.rst
index 480a9dec7f133b..7e70f90893b517 100644
--- a/Doc/library/gc.rst
+++ b/Doc/library/gc.rst
@@ -128,6 +128,11 @@ The :mod:`gc` module provides the following functions:
    starts. For each collection, all the objects in the young generation and some
    fraction of the old generation is collected.
 
+   In the free-threaded build, the increase in process resident set size (RSS)
+   is also checked before running the collector.  If the RSS has not increased
+   by 10% since the last collection and the net number of object allocations
+   has not exceeded 40 times *threshold0*, the collection is not run.
+
    The fraction of the old generation that is collected is **inversely** proportional
    to *threshold1*. The larger *threshold1* is, the slower objects in the old generation
    are collected.

From a477c0a7f877bfff1bb2cf8024ce13877304246b Mon Sep 17 00:00:00 2001
From: Neil Schemenauer <nas@arctrix.com>
Date: Sun, 4 May 2025 14:47:28 -0700
Subject: [PATCH 3/6] Add NEWS.

---
 .../2025-05-04-14-47-26.gh-issue-132917.DrEU1y.rst             | 3 +++
 1 file changed, 3 insertions(+)
 create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2025-05-04-14-47-26.gh-issue-132917.DrEU1y.rst

diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-05-04-14-47-26.gh-issue-132917.DrEU1y.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-05-04-14-47-26.gh-issue-132917.DrEU1y.rst
new file mode 100644
index 00000000000000..f22950cab22d68
--- /dev/null
+++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-05-04-14-47-26.gh-issue-132917.DrEU1y.rst
@@ -0,0 +1,3 @@
+For the free-threaded build, check the process resident set size (RSS)
+increase before triggering a full automatic garbage collection.  If the RSS
+has not increased 10% since the last collection then it is deferred.

From 1091342a526a8b83d14106f0f0cb159b7f398179 Mon Sep 17 00:00:00 2001
From: Neil Schemenauer <nas@arctrix.com>
Date: Sun, 4 May 2025 15:01:06 -0700
Subject: [PATCH 4/6] Cleanup includes.

---
 Python/gc_free_threading.c | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/Python/gc_free_threading.c b/Python/gc_free_threading.c
index 2861f838b4f286..8a02c122933c29 100644
--- a/Python/gc_free_threading.c
+++ b/Python/gc_free_threading.c
@@ -23,14 +23,10 @@
     #include <psapi.h> // For GetProcessMemoryInfo
 #elif defined(__linux__)
     #include <unistd.h> // For sysconf, getpid
-    //#include <errno.h>
-    // #include <fcntl.h> // Only if using open/read directly
 #elif defined(__APPLE__)
-    // macOS (Darwin)
     #include <mach/mach.h>
     #include <unistd.h> // For sysconf, getpid
 #elif defined(__FreeBSD__)
-    // FreeBSD
     #include <sys/types.h>
     #include <sys/sysctl.h>
     #include <sys/user.h> // Requires sys/user.h for kinfo_proc definition
@@ -39,12 +35,10 @@
     #include <fcntl.h> // For O_RDONLY
     #include <limits.h> // For _POSIX2_LINE_MAX
 #elif defined(__OpenBSD__)
-    // OpenBSD
     #include <sys/types.h>
     #include <sys/sysctl.h>
     #include <sys/user.h> // For kinfo_proc
     #include <unistd.h> // For sysconf, getpid
-    //#include <errno.h>
 #endif
 
 // enable the "mark alive" pass of GC

From 452cd0851260b01030e6e72418c72b576f0a0fa2 Mon Sep 17 00:00:00 2001
From: Neil Schemenauer <nas@arctrix.com>
Date: Sun, 4 May 2025 15:35:40 -0700
Subject: [PATCH 5/6] Use atomic load for deferred_count.

---
 Python/gc_free_threading.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/Python/gc_free_threading.c b/Python/gc_free_threading.c
index 8a02c122933c29..b36a0f8289a721 100644
--- a/Python/gc_free_threading.c
+++ b/Python/gc_free_threading.c
@@ -2040,7 +2040,8 @@ gc_should_collect_rss(GCState *gcstate)
         return true;
     }
     int threshold = gcstate->young.threshold;
-    if (gcstate->deferred_count > threshold * 40) {
+    Py_ssize_t deferred = _Py_atomic_load_ssize_relaxed(&gcstate->deferred_count);
+    if (deferred > threshold * 40) {
         // Too many new container objects since last GC, even though RSS
         // might not have increased much.  This is intended to avoid resource
         // exhaustion if some objects consume resources but don't result in a

From ff2882ec3b839f23735e491ffb8231585dfef85d Mon Sep 17 00:00:00 2001
From: Neil Schemenauer <nas@arctrix.com>
Date: Mon, 5 May 2025 09:50:40 -0700
Subject: [PATCH 6/6] Address review feedback.

---
 Python/gc_free_threading.c | 19 +++++++++++++------
 1 file changed, 13 insertions(+), 6 deletions(-)

diff --git a/Python/gc_free_threading.c b/Python/gc_free_threading.c
index b36a0f8289a721..1e769ca48a8744 100644
--- a/Python/gc_free_threading.c
+++ b/Python/gc_free_threading.c
@@ -1919,8 +1919,8 @@ get_current_rss(void)
     if (GetProcessMemoryInfo(hProcess, &pmc, sizeof(pmc))) {
         // pmc.WorkingSetSize is in bytes. Convert to KB.
         return (Py_ssize_t)(pmc.WorkingSetSize / 1024);
-    } else {
-        CloseHandle(hProcess);
+    }
+    else {
         return -1;
     }
 
@@ -1944,6 +1944,11 @@ get_current_rss(void)
     }
     fclose(fp);
 
+    // Sanity check
+    if (rss_pages < 0 || rss_pages > 1000000000) {
+        return -1;
+    }
+
     // Convert unit to KB
     return (Py_ssize_t)rss_pages * (page_size_bytes / 1024);
 
@@ -1989,7 +1994,8 @@ get_current_rss(void)
         // kp[0] contains the info for our process
         // ki_rssize is in pages. Convert to KB.
         rss_kb = (Py_ssize_t)kp->ki_rssize * page_size_kb;
-    } else {
+    }
+    else {
         // Process with PID not found, shouldn't happen for self.
         rss_kb = -1;
     }
@@ -2021,7 +2027,8 @@ get_current_rss(void)
     if (len > 0) {
         // p_vm_rssize is in pages on OpenBSD. Convert to KB.
         return (Py_ssize_t)kp.p_vm_rssize * page_size_kb;
-    } else {
+    }
+    else {
         // Process info not returned
         return -1;
     }
@@ -2060,10 +2067,10 @@ gc_should_collect_rss(GCState *gcstate)
         // The RSS has not increased enough, defer the collection and clear
         // the young object count so we don't check RSS again on the next call
         // to gc_should_collect().
-        Py_BEGIN_CRITICAL_SECTION_MUT(&gcstate->mutex);
+        PyMutex_Lock(&gcstate->mutex);
         gcstate->deferred_count += gcstate->young.count;
         gcstate->young.count = 0;
-        Py_END_CRITICAL_SECTION();
+        PyMutex_Unlock(&gcstate->mutex);
         return false;
     }
 }