Skip to content

Instantly share code, notes, and snippets.

@nascheme
Created April 23, 2026 06:18
Show Gist options
  • Select an option

  • Save nascheme/21ff658c6fb6294591cfae1047cb1558 to your computer and use it in GitHub Desktop.

Select an option

Save nascheme/21ff658c6fb6294591cfae1047cb1558 to your computer and use it in GitHub Desktop.
Proposed change to Python 3.14 incremental gc
diff --git a/Include/internal/pycore_interp_structs.h b/Include/internal/pycore_interp_structs.h
index 6b3d5711b92..aadc577306f 100644
--- a/Include/internal/pycore_interp_structs.h
+++ b/Include/internal/pycore_interp_structs.h
@@ -229,7 +229,8 @@ struct _gc_runtime_state {
PyObject *callbacks;
Py_ssize_t heap_size;
- Py_ssize_t work_to_do;
+ /* Total number of young objects since the last complete collection */
+ Py_ssize_t young_pending;
/* Which of the old spaces is the visited space */
int visited_space;
int phase;
diff --git a/Include/internal/pycore_runtime_init.h b/Include/internal/pycore_runtime_init.h
index b182f7825a2..0ce6987b0ec 100644
--- a/Include/internal/pycore_runtime_init.h
+++ b/Include/internal/pycore_runtime_init.h
@@ -139,7 +139,7 @@ extern PyTypeObject _PyExc_MemoryError;
{ .threshold = 10, }, \
{ .threshold = 0, }, \
}, \
- .work_to_do = -5000, \
+ .young_pending = 0, \
.phase = GC_PHASE_MARK, \
}, \
.qsbr = { \
@@ -233,4 +233,4 @@ extern PyTypeObject _PyExc_MemoryError;
#ifdef __cplusplus
}
#endif
-#endif /* !Py_INTERNAL_RUNTIME_INIT_H */
\ No newline at end of file
+#endif /* !Py_INTERNAL_RUNTIME_INIT_H */
diff --git a/Python/gc.c b/Python/gc.c
index d067a6144b0..19019a2d4b6 100644
--- a/Python/gc.c
+++ b/Python/gc.c
@@ -1486,7 +1486,7 @@ completed_scavenge(GCState *gcstate)
gc_list_set_space(&gcstate->old[not_visited].head, not_visited);
}
assert(gc_list_is_empty(&gcstate->old[visited].head));
- gcstate->work_to_do = 0;
+ gcstate->young_pending = 0;
gcstate->phase = GC_PHASE_MARK;
}
@@ -1618,7 +1618,6 @@ mark_at_start(PyThreadState *tstate)
PyGC_Head *visited = &gcstate->old[gcstate->visited_space].head;
Py_ssize_t objects_marked = mark_global_roots(tstate->interp, visited, gcstate->visited_space);
objects_marked += mark_stacks(tstate->interp, visited, gcstate->visited_space, true);
- gcstate->work_to_do -= objects_marked;
gcstate->phase = GC_PHASE_COLLECT;
validate_spaces(gcstate);
return objects_marked;
@@ -1627,30 +1626,42 @@ mark_at_start(PyThreadState *tstate)
static intptr_t
assess_work_to_do(GCState *gcstate)
{
- /* The amount of work we want to do depends on three things.
- * 1. The number of new objects created
- * 2. The growth in heap size since the last collection
- * 3. The heap size (up to the number of new objects, to avoid quadratic effects)
- *
- * For a steady state heap, the amount of work to do is three times the number
- * of new objects added to the heap. This ensures that we stay ahead in the
- * worst case of all new objects being garbage.
- *
- * This could be improved by tracking survival rates, but it is still a
- * large improvement on the non-marking approach.
- */
- intptr_t scale_factor = gcstate->old[0].threshold;
- if (scale_factor < 2) {
- scale_factor = 2;
- }
intptr_t new_objects = gcstate->young.count;
- intptr_t max_heap_fraction = new_objects*2;
- intptr_t heap_fraction = gcstate->heap_size / SCAN_RATE_DIVISOR / scale_factor;
- if (heap_fraction > max_heap_fraction) {
- heap_fraction = max_heap_fraction;
+ // This is the minimum number of objects we will take from the pending
+ // (not_visited) set when building the increment set. It needs to be
+ // large enough such that, in general, the pending set is empty when
+ // the other conditions in ready_to_mark() are true. It is okay to be
+ // conservative here (too large) as long as we avoid long pauses in
+ // the GC increments.
+ intptr_t pending_count = gcstate->young.threshold;
+ if (pending_count < new_objects) {
+ pending_count = new_objects;
}
+ // For the non-incremental GC (in Python <= 3.13) we counted the young
+ // objects that survive a gen0 collection. That's not easy to do here
+ // since the increment includes not only the young objects but also some
+ // from the old generation. Doing the simpler thing and just counting
+ // all young objects means we might finish a full collector more quickly
+ // compared to if we only counted survivors.
+ gcstate->young_pending += new_objects;
gcstate->young.count = 0;
- return new_objects + heap_fraction;
+ return new_objects + pending_count;
+}
+
+static bool
+ready_to_mark(GCState *gcstate)
+{
+ PyGC_Head *pending = &gcstate->old[gcstate->visited_space^1].head;
+ if (!gc_list_is_empty(pending)) {
+ return false;
+ }
+ if (gcstate->young_pending < gcstate->young.threshold) {
+ return false;
+ }
+ if (gcstate->young_pending < gcstate->heap_size / 4) {
+ return false;
+ }
+ return true;
}
static void
@@ -1658,15 +1669,10 @@ gc_collect_increment(PyThreadState *tstate, struct gc_collection_stats *stats)
{
GC_STAT_ADD(1, collections, 1);
GCState *gcstate = &tstate->interp->gc;
- gcstate->work_to_do += assess_work_to_do(gcstate);
- if (gcstate->work_to_do < 0) {
- return;
- }
untrack_tuples(&gcstate->young.head);
if (gcstate->phase == GC_PHASE_MARK) {
Py_ssize_t objects_marked = mark_at_start(tstate);
GC_STAT_ADD(1, objects_transitively_reachable, objects_marked);
- gcstate->work_to_do -= objects_marked;
stats->candidates += objects_marked;
validate_spaces(gcstate);
return;
@@ -1675,18 +1681,14 @@ gc_collect_increment(PyThreadState *tstate, struct gc_collection_stats *stats)
PyGC_Head *visited = &gcstate->old[gcstate->visited_space].head;
PyGC_Head increment;
gc_list_init(&increment);
- int scale_factor = gcstate->old[0].threshold;
- if (scale_factor < 2) {
- scale_factor = 2;
- }
intptr_t objects_marked = mark_stacks(tstate->interp, visited, gcstate->visited_space, false);
GC_STAT_ADD(1, objects_transitively_reachable, objects_marked);
- gcstate->work_to_do -= objects_marked;
+ int work_to_do = assess_work_to_do(gcstate);
gc_list_set_space(&gcstate->young.head, gcstate->visited_space);
gc_list_merge(&gcstate->young.head, &increment);
gc_list_validate_space(&increment, gcstate->visited_space);
Py_ssize_t increment_size = gc_list_size(&increment);
- while (increment_size < gcstate->work_to_do) {
+ while (increment_size < work_to_do) {
if (gc_list_is_empty(not_visited)) {
break;
}
@@ -1705,9 +1707,8 @@ gc_collect_increment(PyThreadState *tstate, struct gc_collection_stats *stats)
gc_collect_region(tstate, &increment, &survivors, stats);
gc_list_merge(&survivors, visited);
assert(gc_list_is_empty(&increment));
- gcstate->work_to_do -= increment_size;
- if (gc_list_is_empty(not_visited)) {
+ if (ready_to_mark(gcstate)) {
completed_scavenge(gcstate);
}
validate_spaces(gcstate);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment