Created
April 23, 2026 06:18
-
-
Save nascheme/21ff658c6fb6294591cfae1047cb1558 to your computer and use it in GitHub Desktop.
Proposed change to Python 3.14 incremental gc
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| diff --git a/Include/internal/pycore_interp_structs.h b/Include/internal/pycore_interp_structs.h | |
| index 6b3d5711b92..aadc577306f 100644 | |
| --- a/Include/internal/pycore_interp_structs.h | |
| +++ b/Include/internal/pycore_interp_structs.h | |
| @@ -229,7 +229,8 @@ struct _gc_runtime_state { | |
| PyObject *callbacks; | |
| Py_ssize_t heap_size; | |
| - Py_ssize_t work_to_do; | |
| + /* Total number of young objects since the last complete collection */ | |
| + Py_ssize_t young_pending; | |
| /* Which of the old spaces is the visited space */ | |
| int visited_space; | |
| int phase; | |
| diff --git a/Include/internal/pycore_runtime_init.h b/Include/internal/pycore_runtime_init.h | |
| index b182f7825a2..0ce6987b0ec 100644 | |
| --- a/Include/internal/pycore_runtime_init.h | |
| +++ b/Include/internal/pycore_runtime_init.h | |
| @@ -139,7 +139,7 @@ extern PyTypeObject _PyExc_MemoryError; | |
| { .threshold = 10, }, \ | |
| { .threshold = 0, }, \ | |
| }, \ | |
| - .work_to_do = -5000, \ | |
| + .young_pending = 0, \ | |
| .phase = GC_PHASE_MARK, \ | |
| }, \ | |
| .qsbr = { \ | |
| @@ -233,4 +233,4 @@ extern PyTypeObject _PyExc_MemoryError; | |
| #ifdef __cplusplus | |
| } | |
| #endif | |
| -#endif /* !Py_INTERNAL_RUNTIME_INIT_H */ | |
| \ No newline at end of file | |
| +#endif /* !Py_INTERNAL_RUNTIME_INIT_H */ | |
| diff --git a/Python/gc.c b/Python/gc.c | |
| index d067a6144b0..19019a2d4b6 100644 | |
| --- a/Python/gc.c | |
| +++ b/Python/gc.c | |
| @@ -1486,7 +1486,7 @@ completed_scavenge(GCState *gcstate) | |
| gc_list_set_space(&gcstate->old[not_visited].head, not_visited); | |
| } | |
| assert(gc_list_is_empty(&gcstate->old[visited].head)); | |
| - gcstate->work_to_do = 0; | |
| + gcstate->young_pending = 0; | |
| gcstate->phase = GC_PHASE_MARK; | |
| } | |
| @@ -1618,7 +1618,6 @@ mark_at_start(PyThreadState *tstate) | |
| PyGC_Head *visited = &gcstate->old[gcstate->visited_space].head; | |
| Py_ssize_t objects_marked = mark_global_roots(tstate->interp, visited, gcstate->visited_space); | |
| objects_marked += mark_stacks(tstate->interp, visited, gcstate->visited_space, true); | |
| - gcstate->work_to_do -= objects_marked; | |
| gcstate->phase = GC_PHASE_COLLECT; | |
| validate_spaces(gcstate); | |
| return objects_marked; | |
| @@ -1627,30 +1626,42 @@ mark_at_start(PyThreadState *tstate) | |
| static intptr_t | |
| assess_work_to_do(GCState *gcstate) | |
| { | |
| - /* The amount of work we want to do depends on three things. | |
| - * 1. The number of new objects created | |
| - * 2. The growth in heap size since the last collection | |
| - * 3. The heap size (up to the number of new objects, to avoid quadratic effects) | |
| - * | |
| - * For a steady state heap, the amount of work to do is three times the number | |
| - * of new objects added to the heap. This ensures that we stay ahead in the | |
| - * worst case of all new objects being garbage. | |
| - * | |
| - * This could be improved by tracking survival rates, but it is still a | |
| - * large improvement on the non-marking approach. | |
| - */ | |
| - intptr_t scale_factor = gcstate->old[0].threshold; | |
| - if (scale_factor < 2) { | |
| - scale_factor = 2; | |
| - } | |
| intptr_t new_objects = gcstate->young.count; | |
| - intptr_t max_heap_fraction = new_objects*2; | |
| - intptr_t heap_fraction = gcstate->heap_size / SCAN_RATE_DIVISOR / scale_factor; | |
| - if (heap_fraction > max_heap_fraction) { | |
| - heap_fraction = max_heap_fraction; | |
| + // This is the minimum number of objects we will take from the pending | |
| + // (not_visited) set when building the increment set. It needs to be | |
| + // large enough such that, in general, the pending set is empty when | |
| + // the other conditions in ready_to_mark() are true. It is okay to be | |
| + // conservative here (too large) as long as we avoid long pauses in | |
| + // the GC increments. | |
| + intptr_t pending_count = gcstate->young.threshold; | |
| + if (pending_count < new_objects) { | |
| + pending_count = new_objects; | |
| } | |
| + // For the non-incremental GC (in Python <= 3.13) we counted the young | |
| + // objects that survive a gen0 collection. That's not easy to do here | |
| + // since the increment includes not only the young objects but also some | |
| + // from the old generation. Doing the simpler thing and just counting | |
| + // all young objects means we might finish a full collector more quickly | |
| + // compared to if we only counted survivors. | |
| + gcstate->young_pending += new_objects; | |
| gcstate->young.count = 0; | |
| - return new_objects + heap_fraction; | |
| + return new_objects + pending_count; | |
| +} | |
| + | |
| +static bool | |
| +ready_to_mark(GCState *gcstate) | |
| +{ | |
| + PyGC_Head *pending = &gcstate->old[gcstate->visited_space^1].head; | |
| + if (!gc_list_is_empty(pending)) { | |
| + return false; | |
| + } | |
| + if (gcstate->young_pending < gcstate->young.threshold) { | |
| + return false; | |
| + } | |
| + if (gcstate->young_pending < gcstate->heap_size / 4) { | |
| + return false; | |
| + } | |
| + return true; | |
| } | |
| static void | |
| @@ -1658,15 +1669,10 @@ gc_collect_increment(PyThreadState *tstate, struct gc_collection_stats *stats) | |
| { | |
| GC_STAT_ADD(1, collections, 1); | |
| GCState *gcstate = &tstate->interp->gc; | |
| - gcstate->work_to_do += assess_work_to_do(gcstate); | |
| - if (gcstate->work_to_do < 0) { | |
| - return; | |
| - } | |
| untrack_tuples(&gcstate->young.head); | |
| if (gcstate->phase == GC_PHASE_MARK) { | |
| Py_ssize_t objects_marked = mark_at_start(tstate); | |
| GC_STAT_ADD(1, objects_transitively_reachable, objects_marked); | |
| - gcstate->work_to_do -= objects_marked; | |
| stats->candidates += objects_marked; | |
| validate_spaces(gcstate); | |
| return; | |
| @@ -1675,18 +1681,14 @@ gc_collect_increment(PyThreadState *tstate, struct gc_collection_stats *stats) | |
| PyGC_Head *visited = &gcstate->old[gcstate->visited_space].head; | |
| PyGC_Head increment; | |
| gc_list_init(&increment); | |
| - int scale_factor = gcstate->old[0].threshold; | |
| - if (scale_factor < 2) { | |
| - scale_factor = 2; | |
| - } | |
| intptr_t objects_marked = mark_stacks(tstate->interp, visited, gcstate->visited_space, false); | |
| GC_STAT_ADD(1, objects_transitively_reachable, objects_marked); | |
| - gcstate->work_to_do -= objects_marked; | |
| + int work_to_do = assess_work_to_do(gcstate); | |
| gc_list_set_space(&gcstate->young.head, gcstate->visited_space); | |
| gc_list_merge(&gcstate->young.head, &increment); | |
| gc_list_validate_space(&increment, gcstate->visited_space); | |
| Py_ssize_t increment_size = gc_list_size(&increment); | |
| - while (increment_size < gcstate->work_to_do) { | |
| + while (increment_size < work_to_do) { | |
| if (gc_list_is_empty(not_visited)) { | |
| break; | |
| } | |
| @@ -1705,9 +1707,8 @@ gc_collect_increment(PyThreadState *tstate, struct gc_collection_stats *stats) | |
| gc_collect_region(tstate, &increment, &survivors, stats); | |
| gc_list_merge(&survivors, visited); | |
| assert(gc_list_is_empty(&increment)); | |
| - gcstate->work_to_do -= increment_size; | |
| - if (gc_list_is_empty(not_visited)) { | |
| + if (ready_to_mark(gcstate)) { | |
| completed_scavenge(gcstate); | |
| } | |
| validate_spaces(gcstate); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment