From b25076b9e734e1ca7ea5ae8951f62d38597ce2d7 Mon Sep 17 00:00:00 2001 From: Dani Pinyol Date: Sun, 12 Apr 2026 22:22:27 +0200 Subject: [PATCH 1/2] fix: rare crash due to race condition MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The vectors PYJLVALUES and PYJLFREEVALUES are modified without any protection against GC-triggered re-entrancy. _pyjl_dealloc(o1) or PyJuliaValue_SetValue └→ push!(PYJLFREEVALUES, idx) # begins resize (sets internal flag) └→ vector must grow → allocates → triggers Julia GC └→ GC collects a Py object → runs py_finalizer └→ enqueue(ptr) → PyGILState_Check()==1 (same thread holds GIL) └→ Py_DecRef(ptr) → refcount hits 0 └→ _pyjl_dealloc(o2) └→ push!(PYJLFREEVALUES, idx2) ← BOOM: flag already set ConcurrencyViolationError! 1. push! needs to grow the vector (exceeds capacity), so it allocates 2. The allocation triggers Julia GC, which runs finalizers 3. A finalizer calls Py_DecRef (because enqueue sees the GIL is held on this same thread), dropping refcount to 0, which triggers _pyjl_dealloc re-entrantly on the same vector This is especially likely during shutdown (at_jl_exit → jl_atexit_hook), because jl_gc_run_all_finalizers runs finalizers on the calling thread (the main thread, which holds the GIL). Many Py objects are finalized at once, repeatedly pushing to PYJLFREEVALUES, and each push that triggers a reallocation can cause the re-entrant chain. Disabled Julia's GC during the critical vector modifications, so that push!/pop! cannot trigger allocation-based GC, which prevents the finalizer chain from re-entering. Added a SpinLock as defense-in-depth against true multi-thread races. --- src/JlWrap/C.jl | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/src/JlWrap/C.jl b/src/JlWrap/C.jl index 19ff2904..dc1d84c2 100644 --- a/src/JlWrap/C.jl +++ b/src/JlWrap/C.jl @@ -25,6 +25,8 @@ const PyJuliaBase_Type = Ref(C.PyNULL) const PYJLVALUES = [] # unused indices in PYJLVALUES const PYJLFREEVALUES = Int[] +# lock protecting PYJLVALUES and PYJLFREEVALUES from concurrent modification +const PYJL_LOCK = Threads.SpinLock() function _pyjl_new(t::C.PyPtr, ::C.PyPtr, ::C.PyPtr) alloc = C.PyType_GetSlot(t, C.Py_tp_alloc) @@ -39,8 +41,16 @@ end function _pyjl_dealloc(o::C.PyPtr) idx = C.@ft UnsafePtr{PyJuliaValueObject}(o).value[] if idx != 0 + # Disable GC to prevent push! from triggering a GC that runs finalizers + # re-entrantly (the finalizer chain: push! → alloc → GC → py_finalizer → + # enqueue → Py_DecRef → _pyjl_dealloc → push! on same vector → + # ConcurrencyViolationError). The lock guards against true multi-thread races. + prev = Base.GC.enable(false) + lock(PYJL_LOCK) PYJLVALUES[idx] = nothing push!(PYJLFREEVALUES, idx) + unlock(PYJL_LOCK) + Base.GC.enable(prev) end (C.@ft UnsafePtr{PyJuliaValueObject}(o).weaklist[!]) == C.PyNULL || C.PyObject_ClearWeakRefs(o) freeptr = C.PyType_GetSlot(C.Py_Type(o), C.Py_tp_free) @@ -375,6 +385,10 @@ PyJuliaValue_SetValue(_o, @nospecialize(v)) = Base.GC.@preserve _o begin o = C.asptr(_o) idx = C.@ft UnsafePtr{PyJuliaValueObject}(o).value[] if idx == 0 + # Disable GC to prevent push!/pop! from triggering a GC whose finalizers + # re-entrantly resize the same vectors (see _pyjl_dealloc comment). + prev = Base.GC.enable(false) + lock(PYJL_LOCK) if isempty(PYJLFREEVALUES) push!(PYJLVALUES, v) idx = length(PYJLVALUES) @@ -382,6 +396,8 @@ PyJuliaValue_SetValue(_o, @nospecialize(v)) = Base.GC.@preserve _o begin idx = pop!(PYJLFREEVALUES) PYJLVALUES[idx] = v end + unlock(PYJL_LOCK) + Base.GC.enable(prev) C.@ft UnsafePtr{PyJuliaValueObject}(o).value[] = idx else PYJLVALUES[idx] = v From 7d79ab5c5f0344414e1a4a6fc6b21c6df579767d Mon Sep 17 00:00:00 2001 From: Dani Pinyol Date: Wed, 27 May 2026 11:07:42 +0200 Subject: [PATCH 2/2] fix: no GC disable. avoid resize PYJLFREEVALUES in dealloc --- src/JlWrap/C.jl | 32 +++++++++++++++++--------------- 1 file changed, 17 insertions(+), 15 deletions(-) diff --git a/src/JlWrap/C.jl b/src/JlWrap/C.jl index dc1d84c2..60118ba6 100644 --- a/src/JlWrap/C.jl +++ b/src/JlWrap/C.jl @@ -23,9 +23,13 @@ const PyJuliaBase_Type = Ref(C.PyNULL) # we store the actual julia values here # the `value` field of `PyJuliaValueObject` indexes into here const PYJLVALUES = [] -# unused indices in PYJLVALUES +# unused indices in PYJLVALUES; kept the same length as PYJLVALUES (extra slots +# are pushed alongside PYJLVALUES so the dealloc/finalizer path never has to +# resize this vector). Only the first PYJLFREEVALUECOUNT[] entries are valid +# free indices; the rest are placeholders. const PYJLFREEVALUES = Int[] -# lock protecting PYJLVALUES and PYJLFREEVALUES from concurrent modification +const PYJLFREEVALUECOUNT = Ref(0) +# lock protecting PYJLVALUES, PYJLFREEVALUES and PYJLFREEVALUECOUNT const PYJL_LOCK = Threads.SpinLock() function _pyjl_new(t::C.PyPtr, ::C.PyPtr, ::C.PyPtr) @@ -41,16 +45,14 @@ end function _pyjl_dealloc(o::C.PyPtr) idx = C.@ft UnsafePtr{PyJuliaValueObject}(o).value[] if idx != 0 - # Disable GC to prevent push! from triggering a GC that runs finalizers - # re-entrantly (the finalizer chain: push! → alloc → GC → py_finalizer → - # enqueue → Py_DecRef → _pyjl_dealloc → push! on same vector → - # ConcurrencyViolationError). The lock guards against true multi-thread races. - prev = Base.GC.enable(false) + # No allocation in this critical section: PYJLFREEVALUES has been pre-sized + # to match PYJLVALUES by PyJuliaValue_SetValue, so recording a free index is + # just a write to an existing slot plus a counter bump. This avoids the GC-triggered lock(PYJL_LOCK) PYJLVALUES[idx] = nothing - push!(PYJLFREEVALUES, idx) + PYJLFREEVALUECOUNT[] += 1 + PYJLFREEVALUES[PYJLFREEVALUECOUNT[]] = idx unlock(PYJL_LOCK) - Base.GC.enable(prev) end (C.@ft UnsafePtr{PyJuliaValueObject}(o).weaklist[!]) == C.PyNULL || C.PyObject_ClearWeakRefs(o) freeptr = C.PyType_GetSlot(C.Py_Type(o), C.Py_tp_free) @@ -385,19 +387,19 @@ PyJuliaValue_SetValue(_o, @nospecialize(v)) = Base.GC.@preserve _o begin o = C.asptr(_o) idx = C.@ft UnsafePtr{PyJuliaValueObject}(o).value[] if idx == 0 - # Disable GC to prevent push!/pop! from triggering a GC whose finalizers - # re-entrantly resize the same vectors (see _pyjl_dealloc comment). - prev = Base.GC.enable(false) lock(PYJL_LOCK) - if isempty(PYJLFREEVALUES) + if PYJLFREEVALUECOUNT[] == 0 + # Grow both vectors together so length(PYJLFREEVALUES) == length(PYJLVALUES) + # is preserved. The 0 in PYJLFREEVALUES is just a placeholder push!(PYJLVALUES, v) + push!(PYJLFREEVALUES, 0) idx = length(PYJLVALUES) else - idx = pop!(PYJLFREEVALUES) + idx = PYJLFREEVALUES[PYJLFREEVALUECOUNT[]] + PYJLFREEVALUECOUNT[] -= 1 PYJLVALUES[idx] = v end unlock(PYJL_LOCK) - Base.GC.enable(prev) C.@ft UnsafePtr{PyJuliaValueObject}(o).value[] = idx else PYJLVALUES[idx] = v