@@ -20,7 +20,7 @@ use crate::{
2020 AsObject , Py , PyObject , PyObjectRef , PyPayload , PyRef , PyResult ,
2121 builtins:: {
2222 self , PyBaseExceptionRef , PyDict , PyDictRef , PyInt , PyList , PyModule , PyStr , PyStrInterned ,
23- PyStrRef , PyTypeRef ,
23+ PyStrRef , PyTypeRef , PyWeak ,
2424 code:: PyCode ,
2525 dict:: { PyDictItems , PyDictKeys , PyDictValues } ,
2626 pystr:: AsPyStr ,
@@ -621,6 +621,186 @@ impl VirtualMachine {
621621 }
622622 }
623623
624+ /// Clear module references during shutdown.
625+ /// Follows the same phased algorithm as pylifecycle.c finalize_modules():
626+ /// no hardcoded module names, reverse import order, only builtins/sys last.
627+ pub fn finalize_modules ( & self ) {
628+ // Phase 1: Set special sys/builtins attributes to None, restore stdio
629+ self . finalize_modules_delete_special ( ) ;
630+
631+ // Phase 2: Remove all modules from sys.modules (set values to None),
632+ // and collect weakrefs to modules preserving import order.
633+ // Also keeps strong refs (module_refs) to prevent premature deallocation.
634+ // CPython uses _PyGC_CollectNoFail() here to collect __globals__ cycles;
635+ // since RustPython has no working GC, we keep modules alive through
636+ // Phase 4 so their dicts can be explicitly cleared.
637+ let ( module_weakrefs, module_refs) = self . finalize_remove_modules ( ) ;
638+
639+ // Phase 3: Clear sys.modules dict
640+ self . finalize_clear_modules_dict ( ) ;
641+
642+ // Phase 4: Clear module dicts in reverse import order using 2-pass algorithm.
643+ // All modules are still alive (held by module_refs), so all weakrefs are valid.
644+ // This breaks __globals__ cycles: dict entries set to None → functions freed →
645+ // __globals__ refs dropped → dict refcount decreases.
646+ self . finalize_clear_module_dicts ( & module_weakrefs) ;
647+
648+ // Drop strong refs → modules freed with already-cleared dicts.
649+ // No __globals__ cycles remain (broken by Phase 4).
650+ drop ( module_refs) ;
651+
652+ // Phase 5: Clear sys and builtins dicts last
653+ self . finalize_clear_sys_builtins_dict ( ) ;
654+ }
655+
656+ /// Phase 1: Set special sys attributes to None and restore stdio.
657+ fn finalize_modules_delete_special ( & self ) {
658+ let none = self . ctx . none ( ) ;
659+ let sys_dict = self . sys_module . dict ( ) ;
660+
661+ // Set special sys attributes to None
662+ for attr in & [
663+ "path" ,
664+ "argv" ,
665+ "ps1" ,
666+ "ps2" ,
667+ "last_exc" ,
668+ "last_type" ,
669+ "last_value" ,
670+ "last_traceback" ,
671+ "path_importer_cache" ,
672+ "meta_path" ,
673+ "path_hooks" ,
674+ ] {
675+ let _ = sys_dict. set_item ( * attr, none. clone ( ) , self ) ;
676+ }
677+
678+ // Restore stdin/stdout/stderr from __stdin__/__stdout__/__stderr__
679+ for ( std_name, dunder_name) in & [
680+ ( "stdin" , "__stdin__" ) ,
681+ ( "stdout" , "__stdout__" ) ,
682+ ( "stderr" , "__stderr__" ) ,
683+ ] {
684+ let restored = sys_dict
685+ . get_item_opt ( * dunder_name, self )
686+ . ok ( )
687+ . flatten ( )
688+ . unwrap_or_else ( || none. clone ( ) ) ;
689+ let _ = sys_dict. set_item ( * std_name, restored, self ) ;
690+ }
691+
692+ // builtins._ = None
693+ let _ = self . builtins . dict ( ) . set_item ( "_" , none, self ) ;
694+ }
695+
696+ /// Phase 2: Set all sys.modules values to None and collect weakrefs to modules.
697+ /// Returns (weakrefs for Phase 4, strong refs to keep modules alive).
698+ fn finalize_remove_modules ( & self ) -> ( Vec < ( String , PyRef < PyWeak > ) > , Vec < PyObjectRef > ) {
699+ let mut module_weakrefs = Vec :: new ( ) ;
700+ let mut module_refs = Vec :: new ( ) ;
701+
702+ let Ok ( modules) = self . sys_module . get_attr ( identifier ! ( self , modules) , self ) else {
703+ return ( module_weakrefs, module_refs) ;
704+ } ;
705+ let Some ( modules_dict) = modules. downcast_ref :: < PyDict > ( ) else {
706+ return ( module_weakrefs, module_refs) ;
707+ } ;
708+
709+ let none = self . ctx . none ( ) ;
710+ let items: Vec < _ > = modules_dict. into_iter ( ) . collect ( ) ;
711+
712+ for ( key, value) in items {
713+ let name = key
714+ . downcast_ref :: < PyStr > ( )
715+ . map ( |s| s. as_str ( ) . to_owned ( ) )
716+ . unwrap_or_default ( ) ;
717+
718+ // Save weakref and strong ref to module for later clearing
719+ if value. downcast_ref :: < PyModule > ( ) . is_some ( ) {
720+ if let Ok ( weak) = value. downgrade ( None , self ) {
721+ module_weakrefs. push ( ( name, weak) ) ;
722+ }
723+ module_refs. push ( value. clone ( ) ) ;
724+ }
725+
726+ // Set the value to None in sys.modules
727+ let _ = modules_dict. set_item ( & * key, none. clone ( ) , self ) ;
728+ }
729+
730+ ( module_weakrefs, module_refs)
731+ }
732+
733+ /// Phase 3: Clear sys.modules dict.
734+ fn finalize_clear_modules_dict ( & self ) {
735+ if let Ok ( modules) = self . sys_module . get_attr ( identifier ! ( self , modules) , self )
736+ && let Some ( modules_dict) = modules. downcast_ref :: < PyDict > ( )
737+ {
738+ modules_dict. clear ( ) ;
739+ }
740+ }
741+
742+ /// Phase 4: Clear module dicts.
743+ /// Without GC, only clear __main__ — other modules' __del__ handlers
744+ /// need their globals intact. CPython can clear ALL module dicts because
745+ /// _PyGC_CollectNoFail() finalizes cycle-participating objects beforehand.
746+ fn finalize_clear_module_dicts ( & self , module_weakrefs : & [ ( String , PyRef < PyWeak > ) ] ) {
747+ for ( name, weakref) in module_weakrefs. iter ( ) . rev ( ) {
748+ // Only clear __main__ — user objects with __del__ get finalized
749+ // while other modules' globals remain intact for their __del__ handlers.
750+ if name != "__main__" {
751+ continue ;
752+ }
753+
754+ let Some ( module_obj) = weakref. upgrade ( ) else {
755+ continue ;
756+ } ;
757+ let Some ( module) = module_obj. downcast_ref :: < PyModule > ( ) else {
758+ continue ;
759+ } ;
760+
761+ Self :: module_clear_dict ( & module. dict ( ) , self ) ;
762+ }
763+ }
764+
765+ /// 2-pass module dict clearing (_PyModule_ClearDict algorithm).
766+ /// Pass 1: Set names starting with '_' (except __builtins__) to None.
767+ /// Pass 2: Set all remaining names (except __builtins__) to None.
768+ pub ( crate ) fn module_clear_dict ( dict : & Py < PyDict > , vm : & VirtualMachine ) {
769+ let none = vm. ctx . none ( ) ;
770+
771+ // Pass 1: names starting with '_' (except __builtins__)
772+ for ( key, value) in dict. into_iter ( ) . collect :: < Vec < _ > > ( ) {
773+ if vm. is_none ( & value) {
774+ continue ;
775+ }
776+ if let Some ( key_str) = key. downcast_ref :: < PyStr > ( ) {
777+ let name = key_str. as_str ( ) ;
778+ if name. starts_with ( '_' ) && name != "__builtins__" && name != "__spec__" {
779+ let _ = dict. set_item ( name, none. clone ( ) , vm) ;
780+ }
781+ }
782+ }
783+
784+ // Pass 2: all remaining (except __builtins__)
785+ for ( key, value) in dict. into_iter ( ) . collect :: < Vec < _ > > ( ) {
786+ if vm. is_none ( & value) {
787+ continue ;
788+ }
789+ if let Some ( key_str) = key. downcast_ref :: < PyStr > ( )
790+ && key_str. as_str ( ) != "__builtins__"
791+ && key_str. as_str ( ) != "__spec__"
792+ {
793+ let _ = dict. set_item ( key_str. as_str ( ) , none. clone ( ) , vm) ;
794+ }
795+ }
796+ }
797+
798+ /// Phase 5: Clear sys and builtins dicts last.
799+ fn finalize_clear_sys_builtins_dict ( & self ) {
800+ Self :: module_clear_dict ( & self . sys_module . dict ( ) , self ) ;
801+ Self :: module_clear_dict ( & self . builtins . dict ( ) , self ) ;
802+ }
803+
624804 pub fn current_recursion_depth ( & self ) -> usize {
625805 self . recursion_depth . get ( )
626806 }
0 commit comments