diff --git a/.gitignore b/.gitignore index 8898f647..95d38542 100644 --- a/.gitignore +++ b/.gitignore @@ -79,3 +79,6 @@ rust/debug rust/target rust/flamegraph.svg target + +# UV +uv.lock diff --git a/rust/Cargo.lock b/rust/Cargo.lock index f4ae21c1..6c439009 100644 --- a/rust/Cargo.lock +++ b/rust/Cargo.lock @@ -21,7 +21,9 @@ dependencies = [ "ruff_python_parser", "ruff_source_file", "rustc-hash 2.1.1", + "serde", "serde_json", + "serde_yaml", "slotmap", "string-interner", "tap", @@ -666,6 +668,19 @@ dependencies = [ "serde", ] +[[package]] +name = "serde_yaml" +version = "0.9.34+deprecated" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a8b1a1a2ebf674015cc02edccce75287f1a0130d394307b36743c2f5d504b47" +dependencies = [ + "indexmap 2.9.0", + "itoa", + "ryu", + "serde", + "unsafe-libyaml", +] + [[package]] name = "siphasher" version = "1.0.1" @@ -810,6 +825,12 @@ version = "0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7264e107f553ccae879d21fbea1d6724ac785e8c3bfc762137959b5802826ef3" +[[package]] +name = "unsafe-libyaml" +version = "0.2.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "673aac59facbab8a9007c7f6108d11f63b603f7cabff99fabf650fea5c32b861" + [[package]] name = "version_check" version = "0.9.5" diff --git a/rust/Cargo.toml b/rust/Cargo.toml index 8e5acab2..21b170d5 100644 --- a/rust/Cargo.toml +++ b/rust/Cargo.toml @@ -25,6 +25,8 @@ const_format = "0.2.34" ruff_python_parser = { git = "https://github.com/astral-sh/ruff.git", tag = "v0.4.10" } ruff_python_ast = { git = "https://github.com/astral-sh/ruff.git", tag = "v0.4.10" } ruff_source_file = { git = "https://github.com/astral-sh/ruff.git", tag = "v0.4.10" } +serde = { version = "1.0", features = ["derive"] } +serde_yaml = "0.9" [dependencies.pyo3] version = "0.24.1" diff --git a/rust/src/filesystem.rs b/rust/src/filesystem.rs new file mode 100644 index 00000000..88d334de --- /dev/null +++ b/rust/src/filesystem.rs @@ -0,0 +1,256 @@ +use pyo3::prelude::*; +use pyo3::exceptions::PyValueError; +use std::fs; +use std::io; +use std::collections::HashMap; +use std::ops::Add; + +#[pyclass] +pub struct RealFileSystem {} + + +#[pymethods] +impl RealFileSystem { + #[new] + fn new() -> Self { + RealFileSystem {} + } + + fn read(&self, filename: &str) -> Result { + fs::read_to_string(filename) + } +} + + +// Define a type alias for the nested HashMap structure +// This allows the structure to represent arbitrary nested maps, +// similar to Python's dictionary. +type FileSystemContents = HashMap; + +#[derive(Debug, PartialEq, serde::Deserialize, serde::Serialize, Clone)] +#[serde(untagged)] // Allows deserialization into either map or null +enum FileNode { + /// A directory, containing more file system contents. + Directory(FileSystemContents), + /// A file, represented by `None` in the Python example. + File(Option), +} + +#[pyclass] +pub struct FakeFileSystem { + contents: FileSystemContents, +} + +impl FakeFileSystem { + /// Helper method to dedent lines, similar to the Python example. + /// This is a simplified dedent and might need more robust implementation + /// depending on the exact requirements of the original Python's _dedent method. + fn _dedent(lines: Vec<&str>) -> Vec { + if lines.is_empty() { + return Vec::new(); + } + + // Find the minimum indentation of non-empty lines + let min_indent = lines + .iter() + .filter_map(|line| { + if line.trim().is_empty() { + None + } else { + Some(line.chars().take_while(|&c| c.is_whitespace()).count()) + } + }) + .min() + .unwrap_or(0); // If no non-empty lines, assume 0 indentation + + lines + .iter() + .map(|line| { + if line.len() >= min_indent { + line[min_indent..].to_string() + } else { + line.to_string() // Should not happen with correct min_indent calculation + } + }) + .collect() + } + + /// Parses raw contents into a nested dictionary-like structure. + /// + /// This method expects raw_contents to be a string where indentation defines + /// the hierarchy, similar to the original Python function. + /// + /// Returns a Rust `Result` containing the `FileSystemContents` or an error `String`. + fn parse_contents( + raw_contents: &str, + ) -> PyResult { + + let raw_lines: Vec<&str> = raw_contents + .split('\n') + .filter(|s| !s.trim().is_empty()) + .collect(); + + let dedented_lines = FakeFileSystem::_dedent(raw_lines); + + let mut yamlified_lines: Vec = Vec::new(); + for line in dedented_lines { + let trimmed_line = line.trim_end_matches('/').to_string(); + let yamlified_line = trimmed_line.add(":"); + yamlified_lines.push(yamlified_line); + } + + let yamlified_string = yamlified_lines.join("\n"); + + // Use serde_yaml to parse the constructed YAML string + let parsed_contents: FileSystemContents = + serde_yaml::from_str(&yamlified_string).map_err(|_e| { + PyValueError::new_err("Failed to parse YAML from raw_contents") + })?; + + Ok(parsed_contents) + } + + /// Helper to join path components. + fn _join_path(parent: &str, child: &str) -> String { + if parent.is_empty() { + child.to_string() + } else { + format!("{}/{}", parent, child) + } + } +} + +#[pyclass] +pub struct FakeFileSystemWalkIterator { + stack: Vec<(String, FileSystemContents)>, +} + +#[pymethods] +impl FakeFileSystem { + #[new] + fn new(contents: &str) -> PyResult { + let parsed_contents = FakeFileSystem::parse_contents(&contents)?; + Ok(FakeFileSystem { + contents: parsed_contents + }) + } + + #[allow(unused_variables)] + fn read(&self, filename: &str) -> Result { + panic!("Not yet implemented"); + } + + /// Given a directory, walk the file system recursively. + /// + /// For each directory in the tree rooted at directory top (including top itself), + /// it yields a 3-tuple (dirpath, dirnames, filenames). + #[pyo3(name = "walk")] + fn py_walk(&self, directory_name: &str) -> PyResult { + let initial_contents = match self.contents.get(directory_name) { + Some(initial_contents) => initial_contents, + None => return Ok(FakeFileSystemWalkIterator{stack: vec![]}), + }; + + let initial_dir_contents = match initial_contents { + FileNode::Directory(contents) => contents.clone(), + _ => return Err(PyValueError::new_err("Provided path is not a directory")), + }; + + let a: FileSystemContents = initial_dir_contents.clone(); + Ok(FakeFileSystemWalkIterator { + stack: vec![(directory_name.to_string(), a)], + }) + } + + #[getter] + fn sep(&self) -> String { + "/".to_string() + } + + /// Joins path components using the file system separator. + /// Equivalent to `os.path.join` in Python. + fn join(&self, components: Vec) -> String { + let sep = self.sep(); // Get the separator from the getter method + components.into_iter() + .map(|c| c.trim_end_matches(&sep).to_string()) + .collect::>() + .join(&sep) + } + + /// Split the path into a pair of (head, tail) where tail is the last + /// pathname component and head is everything leading up to that. + /// + /// This is equivalent to Python's `os.path.split`. + #[pyo3(name = "split")] + fn py_split(&self, file_name: &str) -> (String, String) { + let components: Vec<&str> = file_name.split('/').collect(); + + if components.is_empty() { + return ("".to_string(), "".to_string()); + } + + let tail = components.last().unwrap_or(&""); // Last component, or empty if components is empty (shouldn't happen from split) + + let head_components = &components[..components.len() - 1]; // All components except the last + + let head = if head_components.is_empty() { + // Case for single component paths like "filename.txt" or empty string "" + "".to_string() + } else if file_name.starts_with('/') && head_components.len() == 1 && head_components[0].is_empty() { + // Special handling for paths starting with '/', e.g., "/" or "/filename.txt" + // If components were ["", ""], head_components is [""] -> should be "/" + // If components were ["", "file.txt"], head_components is [""] -> should be "/" + "/".to_string() + } else { + // Default joining for multiple components + head_components.join("/") + }; + + (head, tail.to_string()) + } + + /// Return the full path to the directory name of the supplied filename. + /// + /// E.g. '/path/to/filename.py' will return '/path/to'. + #[pyo3(name = "dirname")] + fn py_dirname(&self, filename: &str) -> String { + self.py_split(filename).0 // Get the first element (head) from the split result + } +} + + +#[pymethods] +impl FakeFileSystemWalkIterator { + fn __iter__(slf: PyRef) -> PyRef { + slf + } + + fn __next__(&mut self) -> Option<(String, Vec, Vec)> { + while let Some((current_dir_path, current_dir_contents)) = self.stack.pop() { + let mut directories = Vec::new(); + let mut files = Vec::new(); + + for (key, value) in current_dir_contents.iter() { + match value { + FileNode::Directory(_) => directories.push(key.clone()), + FileNode::File(_) => files.push(key.clone()), + } + } + + // Sort for consistent output, matching typical file system walk behavior + directories.sort(); + files.sort(); + + // Push subdirectories onto the stack in reverse order so they are processed in + // lexicographical order (LIFO from stack pop) + for dir_name in directories.iter().rev() { + if let Some(FileNode::Directory(subdir_contents)) = current_dir_contents.get(dir_name) { + let full_subdir_path = FakeFileSystem::_join_path(¤t_dir_path, dir_name); + self.stack.push((full_subdir_path, subdir_contents.clone())); + } + } + return Some((current_dir_path, directories, files)); + } + None + } +} \ No newline at end of file diff --git a/rust/src/lib.rs b/rust/src/lib.rs index 18327cd2..966c460a 100644 --- a/rust/src/lib.rs +++ b/rust/src/lib.rs @@ -3,6 +3,7 @@ pub mod exceptions; pub mod graph; pub mod import_parsing; pub mod module_expressions; +mod filesystem; use crate::errors::{GrimpError, GrimpResult}; use crate::exceptions::{InvalidModuleExpression, ModuleNotPresent, NoSuchContainer, ParseError}; @@ -18,11 +19,14 @@ use pyo3::types::{IntoPyDict, PyDict, PyFrozenSet, PyList, PySet, PyString, PyTu use rayon::prelude::*; use rustc_hash::FxHashSet; use std::collections::HashSet; +use filesystem::{RealFileSystem, FakeFileSystem}; #[pymodule] fn _rustgrimp(py: Python<'_>, m: &Bound<'_, PyModule>) -> PyResult<()> { m.add_wrapped(wrap_pyfunction!(parse_imported_objects_from_code))?; m.add_class::()?; + m.add_class::()?; + m.add_class::()?; m.add("ModuleNotPresent", py.get_type::())?; m.add("NoSuchContainer", py.get_type::())?; m.add( @@ -640,3 +644,5 @@ struct Route { middle: Vec, tails: Vec, } + + diff --git a/src/grimp/adaptors/filesystem.py b/src/grimp/adaptors/filesystem.py index 9f637982..06f2f480 100644 --- a/src/grimp/adaptors/filesystem.py +++ b/src/grimp/adaptors/filesystem.py @@ -2,10 +2,8 @@ import tokenize from typing import Iterator, List, Tuple -from grimp.application.ports.filesystem import AbstractFileSystem - -class FileSystem(AbstractFileSystem): +class FileSystem: """ Abstraction around file system calls. """ diff --git a/src/grimp/adaptors/importscanner.py b/src/grimp/adaptors/importscanner.py index a405facc..7d877245 100644 --- a/src/grimp/adaptors/importscanner.py +++ b/src/grimp/adaptors/importscanner.py @@ -128,6 +128,8 @@ def _read_module_contents(self, module_filename: str) -> str: """ Read the file contents of the module. """ + if self.file_system_2: + return self.file_system_2.read(module_filename) # type: ignore[attr-defined] return self.file_system.read(module_filename) def _module_is_package(self, module_filename: str) -> bool: diff --git a/src/grimp/application/ports/filesystem.py b/src/grimp/application/ports/filesystem.py index b11125be..b699438e 100644 --- a/src/grimp/application/ports/filesystem.py +++ b/src/grimp/application/ports/filesystem.py @@ -1,14 +1,12 @@ -import abc -from typing import Iterator, List, Tuple +from typing import Iterator, List, Tuple, Protocol -class AbstractFileSystem(abc.ABC): +class AbstractFileSystem(Protocol): """ Abstraction around file system calls. """ @property - @abc.abstractmethod def sep(self) -> str: """ Return the file separator for the FileSystem. @@ -16,16 +14,13 @@ def sep(self) -> str: E.G. '/' for UNIX systems and '\\' for Windows systems """ - @abc.abstractmethod def dirname(self, filename: str) -> str: """ Return the full path to the directory name of the supplied filename. E.g. '/path/to/filename.py' will return '/path/to'. """ - raise NotImplementedError - @abc.abstractmethod def walk(self, directory_name: str) -> Iterator[Tuple[str, List[str], List[str]]]: """ Given a directory, walk the file system recursively. @@ -33,13 +28,10 @@ def walk(self, directory_name: str) -> Iterator[Tuple[str, List[str], List[str]] For each directory in the tree rooted at directory top (including top itself), it yields a 3-tuple (dirpath, dirnames, filenames). """ - raise NotImplementedError - @abc.abstractmethod def join(self, *components: str) -> str: - raise NotImplementedError + ... - @abc.abstractmethod def split(self, file_name: str) -> Tuple[str, str]: """ Split the pathname path into a pair, (head, tail) where tail is the last pathname component @@ -49,34 +41,36 @@ def split(self, file_name: str) -> Tuple[str, str]: head unless it is the root (one or more slashes only). In all cases, join(head, tail) returns a path to the same location as path (but the strings may differ). """ - raise NotImplementedError - @abc.abstractmethod def read(self, file_name: str) -> str: """ Given a file name, return the contents of the file. """ - raise NotImplementedError - @abc.abstractmethod def exists(self, file_name: str) -> bool: """ Return whether a file exists. """ - raise NotImplementedError - @abc.abstractmethod def get_mtime(self, file_name: str) -> float: """ Return the mtime of a file. Raises FileNotFoundError if the file does not exist. """ - raise NotImplementedError - @abc.abstractmethod def write(self, file_name: str, contents: str) -> None: """ Write the contents to a file. """ - raise NotImplementedError + + +class FileSystem2(Protocol): + """ + New abstraction around file system calls. + """ + + def exists(self, file_name: str) -> bool: + """ + Return whether a file exists. + """ diff --git a/src/grimp/application/ports/importscanner.py b/src/grimp/application/ports/importscanner.py index 34da5be9..f7651d54 100644 --- a/src/grimp/application/ports/importscanner.py +++ b/src/grimp/application/ports/importscanner.py @@ -1,9 +1,10 @@ import abc from typing import Set -from grimp.application.ports.filesystem import AbstractFileSystem +from grimp.application.ports.filesystem import AbstractFileSystem, FileSystem2 from grimp.application.ports.modulefinder import FoundPackage from grimp.domain.valueobjects import DirectImport, Module +from typing import Optional class AbstractImportScanner(abc.ABC): @@ -16,6 +17,7 @@ def __init__( file_system: AbstractFileSystem, found_packages: Set[FoundPackage], include_external_packages: bool = False, + file_system_2: Optional[FileSystem2] = None, ) -> None: """ Args: @@ -27,6 +29,7 @@ def __init__( in the results. """ self.file_system = file_system + self.file_system_2 = file_system_2 self.include_external_packages = include_external_packages self.found_packages = found_packages diff --git a/src/grimp/application/usecases.py b/src/grimp/application/usecases.py index eee3286c..247f2f44 100644 --- a/src/grimp/application/usecases.py +++ b/src/grimp/application/usecases.py @@ -2,13 +2,13 @@ Use cases handle application logic. """ -from typing import Dict, Sequence, Set, Type, Union, cast, Iterable, Collection +from typing import Dict, Sequence, Set, Type, Union, cast, Iterable, Collection, Optional import math import joblib # type: ignore from ..application.ports import caching -from ..application.ports.filesystem import AbstractFileSystem +from ..application.ports.filesystem import AbstractFileSystem, FileSystem2 from ..application.ports.graph import ImportGraph from ..application.ports.importscanner import AbstractImportScanner from ..application.ports.modulefinder import AbstractModuleFinder, FoundPackage, ModuleFile @@ -61,6 +61,7 @@ def build_graph( """ file_system: AbstractFileSystem = settings.FILE_SYSTEM + file_system_2: Optional[FileSystem2] = settings.FILE_SYSTEM_2 found_packages = _find_packages( file_system=file_system, @@ -70,6 +71,7 @@ def build_graph( imports_by_module = _scan_packages( found_packages=found_packages, file_system=file_system, + file_system_2=file_system_2, include_external_packages=include_external_packages, exclude_type_checking_imports=exclude_type_checking_imports, cache_dir=cache_dir, @@ -115,6 +117,7 @@ def _validate_package_names_are_strings( def _scan_packages( found_packages: Set[FoundPackage], file_system: AbstractFileSystem, + file_system_2: Optional[FileSystem2], include_external_packages: bool, exclude_type_checking_imports: bool, cache_dir: Union[str, Type[NotSupplied], None], @@ -146,6 +149,7 @@ def _scan_packages( _scan_imports( remaining_module_files_to_scan, file_system=file_system, + file_system_2=file_system_2, found_packages=found_packages, include_external_packages=include_external_packages, exclude_type_checking_imports=exclude_type_checking_imports, @@ -214,14 +218,20 @@ def _scan_imports( module_files: Collection[ModuleFile], *, file_system: AbstractFileSystem, + file_system_2: Optional[FileSystem2], found_packages: Set[FoundPackage], include_external_packages: bool, exclude_type_checking_imports: bool, ) -> Dict[ModuleFile, Set[DirectImport]]: - chunks = _create_chunks(module_files) + if file_system_2: + # Multiprocessing is not supported - just do one chunk. + chunks: Collection[Collection[ModuleFile]] = (module_files,) + else: + chunks = _create_chunks(module_files) return _scan_chunks( chunks, file_system, + file_system_2, found_packages, include_external_packages, exclude_type_checking_imports, @@ -259,12 +269,14 @@ def _decide_number_of_processes(number_of_module_files: int) -> int: def _scan_chunks( chunks: Collection[Collection[ModuleFile]], file_system: AbstractFileSystem, + file_system_2: Optional[FileSystem2], found_packages: Set[FoundPackage], include_external_packages: bool, exclude_type_checking_imports: bool, ) -> Dict[ModuleFile, Set[DirectImport]]: import_scanner: AbstractImportScanner = settings.IMPORT_SCANNER_CLASS( file_system=file_system, + file_system_2=file_system_2, found_packages=found_packages, include_external_packages=include_external_packages, ) diff --git a/src/grimp/main.py b/src/grimp/main.py index 70622b05..d38a4802 100644 --- a/src/grimp/main.py +++ b/src/grimp/main.py @@ -9,10 +9,12 @@ from .adaptors.timing import SystemClockTimer from .application.config import settings from .application.usecases import build_graph +from grimp import _rustgrimp as rust # type: ignore[attr-defined] settings.configure( MODULE_FINDER=ModuleFinder(), FILE_SYSTEM=FileSystem(), + FILE_SYSTEM_2=rust.RealFileSystem(), IMPORT_SCANNER_CLASS=ImportScanner, IMPORT_GRAPH_CLASS=ImportGraph, PACKAGE_FINDER=ImportLibPackageFinder(), diff --git a/tests/adaptors/filesystem.py b/tests/adaptors/filesystem.py index 6eac8cca..190c1078 100644 --- a/tests/adaptors/filesystem.py +++ b/tests/adaptors/filesystem.py @@ -2,12 +2,11 @@ import yaml -from grimp.application.ports.filesystem import AbstractFileSystem DEFAULT_MTIME = 10000.0 -class FakeFileSystem(AbstractFileSystem): +class FakeFileSystem: def __init__( self, contents: Optional[str] = None, diff --git a/tests/unit/adaptors/test_filesystem.py b/tests/unit/adaptors/test_filesystem.py index a59a173c..3de2ac4f 100644 --- a/tests/unit/adaptors/test_filesystem.py +++ b/tests/unit/adaptors/test_filesystem.py @@ -2,7 +2,9 @@ import pytest # type: ignore -from tests.adaptors.filesystem import FakeFileSystem +from grimp import _rustgrimp as rust # type: ignore[attr-defined] + +FakeFileSystem = rust.FakeFileSystem class TestFakeFileSystem: @@ -25,7 +27,7 @@ def test_walk(self): assert [ ("/path/to/mypackage", ["foo"], ["__init__.py"]), ("/path/to/mypackage/foo", ["two"], ["__init__.py", "one.py"]), - ("/path/to/mypackage/foo/two", [], ["__init__.py", "green.py", "blue.py"]), + ("/path/to/mypackage/foo/two", [], ["__init__.py", "blue.py", "green.py"]), ] == list(file_system.walk("/path/to/mypackage")) def test_empty_if_directory_does_not_exist(self): @@ -38,16 +40,16 @@ def test_empty_if_directory_does_not_exist(self): assert [] == list(file_system.walk("/path/to/nonexistent/package")) def test_dirname(self): - file_system = FakeFileSystem() + file_system = FakeFileSystem("") assert "/path/to" == file_system.dirname("/path/to/file.txt") @pytest.mark.parametrize("path", ("/path/to", "/path/to/")) def test_join(self, path): - file_system = FakeFileSystem() - assert "/path/to/mypackage/file.py" == file_system.join(path, "mypackage", "file.py") + file_system = FakeFileSystem("") + assert "/path/to/mypackage/file.py" == file_system.join([path, "mypackage", "file.py"]) def test_split(self): - file_system = FakeFileSystem() + file_system = FakeFileSystem("") assert ("/path/to/mypackage", "file.py") == file_system.split("/path/to/mypackage/file.py") def test_dirnames_can_be_modified_in_place(self): diff --git a/tests/unit/application/test_usecases.py b/tests/unit/application/test_usecases.py index a8f14207..ca0c494a 100644 --- a/tests/unit/application/test_usecases.py +++ b/tests/unit/application/test_usecases.py @@ -45,7 +45,11 @@ def test_happy_path(self, include_external_packages): class FakePackageFinder(BaseFakePackageFinder): directory_map = {"mypackage": "/path/to/mypackage"} - with override_settings(FILE_SYSTEM=file_system, PACKAGE_FINDER=FakePackageFinder()): + with override_settings( + FILE_SYSTEM=file_system, + FILE_SYSTEM_2=None, + PACKAGE_FINDER=FakePackageFinder(), + ): graph = usecases.build_graph( "mypackage", include_external_packages=include_external_packages ) @@ -200,6 +204,7 @@ class FakeModuleFinder(BaseFakeModuleFinder): with override_settings( FILE_SYSTEM=FakeFileSystem(), + FILE_SYSTEM_2=None, PACKAGE_FINDER=FakePackageFinder(), MODULE_FINDER=FakeModuleFinder(), ), patch.object(os, "environ", fake_environ):