diff --git a/pre_commit/clientlib.py b/pre_commit/clientlib.py index 51f14d26e..99ebfebeb 100644 --- a/pre_commit/clientlib.py +++ b/pre_commit/clientlib.py @@ -72,11 +72,20 @@ def transform_stage(stage: str) -> str: return _STAGES.get(stage, stage) -MINIMAL_MANIFEST_SCHEMA = cfgv.Array( - cfgv.Map( - 'Hook', 'id', - cfgv.Required('id', cfgv.check_string), - cfgv.Optional('stages', cfgv.check_array(cfgv.check_string), []), +_MINIMAL_MANIFEST_SCHEMA = cfgv.Map( + 'Manifest', None, + cfgv.RequiredRecurse( + 'hooks', + cfgv.KeyValueMap( + 'Hooks', + cfgv.check_string, + cfgv.Map( + 'Hook', None, + cfgv.Optional( + 'stages', cfgv.check_array(cfgv.check_string), [], + ), + ), + ), ), ) @@ -85,16 +94,16 @@ def warn_for_stages_on_repo_init(repo: str, directory: str) -> None: try: manifest = cfgv.load_from_filename( os.path.join(directory, C.MANIFEST_FILE), - schema=MINIMAL_MANIFEST_SCHEMA, - load_strategy=yaml_load, + schema=_MINIMAL_MANIFEST_SCHEMA, + load_strategy=_load_manifest_backward_compat, exc_tp=InvalidManifestError, ) except InvalidManifestError: return # they'll get a better error message when it actually loads! legacy_stages = {} # sorted set - for hook in manifest: - for stage in hook.get('stages', ()): + for hook in manifest['hooks'].values(): + for stage in hook['stages']: if stage in _STAGES: legacy_stages[stage] = True @@ -228,7 +237,7 @@ def check(self, dct: dict[str, Any]) -> None: MANIFEST_HOOK_DICT = cfgv.Map( - 'Hook', 'id', + 'Hook', None, # check first in case it uses some newer, incompatible feature cfgv.Optional( @@ -237,7 +246,6 @@ def check(self, dct: dict[str, Any]) -> None: '0', ), - cfgv.Required('id', cfgv.check_string), cfgv.Required('name', cfgv.check_string), cfgv.Required('entry', cfgv.check_string), LanguageMigrationRequired('language', cfgv.check_one_of(language_names)), @@ -263,18 +271,38 @@ def check(self, dct: dict[str, Any]) -> None: StagesMigration('stages', []), cfgv.Optional('verbose', cfgv.check_bool, False), ) -MANIFEST_SCHEMA = cfgv.Array(MANIFEST_HOOK_DICT) + + +MANIFEST_SCHEMA = cfgv.Map( + 'Manifest', None, + + # check first in case it uses some newer, incompatible feature + cfgv.Optional( + 'minimum_pre_commit_version', + cfgv.check_and(cfgv.check_string, check_min_version), + '0', + ), + cfgv.RequiredRecurse( + 'hooks', + cfgv.KeyValueMap('Hooks', cfgv.check_string, MANIFEST_HOOK_DICT), + ), +) class InvalidManifestError(FatalError): pass -def _load_manifest_forward_compat(contents: str) -> object: +_MINIMAL_LEGACY_MANIFEST_SCHEMA = cfgv.Array( + cfgv.Map('Hook', 'id', cfgv.Required('id', cfgv.check_string)), +) + + +def _load_manifest_backward_compat(contents: str) -> object: obj = yaml_load(contents) - if isinstance(obj, dict): - check_min_version('5') - raise AssertionError('unreachable') + if isinstance(obj, list): + cfgv.validate(obj, _MINIMAL_LEGACY_MANIFEST_SCHEMA) + return {'hooks': {hook.pop('id'): hook for hook in obj}} else: return obj @@ -282,11 +310,21 @@ def _load_manifest_forward_compat(contents: str) -> object: load_manifest = functools.partial( cfgv.load_from_filename, schema=MANIFEST_SCHEMA, - load_strategy=_load_manifest_forward_compat, + load_strategy=_load_manifest_backward_compat, exc_tp=InvalidManifestError, ) +def load_manifest_contents(repo: str, contents: str) -> dict[str, Any]: + with ( + cfgv.reraise_as(InvalidManifestError), + cfgv.validate_context(f'File ({repo})/{C.MANIFEST_FILE}'), + ): + obj = _load_manifest_backward_compat(contents) + cfgv.validate(obj, MANIFEST_SCHEMA) + return cfgv.apply_defaults(obj, MANIFEST_SCHEMA) + + LOCAL = 'local' META = 'meta' @@ -448,7 +486,6 @@ def check(self, dct: dict[str, Any]) -> None: *( cfgv.OptionalNoDefault(item.key, item.check_fn) for item in MANIFEST_HOOK_DICT.items - if item.key != 'id' if item.key != 'stages' if item.key != 'language' # remove ), @@ -459,6 +496,7 @@ def check(self, dct: dict[str, Any]) -> None: LOCAL_HOOK_DICT = cfgv.Map( 'Hook', 'id', + cfgv.Required('id', cfgv.check_string), *MANIFEST_HOOK_DICT.items, *_COMMON_HOOK_WARNINGS, ) @@ -549,3 +587,18 @@ class InvalidConfigError(FatalError): load_strategy=yaml_load, exc_tp=InvalidConfigError, ) + + +class _AnySchema: + def check(self, v: object) -> None: + pass + + def apply_defaults(self, v: object) -> object: + return v + + +load_raw = functools.partial( + cfgv.load_from_filename, + schema=_AnySchema(), + load_strategy=yaml_load, +) diff --git a/pre_commit/commands/autoupdate.py b/pre_commit/commands/autoupdate.py index aa0c5e25e..838d3642e 100644 --- a/pre_commit/commands/autoupdate.py +++ b/pre_commit/commands/autoupdate.py @@ -77,7 +77,7 @@ def update(self, tags_only: bool, freeze: bool) -> RevInfo: except InvalidManifestError as e: raise RepositoryCannotBeUpdatedError(f'[{self.repo}] {e}') else: - hook_ids = frozenset(hook['id'] for hook in manifest) + hook_ids = frozenset(hook['id'] for hook in manifest['hooks']) return self._replace(rev=rev, frozen=frozen, hook_ids=hook_ids) diff --git a/pre_commit/commands/gc.py b/pre_commit/commands/gc.py index 975d5e4c1..38f5635f7 100644 --- a/pre_commit/commands/gc.py +++ b/pre_commit/commands/gc.py @@ -1,5 +1,6 @@ from __future__ import annotations +import json import os.path from typing import Any @@ -15,18 +16,21 @@ from pre_commit.util import rmtree -def _mark_used_repos( - store: Store, - all_repos: dict[tuple[str, str], str], - unused_repos: set[tuple[str, str]], +def _mark_used( + config: dict[str, Any], repo: dict[str, Any], + manifests: dict[tuple[str, str], dict[str, Any]], + unused_manifests: set[tuple[str, str]], + unused_installs: set[str], ) -> None: if repo['repo'] == META: return elif repo['repo'] == LOCAL: for hook in repo['hooks']: - deps = hook.get('additional_dependencies') - unused_repos.discard(( + deps = hook['additional_dependencies'] + unused_installs.discard(( + repo['repo'], C.LOCAL_REPO_VERSION, + repo['language'], store.db_repo_name(repo['repo'], deps), C.LOCAL_REPO_VERSION, )) @@ -43,7 +47,7 @@ def _mark_used_repos( return else: unused_repos.discard(key) - by_id = {hook['id']: hook for hook in manifest} + by_id = manifest['hooks'] for hook in repo['hooks']: if hook['id'] not in by_id: @@ -60,11 +64,16 @@ def _mark_used_repos( def _gc(store: Store) -> int: with store.exclusive_lock(), store.connect() as db: - store._create_configs_table(db) + installs_rows = db.execute('SELECT key, path FROM installs').fetchall() + all_installs = dict(installs_rows) + unused_installs = set(all_installs) - repos = db.execute('SELECT repo, ref, path FROM repos').fetchall() - all_repos = {(repo, ref): path for repo, ref, path in repos} - unused_repos = set(all_repos) + manifests_query = 'SELECT repo, rev, manifest FROM manifests' + manifests = { + (repo, rev): json.loads(manifest) + for repo, rev, manifest in db.execute(manifests_query).fetchall() + } + unused_manifests = set(manifests) configs_rows = db.execute('SELECT path FROM configs').fetchall() configs = [path for path, in configs_rows] @@ -78,7 +87,13 @@ def _gc(store: Store) -> int: continue else: for repo in config['repos']: - _mark_used_repos(store, all_repos, unused_repos, repo) + _mark_used( + config, + repo, + manifests, + unused_manifests, + unused_installs, + ) paths = [(path,) for path in dead_configs] db.executemany('DELETE FROM configs WHERE path = ?', paths) @@ -94,5 +109,7 @@ def _gc(store: Store) -> int: def gc(store: Store) -> int: - output.write_line(f'{_gc(store)} repo(s) removed.') + installs, clones = _gc(store) + output.write_line(f'{clones} clone(s) removed.') + output.write_line(f'{installs} installs(s) removed.') return 0 diff --git a/pre_commit/commands/try_repo.py b/pre_commit/commands/try_repo.py index 539ed3c2b..8cf93907f 100644 --- a/pre_commit/commands/try_repo.py +++ b/pre_commit/commands/try_repo.py @@ -58,8 +58,8 @@ def try_repo(args: argparse.Namespace) -> int: else: repo_path = store.clone(repo, ref) manifest = load_manifest(os.path.join(repo_path, C.MANIFEST_FILE)) - manifest = sorted(manifest, key=lambda hook: hook['id']) - hooks = [{'id': hook['id']} for hook in manifest] + hook_ids = sorted(hook['id'] for hook in manifest['hooks']) + hooks = [{'id': hook_id} for hook_id in hook_ids] config = {'repos': [{'repo': repo, 'rev': ref, 'hooks': hooks}]} config_s = yaml_dump(config) diff --git a/pre_commit/hook.py b/pre_commit/hook.py index 309cd5be3..976bb3d34 100644 --- a/pre_commit/hook.py +++ b/pre_commit/hook.py @@ -1,18 +1,28 @@ from __future__ import annotations +import json import logging from collections.abc import Sequence from typing import Any from typing import NamedTuple -from pre_commit.prefix import Prefix - logger = logging.getLogger('pre_commit') +class InstallKey(NamedTuple): + repo: str + rev: str + language: str + language_version: str + additional_dependencies: tuple[str, ...] + + def keystr(self) -> str: + return json.dumps(self, separators=(',', ':')) + + class Hook(NamedTuple): - src: str - prefix: Prefix + repo: str + rev: str id: str name: str entry: str @@ -37,24 +47,25 @@ class Hook(NamedTuple): verbose: bool @property - def install_key(self) -> tuple[Prefix, str, str, tuple[str, ...]]: - return ( - self.prefix, - self.language, - self.language_version, - tuple(self.additional_dependencies), + def install_key(self) -> InstallKey: + return InstallKey( + repo=self.repo, + rev=self.rev, + language=self.language, + language_version=self.language_version, + additional_dependencies=tuple(self.additional_dependencies), ) @classmethod - def create(cls, src: str, prefix: Prefix, dct: dict[str, Any]) -> Hook: + def create(cls, repo: str, rev: str, dct: dict[str, Any]) -> Hook: # TODO: have cfgv do this (?) extra_keys = set(dct) - _KEYS if extra_keys: logger.warning( - f'Unexpected key(s) present on {src} => {dct["id"]}: ' + f'Unexpected key(s) present on {repo} => {dct["id"]}: ' f'{", ".join(sorted(extra_keys))}', ) - return cls(src=src, prefix=prefix, **{k: dct[k] for k in _KEYS}) + return cls(repo=repo, rev=rev, **{k: dct[k] for k in _KEYS}) -_KEYS = frozenset(set(Hook._fields) - {'src', 'prefix'}) +_KEYS = frozenset(set(Hook._fields) - {'repo', 'rev'}) diff --git a/pre_commit/lang_base.py b/pre_commit/lang_base.py index 198e93657..b0f40faf1 100644 --- a/pre_commit/lang_base.py +++ b/pre_commit/lang_base.py @@ -25,9 +25,6 @@ class Language(Protocol): - # Use `None` for no installation / environment - @property - def ENVIRONMENT_DIR(self) -> str | None: ... # return a value to replace `'default` for `language_version` def get_default_version(self) -> str: ... # return whether the environment is healthy (or should be rebuilt) @@ -36,7 +33,8 @@ def health_check(self, prefix: Prefix, version: str) -> str | None: ... # install a repository for the given language and language_version def install_environment( self, - prefix: Prefix, + clone: Prefix, + dest: Prefix, version: str, additional_dependencies: Sequence[str], ) -> None: @@ -87,10 +85,6 @@ def setup_cmd(prefix: Prefix, cmd: tuple[str, ...], **kwargs: Any) -> None: cmd_output_b(*cmd, cwd=prefix.prefix_dir, **kwargs) -def environment_dir(prefix: Prefix, d: str, language_version: str) -> str: - return prefix.path(f'{d}-{language_version}') - - def assert_version_default(binary: str, version: str) -> None: if version != C.DEFAULT: raise AssertionError( diff --git a/pre_commit/repository.py b/pre_commit/repository.py index a9461ab63..6edeedf09 100644 --- a/pre_commit/repository.py +++ b/pre_commit/repository.py @@ -1,91 +1,47 @@ from __future__ import annotations -import json import logging import os +import tempfile from collections.abc import Sequence from typing import Any import pre_commit.constants as C +from pre_commit import lang_base from pre_commit.all_languages import languages -from pre_commit.clientlib import load_manifest from pre_commit.clientlib import LOCAL from pre_commit.clientlib import META from pre_commit.hook import Hook -from pre_commit.lang_base import environment_dir +from pre_commit.hook import InstallKey from pre_commit.prefix import Prefix from pre_commit.store import Store from pre_commit.util import clean_path_on_failure -from pre_commit.util import rmtree logger = logging.getLogger('pre_commit') -def _state_filename_v1(venv: str) -> str: - return os.path.join(venv, '.install_state_v1') +def _state_filename_v5(d: str) -> str: + return os.path.join(d, '.pre-commit-state-v5') -def _state_filename_v2(venv: str) -> str: - return os.path.join(venv, '.install_state_v2') - - -def _state(additional_deps: Sequence[str]) -> object: - return {'additional_dependencies': additional_deps} - - -def _read_state(venv: str) -> object | None: - filename = _state_filename_v1(venv) - if not os.path.exists(filename): - return None - else: - with open(filename) as f: - return json.load(f) - - -def _hook_installed(hook: Hook) -> bool: - lang = languages[hook.language] - if lang.ENVIRONMENT_DIR is None: - return True - - venv = environment_dir( - hook.prefix, - lang.ENVIRONMENT_DIR, - hook.language_version, - ) - return ( - ( - os.path.exists(_state_filename_v2(venv)) or - _read_state(venv) == _state(hook.additional_dependencies) - ) and - not lang.health_check(hook.prefix, hook.language_version) - ) - - -def _hook_install(hook: Hook) -> None: - logger.info(f'Installing environment for {hook.src}.') +def _hook_install(hook: Hook, store: Store) -> None: + logger.info(f'Installing environment for {hook.repo}.') logger.info('Once installed this environment will be reused.') logger.info('This may take a few minutes...') lang = languages[hook.language] - assert lang.ENVIRONMENT_DIR is not None - venv = environment_dir( - hook.prefix, - lang.ENVIRONMENT_DIR, - hook.language_version, - ) + clone = store.clone(hook.repo, hook.rev) + dest = tempfile.mkdtemp(prefix='i-', dir=store.directory) - # There's potentially incomplete cleanup from previous runs - # Clean it up! - if os.path.exists(venv): - rmtree(venv) - - with clean_path_on_failure(venv): + with clean_path_on_failure(dest): + prefix = Prefix(dest) lang.install_environment( - hook.prefix, hook.language_version, hook.additional_dependencies, + Prefix(clone), prefix, + hook.language_version, hook.additional_dependencies, ) - health_error = lang.health_check(hook.prefix, hook.language_version) + health_error = lang.health_check(prefix, hook.language_version) if health_error: raise AssertionError( f'BUG: expected environment for {hook.language} to be healthy ' @@ -94,16 +50,12 @@ def _hook_install(hook: Hook) -> None: f'more info:\n\n{health_error}', ) - # TODO: remove v1 state writing, no longer needed after pre-commit 3.0 - # Write our state to indicate we're installed - state_filename = _state_filename_v1(venv) - staging = f'{state_filename}staging' - with open(staging, 'w') as state_file: - state_file.write(json.dumps(_state(hook.additional_dependencies))) - # Move the file into place atomically to indicate we've installed - os.replace(staging, state_filename) + # TODO: need more info? + open(_state_filename_v5(dest), 'a+').close() + - open(_state_filename_v2(venv), 'a+').close() +def _hook_installed(hook: Hook, store: Store) -> bool: + return False def _hook( @@ -123,7 +75,7 @@ def _hook( if not ret['stages']: ret['stages'] = root_config['default_stages'] - if languages[lang].ENVIRONMENT_DIR is None: + if languages[lang].install_environment is lang_base.no_install: if ret['language_version'] != C.DEFAULT: logger.error( f'The hook `{ret["id"]}` specifies `language_version` but is ' @@ -131,7 +83,7 @@ def _hook( f'environment. ' f'Perhaps you meant to use a specific language?', ) - exit(1) + raise SystemExit(1) if ret['additional_dependencies']: logger.error( f'The hook `{ret["id"]}` specifies `additional_dependencies` ' @@ -139,86 +91,64 @@ def _hook( f'environment. ' f'Perhaps you meant to use a specific language?', ) - exit(1) + raise SystemExit(1) return ret -def _non_cloned_repository_hooks( +def _repository_hooks( repo_config: dict[str, Any], store: Store, root_config: dict[str, Any], ) -> tuple[Hook, ...]: - def _prefix(language_name: str, deps: Sequence[str]) -> Prefix: - language = languages[language_name] - # pygrep / script / system / docker_image do not have - # environments so they work out of the current directory - if language.ENVIRONMENT_DIR is None: - return Prefix(os.getcwd()) - else: - return Prefix(store.make_local(deps)) - - return tuple( - Hook.create( - repo_config['repo'], - _prefix(hook['language'], hook['additional_dependencies']), - _hook(hook, root_config=root_config), + repo = repo_config['repo'] + if repo == META: + return tuple( + Hook.create( + repo, '', + _hook(hook, root_config=root_config), + ) + for hook in repo_config['hooks'] ) - for hook in repo_config['hooks'] - ) - - -def _cloned_repository_hooks( - repo_config: dict[str, Any], - store: Store, - root_config: dict[str, Any], -) -> tuple[Hook, ...]: - repo, rev = repo_config['repo'], repo_config['rev'] - manifest_path = os.path.join(store.clone(repo, rev), C.MANIFEST_FILE) - by_id = {hook['id']: hook for hook in load_manifest(manifest_path)} - - for hook in repo_config['hooks']: - if hook['id'] not in by_id: - logger.error( - f'`{hook["id"]}` is not present in repository {repo}. ' - f'Typo? Perhaps it is introduced in a newer version? ' - f'Often `pre-commit autoupdate` fixes this.', + elif repo == LOCAL: + return tuple( + Hook.create( + repo, C.LOCAL_REPO_VERSION, + _hook(hook, root_config=root_config), ) - exit(1) - - hook_dcts = [ - _hook(by_id[hook['id']], hook, root_config=root_config) - for hook in repo_config['hooks'] - ] - return tuple( - Hook.create( - repo_config['repo'], - Prefix(store.clone(repo, rev, hook['additional_dependencies'])), - hook, + for hook in repo_config['hooks'] ) - for hook in hook_dcts - ) - - -def _repository_hooks( - repo_config: dict[str, Any], - store: Store, - root_config: dict[str, Any], -) -> tuple[Hook, ...]: - if repo_config['repo'] in {LOCAL, META}: - return _non_cloned_repository_hooks(repo_config, store, root_config) else: - return _cloned_repository_hooks(repo_config, store, root_config) + rev = repo_config['rev'] + by_id = store.manifest(repo, rev)['hooks'] + + for hook in repo_config['hooks']: + if hook['id'] not in by_id: + logger.error( + f'`{hook["id"]}` is not present in repository {repo}. ' + f'Typo? Perhaps it is introduced in a newer version? ' + f'Often `pre-commit autoupdate` fixes this.', + ) + raise SystemExit(1) + + return tuple( + Hook.create( + repo, rev, + _hook(by_id[hook['id']], hook, root_config=root_config), + ) + for hook in repo_config['hooks'] + ) def install_hook_envs(hooks: Sequence[Hook], store: Store) -> None: def _need_installed() -> list[Hook]: - seen: set[tuple[Prefix, str, str, tuple[str, ...]]] = set() + seen: set[InstallKey] = set() ret = [] for hook in hooks: - if hook.install_key not in seen and not _hook_installed(hook): + key = hook.install_key + if key not in seen and not _hook_installed(hook, store): ret.append(hook) - seen.add(hook.install_key) + seen.add(key) return ret if not _need_installed(): @@ -226,7 +156,7 @@ def _need_installed() -> list[Hook]: with store.exclusive_lock(): # Another process may have already completed this work for hook in _need_installed(): - _hook_install(hook) + _hook_install(hook, store) def all_hooks(root_config: dict[str, Any], store: Store) -> tuple[Hook, ...]: diff --git a/pre_commit/store.py b/pre_commit/store.py index dc90c0519..a6d80d041 100644 --- a/pre_commit/store.py +++ b/pre_commit/store.py @@ -1,13 +1,14 @@ from __future__ import annotations import contextlib +import json import logging import os.path import sqlite3 import tempfile from collections.abc import Callable from collections.abc import Generator -from collections.abc import Sequence +from typing import Any import pre_commit.constants as C from pre_commit import clientlib @@ -61,7 +62,7 @@ class Store: def __init__(self, directory: str | None = None) -> None: self.directory = directory or Store.get_default_directory() - self.db_path = os.path.join(self.directory, 'db.db') + self.db_path = os.path.join(self.directory, 'db5.db') self.readonly = ( os.path.exists(self.directory) and not os.access(self.directory, os.W_OK) @@ -82,20 +83,40 @@ def __init__(self, directory: str | None = None) -> None: if os.path.exists(self.db_path): # pragma: no cover (race) return # To avoid a race where someone ^Cs between db creation and - # execution of the CREATE TABLE statement + # execution of the CREATE TABLE statements fd, tmpfile = tempfile.mkstemp(dir=self.directory) # We'll be managing this file ourselves os.close(fd) with self.connect(db_path=tmpfile) as db: db.executescript( - 'CREATE TABLE repos (' + 'CREATE TABLE configs (' + ' path TEXT NOT NULL,' + ' PRIMARY KEY (path)' + ');', + ) + db.executescript( + 'CREATE TABLE manifests (' + ' repo TEXT NOT NULL,' + ' rev TEXT NOT NULL,' + ' manifest TEXT NOT NULL,' + ' PRIMARY KEY (repo, rev)' + ');', + ) + db.executescript( + 'CREATE TABLE clones (' ' repo TEXT NOT NULL,' - ' ref TEXT NOT NULL,' + ' rev TEXT NOT NULL,' ' path TEXT NOT NULL,' - ' PRIMARY KEY (repo, ref)' + ' PRIMARY KEY (repo, rev)' + ');', + ) + db.executescript( + 'CREATE TABLE installs (' + ' key TEXT NOT NULL,' + ' path TEXT NOT NULL,' + ' PRIMARY KEY (key)' ');', ) - self._create_configs_table(db) # Atomic file move os.replace(tmpfile, self.db_path) @@ -122,29 +143,30 @@ def connect( with db: yield db - @classmethod - def db_repo_name(cls, repo: str, deps: Sequence[str]) -> str: - if deps: - return f'{repo}:{",".join(deps)}' - else: - return repo + def _complete_clone(self, rev: str, git_cmd: Callable[..., None]) -> None: + """Perform a complete clone of a repository and its submodules """ - def _new_repo( - self, - repo: str, - ref: str, - deps: Sequence[str], - make_strategy: Callable[[str], None], - ) -> str: - original_repo = repo - repo = self.db_repo_name(repo, deps) + git_cmd('fetch', 'origin', '--tags') + git_cmd('checkout', rev) + git_cmd('submodule', 'update', '--init', '--recursive') + def _shallow_clone(self, rev: str, git_cmd: Callable[..., None]) -> None: + """Perform a shallow clone of a repository and its submodules """ + + v2 = ('-c', 'protocol.version=2') + git_cmd(*v2, 'fetch', 'origin', rev, '--depth=1') + git_cmd('checkout', 'FETCH_HEAD') + git_cmd( + *v2, 'submodule', 'update', '--init', '--recursive', + '--depth=1', + ) + + def clone(self, repo: str, rev: str) -> str: def _get_result() -> str | None: - # Check if we already exist with self.connect() as db: result = db.execute( - 'SELECT path FROM repos WHERE repo = ? AND ref = ?', - (repo, ref), + 'SELECT path FROM clones WHERE repo = ? AND rev = ?', + (repo, rev), ).fetchone() return result[0] if result else None @@ -157,70 +179,64 @@ def _get_result() -> str | None: if result: # pragma: no cover (race) return result - logger.info(f'Initializing environment for {repo}.') + logger.info(f'Cloning {repo}...') - directory = tempfile.mkdtemp(prefix='repo', dir=self.directory) + directory = tempfile.mkdtemp(prefix='clone', dir=self.directory) with clean_path_on_failure(directory): - make_strategy(directory) + git.init_repo(directory, repo) + env = git.no_git_env() + + def _git_cmd(*args: str) -> None: + cmd_output_b('git', *args, cwd=directory, env=env) + + try: + self._shallow_clone(rev, _git_cmd) + except CalledProcessError: + self._complete_clone(rev, _git_cmd) + + manifest = clientlib.load_raw( + os.path.join(directory, C.MANIFEST_FILE), + display_filename=f'({repo})/{C.MANIFEST_FILE}', + exc_tp=clientlib.InvalidManifestError, + ) # Update our db with the created repo with self.connect() as db: db.execute( - 'INSERT INTO repos (repo, ref, path) VALUES (?, ?, ?)', - [repo, ref, directory], + 'INSERT INTO clones VALUES (?, ?, ?)', + (repo, rev, directory), + ) + db.execute( + 'INSERT INTO manifests VALUES (?, ?, ?)', + (repo, rev, json.dumps(manifest)), ) - clientlib.warn_for_stages_on_repo_init(original_repo, directory) + clientlib.warn_for_stages_on_repo_init(repo, directory) return directory - def _complete_clone(self, ref: str, git_cmd: Callable[..., None]) -> None: - """Perform a complete clone of a repository and its submodules """ - - git_cmd('fetch', 'origin', '--tags') - git_cmd('checkout', ref) - git_cmd('submodule', 'update', '--init', '--recursive') - - def _shallow_clone(self, ref: str, git_cmd: Callable[..., None]) -> None: - """Perform a shallow clone of a repository and its submodules """ - - git_config = 'protocol.version=2' - git_cmd('-c', git_config, 'fetch', 'origin', ref, '--depth=1') - git_cmd('checkout', 'FETCH_HEAD') - git_cmd( - '-c', git_config, 'submodule', 'update', '--init', '--recursive', - '--depth=1', - ) - - def clone(self, repo: str, ref: str, deps: Sequence[str] = ()) -> str: - """Clone the given url and checkout the specific ref.""" - - def clone_strategy(directory: str) -> None: - git.init_repo(directory, repo) - env = git.no_git_env() - - def _git_cmd(*args: str) -> None: - cmd_output_b('git', *args, cwd=directory, env=env) - - try: - self._shallow_clone(ref, _git_cmd) - except CalledProcessError: - self._complete_clone(ref, _git_cmd) - - return self._new_repo(repo, ref, deps, clone_strategy) - - def make_local(self, deps: Sequence[str]) -> str: - return self._new_repo( - 'local', C.LOCAL_REPO_VERSION, deps, _make_local_repo, - ) - - def _create_configs_table(self, db: sqlite3.Connection) -> None: - db.executescript( - 'CREATE TABLE IF NOT EXISTS configs (' - ' path TEXT NOT NULL,' - ' PRIMARY KEY (path)' - ');', - ) + def maybe_manifest( + self, + repo: str, + rev: str, + ) -> dict[str, dict[str, Any]] | None: + with self.connect() as db: + result = db.execute( + 'SELECT manifest FROM manifests WHERE repo = ? AND rev = ?', + (repo, rev), + ).fetchone() + if result is not None: + return clientlib.load_manifest_contents(repo, result[0]) + else: + return None + + def manifest(self, repo: str, rev: str) -> dict[str, dict[str, Any]]: + ret = self.maybe_manifest(repo, rev) + if ret is None: + self.clone(repo, rev) + ret = self.maybe_manifest(repo, rev) + assert ret is not None + return ret def mark_config_used(self, path: str) -> None: if self.readonly: # pragma: win32 no cover @@ -230,6 +246,4 @@ def mark_config_used(self, path: str) -> None: if not os.path.exists(path): return with self.connect() as db: - # TODO: eventually remove this and only create in _create - self._create_configs_table(db) db.execute('INSERT OR IGNORE INTO configs VALUES (?)', (path,)) diff --git a/setup.cfg b/setup.cfg index a95ee4473..11a19fe52 100644 --- a/setup.cfg +++ b/setup.cfg @@ -18,7 +18,7 @@ classifiers = [options] packages = find: install_requires = - cfgv>=2.0.0 + cfgv>=3.5.0 identify>=1.0.0 nodeenv>=0.11.1 pyyaml>=5.1 diff --git a/testing/fixtures.py b/testing/fixtures.py index 79a11605e..c492d6a4d 100644 --- a/testing/fixtures.py +++ b/testing/fixtures.py @@ -101,7 +101,7 @@ def make_config_from_repo(repo_path, rev=None, hooks=None, check=True): config = { 'repo': f'file://{repo_path}', 'rev': rev or git.head_rev(repo_path), - 'hooks': hooks or [{'id': hook['id']} for hook in manifest], + 'hooks': hooks or [{'id': hook_id} for hook_id in manifest['hooks']], } if check: diff --git a/tests/clientlib_test.py b/tests/clientlib_test.py index 2c42b80cf..51c21b825 100644 --- a/tests/clientlib_test.py +++ b/tests/clientlib_test.py @@ -12,7 +12,6 @@ from pre_commit.clientlib import CONFIG_REPO_DICT from pre_commit.clientlib import CONFIG_SCHEMA from pre_commit.clientlib import DEFAULT_LANGUAGE_VERSION -from pre_commit.clientlib import InvalidManifestError from pre_commit.clientlib import load_manifest from pre_commit.clientlib import MANIFEST_HOOK_DICT from pre_commit.clientlib import MANIFEST_SCHEMA @@ -405,30 +404,40 @@ def test_unsupported_language_migration_language_required(): @pytest.mark.parametrize( 'manifest_obj', ( - [{ - 'id': 'a', - 'name': 'b', - 'entry': 'c', - 'language': 'python', - 'files': r'\.py$', - }], - [{ - 'id': 'a', - 'name': 'b', - 'entry': 'c', - 'language': 'python', - 'language_version': 'python3.4', - 'files': r'\.py$', - }], + { + 'hooks': { + 'a': { + 'name': 'b', + 'entry': 'c', + 'language': 'python', + 'files': r'\.py$', + }, + }, + }, + { + 'hooks': { + 'a': { + 'name': 'b', + 'entry': 'c', + 'language': 'python', + 'language_version': 'python3.4', + 'files': r'\.py$', + }, + }, + }, # A regression in 0.13.5: always_run and files are permissible - [{ - 'id': 'a', - 'name': 'b', - 'entry': 'c', - 'language': 'python', - 'files': '', - 'always_run': True, - }], + { + 'hooks': { + 'a': { + 'id': 'a', + 'name': 'b', + 'entry': 'c', + 'language': 'python', + 'files': '', + 'always_run': True, + }, + }, + }, ), ) def test_valid_manifests(manifest_obj): @@ -592,16 +601,31 @@ def test_config_hook_stages_defaulting(): } -def test_manifest_v5_forward_compat(tmp_path): +def test_manifest_backward_compat(tmp_path): + src = '''\ +- id: example + name: example + language: unsupported + entry: echo +''' manifest = tmp_path.joinpath('.pre-commit-hooks.yaml') - manifest.write_text('hooks: {}') - - with pytest.raises(InvalidManifestError) as excinfo: - load_manifest(manifest) - assert str(excinfo.value) == ( - f'\n' - f'==> File {manifest}\n' - f'=====> \n' - f'=====> pre-commit version 5 is required but version {C.VERSION} ' - f'is installed. Perhaps run `pip install --upgrade pre-commit`.' - ) + manifest.write_text(src) + + ret = load_manifest(manifest) + + # just to make the assertion easier + assert 'id' not in ret['hooks']['example'] + for k in tuple(ret['hooks']['example']): + if k not in {'name', 'language', 'entry'}: + ret['hooks']['example'].pop(k) + + assert ret == { + 'minimum_pre_commit_version': '0', + 'hooks': { + 'example': { + 'name': 'example', + 'language': 'unsupported', + 'entry': 'echo', + }, + }, + } diff --git a/tests/repository_test.py b/tests/repository_test.py index 5d71c3e4c..65fd87858 100644 --- a/tests/repository_test.py +++ b/tests/repository_test.py @@ -344,7 +344,8 @@ def local_python_config(): repo_path = get_resource_path('python_hooks_repo') manifest = load_manifest(os.path.join(repo_path, C.MANIFEST_FILE)) hooks = [ - dict(hook, additional_dependencies=[repo_path]) for hook in manifest + {'id': hook_id, **hook, 'additional_dependencies': [repo_path]} + for hook_id, hook in manifest['hooks'].items() ] return {'repo': 'local', 'hooks': hooks} diff --git a/tests/store_test.py b/tests/store_test.py index 13f198ea2..5b46a9b4d 100644 --- a/tests/store_test.py +++ b/tests/store_test.py @@ -144,6 +144,48 @@ def test_warning_for_deprecated_stages_on_init(store, tempdir_factory, caplog): assert caplog.record_tuples == [] +def test_warning_for_deprecated_stages_v5_manifest( + store, tempdir_factory, caplog, +): + manifest = '''\ +hooks: + hook1: + name: hook1 + language: system + entry: echo hook1 + stages: [commit, push] + hook2: + name: hook2 + language: system + entry: echo hook2 + stages: [push, merge-commit] +''' + + path = git_dir(tempdir_factory) + with open(os.path.join(path, C.MANIFEST_FILE), 'w') as f: + f.write(manifest) + cmd_output('git', 'add', '.', cwd=path) + git_commit(cwd=path) + rev = git.head_rev(path) + + store.clone(path, rev) + assert caplog.record_tuples[1] == ( + 'pre_commit', + logging.WARNING, + f'repo `{path}` uses deprecated stage names ' + f'(commit, push, merge-commit) which will be removed in a future ' + f'version. ' + f'Hint: often `pre-commit autoupdate --repo {shlex.quote(path)}` ' + f'will fix this. ' + f'if it does not -- consider reporting an issue to that repo.', + ) + + # should not re-warn + caplog.clear() + store.clone(path, rev) + assert caplog.record_tuples == [] + + def test_no_warning_for_non_deprecated_stages_on_init( store, tempdir_factory, caplog, ):