Skip to content

Commit 7983c7a

Browse files
konstinzanieb
andauthored
Validate and heal RECORD during installation (#18943)
Check the RECORD of a wheel file and heal it if necessary, to ensure the RECORD and the wheel contents always match, and uninstallation can't remove files that don't belong to the wheel. This check and repair happen between unpacking a wheel and persisting it in the cache, ensuring that every wheel that ends up in the cache has a valid RECORD. We collect the paths from the archive in the unpacking step, I added it in all unpacking steps for consistency. I also improved the consistency around RECORD handling code. --------- Co-authored-by: Zanie Blue <contact@zanie.dev>
1 parent b38439b commit 7983c7a

16 files changed

Lines changed: 452 additions & 134 deletions

File tree

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

crates/uv-build-backend/src/wheel.rs

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -397,7 +397,7 @@ struct RecordEntry {
397397
/// The urlsafe-base64-nopad encoded SHA256 of the files.
398398
hash: String,
399399
/// The size of the file in bytes.
400-
size: usize,
400+
size: u64,
401401
}
402402

403403
/// Read the input file and write it both to the hasher and the target file.
@@ -410,7 +410,7 @@ fn write_hashed(
410410
writer: &mut dyn Write,
411411
) -> Result<RecordEntry, io::Error> {
412412
let mut hasher = Sha256::new();
413-
let mut size = 0;
413+
let mut size: u64 = 0;
414414
// 8KB is the default defined in `std::sys_common::io`.
415415
let mut buffer = vec![0; 8 * 1024];
416416
loop {
@@ -425,7 +425,7 @@ fn write_hashed(
425425
}
426426
hasher.update(&buffer[..read]);
427427
writer.write_all(&buffer[..read])?;
428-
size += read;
428+
size += read as u64;
429429
}
430430
Ok(RecordEntry {
431431
path: path.to_string(),
@@ -736,7 +736,7 @@ impl<W: Write + Seek> DirectoryWriter for ZipDirectoryWriter<W> {
736736
self.record.push(RecordEntry {
737737
path: path.to_string(),
738738
hash,
739-
size: bytes.len(),
739+
size: bytes.len() as u64,
740740
});
741741

742742
Ok(())
@@ -816,7 +816,7 @@ impl DirectoryWriter for FilesystemWriter {
816816
self.record.push(RecordEntry {
817817
path: path.to_string(),
818818
hash,
819-
size: bytes.len(),
819+
size: bytes.len() as u64,
820820
});
821821

822822
Ok(fs_err::write(self.root.join(path), bytes)?)

crates/uv-distribution-types/src/lib.rs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
//!
3434
//! Since we read this information from [`direct_url.json`](https://packaging.python.org/en/latest/specifications/direct-url-data-structure/), it doesn't match the information [`Dist`] exactly.
3535
use std::borrow::Cow;
36+
use std::fmt::Display;
3637
use std::path;
3738
use std::path::Path;
3839
use std::str::FromStr;
@@ -204,6 +205,15 @@ pub enum DistRef<'a> {
204205
Source(&'a SourceDist),
205206
}
206207

208+
impl Display for DistRef<'_> {
209+
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
210+
match self {
211+
Self::Built(built_dist) => Display::fmt(&built_dist, f),
212+
Self::Source(source_dist) => Display::fmt(&source_dist, f),
213+
}
214+
}
215+
}
216+
207217
/// A wheel, with its three possible origins (index, url, path)
208218
#[derive(Debug, Clone, Hash, PartialEq, Eq)]
209219
pub enum BuiltDist {

crates/uv-distribution/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ uv-flags = { workspace = true }
2828
uv-fs = { workspace = true, features = ["tokio"] }
2929
uv-git = { workspace = true }
3030
uv-git-types = { workspace = true }
31+
uv-install-wheel = { workspace = true }
3132
uv-metadata = { workspace = true }
3233
uv-normalize = { workspace = true }
3334
uv-pep440 = { workspace = true }

crates/uv-distribution/src/distribution_database.rs

Lines changed: 62 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@ use std::sync::Arc;
66
use std::task::{Context, Poll};
77

88
use futures::{FutureExt, TryStreamExt};
9-
use tempfile::TempDir;
109
use tokio::io::{AsyncRead, AsyncSeekExt, ReadBuf};
1110
use tokio::sync::Semaphore;
1211
use tokio_util::compat::FuturesAsyncReadCompatExt;
@@ -20,11 +19,12 @@ use uv_client::{
2019
};
2120
use uv_distribution_filename::{SourceDistExtension, WheelFilename};
2221
use uv_distribution_types::{
23-
BuildInfo, BuildableSource, BuiltDist, Dist, File, HashPolicy, Hashed, IndexUrl, InstalledDist,
24-
Name, SourceDist, ToUrlError,
22+
BuildInfo, BuildableSource, BuiltDist, Dist, DistRef, File, HashPolicy, Hashed, IndexUrl,
23+
InstalledDist, Name, SourceDist, ToUrlError,
2524
};
2625
use uv_extract::hash::Hasher;
2726
use uv_fs::write_atomic;
27+
use uv_install_wheel::validate_and_heal_record;
2828
use uv_platform_tags::Tags;
2929
use uv_pypi_types::{HashDigest, HashDigests, PyProjectToml};
3030
use uv_redacted::DisplaySafeUrl;
@@ -491,7 +491,11 @@ impl<'a, Context: BuildContext> DistributionDatabase<'a, Context> {
491491

492492
// Otherwise, unzip the wheel.
493493
let id = self
494-
.unzip_wheel(&built_wheel.path, &built_wheel.target)
494+
.unzip_wheel(
495+
&built_wheel.path,
496+
&built_wheel.target,
497+
DistRef::Source(dist),
498+
)
495499
.await?;
496500

497501
Ok(LocalWheel {
@@ -683,41 +687,45 @@ impl<'a, Context: BuildContext> DistributionDatabase<'a, Context> {
683687
let temp_dir = tempfile::tempdir_in(self.build_context.cache().root())
684688
.map_err(Error::CacheWrite)?;
685689

686-
match progress {
690+
let files = match progress {
687691
Some((reporter, progress)) => {
688692
let mut reader = ProgressReader::new(&mut hasher, progress, &**reporter);
689693
match extension {
690694
WheelExtension::Whl => {
691695
uv_extract::stream::unzip(query_url, &mut reader, temp_dir.path())
692696
.await
693-
.map_err(|err| Error::Extract(filename.to_string(), err))?;
697+
.map_err(|err| Error::Extract(filename.to_string(), err))?
694698
}
695699
WheelExtension::WhlZst => {
696700
uv_extract::stream::untar_zst(&mut reader, temp_dir.path())
697701
.await
698-
.map_err(|err| Error::Extract(filename.to_string(), err))?;
702+
.map_err(|err| Error::Extract(filename.to_string(), err))?
699703
}
700704
}
701705
}
702706
None => match extension {
703707
WheelExtension::Whl => {
704708
uv_extract::stream::unzip(query_url, &mut hasher, temp_dir.path())
705709
.await
706-
.map_err(|err| Error::Extract(filename.to_string(), err))?;
710+
.map_err(|err| Error::Extract(filename.to_string(), err))?
707711
}
708712
WheelExtension::WhlZst => {
709713
uv_extract::stream::untar_zst(&mut hasher, temp_dir.path())
710714
.await
711-
.map_err(|err| Error::Extract(filename.to_string(), err))?;
715+
.map_err(|err| Error::Extract(filename.to_string(), err))?
712716
}
713717
},
714-
}
715-
718+
};
716719
// If necessary, exhaust the reader to compute the hash.
717720
if !hashes.is_none() {
718721
hasher.finish().await.map_err(Error::HashExhaustion)?;
719722
}
720723

724+
// Before we make the wheel accessible by persisting it, ensure that the RECORD is
725+
// valid.
726+
validate_and_heal_record(temp_dir.path(), files.iter(), dist)
727+
.map_err(Error::InstallWheelError)?;
728+
721729
// Persist the temporary directory to the directory store.
722730
let id = self
723731
.build_context
@@ -883,52 +891,49 @@ impl<'a, Context: BuildContext> DistributionDatabase<'a, Context> {
883891
.map_err(Error::CacheWrite)?;
884892

885893
// If no hashes are required, parallelize the unzip operation.
886-
let hashes = if hashes.is_none() {
894+
let (files, hashes) = if hashes.is_none() {
895+
// Unzip the wheel into a temporary directory.
887896
let file = file.into_std().await;
888-
tokio::task::spawn_blocking({
889-
let target = temp_dir.path().to_owned();
890-
move || -> Result<(), uv_extract::Error> {
891-
// Unzip the wheel into a temporary directory.
892-
match extension {
893-
WheelExtension::Whl => {
894-
uv_extract::unzip(file, &target)?;
895-
}
896-
WheelExtension::WhlZst => {
897-
uv_extract::stream::untar_zst_file(file, &target)?;
898-
}
899-
}
900-
Ok(())
901-
}
897+
let target = temp_dir.path().to_owned();
898+
let files = tokio::task::spawn_blocking(move || match extension {
899+
WheelExtension::Whl => uv_extract::unzip(file, &target),
900+
WheelExtension::WhlZst => uv_extract::stream::untar_zst_file(file, &target),
902901
})
903902
.await?
904903
.map_err(|err| Error::Extract(filename.to_string(), err))?;
905904

906-
HashDigests::empty()
905+
(files, HashDigests::empty())
907906
} else {
908907
// Create a hasher for each hash algorithm.
909908
let algorithms = hashes.algorithms();
910909
let mut hashers = algorithms.into_iter().map(Hasher::from).collect::<Vec<_>>();
911910
let mut hasher = uv_extract::hash::HashReader::new(file, &mut hashers);
912911

913-
match extension {
912+
let files = match extension {
914913
WheelExtension::Whl => {
915914
uv_extract::stream::unzip(query_url, &mut hasher, temp_dir.path())
916915
.await
917-
.map_err(|err| Error::Extract(filename.to_string(), err))?;
916+
.map_err(|err| Error::Extract(filename.to_string(), err))?
918917
}
919918
WheelExtension::WhlZst => {
920919
uv_extract::stream::untar_zst(&mut hasher, temp_dir.path())
921920
.await
922-
.map_err(|err| Error::Extract(filename.to_string(), err))?;
921+
.map_err(|err| Error::Extract(filename.to_string(), err))?
923922
}
924-
}
923+
};
925924

926925
// If necessary, exhaust the reader to compute the hash.
927926
hasher.finish().await.map_err(Error::HashExhaustion)?;
927+
let hashes = hashers.into_iter().map(HashDigest::from).collect();
928928

929-
hashers.into_iter().map(HashDigest::from).collect()
929+
(files, hashes)
930930
};
931931

932+
// Before we make the wheel accessible by persisting it, ensure that the RECORD is
933+
// valid.
934+
validate_and_heal_record(temp_dir.path(), files.iter(), dist)
935+
.map_err(Error::InstallWheelError)?;
936+
932937
// Persist the temporary directory to the directory store.
933938
let id = self
934939
.build_context
@@ -1062,7 +1067,8 @@ impl<'a, Context: BuildContext> DistributionDatabase<'a, Context> {
10621067
} else if hashes.is_none() {
10631068
// Otherwise, unzip the wheel.
10641069
let archive = Archive::new(
1065-
self.unzip_wheel(path, wheel_entry.path()).await?,
1070+
self.unzip_wheel(path, wheel_entry.path(), DistRef::Built(dist))
1071+
.await?,
10661072
HashDigests::empty(),
10671073
filename.clone(),
10681074
);
@@ -1100,24 +1106,29 @@ impl<'a, Context: BuildContext> DistributionDatabase<'a, Context> {
11001106
let mut hasher = uv_extract::hash::HashReader::new(file, &mut hashers);
11011107

11021108
// Unzip the wheel to a temporary directory.
1103-
match extension {
1109+
let files = match extension {
11041110
WheelExtension::Whl => {
11051111
uv_extract::stream::unzip(path.display(), &mut hasher, temp_dir.path())
11061112
.await
1107-
.map_err(|err| Error::Extract(filename.to_string(), err))?;
1113+
.map_err(|err| Error::Extract(filename.to_string(), err))?
11081114
}
11091115
WheelExtension::WhlZst => {
11101116
uv_extract::stream::untar_zst(&mut hasher, temp_dir.path())
11111117
.await
1112-
.map_err(|err| Error::Extract(filename.to_string(), err))?;
1118+
.map_err(|err| Error::Extract(filename.to_string(), err))?
11131119
}
1114-
}
1120+
};
11151121

11161122
// Exhaust the reader to compute the hash.
11171123
hasher.finish().await.map_err(Error::HashExhaustion)?;
11181124

11191125
let hashes = hashers.into_iter().map(HashDigest::from).collect();
11201126

1127+
// Before we make the wheel accessible by persisting it, ensure that the RECORD is
1128+
// valid.
1129+
validate_and_heal_record(temp_dir.path(), files.iter(), dist)
1130+
.map_err(Error::InstallWheelError)?;
1131+
11211132
// Persist the temporary directory to the directory store.
11221133
let id = self
11231134
.build_context
@@ -1152,21 +1163,30 @@ impl<'a, Context: BuildContext> DistributionDatabase<'a, Context> {
11521163
}
11531164

11541165
/// Unzip a wheel into the cache, returning the path to the unzipped directory.
1155-
async fn unzip_wheel(&self, path: &Path, target: &Path) -> Result<ArchiveId, Error> {
1156-
let temp_dir = tokio::task::spawn_blocking({
1166+
async fn unzip_wheel(
1167+
&self,
1168+
path: &Path,
1169+
target: &Path,
1170+
dist: DistRef<'_>,
1171+
) -> Result<ArchiveId, Error> {
1172+
let (temp_dir, files) = tokio::task::spawn_blocking({
11571173
let path = path.to_owned();
11581174
let root = self.build_context.cache().root().to_path_buf();
1159-
move || -> Result<TempDir, Error> {
1175+
move || -> Result<_, Error> {
11601176
// Unzip the wheel into a temporary directory.
11611177
let temp_dir = tempfile::tempdir_in(root).map_err(Error::CacheWrite)?;
11621178
let reader = fs_err::File::open(&path).map_err(Error::CacheWrite)?;
1163-
uv_extract::unzip(reader, temp_dir.path())
1179+
let files = uv_extract::unzip(reader, temp_dir.path())
11641180
.map_err(|err| Error::Extract(path.to_string_lossy().into_owned(), err))?;
1165-
Ok(temp_dir)
1181+
Ok((temp_dir, files))
11661182
}
11671183
})
11681184
.await??;
11691185

1186+
// Before we make the wheel accessible by persisting it, ensure that the RECORD is valid.
1187+
validate_and_heal_record(temp_dir.path(), files.iter(), dist)
1188+
.map_err(Error::InstallWheelError)?;
1189+
11701190
// Persist the temporary directory to the directory store.
11711191
let id = self
11721192
.build_context

crates/uv-distribution/src/error.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -196,6 +196,9 @@ pub enum Error {
196196

197197
#[error("Hash-checking is not supported for Git repositories: `{0}`")]
198198
HashesNotSupportedGit(String),
199+
200+
#[error(transparent)]
201+
InstallWheelError(uv_install_wheel::Error),
199202
}
200203

201204
impl From<reqwest::Error> for Error {

0 commit comments

Comments
 (0)