Skip to content

Commit 0346076

Browse files
JRoyanthonyshew
andauthored
fix: Bun workspace lockfile pruning producing invalid output (#12548)
> Note: Claude helped me work through most of this, if the code is unusable I understand, hopefully at least the reproduction repository is still useful! ### Description Several issues caused `bun install --frozen-lockfile` to reject pruned lockfiles generated by `turbo prune --docker`: - Overridden dependencies resolved to a lower version were dropped from the transitive closure because the version spec check rejected them. Skip the check when an override is active since overrides are authoritative. - trustedDependencies carried stale entries from the full monorepo. Clear the section in pruned lockfiles since bun regenerates it from config. - Suffixed package keys (e.g. `pkg--for-generate-function-map`) were not renamed to their base name when the unsuffixed entry was pruned. - Orphan removal kept nested entries from unrelated dependency trees (e.g. React Native packages in a web prune) because it matched on package ident rather than requiring the parent to also be reachable. Tighten the check and iteratively prune entries whose parent no longer exists. - Redundant nested entries matching or semver-compatible with the hoisted version were not deduplicated, causing bun to reject the lockfile. - URL-based packages (`@https://`) were serialized with the npm 4-element format instead of the tarball 2-element format, emitting spurious empty registry and checksum fields. ### Testing Instructions Reproduction repo with steps here: https://github.com/JRoy/turbo-repro --------- Co-authored-by: Anthony Shew <anthonyshew@gmail.com>
1 parent b7d89a4 commit 0346076

10 files changed

Lines changed: 590 additions & 42 deletions

File tree

crates/turborepo-lockfiles/src/bun/mod.rs

Lines changed: 218 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,10 @@ pub(crate) fn is_git_or_github_package(ident: &str) -> bool {
132132
ident.contains("@git+") || ident.contains("@github:")
133133
}
134134

135+
pub(crate) fn is_tarball_or_url_package(ident: &str) -> bool {
136+
ident.contains("@https://") || ident.contains("@http://")
137+
}
138+
135139
/// Represents a platform constraint that can be either inclusive or exclusive.
136140
/// This matches Bun's Negatable type for os/cpu/libc fields.
137141
#[derive(Debug, Default, Clone, PartialEq, Eq)]
@@ -455,11 +459,18 @@ impl BunLockfile {
455459
override_version: &str,
456460
resolved_version: &str,
457461
) -> Result<Option<crate::Package>, crate::Error> {
462+
// When an override is active, the overridden version is authoritative
463+
// and should always be accepted regardless of the original version spec.
464+
// For example, if a package declares `"lightningcss": "1.30.2"` but the
465+
// root package.json overrides it to `"1.30.1"`, the resolved version
466+
// 1.30.1 must be accepted even though it doesn't satisfy `^1.30.2`.
467+
let has_override = override_version != resolved_version;
468+
458469
// Try workspace-scoped first
459470
if let Some(entry) = self.index.get_workspace_scoped(workspace_name, name)
460471
&& let Some(pkg) =
461472
self.process_package_entry(entry, name, override_version, resolved_version)?
462-
&& self.version_satisfies_spec(&pkg.version, version_spec)
473+
&& (has_override || self.version_satisfies_spec(&pkg.version, version_spec))
463474
{
464475
return Ok(Some(pkg));
465476
}
@@ -468,7 +479,7 @@ impl BunLockfile {
468479
if let Some((_key, entry)) = self.index.find_package(Some(workspace_name), name)
469480
&& let Some(pkg) =
470481
self.process_package_entry(entry, name, override_version, resolved_version)?
471-
&& self.version_satisfies_spec(&pkg.version, version_spec)
482+
&& (has_override || self.version_satisfies_spec(&pkg.version, version_spec))
472483
{
473484
return Ok(Some(pkg));
474485
}
@@ -504,10 +515,9 @@ impl BunLockfile {
504515
continue;
505516
}
506517

507-
// Check if this version satisfies the spec
508518
if let Some(pkg) =
509519
self.process_package_entry(entry, name, override_version, resolved_version)?
510-
&& self.version_satisfies_spec(&pkg.version, version_spec)
520+
&& (has_override || self.version_satisfies_spec(&pkg.version, version_spec))
511521
{
512522
tracing::debug!(
513523
"Found matching version {} for {} (spec: {}) in nested entry {}",
@@ -930,15 +940,27 @@ impl BunLockfile {
930940
} else {
931941
output.push_str(&format!(" \"{key}\": [{ident_json}],"));
932942
}
933-
} else if ident.is_local_package() {
934-
// file:, link:, and tarball entries: [ident, info] — 2 elements
943+
} else if ident.is_local_package() || is_tarball_or_url_package(&entry.ident) {
935944
let ident_json = serde_json::to_string(&entry.ident)?;
936945
let info_json =
937946
serde_json::to_string(&entry.info.as_ref().unwrap_or(&PackageInfo::default()))?;
938947
let info_json_spaced = self.format_info_json(&info_json);
939-
output.push_str(&format!(
940-
" \"{key}\": [{ident_json}, {info_json_spaced}],",
941-
));
948+
if let Some(checksum) = &entry.checksum {
949+
if !checksum.is_empty() {
950+
let checksum_json = serde_json::to_string(checksum)?;
951+
output.push_str(&format!(
952+
" \"{key}\": [{ident_json}, {info_json_spaced}, {checksum_json}],",
953+
));
954+
} else {
955+
output.push_str(&format!(
956+
" \"{key}\": [{ident_json}, {info_json_spaced}],",
957+
));
958+
}
959+
} else {
960+
output.push_str(&format!(
961+
" \"{key}\": [{ident_json}, {info_json_spaced}],",
962+
));
963+
}
942964
} else {
943965
let ident_json = serde_json::to_string(&entry.ident)?;
944966
let info_json =
@@ -1231,7 +1253,10 @@ impl BunLockfile {
12311253
lockfile_version: self.data.lockfile_version,
12321254
config_version: self.data.config_version,
12331255
workspaces: Map::new(),
1234-
trusted_dependencies: self.data.trusted_dependencies.clone(),
1256+
// trustedDependencies are intentionally left empty. turbo prune
1257+
// copies the root package.json which is the source of truth for
1258+
// trusted scripts; bun re-derives the set at install time.
1259+
trusted_dependencies: Vec::new(),
12351260
overrides: Map::new(),
12361261
catalog: self.data.catalog.clone(),
12371262
catalogs: self.data.catalogs.clone(),
@@ -1671,29 +1696,39 @@ impl BunLockfile {
16711696
})
16721697
.collect();
16731698

1674-
// Remove unreachable packages
16751699
pruned_data.packages.retain(|key, entry| {
1676-
// Keep if the ident is reachable
1677-
if reachable_idents.contains(&entry.ident)
1678-
|| entry.ident.contains("@workspace:")
1679-
{
1700+
if entry.ident.contains("@workspace:") {
16801701
return true;
16811702
}
16821703

1683-
// Keep nested packages if their parent is reachable
1684-
// E.g., keep "@hatchet-dev/typescript-sdk/zod" if "@hatchet-dev/typescript-sdk"
1685-
// is reachable
1686-
if let Some(slash_pos) = key.rfind('/') {
1687-
let parent_key = &key[..slash_pos];
1688-
if reachable_lockfile_keys.contains(parent_key) {
1689-
return true;
1690-
}
1704+
let parsed = PackageKey::parse(key);
1705+
if let Some(parent) = parsed.parent() {
1706+
reachable_idents.contains(&entry.ident)
1707+
&& reachable_lockfile_keys.contains(&parent)
1708+
} else {
1709+
reachable_idents.contains(&entry.ident)
16911710
}
1692-
1693-
false
16941711
});
1712+
1713+
loop {
1714+
let current_keys: HashSet<String> =
1715+
pruned_data.packages.keys().cloned().collect();
1716+
let before = current_keys.len();
1717+
pruned_data.packages.retain(|key, _entry| {
1718+
let parsed = PackageKey::parse(key);
1719+
match parsed.parent() {
1720+
Some(parent) => current_keys.contains(&parent),
1721+
None => true,
1722+
}
1723+
});
1724+
if pruned_data.packages.len() == before {
1725+
break;
1726+
}
1727+
}
1728+
}
1729+
Err(e) => {
1730+
tracing::warn!("Failed to recompute transitive closures for orphan removal: {e}");
16951731
}
1696-
Err(_e) => {}
16971732
}
16981733

16991734
// NESTED KEY PROMOTION: In bun lockfiles, nested entries like
@@ -1791,6 +1826,80 @@ impl BunLockfile {
17911826
}
17921827
}
17931828

1829+
// SUFFIXED KEY RENAME: Bun appends suffixes like `--for-generate-function-map`
1830+
// to distinguish multiple versions of the same package. When the primary
1831+
// (unsuffixed) entry is pruned and only the suffixed one remains, rename
1832+
// the key to the base package name so bun can resolve it.
1833+
{
1834+
let top_level_pkg_names: HashSet<String> = pruned_data
1835+
.packages
1836+
.iter()
1837+
.filter(|(key, _)| !key.contains("--") && PackageKey::parse(key).parent().is_none())
1838+
.map(|(_, entry)| PackageIdent::parse(&entry.ident).name().to_string())
1839+
.collect();
1840+
1841+
let mut renames: Vec<(String, String)> = Vec::new();
1842+
for (key, entry) in &pruned_data.packages {
1843+
if !key.contains("--") {
1844+
continue;
1845+
}
1846+
let parsed = PackageKey::parse(key);
1847+
if parsed.parent().is_some() {
1848+
continue;
1849+
}
1850+
let ident = PackageIdent::parse(&entry.ident);
1851+
let pkg_name = ident.name();
1852+
if key != pkg_name && !top_level_pkg_names.contains(pkg_name) {
1853+
renames.push((key.clone(), pkg_name.to_string()));
1854+
}
1855+
}
1856+
for (old_key, new_key) in renames {
1857+
if let Some(entry) = pruned_data.packages.remove(&old_key) {
1858+
pruned_data.packages.insert(new_key, entry);
1859+
}
1860+
}
1861+
}
1862+
1863+
// NESTED ENTRY DEDUPLICATION: In bun lockfiles, nested entries like
1864+
// `@babel/core/@babel/traverse` only need to exist when they resolve to
1865+
// a DIFFERENT version than the hoisted top-level `@babel/traverse`.
1866+
// After pruning removes some versions, nested entries that now match
1867+
// the hoisted entry are redundant and must be removed — bun's
1868+
// --frozen-lockfile rejects them.
1869+
{
1870+
let hoisted_idents: HashMap<String, String> = pruned_data
1871+
.packages
1872+
.iter()
1873+
.filter(|(key, entry)| {
1874+
let parsed = PackageKey::parse(key);
1875+
if parsed.parent().is_some() {
1876+
return false;
1877+
}
1878+
let ident = PackageIdent::parse(&entry.ident);
1879+
let pkg_name = ident.name();
1880+
*key == pkg_name
1881+
})
1882+
.map(|(_, entry)| {
1883+
let ident = PackageIdent::parse(&entry.ident);
1884+
(ident.name().to_string(), entry.ident.clone())
1885+
})
1886+
.collect();
1887+
1888+
pruned_data.packages.retain(|key, entry| {
1889+
if PackageKey::parse(key).parent().is_none() {
1890+
return true;
1891+
}
1892+
let ident = PackageIdent::parse(&entry.ident);
1893+
if ident.is_workspace() {
1894+
return true;
1895+
}
1896+
match hoisted_idents.get(ident.name()) {
1897+
Some(hoisted_ident) => &entry.ident != hoisted_ident,
1898+
None => true,
1899+
}
1900+
});
1901+
}
1902+
17941903
// Rebuild key_to_entry HashMap for the pruned lockfile
17951904
let mut key_to_entry: HashMap<String, String> =
17961905
HashMap::with_capacity(pruned_data.packages.len());
@@ -2121,6 +2230,89 @@ mod test {
21212230
assert_eq!(result.version, "1.0.0");
21222231
}
21232232

2233+
#[test]
2234+
fn test_override_resolves_lower_version() {
2235+
let contents = serde_json::to_string(&json!({
2236+
"lockfileVersion": 0,
2237+
"workspaces": {
2238+
"": {
2239+
"name": "test",
2240+
"dependencies": {
2241+
"parent": "^1.0.0"
2242+
}
2243+
}
2244+
},
2245+
"packages": {
2246+
"parent": ["parent@1.0.0", "", {
2247+
"dependencies": {
2248+
"dep": "1.5.0"
2249+
}
2250+
}, "sha512-parent"],
2251+
"dep": ["dep@1.4.0", "", {}, "sha512-dep"]
2252+
},
2253+
"overrides": {
2254+
"dep": "1.4.0"
2255+
}
2256+
}))
2257+
.unwrap();
2258+
2259+
let lockfile = BunLockfile::from_str(&contents).unwrap();
2260+
2261+
let result = lockfile.resolve_package("", "dep", "1.5.0").unwrap();
2262+
2263+
assert!(
2264+
result.is_some(),
2265+
"Override to lower version (1.4.0) must still resolve even though 1.4.0 does not \
2266+
satisfy ^1.5.0"
2267+
);
2268+
let pkg = result.unwrap();
2269+
assert_eq!(pkg.key, "dep@1.4.0");
2270+
assert_eq!(pkg.version, "1.4.0");
2271+
}
2272+
2273+
#[test]
2274+
fn test_override_lower_version_in_transitive_closure() {
2275+
let contents = serde_json::to_string(&json!({
2276+
"lockfileVersion": 0,
2277+
"workspaces": {
2278+
"": {
2279+
"name": "test",
2280+
"dependencies": {
2281+
"parent": "^1.0.0"
2282+
}
2283+
}
2284+
},
2285+
"packages": {
2286+
"parent": ["parent@1.0.0", "", {
2287+
"dependencies": {
2288+
"dep": "1.5.0"
2289+
}
2290+
}, "sha512-parent"],
2291+
"dep": ["dep@1.4.0", "", {}, "sha512-dep"]
2292+
},
2293+
"overrides": {
2294+
"dep": "1.4.0"
2295+
}
2296+
}))
2297+
.unwrap();
2298+
2299+
let lockfile = BunLockfile::from_str(&contents).unwrap();
2300+
2301+
let unresolved_deps: std::collections::BTreeMap<String, String> =
2302+
[("parent".to_string(), "^1.0.0".to_string())]
2303+
.into_iter()
2304+
.collect();
2305+
let closure = crate::transitive_closure(&lockfile, "", unresolved_deps, false).unwrap();
2306+
2307+
let dep_keys: Vec<String> = closure.iter().map(|p| p.key.clone()).collect();
2308+
assert!(
2309+
dep_keys.contains(&"dep@1.4.0".to_string()),
2310+
"Overridden dep@1.4.0 must be in transitive closure even though parent declares \
2311+
dep@1.5.0. Got: {:?}",
2312+
dep_keys
2313+
);
2314+
}
2315+
21242316
#[test]
21252317
fn test_subgraph_filters_overrides() {
21262318
let contents = serde_json::to_string(&json!({

crates/turborepo-lockfiles/src/bun/ser.rs

Lines changed: 29 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
use serde::{Serialize, ser::SerializeTuple};
22

3-
use super::{PackageEntry, is_git_or_github_package};
3+
use super::{PackageEntry, is_git_or_github_package, is_tarball_or_url_package};
44

55
// Comment explaining entry schemas taken from bun.lock.zig
66
// first index is resolution for each type of package
@@ -20,33 +20,48 @@ use super::{PackageEntry, is_git_or_github_package};
2020
// this) ]
2121
impl Serialize for PackageEntry {
2222
fn serialize<S: serde::Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
23-
let mut tuple = serializer.serialize_tuple(4)?;
23+
let is_git = is_git_or_github_package(&self.ident);
24+
let is_url = is_tarball_or_url_package(&self.ident);
2425

25-
// First value is always the package key
26+
let mut len = 1;
27+
if self.root.is_some() {
28+
len += 1;
29+
} else {
30+
if self.registry.is_some() && !is_git && !is_url {
31+
len += 1;
32+
}
33+
if self.info.is_some() {
34+
len += 1;
35+
}
36+
if let Some(checksum) = &self.checksum
37+
&& (!is_url || !checksum.is_empty())
38+
{
39+
len += 1;
40+
}
41+
}
42+
43+
let mut tuple = serializer.serialize_tuple(len)?;
2644
tuple.serialize_element(&self.ident)?;
2745

28-
// For root packages, only thing left to serialize is the root info
2946
if let Some(root) = &self.root {
3047
tuple.serialize_element(root)?;
3148
return tuple.end();
3249
}
3350

34-
// npm packages have a registry (but not git/github packages)
35-
if let Some(registry) = &self.registry {
36-
// Skip registry for git/github packages even if incorrectly
37-
// set
38-
if !is_git_or_github_package(&self.ident) {
39-
tuple.serialize_element(registry)?;
40-
}
51+
if let Some(registry) = &self.registry
52+
&& !is_git
53+
&& !is_url
54+
{
55+
tuple.serialize_element(registry)?;
4156
}
4257

43-
// All packages have info in the next slot
4458
if let Some(info) = &self.info {
4559
tuple.serialize_element(info)?;
4660
};
4761

48-
// npm packages, git, and github have a checksum/integrity
49-
if let Some(checksum) = &self.checksum {
62+
if let Some(checksum) = &self.checksum
63+
&& (!is_url || !checksum.is_empty())
64+
{
5065
tuple.serialize_element(checksum)?;
5166
}
5267

0 commit comments

Comments
 (0)