Skip to content

Commit 7a6717d

Browse files
authored
DuckDB: test curl impl, don't auto-load extensions (#6677)
#6658 but without github action changes Signed-off-by: Mikhail Kot <mikhail@spiraldb.com>
1 parent 8a9185a commit 7a6717d

5 files changed

Lines changed: 34 additions & 25 deletions

File tree

benchmarks/duckdb-bench/src/lib.rs

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -87,10 +87,6 @@ impl DuckClient {
8787
let connection = db.connect()?;
8888
vortex_duckdb::initialize(&db)?;
8989

90-
// Install and load httpfs so DuckDB can access remote files (S3, GCS, HTTP).
91-
connection.query("INSTALL httpfs;")?;
92-
connection.query("LOAD httpfs;")?;
93-
9490
// Enable Parquet metadata cache for all benchmark runs.
9591
//
9692
// `parquet_metadata_cache` is an extension-specific option that's

vortex-bench/src/tpcds/duckdb.rs

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,8 +29,6 @@ pub fn generate_tpcds(base_dir: PathBuf, scale_factor: String) -> Result<PathBuf
2929
let sql_script = format!(
3030
"SET autoinstall_known_extensions=1;\
3131
SET autoload_known_extensions=1;\
32-
INSTALL tpcds;\
33-
LOAD tpcds;\
3432
CALL dsdgen(sf={scale_factor});\
3533
EXPORT DATABASE '{output_dir}' (FORMAT PARQUET);",
3634
scale_factor = scale_factor,

vortex-duckdb/README.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,3 +73,10 @@ By default, our tests use a precompiled build which means you don't get an
7373

7474
./target/release/duckdb will be a duckdb instance with vortex-duckdb already
7575
loaded.
76+
77+
## Testing a custom DuckDB tag
78+
79+
Change `DUCKDB_VERSION` environment variable value to a preferred hash or commit
80+
(local build), or change build.rs (for testing in CI). If you use a commit,
81+
DuckDB needs to link httpfs statically so you also need to install CURL
82+
development headers (e.g. `libcurl4-openssl-dev`).

vortex-duckdb/src/e2e_test/vortex_scan_test.rs

Lines changed: 24 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -361,8 +361,10 @@ fn test_vortex_scan_over_http() {
361361
let listener = TcpListener::bind("127.0.0.1:0").unwrap();
362362
let addr = listener.local_addr().unwrap();
363363

364+
// Spawn 10 threads because DuckDB does HEAD and GET requests with retries,
365+
// thus 2 threads, one for each implementation, aren't enough
364366
std::thread::spawn(move || {
365-
for _ in 0..2 {
367+
for _ in 0..10 {
366368
if let Ok((mut stream, _)) = listener.accept() {
367369
let response = format!(
368370
"HTTP/1.1 200 OK\r\nContent-Length: {}\r\n\r\n",
@@ -376,24 +378,30 @@ fn test_vortex_scan_over_http() {
376378

377379
let conn = database_connection();
378380
conn.query("SET vortex_filesystem = 'duckdb';").unwrap();
379-
conn.query("INSTALL httpfs;").unwrap();
380-
conn.query("LOAD httpfs;").unwrap();
381+
for httpfs_impl in ["httplib", "curl"] {
382+
println!("Testing httpfs client implementation: {httpfs_impl}");
383+
conn.query(&format!(
384+
"SET httpfs_client_implementation = '{httpfs_impl}';"
385+
))
386+
.unwrap();
381387

382-
let url = format!(
383-
"http://{}/{}",
384-
addr,
385-
file.path().file_name().unwrap().to_string_lossy()
386-
);
388+
let url = format!(
389+
"http://{}/{}",
390+
addr,
391+
file.path().file_name().unwrap().to_string_lossy()
392+
);
393+
println!("url={url}, file={}", file.path().display());
387394

388-
let result = conn
389-
.query(&format!("SELECT COUNT(*) FROM read_vortex('{url}')"))
390-
.unwrap();
391-
let chunk = result.into_iter().next().unwrap();
392-
let count = chunk
393-
.get_vector(0)
394-
.as_slice_with_len::<i64>(chunk.len().as_())[0];
395+
let result = conn
396+
.query(&format!("SELECT COUNT(*) FROM read_vortex('{url}')"))
397+
.unwrap();
398+
let chunk = result.into_iter().next().unwrap();
399+
let count = chunk
400+
.get_vector(0)
401+
.as_slice_with_len::<i64>(chunk.len().as_())[0];
395402

396-
assert_eq!(count, 3);
403+
assert_eq!(count, 3);
404+
}
397405
}
398406

399407
#[test]

vortex-duckdb/src/filesystem.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ pub(super) fn resolve_filesystem(
6363
ctx.erase_lifetime()
6464
}))
6565
} else if fs_config.as_str() == "vortex" {
66-
tracing::debug!(
66+
tracing::info!(
6767
"Using Vortex's object store filesystem for URL scheme '{}'",
6868
base_url.scheme()
6969
);
@@ -124,7 +124,7 @@ impl FileSystem for DuckDbFileSystem {
124124
fn list(&self, prefix: &str) -> BoxStream<'_, VortexResult<FileListing>> {
125125
let mut directory_url = self.base_url.clone();
126126
if !prefix.is_empty() {
127-
directory_url.set_path(&format!("{}/{}", directory_url.path(), prefix));
127+
directory_url.set_path(prefix);
128128
}
129129

130130
// DuckDB's ListFiles expects bare paths for local files, but full URLs
@@ -159,7 +159,7 @@ impl FileSystem for DuckDbFileSystem {
159159

160160
async fn open_read(&self, path: &str) -> VortexResult<Arc<dyn VortexReadAt>> {
161161
let mut url = self.base_url.clone();
162-
url.set_path(&format!("{}/{}", url.path(), path));
162+
url.set_path(path);
163163
let reader = unsafe { DuckDbFsReader::open_url(self.ctx.as_ptr(), &url)? };
164164
Ok(Arc::new(reader))
165165
}

0 commit comments

Comments
 (0)