vortex/docs/references.bib at develop · vortex-data/vortex

174 lines (165 loc) · 16.4 KB
@article{2023/BtrBlocks,
    author = {Kuschewski, Maximilian and Sauerwein, David and Alhomssi, Adnan and Leis, Viktor},
    title = {BtrBlocks: Efficient Columnar Compression for Data Lakes},
    year = {2023},
    issue_date = {June 2023},
    publisher = {Association for Computing Machinery},
    address = {New York, NY, USA},
    volume = {1},
    number = {2},
    url = {https://doi.org/10.1145/3589263},
    doi = {10.1145/3589263},
    abstract = {Analytics is moving to the cloud and data is moving into data lakes. These reside on object storage services like S3 and enable seamless data sharing and system interoperability. To support this, many systems build on open storage formats like Apache Parquet. However, these formats are not optimized for remotely-accessed data lakes and today's high-throughput networks. Inefficient decompression makes scans CPU-bound and thus increases query time and cost. With this work we present BtrBlocks, an open columnar storage format designed for data lakes. BtrBlocks uses a set of lightweight encoding schemes, achieving fast and efficient decompression and high compression ratios.},
    journal = {Proc. ACM Manag. Data},
    month = jun,
    articleno = {118},
    numpages = {26},
    keywords = {columnar storage, compression, data lake, query processing}
@article{2023/FastLanes,
    author = {Afroozeh, Azim and Boncz, Peter},
    title = {The FastLanes Compression Layout: Decoding > 100 Billion Integers per Second with Scalar Code},
    year = {2023},
    issue_date = {May 2023},
    publisher = {VLDB Endowment},
    volume = {16},
    number = {9},
    issn = {2150-8097},
    url = {https://doi.org/10.14778/3598581.3598587},
    doi = {10.14778/3598581.3598587},
    abstract = {The open-source FastLanes project aims to improve big data formats, such as Parquet, ORC and columnar database formats, in multiple ways. In this paper, we significantly accelerate decoding of all common Light-Weight Compression (LWC) schemes: DICT, FOR, DELTA and RLE through better data-parallelism. We do so by re-designing the compression layout using two main ideas: (i) generalizing the value interleaving technique in the basic operation of bit-(un)packing by targeting a virtual 1024-bits SIMD register, (ii) reordering the tuples in all columns of a table in the same Unified Transposed Layout that puts tuple chunks in a common "04261537" order (explained in the paper); allowing for maximum independent work for all possible basic SIMD lane widths: 8, 16, 32, and 64 bits.We address the software development, maintenance and future-proofness challenges of increasing hardware diversity, by defining a virtual 1024-bits instruction set that consists of simple operators supported by all SIMD dialects; and also, importantly, by scalar code. The interleaved and tuple-reordered layout actually makes scalar decoding faster, extracting more data-parallelism from today's wide-issue CPUs. Importantly, the scalar version can be fully auto-vectorized by modern compilers, eliminating technical debt in software caused by platform-specific SIMD intrinsics.Micro-benchmarks on Intel, AMD, Apple and AWS CPUs show that FastLanes accelerates decoding by factors (decoding >40 values per CPU cycle). FastLanes can make queries faster, as compressing the data reduces bandwidth needs, while decoding is almost free.},
    journal = {Proc. VLDB Endow.},
    month = may,
    pages = {2132–2144},
    numpages = {13}
@article{2023/ALP,
    author = {Afroozeh, Azim and Kuffo, Leonardo X. and Boncz, Peter},
    title = {ALP: Adaptive Lossless floating-Point Compression},
    year = {2023},
    issue_date = {December 2023},
    publisher = {Association for Computing Machinery},
    address = {New York, NY, USA},
    volume = {1},
    number = {4},
    url = {https://doi.org/10.1145/3626717},
    doi = {10.1145/3626717},
    abstract = {IEEE 754 doubles do not exactly represent most real values, introducing rounding errors in computations and [de]serialization to text. These rounding errors inhibit the use of existing lightweight compression schemes such as Delta and Frame Of Reference (FOR), but recently new schemes were proposed: Gorilla, Chimp128, PseudoDecimals (PDE), Elf and Patas. However, their compression ratios are not better than those of general-purpose compressors such as Zstd; while [de]compression is much slower than Delta and FOR.We propose and evaluate ALP, that significantly improves these previous schemes in both speed and compression ratio (Figure 1). We created ALP after carefully studying the datasets used to evaluate the previous schemes. To obtain speed, ALP is designed to fit vectorized execution. This turned out to be key for also improving the compression ratio, as we found in-vector commonalities to create compression opportunities. ALP is an adaptive scheme that uses a strongly enhanced version of PseudoDecimals [31] to losslessly encode doubles as integers if they originated as decimals, and otherwise uses vectorized compression of the doubles' front bits. Its high speeds stem from our implementation in scalar code that auto-vectorizes, using building blocks provided by our FastLanes library [6], and an efficient two-stage compression algorithm that first samples row-groups and then vectors.},
    journal = {Proc. ACM Manag. Data},
    month = dec,
    articleno = {230},
    numpages = {26},
    keywords = {big data formats, columnar storage, floating point compression, lightweight compression, lossless compression, vectorized execution}
@inproceedings{2024/FastLanesGPU,
    author = {Afroozeh, Azim and Felius, Lotte and Boncz, Peter},
    title = {Accelerating GPU Data Processing using FastLanes Compression},
    year = {2024},
    isbn = {9798400706677},
    publisher = {Association for Computing Machinery},
    address = {New York, NY, USA},
    url = {https://doi.org/10.1145/3662010.3663450},
    doi = {10.1145/3662010.3663450},
    abstract = {We show that compression can be a win-win for GPU data processing: it not only allows to store more data in GPU global memory, but can also accelerate data processing. We show that the complete redesign of compressed columnar storage in FastLanes, with its fully data-parallel bit-packing and encodings, also benefits GPU hardware. We micro-benchmark the performance of FastLanes on two GPU architectures (Nvidia T4 and V100) and integrate FastLanes in the Crystal GPU query processing prototype. Our experiments show that FastLanes decompression significantly outperforms previous decompression methods in micro-benchmarks, and can make end-to-end SSB queries up to twice faster compared to uncompressed query processing - in contrast to previous work where GPU decompression caused execution to slow down. We further discovered that an access granularity of decoding vectors of 1024 values is too large for a single GPU warp due to register pressure. We mitigate this here using mini-vectors - a future work question is how to further reduce this granularity with minimal impact on efficiency.},
    booktitle = {Proceedings of the 20th International Workshop on Data Management on New Hardware},
    articleno = {8},
    numpages = {11},
    location = {Santiago, AA, Chile},
    series = {DaMoN '24}
@article{2020/FSST,
    author = {Boncz, Peter and Neumann, Thomas and Leis, Viktor},
    title = {FSST: fast random access string compression},
    year = {2020},
    issue_date = {August 2020},
    publisher = {VLDB Endowment},
    volume = {13},
    number = {12},
    issn = {2150-8097},
    url = {https://doi.org/10.14778/3407790.3407851},
    doi = {10.14778/3407790.3407851},
    abstract = {Strings are prevalent in real-world data sets. They often occupy a large fraction of the data and are slow to process. In this work, we present Fast Static Symbol Table (FSST), a lightweight compression scheme for strings. On text data, FSST offers decompression and compression speed similar to or better than the best speed-optimized compression methods, such as LZ4, yet offers significantly better compression factors. Moreover, its use of a static symbol table allows random access to individual, compressed strings, enabling lazy decompression and query processing on compressed data. We believe these features will make FSST a valuable piece in the standard compression toolbox.},
    journal = {Proc. VLDB Endow.},
    month = jul,
    pages = {2649–2661},
    numpages = {13}
@article{2019/Procella,
    title	 = {Procella: Unifying serving and analytical data at YouTube},
    author	 = {Biswapesh Chattopadhyay and Priyam Dutta and Weiran Liu and Ott Tinn and Andrew McCormick and Aniket Mokashi and Paul Harvey and Hector Gonzalez and David Lomax and Sagar Mittal and Roee Aharon Ebenstein and Nikita Mikhaylin and Hung-ching Lee and Xiaoyan Zhao and Guanzhong Xu and Luis Antonio Perez and Farhad Shahmohammadi and Tran Bui and Neil McKay and Vera Lychagina and Brett Elliott},
    year	 = {2019},
    URL	 = {https://dl.acm.org/citation.cfm?id=3360438},
    journal	 = {PVLDB},
    pages	 = {2022-2034},
    volume	 = {12(12)}
@article{2023/AnyBlob,
    author = {Durner, Dominik and Leis, Viktor and Neumann, Thomas},
    title = {Exploiting Cloud Object Storage for High-Performance Analytics},
    year = {2023},
    issue_date = {July 2023},
    publisher = {VLDB Endowment},
    volume = {16},
    number = {11},
    issn = {2150-8097},
    url = {https://doi.org/10.14778/3611479.3611486},
    doi = {10.14778/3611479.3611486},
    abstract = {Elasticity of compute and storage is crucial for analytical cloud database systems. All cloud vendors provide disaggregated object stores, which can be used as storage backend for analytical query engines. Until recently, local storage was unavoidable to process large tables efficiently due to the bandwidth limitations of the network infrastructure in public clouds. However, the gap between remote network and local NVMe bandwidth is closing, making cloud storage more attractive. This paper presents a blueprint for performing efficient analytics directly on cloud object stores. We derive cost- and performance-optimal retrieval configurations for cloud object stores with the first in-depth study of this foundational service in the context of analytical query processing. For achieving high retrieval performance, we present AnyBlob, a novel download manager for query engines that optimizes throughput while minimizing CPU usage. We discuss the integration of high-performance data retrieval in query engines and demonstrate it by incorporating AnyBlob in our database system Umbra. Our experiments show that even without caching, Umbra with integrated AnyBlob achieves similar performance to state-of-the-art cloud data warehouses that cache data on local SSDs while improving resource elasticity.},
    journal = {Proc. VLDB Endow.},
    month = jul,
    pages = {2769–2782},
    numpages = {14}
@article{2023/SelectionPushdown,
    author = {Li, Yinan and Lu, Jianan and Chandramouli, Badrish},
    title = {Selection Pushdown in Column Stores using Bit Manipulation Instructions},
    year = {2023},
    issue_date = {June 2023},
    publisher = {Association for Computing Machinery},
    address = {New York, NY, USA},
    volume = {1},
    number = {2},
    url = {https://doi.org/10.1145/3589323},
    doi = {10.1145/3589323},
    abstract = {Modern analytical database systems predominantly rely on column-oriented storage, which offers superior compression efficiency due to the nature of the columnar layout. This compression, however, creates challenges in decoding speed during query processing. Previous research has explored predicate pushdown on encoded values to avoid decoding, but these techniques are restricted to specific encoding schemes and predicates, limiting their practical use. In this paper, we propose a generic predicate pushdown approach that supports arbitrary predicates by leveraging selection pushdown to reduce decoding costs. At the core of our approach is a fast select operator capable of directly extracting selected encoded values without decoding, by using Bit Manipulation Instructions, an instruction set extension to the X86 architecture. We empirically evaluate the proposed techniques in the context of Apache Parquet using both micro-benchmarks and the TPC-H benchmark, and show that our techniques improve the query performance of Parquet by up to one order of magnitude with representative scan queries. Further experimentation using Apache Spark demonstrates speed improvements of up to 5.5X even for end-to-end queries involving complex joins.},
    journal = {Proc. ACM Manag. Data},
    month = jun,
    articleno = {178},
    numpages = {26},
    keywords = {BMI, column stores, compression, predicate pushdown}
@inproceedings{2021/JSONTiles,
    author = {Durner, Dominik and Leis, Viktor and Neumann, Thomas},
    title = {JSON Tiles: Fast Analytics on Semi-Structured Data},
    year = {2021},
    isbn = {9781450383431},
    publisher = {Association for Computing Machinery},
    address = {New York, NY, USA},
    url = {https://doi.org/10.1145/3448016.3452809},
    doi = {10.1145/3448016.3452809},
    abstract = {Developers often prefer flexibility over upfront schema design, making semi-structured data formats such as JSON increasingly popular. Large amounts of JSON data are therefore stored and analyzed by relational database systems. In existing systems, however, JSON's lack of a fixed schema results in slow analytics. In this paper, we present JSON tiles, which, without losing the flexibility of JSON, enables relational systems to perform analytics on JSON data at native speed. JSON tiles automatically detects the most important keys and extracts them transparently - often achieving scan performance similar to columnar storage. At the same time, JSON tiles is capable of handling heterogeneous and changing data. Furthermore, we automatically collect statistics that enable the query optimizer to find good execution plans. Our experimental evaluation compares against state-of-the-art systems and research proposals and shows that our approach is both robust and efficient.},
    booktitle = {Proceedings of the 2021 International Conference on Management of Data},
    pages = {445–458},
    numpages = {14},
    keywords = {storage, semi-structured data, scan, olap, jsonb, json, analytics},
    location = {Virtual Event, China},
    series = {SIGMOD '21}
@article{2024/MementoFilter,
    author = {Eslami, Navid and Dayan, Niv},
    title = {Memento Filter: A Fast, Dynamic, and Robust Range Filter},
    year = {2024},
    issue_date = {December 2024},
    publisher = {Association for Computing Machinery},
    address = {New York, NY, USA},
    volume = {2},
    number = {6},
    url = {https://doi.org/10.1145/3698820},
    doi = {10.1145/3698820},
    abstract = {Range filters are probabilistic data structures that answer approximate range emptiness queries. They aid in avoiding processing empty range queries and have use cases in many application domains such as key-value stores and social web analytics. However, current range filters do not support dynamically changing and growing datasets. Moreover, several of these designs also exhibit impractically high false positive rates under correlated workloads, which are common in practice. These impediments restrict the applicability of range filters across a wide range of use cases.We introduce Memento filter, the first range filter to simultaneously offer dynamicity, fast operations, and a robust false positive rate for any workload. Memento filter partitions the key universe and clusters its keys according to this partitioning. For each cluster, it stores a fingerprint and a list of key suffixes contiguously. The encoding of these lists makes them amenable to existing dynamic filter structures. Due to the one-to-one mapping from keys to suffixes, Memento filter supports inserts and deletes and can even expand to accommodate a growing dataset.We implement Memento filter on top of a Rank-and-Select Quotient filter and InfiniFilter and demonstrate that it achieves a competitive false positive rate and performance with the state of the art while also providing dynamicity. Due to its dynamicity, Memento filter is the first range filter applicable to B-Trees. We showcase this by integrating Memento filter into WiredTiger, a B-Tree-based key-value store, significantly boosting its performance for mixed workloads.},
    journal = {Proc. ACM Manag. Data},
    month = dec,
    articleno = {244},
    numpages = {27},
    keywords = {data growth, dynamic data structure, range filter, scalability}
Provide feedback

Saved searches

Use saved searches to filter your results more quickly

FilesExpand file tree

references.bib

Latest commit

History

references.bib

File metadata and controls