vortex-data
diff --git a/‎java/testfiles/Cargo.lock‎
Lines changed: 18 additions & 18 deletions b/‎java/testfiles/Cargo.lock‎
Lines changed: 18 additions & 18 deletions
diff --git a/‎java/vortex-jni/src/test/java/dev/vortex/api/DTypeTest.java‎
Lines changed: 2 additions & 4 deletions b/‎java/vortex-jni/src/test/java/dev/vortex/api/DTypeTest.java‎
Lines changed: 2 additions & 4 deletions
diff --git a/‎java/vortex-jni/src/test/java/dev/vortex/jni/JNIWriterTest.java‎
Lines changed: 2 additions & 5 deletions b/‎java/vortex-jni/src/test/java/dev/vortex/jni/JNIWriterTest.java‎
Lines changed: 2 additions & 5 deletions
diff --git a/‎java/vortex-spark/src/main/java/dev/vortex/spark/VortexDataSourceV2.java‎
Lines changed: 24 additions & 6 deletions b/‎java/vortex-spark/src/main/java/dev/vortex/spark/VortexDataSourceV2.java‎
Lines changed: 24 additions & 6 deletions
diff --git a/‎java/vortex-spark/src/main/java/dev/vortex/spark/VortexFilePartition.java‎
Lines changed: 19 additions & 1 deletion b/‎java/vortex-spark/src/main/java/dev/vortex/spark/VortexFilePartition.java‎
Lines changed: 19 additions & 1 deletion
diff --git a/‎java/vortex-spark/src/main/java/dev/vortex/spark/VortexTable.java‎
Lines changed: 19 additions & 2 deletions b/‎java/vortex-spark/src/main/java/dev/vortex/spark/VortexTable.java‎
Lines changed: 19 additions & 2 deletions
@@ -60,10 +60,8 @@ public void testNestedFixedSizeList() {
     public void testFixedSizeListInStruct() {
         var elementType = DType.newFloat(false);
         var fslType = DType.newFixedSizeList(elementType, 3, false);
-        var structType = DType.newStruct(
-                new String[] {"id", "embedding"},
-                new DType[] {DType.newInt(false), fslType},
-                false);
+        var structType =
+                DType.newStruct(new String[] {"id", "embedding"}, new DType[] {DType.newInt(false), fslType}, false);
         assertEquals(DType.Variant.STRUCT, structType.getVariant());
 
         var fieldTypes = structType.getFieldTypes();
 
@@ -3,6 +3,7 @@
 
 package dev.vortex.jni;
 
+import static java.nio.charset.StandardCharsets.UTF_8;
 import static org.junit.jupiter.api.Assertions.assertEquals;
 import static org.junit.jupiter.api.Assertions.assertNotNull;
 import static org.junit.jupiter.api.Assertions.assertTrue;
@@ -11,8 +12,6 @@
 import dev.vortex.api.ScanOptions;
 import dev.vortex.api.VortexWriter;
 import dev.vortex.arrow.ArrowAllocation;
-import static java.nio.charset.StandardCharsets.UTF_8;
-
 import java.io.IOException;
 import java.nio.file.Files;
 import java.nio.file.Path;
@@ -81,9 +80,7 @@ public void testWriteBatchFfi() throws IOException {
         String writePath = outputPath.toAbsolutePath().toUri().toString();
 
         var writeSchema = DType.newStruct(
-                new String[] {"name", "age"},
-                new DType[] {DType.newUtf8(false), DType.newInt(false)},
-                false);
+                new String[] {"name", "age"}, new DType[] {DType.newUtf8(false), DType.newInt(false)}, false);
 
         BufferAllocator allocator = ArrowAllocation.rootAllocator();
 
 
@@ -11,15 +11,20 @@
 import dev.vortex.api.Files;
 import dev.vortex.jni.NativeFileMethods;
 import dev.vortex.spark.config.HadoopUtils;
+import dev.vortex.spark.read.PartitionPathUtils;
 import java.util.Map;
 import java.util.Objects;
 import java.util.Optional;
+import java.util.Set;
+import java.util.stream.Collectors;
+import java.util.stream.Stream;
 import org.apache.spark.sql.SparkSession;
 import org.apache.spark.sql.connector.catalog.CatalogV2Util;
 import org.apache.spark.sql.connector.catalog.Table;
 import org.apache.spark.sql.connector.catalog.TableProvider;
 import org.apache.spark.sql.connector.expressions.Transform;
 import org.apache.spark.sql.sources.DataSourceRegister;
+import org.apache.spark.sql.types.DataType;
 import org.apache.spark.sql.types.StructType;
 import org.apache.spark.sql.util.CaseInsensitiveStringMap;
 import scala.Option;
@@ -81,18 +86,31 @@ public StructType inferSchema(CaseInsensitiveStringMap options) {
                     .findFirst();
 
             if (firstFile.isEmpty()) {
-                // Return empty struct if no files found
-                // TODO(aduffy): how does Parquet handle this?
                 return new StructType();
             } else {
                 pathToInfer = firstFile.get();
             }
         }
 
+        StructType dataSchema;
         try (File file = Files.open(pathToInfer, formatOptions)) {
             var columns = SparkTypes.toColumns(file.getDType());
-            return CatalogV2Util.v2ColumnsToStructType(columns);
+            dataSchema = CatalogV2Util.v2ColumnsToStructType(columns);
         }
+
+        // Discover partition columns from Hive-style directory paths and append them.
+        Map<String, String> partitionValues = PartitionPathUtils.parsePartitionValues(pathToInfer);
+        if (!partitionValues.isEmpty()) {
+            Set<String> dataColumnNames = Stream.of(dataSchema.fieldNames()).collect(Collectors.toSet());
+            for (Map.Entry<String, String> entry : partitionValues.entrySet()) {
+                if (!dataColumnNames.contains(entry.getKey())) {
+                    DataType type = PartitionPathUtils.inferPartitionColumnType(entry.getValue());
+                    dataSchema = dataSchema.add(entry.getKey(), type, true);
+                }
+            }
+        }
+
+        return dataSchema;
     }
 
     /**
@@ -102,16 +120,16 @@ public StructType inferSchema(CaseInsensitiveStringMap options) {
      * Vortex files. The partitioning parameter is currently ignored.
      *
      * @param schema        the table schema
-     * @param _partitioning table partitioning transforms (currently ignored)
+     * @param partitioning table partitioning transforms
      * @param properties    the table properties containing file paths and other options
      * @return a VortexTable instance for reading and writing data
      * @throws RuntimeException if required path properties are missing
      */
     @Override
-    public Table getTable(StructType schema, Transform[] _partitioning, Map<String, String> properties) {
+    public Table getTable(StructType schema, Transform[] partitioning, Map<String, String> properties) {
         var uncased = new CaseInsensitiveStringMap(properties);
         ImmutableList<String> paths = getPaths(uncased);
-        return new VortexTable(paths, schema, buildDataSourceOptions(properties));
+        return new VortexTable(paths, schema, buildDataSourceOptions(properties), partitioning);
     }
 
     /**
 
@@ -21,17 +21,25 @@ public final class VortexFilePartition implements InputPartition, Serializable {
     private final String path;
     private final ImmutableList<Column> columns;
     private final ImmutableMap<String, String> formatOptions;
+    private final ImmutableMap<String, String> partitionValues;
 
     /**
      * Creates a new Vortex file partition.
      *
      * @param path the file system path to the Vortex file
      * @param columns the list of columns to read from the file
+     * @param formatOptions options for accessing the file (S3/Azure credentials, etc.)
+     * @param partitionValues Hive-style partition column values extracted from the file path
      */
-    public VortexFilePartition(String path, ImmutableList<Column> columns, ImmutableMap<String, String> formatOptions) {
+    public VortexFilePartition(
+            String path,
+            ImmutableList<Column> columns,
+            ImmutableMap<String, String> formatOptions,
+            ImmutableMap<String, String> partitionValues) {
         this.path = path;
         this.columns = columns;
         this.formatOptions = formatOptions;
+        this.partitionValues = partitionValues;
     }
 
     /**
@@ -55,4 +63,14 @@ public ImmutableList<Column> getColumns() {
     public Map<String, String> getFormatOptions() {
         return formatOptions;
     }
+
+    /**
+     * Returns the partition column values parsed from this file's Hive-style directory path.
+     * Keys are column names, values are the string-encoded partition values.
+     *
+     * @return the partition values, empty if the file is not in a partitioned directory
+     */
+    public ImmutableMap<String, String> getPartitionValues() {
+        return partitionValues;
+    }
 }
@@ -11,6 +11,7 @@
 import java.util.Map;
 import java.util.Set;
 import org.apache.spark.sql.connector.catalog.*;
+import org.apache.spark.sql.connector.expressions.Transform;
 import org.apache.spark.sql.connector.read.ScanBuilder;
 import org.apache.spark.sql.connector.write.LogicalWriteInfo;
 import org.apache.spark.sql.connector.write.WriteBuilder;
@@ -26,14 +27,20 @@ public final class VortexTable implements Table, SupportsRead, SupportsWrite {
     private final ImmutableList<String> paths;
     private final StructType schema;
     private final Map<String, String> formatOptions;
+    private final Transform[] partitionTransforms;
 
     /**
      * Creates a new VortexTable with read/write support.
      */
-    public VortexTable(ImmutableList<String> paths, StructType schema, Map<String, String> formatOptions) {
+    public VortexTable(
+            ImmutableList<String> paths,
+            StructType schema,
+            Map<String, String> formatOptions,
+            Transform[] partitionTransforms) {
         this.paths = paths;
         this.schema = schema;
         this.formatOptions = formatOptions;
+        this.partitionTransforms = partitionTransforms;
     }
 
     /**
@@ -93,7 +100,17 @@ public StructType schema() {
     public WriteBuilder newWriteBuilder(LogicalWriteInfo info) {
         // Make sure only one write path was provided.
         String writePath = Iterables.getOnlyElement(paths);
-        return new VortexWriteBuilder(writePath, info, formatOptions);
+        return new VortexWriteBuilder(writePath, info, formatOptions, partitionTransforms);
+    }
+
+    /**
+     * Returns the partitioning transforms for this table.
+     *
+     * @return an array of partition transforms
+     */
+    @Override
+    public Transform[] partitioning() {
+        return partitionTransforms;
     }
 
     /**