Skip to content

Commit 46d9337

Browse files
jean-philippe-martinchingor13
authored andcommitted
Storage NIO: Try harder to make believable directories (#3775)
* Try harder to make believable directories Previous implementation of fake folders just looked for a trailing '/'. Now we also list the files with this prefix and use this to determine whether the input is a folder. This will detect 'dir' in addition to just 'dir/' before. This version will still say 'yes' to anything that ends in a slash, even if they don't exist, maintaining the previous behavior. * Clarify documentation and add test case * add braces for linter * apply reviewer suggestion * fix listing folders that don't end in / * fix comment * Perf, tests, comments Improved performance by moving the pseudodir code to only run on file not found, leaving the common-case path to run faster. Added tests for pseudodirs, and one in LocalStorage too. Improved comments following reviewer suggestions. * stylistic change * Optimize Remove check for path without '/' in places where it's not required. * cosmetic change for linter
1 parent 4db238d commit 46d9337

6 files changed

Lines changed: 236 additions & 43 deletions

File tree

java-storage-nio/google-cloud-nio/src/main/java/com/google/cloud/storage/contrib/nio/CloudStorageConfiguration.java

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,11 @@ public abstract class CloudStorageConfiguration {
5252
public abstract boolean stripPrefixSlash();
5353

5454
/**
55-
* Returns {@code true} if paths with a trailing slash should be treated as fake directories.
55+
* Returns {@code true} if directories and paths with a trailing slash should be treated as fake
56+
* directories.
57+
*
58+
* <p>With this feature, if file "foo/bar.txt" exists then both "foo" and "foo/" will be
59+
* treated as if they were existing directories.
5660
*/
5761
public abstract boolean usePseudoDirectories();
5862

java-storage-nio/google-cloud-nio/src/main/java/com/google/cloud/storage/contrib/nio/CloudStorageFileSystemProvider.java

Lines changed: 55 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
import com.google.cloud.storage.StorageOptions;
3535
import com.google.common.annotations.VisibleForTesting;
3636
import com.google.common.base.MoreObjects;
37+
import com.google.common.base.Strings;
3738
import com.google.common.collect.AbstractIterator;
3839
import com.google.common.net.UrlEscapers;
3940
import com.google.common.primitives.Ints;
@@ -339,7 +340,8 @@ private SeekableByteChannel newReadChannel(Path path, Set<? extends OpenOption>
339340
}
340341
}
341342
CloudStoragePath cloudPath = CloudStorageUtil.checkPath(path);
342-
if (cloudPath.seemsLikeADirectoryAndUsePseudoDirectories()) {
343+
// passing false since we just want to check if it ends with /
344+
if (cloudPath.seemsLikeADirectoryAndUsePseudoDirectories(null)) {
343345
throw new CloudStoragePseudoDirectoryException(cloudPath);
344346
}
345347
return CloudStorageReadChannel.create(
@@ -355,7 +357,7 @@ private SeekableByteChannel newWriteChannel(Path path, Set<? extends OpenOption>
355357
throws IOException {
356358
initStorage();
357359
CloudStoragePath cloudPath = CloudStorageUtil.checkPath(path);
358-
if (cloudPath.seemsLikeADirectoryAndUsePseudoDirectories()) {
360+
if (cloudPath.seemsLikeADirectoryAndUsePseudoDirectories(null)) {
359361
throw new CloudStoragePseudoDirectoryException(cloudPath);
360362
}
361363
BlobId file = cloudPath.getBlobId();
@@ -446,7 +448,7 @@ public InputStream newInputStream(Path path, OpenOption... options) throws IOExc
446448
public boolean deleteIfExists(Path path) throws IOException {
447449
initStorage();
448450
CloudStoragePath cloudPath = CloudStorageUtil.checkPath(path);
449-
if (cloudPath.seemsLikeADirectoryAndUsePseudoDirectories()) {
451+
if (cloudPath.seemsLikeADirectoryAndUsePseudoDirectories(storage)) {
450452
// if the "folder" is empty then we're fine, otherwise complain
451453
// that we cannot act on folders.
452454
try (DirectoryStream<Path> paths = Files.newDirectoryStream(path)) {
@@ -574,10 +576,13 @@ public void copy(Path source, Path target, CopyOption... options) throws IOExcep
574576
"File systems associated with paths don't agree on pseudo-directories.");
575577
}
576578
}
577-
if (fromPath.seemsLikeADirectoryAndUsePseudoDirectories()) {
579+
// We refuse to use paths that end in '/'. If the user puts a folder name
580+
// but without the '/', they'll get whatever error GCS will return normally
581+
// (if any).
582+
if (fromPath.seemsLikeADirectoryAndUsePseudoDirectories(null)) {
578583
throw new CloudStoragePseudoDirectoryException(fromPath);
579584
}
580-
if (toPath.seemsLikeADirectoryAndUsePseudoDirectories()) {
585+
if (toPath.seemsLikeADirectoryAndUsePseudoDirectories(null)) {
581586
throw new CloudStoragePseudoDirectoryException(toPath);
582587
}
583588

@@ -672,9 +677,6 @@ public void checkAccess(Path path, AccessMode... modes) throws IOException {
672677
while (true) {
673678
try {
674679
CloudStoragePath cloudPath = CloudStorageUtil.checkPath(path);
675-
if ( cloudPath.seemsLikeADirectoryAndUsePseudoDirectories() ) {
676-
return;
677-
}
678680
boolean nullId;
679681
if (isNullOrEmpty(userProject)) {
680682
nullId = storage.get(
@@ -689,13 +691,26 @@ public void checkAccess(Path path, AccessMode... modes) throws IOException {
689691
== null;
690692
}
691693
if (nullId) {
694+
if ( cloudPath.seemsLikeADirectoryAndUsePseudoDirectories(storage) ) {
695+
// there is no such file, but we're not signalling error because the
696+
// path is a pseudo-directory.
697+
return;
698+
}
692699
throw new NoSuchFileException(path.toString());
693700
}
694701
break;
695702
} catch (StorageException exs) {
696703
// Will rethrow a StorageException if all retries/reopens are exhausted
697704
retryHandler.handleStorageException(exs);
698705
// we're being aggressive by retrying even on scenarios where we'd normally reopen.
706+
} catch (IllegalArgumentException exs) {
707+
if ( CloudStorageUtil.checkPath(path).seemsLikeADirectoryAndUsePseudoDirectories(storage) ) {
708+
// there is no such file, but we're not signalling error because the
709+
// path is a pseudo-directory.
710+
return;
711+
}
712+
// Other cause for IAE, forward the exception.
713+
throw exs;
699714
}
700715
}
701716
}
@@ -715,19 +730,34 @@ public <A extends BasicFileAttributes> A readAttributes(
715730
while (true) {
716731
try {
717732
CloudStoragePath cloudPath = CloudStorageUtil.checkPath(path);
718-
if ( cloudPath.seemsLikeADirectoryAndUsePseudoDirectories() ) {
719-
@SuppressWarnings("unchecked")
720-
A result = (A) new CloudStoragePseudoDirectoryAttributes(cloudPath);
721-
return result;
722-
}
723-
BlobInfo blobInfo;
724-
if (isNullOrEmpty(userProject)) {
725-
blobInfo = storage.get(cloudPath.getBlobId());
726-
} else {
727-
blobInfo = storage.get(cloudPath.getBlobId(), BlobGetOption.userProject(userProject));
733+
BlobInfo blobInfo = null;
734+
try {
735+
BlobId blobId = cloudPath.getBlobId();
736+
// Null or empty name won't give us a file, so skip. But perhaps it's a pseudodir.
737+
if (!isNullOrEmpty(blobId.getName())) {
738+
if (isNullOrEmpty(userProject)) {
739+
blobInfo = storage.get(blobId);
740+
} else {
741+
blobInfo = storage.get(blobId, BlobGetOption.userProject(userProject));
742+
}
743+
}
744+
} catch (IllegalArgumentException ex) {
745+
// the path may be invalid but look like a folder. In that case, return a folder.
746+
if (cloudPath.seemsLikeADirectoryAndUsePseudoDirectories(storage)) {
747+
@SuppressWarnings("unchecked")
748+
A result = (A) new CloudStoragePseudoDirectoryAttributes(cloudPath);
749+
return result;
750+
}
751+
// No? Propagate.
752+
throw ex;
728753
}
729754
// null size indicate a file that we haven't closed yet, so GCS treats it as not there yet.
730755
if ( null == blobInfo || blobInfo.getSize() == null ) {
756+
if (cloudPath.seemsLikeADirectoryAndUsePseudoDirectories(storage)) {
757+
@SuppressWarnings("unchecked")
758+
A result = (A) new CloudStoragePseudoDirectoryAttributes(cloudPath);
759+
return result;
760+
}
731761
throw new NoSuchFileException(
732762
"gs://" + cloudPath.getBlobId().getBucket() + "/" + cloudPath.getBlobId().getName());
733763
}
@@ -785,7 +815,13 @@ public DirectoryStream<Path> newDirectoryStream(final Path dir, final Filter<? s
785815
// Loop will terminate via an exception if all retries are exhausted
786816
while (true) {
787817
try {
788-
final String prefix = cloudPath.toRealPath().toString();
818+
String prePrefix = cloudPath.normalize().toRealPath().toString();
819+
// we can recognize paths without the final "/" as folders,
820+
// but storage.list doesn't do the right thing with those, we need to append a "/".
821+
if (!prePrefix.isEmpty() && !prePrefix.endsWith("/")) {
822+
prePrefix += "/";
823+
}
824+
final String prefix = prePrefix;
789825
Page<Blob> dirList;
790826
if (isNullOrEmpty(userProject)) {
791827
dirList = storage.list(cloudPath.bucket(),

java-storage-nio/google-cloud-nio/src/main/java/com/google/cloud/storage/contrib/nio/CloudStoragePath.java

Lines changed: 47 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,10 @@
1919
import static com.google.common.base.Preconditions.checkArgument;
2020
import static com.google.common.base.Preconditions.checkNotNull;
2121

22+
import com.google.api.gax.paging.Page;
23+
import com.google.cloud.storage.Blob;
2224
import com.google.cloud.storage.BlobId;
25+
import com.google.cloud.storage.Storage;
2326
import com.google.common.collect.UnmodifiableIterator;
2427

2528
import java.io.File;
@@ -83,8 +86,50 @@ boolean seemsLikeADirectory() {
8386
return path.seemsLikeADirectory();
8487
}
8588

86-
boolean seemsLikeADirectoryAndUsePseudoDirectories() {
87-
return path.seemsLikeADirectory() && fileSystem.config().usePseudoDirectories();
89+
// True if this path may be a directory (and pseudo-directories are enabled)
90+
// Checks:
91+
// 1) does the path end in / ?
92+
// 2) (optional, if storage is set) is there a file whose name starts with path+/ ?
93+
boolean seemsLikeADirectoryAndUsePseudoDirectories(Storage storage) {
94+
if (!fileSystem.config().usePseudoDirectories()) {
95+
return false;
96+
}
97+
if (path.seemsLikeADirectory()) {
98+
return true;
99+
}
100+
// fancy case: the file name doesn't end in slash, but we've been asked to have pseudo dirs.
101+
// Let's see if there are any files with this prefix.
102+
if (storage == null) {
103+
// we are in a context where we don't want to access the storage, so we conservatively
104+
// say this isn't a directory.
105+
return false;
106+
}
107+
// Using the provided path + "/" as a prefix, can we find one file? If so, the path
108+
// is a directory.
109+
String prefix = path.removeBeginningSeparator().toString();
110+
if (!prefix.endsWith("/")) {
111+
prefix += "/";
112+
}
113+
Page<Blob> list = storage.list(
114+
this.bucket(),
115+
Storage.BlobListOption.prefix(prefix),
116+
// we only look at the first result, so no need for a bigger page.
117+
Storage.BlobListOption.pageSize(1));
118+
for (Blob b : list.getValues()) {
119+
// if this blob starts with our prefix and then a slash, then prefix is indeed a folder!
120+
if (b.getBlobId() == null) {
121+
continue;
122+
}
123+
String name = b.getBlobId().getName();
124+
if (name == null) {
125+
continue;
126+
}
127+
if (("/" + name).startsWith(this.path.toAbsolutePath() + "/")) {
128+
return true;
129+
}
130+
}
131+
// no match, so it's not a directory
132+
return false;
88133
}
89134

90135
@Override

java-storage-nio/google-cloud-nio/src/main/java/com/google/cloud/storage/contrib/nio/testing/FakeStorageRpc.java

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,7 @@ public Tuple<String, Iterable<StorageObject>> list(String bucket, Map<Option, ?>
124124
throws StorageException {
125125
String delimiter = null;
126126
String preprefix = "";
127+
long maxResults = Long.MAX_VALUE;
127128
for (Map.Entry<Option, ?> e : options.entrySet()) {
128129
switch (e.getKey()) {
129130
case PREFIX:
@@ -138,6 +139,9 @@ public Tuple<String, Iterable<StorageObject>> list(String bucket, Map<Option, ?>
138139
case FIELDS:
139140
// ignore and return all the fields
140141
break;
142+
case MAX_RESULTS:
143+
maxResults = (Long) e.getValue();
144+
break;
141145
default:
142146
throw new UnsupportedOperationException("Unknown option: " + e.getKey());
143147
}
@@ -156,6 +160,16 @@ public Tuple<String, Iterable<StorageObject>> list(String bucket, Map<Option, ?>
156160
values.add(so);
157161
}
158162
values.addAll(folders.values());
163+
164+
// truncate to max allowed length
165+
if (values.size() > maxResults) {
166+
List<StorageObject> newValues = new ArrayList<>();
167+
for (int i=0; i < maxResults; i++) {
168+
newValues.add(values.get(i));
169+
}
170+
values = newValues;
171+
}
172+
159173
// null cursor to indicate there is no more data (empty string would cause us to be called again).
160174
// The type cast seems to be necessary to help Java's typesystem remember that collections are iterable.
161175
return Tuple.of(null, (Iterable<StorageObject>) values);

java-storage-nio/google-cloud-nio/src/test/java/com/google/cloud/storage/contrib/nio/CloudStorageFileSystemProviderTest.java

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838
import org.junit.runner.RunWith;
3939
import org.junit.runners.JUnit4;
4040

41+
import java.io.IOException;
4142
import java.io.InputStream;
4243
import java.io.OutputStream;
4344
import java.net.URI;
@@ -54,6 +55,7 @@
5455
import java.nio.file.OpenOption;
5556
import java.nio.file.Path;
5657
import java.nio.file.Paths;
58+
import java.util.ArrayList;
5759
import java.util.HashMap;
5860
import java.util.List;
5961
import java.util.Map;
@@ -397,6 +399,53 @@ public void testExists_trailingSlash_disablePseudoDirectories() throws Exception
397399
}
398400
}
399401

402+
@Test
403+
public void testFakeDirectories() throws IOException {
404+
try (FileSystem fs = forBucket("military")) {
405+
List<Path> paths = new ArrayList<>();
406+
paths.add(fs.getPath("dir/angel"));
407+
paths.add(fs.getPath("dir/deepera"));
408+
paths.add(fs.getPath("dir/deeperb"));
409+
paths.add(fs.getPath("dir/deeper_"));
410+
paths.add(fs.getPath("dir/deeper.sea/hasfish"));
411+
paths.add(fs.getPath("dir/deeper/fish"));
412+
for (Path path : paths) {
413+
Files.createFile(path);
414+
}
415+
416+
// ends with slash, must be a directory
417+
assertThat(Files.isDirectory(fs.getPath("dir/"))).isTrue();
418+
// files are not directories
419+
assertThat(Files.exists(fs.getPath("dir/angel"))).isTrue();
420+
assertThat(Files.isDirectory(fs.getPath("dir/angel"))).isFalse();
421+
// directories are recognized even without the trailing "/"
422+
assertThat(Files.isDirectory(fs.getPath("dir"))).isTrue();
423+
// also works for absolute paths
424+
assertThat(Files.isDirectory(fs.getPath("/dir"))).isTrue();
425+
// non-existent files are not directories (but they don't make us crash)
426+
assertThat(Files.isDirectory(fs.getPath("di"))).isFalse();
427+
assertThat(Files.isDirectory(fs.getPath("dirs"))).isFalse();
428+
assertThat(Files.isDirectory(fs.getPath("dir/deep"))).isFalse();
429+
assertThat(Files.isDirectory(fs.getPath("dir/deeper/fi"))).isFalse();
430+
assertThat(Files.isDirectory(fs.getPath("/dir/deeper/fi"))).isFalse();
431+
// also works for subdirectories
432+
assertThat(Files.isDirectory(fs.getPath("dir/deeper/"))).isTrue();
433+
assertThat(Files.isDirectory(fs.getPath("dir/deeper"))).isTrue();
434+
assertThat(Files.isDirectory(fs.getPath("/dir/deeper/"))).isTrue();
435+
assertThat(Files.isDirectory(fs.getPath("/dir/deeper"))).isTrue();
436+
// dot and .. folders are directories
437+
assertThat(Files.isDirectory(fs.getPath("dir/deeper/."))).isTrue();
438+
assertThat(Files.isDirectory(fs.getPath("dir/deeper/.."))).isTrue();
439+
// dots in the name are fine
440+
assertThat(Files.isDirectory(fs.getPath("dir/deeper.sea/"))).isTrue();
441+
assertThat(Files.isDirectory(fs.getPath("dir/deeper.sea"))).isTrue();
442+
assertThat(Files.isDirectory(fs.getPath("dir/deeper.seax"))).isFalse();
443+
// the root folder is a directory
444+
assertThat(Files.isDirectory(fs.getPath("/"))).isTrue();
445+
assertThat(Files.isDirectory(fs.getPath(""))).isTrue();
446+
}
447+
}
448+
400449
@Test
401450
public void testDelete() throws Exception {
402451
Files.write(Paths.get(URI.create("gs://love/fashion")), "(✿◕ ‿◕ )ノ".getBytes(UTF_8));

0 commit comments

Comments
 (0)