forked from tensorflow/tensorflow
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathgcs.py
More file actions
132 lines (108 loc) · 3.98 KB
/
gcs.py
File metadata and controls
132 lines (108 loc) · 3.98 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Functions for communicating with Google Cloud Storage."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import subprocess
from tensorflow.python.platform import tf_logging as logging
# All GCS paths should start with this.
PATH_PREFIX = 'gs://'
# TODO(phurst): We should use the GCS Python API.
def CopyContents(gcs_path, byte_offset, local_file):
"""Copies the contents of gcs_path from byte_offset onwards to local_file.
Args:
gcs_path: The path to the GCS object.
byte_offset: The byte offset to start appending from.
local_file: The file object to write into.
Raises:
ValueError: If offset is negative or gcs_path is not a valid GCS path.
CalledProcessError: If the gsutil command failed.
"""
if byte_offset < 0:
raise ValueError('byte_offset must not be negative')
command = ['gsutil', 'cat', '-r', '%d-' % byte_offset, gcs_path]
subprocess.check_call(command, stdout=local_file)
local_file.flush()
def ListDirectory(directory):
"""Lists all files in the given directory."""
command = ['gsutil', 'ls', directory]
return subprocess.check_output(command).splitlines()
def ListRecursively(top):
"""Walks a directory tree, yielding (dir_path, file_paths) tuples.
For each top |top| and its subdirectories, yields a tuple containing the path
to the directory and the path to each of the contained files. Note that
unlike os.Walk()/gfile.Walk(), this does not list subdirectories and the file
paths are all absolute.
Args:
top: A path to a GCS directory.
Returns:
A list of (dir_path, file_paths) tuples.
"""
if top.endswith('/'):
wildcard = top + '**'
else:
wildcard = top + '/**'
tuples = []
try:
file_paths = ListDirectory(wildcard)
except subprocess.CalledProcessError as e:
logging.info('%s, assuming it means no files were found', e)
return []
for file_path in file_paths:
dir_path = os.path.dirname(file_path)
if tuples and tuples[-1][0] == dir_path:
tuples[-1][1].append(file_path)
else:
tuples.append((dir_path, [file_path]))
return tuples
def IsDirectory(path):
"""Returns true if path exists and is a directory."""
path = path.rstrip('/')
try:
ls = ListDirectory(path)
except subprocess.CalledProcessError:
# Doesn't exist.
return False
if len(ls) == 1:
# Either it's a file (which ls-es as itself) or it's a dir with one file.
return ls[0] != path
else:
return True
def Exists(path):
"""Returns true if path exists."""
try:
ListDirectory(path)
return True
except subprocess.CalledProcessError:
return False
def IsGCSPath(path):
return path.startswith(PATH_PREFIX)
def CheckIsSupported():
"""Raises an OSError if the system isn't set up for Google Cloud Storage.
Raises:
OSError: If the system hasn't been set up so that TensorBoard can access
Google Cloud Storage. The error's message contains installation
instructions.
"""
try:
subprocess.check_output(['gsutil', 'version'])
except OSError as e:
logging.error('Error while checking for gsutil: %s', e)
raise OSError(
'Unable to execute the gsutil binary, which is required for Google '
'Cloud Storage support. You can find installation instructions at '
'https://goo.gl/sST520')