From 5239aee40add7c577d775a01477d771ed2c7f14c Mon Sep 17 00:00:00 2001 From: Mark Collins Date: Sun, 19 Apr 2020 21:48:45 -0600 Subject: [PATCH 1/2] Add a copy of tensorflow-gcs-config and modify tensorflow-whl/Dockerfile to build the tensorflow_gcs_config wheel --- tensorflow-whl/Dockerfile | 9 + .../tensorflow-gcs-config/.gitignore | 12 + .../tensorflow-gcs-config/Dockerfile | 62 +++++ .../tensorflow-gcs-config/MANIFEST.in | 2 + .../tensorflow-gcs-config/README.md | 10 + .../tensorflow-gcs-config/WORKSPACE | 44 ++++ tensorflow-whl/tensorflow-gcs-config/build.py | 98 ++++++++ tensorflow-whl/tensorflow-gcs-config/setup.py | 20 ++ .../tensorflow_gcs_config/BUILD | 22 ++ .../tensorflow_gcs_config/__init__.py | 132 +++++++++++ .../gcs_config_op_kernels.cc | 218 ++++++++++++++++++ .../tensorflow_gcs_config/gcs_config_ops.cc | 66 ++++++ .../tensorflow-gcs-config/third_party/BUILD | 0 .../third_party/jsoncpp.BUILD | 37 +++ .../third_party/tensorflow/BUILD | 0 .../third_party/tensorflow/BUILD.tpl | 18 ++ .../third_party/tensorflow/tf_configure.bzl | 210 +++++++++++++++++ 17 files changed, 960 insertions(+) create mode 100644 tensorflow-whl/tensorflow-gcs-config/.gitignore create mode 100644 tensorflow-whl/tensorflow-gcs-config/Dockerfile create mode 100644 tensorflow-whl/tensorflow-gcs-config/MANIFEST.in create mode 100644 tensorflow-whl/tensorflow-gcs-config/README.md create mode 100644 tensorflow-whl/tensorflow-gcs-config/WORKSPACE create mode 100644 tensorflow-whl/tensorflow-gcs-config/build.py create mode 100644 tensorflow-whl/tensorflow-gcs-config/setup.py create mode 100644 tensorflow-whl/tensorflow-gcs-config/tensorflow_gcs_config/BUILD create mode 100644 tensorflow-whl/tensorflow-gcs-config/tensorflow_gcs_config/__init__.py create mode 100644 tensorflow-whl/tensorflow-gcs-config/tensorflow_gcs_config/gcs_config_op_kernels.cc create mode 100644 tensorflow-whl/tensorflow-gcs-config/tensorflow_gcs_config/gcs_config_ops.cc create mode 100644 tensorflow-whl/tensorflow-gcs-config/third_party/BUILD create mode 100644 tensorflow-whl/tensorflow-gcs-config/third_party/jsoncpp.BUILD create mode 100644 tensorflow-whl/tensorflow-gcs-config/third_party/tensorflow/BUILD create mode 100644 tensorflow-whl/tensorflow-gcs-config/third_party/tensorflow/BUILD.tpl create mode 100644 tensorflow-whl/tensorflow-gcs-config/third_party/tensorflow/tf_configure.bzl diff --git a/tensorflow-whl/Dockerfile b/tensorflow-whl/Dockerfile index f0d60770..1cc73450 100644 --- a/tensorflow-whl/Dockerfile +++ b/tensorflow-whl/Dockerfile @@ -100,5 +100,14 @@ RUN cd /usr/local/src/tensorflow && \ bazel-bin/tensorflow/tools/pip_package/build_pip_package /tmp/tensorflow_gpu && \ bazel clean +ADD tensorflow-gcs-config /usr/local/src/tensorflow_gcs_config/ + +# Build tensorflow_gcs_config library against the tensorflow_cpu build +RUN cd /usr/local/src/tensorflow_gcs_config && \ + apt-get install -y libcurl4-openssl-dev && \ + pip install /tmp/tensorflow_cpu/tensorflow*.whl && \ + python setup.py bdist_wheel -d /tmp/tensorflow_gcs_config && \ + bazel clean + # Print out the built .whl files RUN ls -R /tmp/tensorflow* diff --git a/tensorflow-whl/tensorflow-gcs-config/.gitignore b/tensorflow-whl/tensorflow-gcs-config/.gitignore new file mode 100644 index 00000000..4e81a34b --- /dev/null +++ b/tensorflow-whl/tensorflow-gcs-config/.gitignore @@ -0,0 +1,12 @@ +bazel-bin +bazel-genfiles +bazel-out +bazel-tensorflow-gcs-config +bazel-out +bazel-testlogs +build +dist +__pycache__ +*.egg-info +*.so +.bazelrc diff --git a/tensorflow-whl/tensorflow-gcs-config/Dockerfile b/tensorflow-whl/tensorflow-gcs-config/Dockerfile new file mode 100644 index 00000000..b2ebce39 --- /dev/null +++ b/tensorflow-whl/tensorflow-gcs-config/Dockerfile @@ -0,0 +1,62 @@ +FROM tensorflow/tensorflow:custom-op + +ARG TF_VERSION +ARG UID +ARG GID +ARG USERNAME="build" +ARG CONDA_ADD_PACKAGES="" +ARG BAZEL_VERSION=0.24.1 +ARG BAZEL_OS=linux + +RUN apt-get update && \ + apt-get install -y \ + git \ + curl \ + nano \ + unzip \ + ffmpeg \ + dnsutils + +RUN groupadd -g ${GID} ${USERNAME} +RUN useradd -d /home/${USERNAME} -ms /bin/bash -g ${USERNAME} -G root -u $UID ${USERNAME} +USER ${USERNAME} + +WORKDIR /home/${USERNAME} + +RUN curl -sL https://github.com/bazelbuild/bazel/releases/download/${BAZEL_VERSION}/bazel-${BAZEL_VERSION}-installer-${BAZEL_OS}-x86_64.sh -o bazel-install.sh && \ + bash -x bazel-install.sh --user && \ + rm bazel-install.sh + +ARG CONDA_OS=Linux + +# Miniconda - Python 3.6, 64-bit, x86, latest +RUN curl -sL https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -o mconda-install.sh && \ + bash -x mconda-install.sh -b -p miniconda && \ + rm mconda-install.sh + +ENV PATH="/home/${USERNAME}/miniconda/bin:$PATH" + +RUN conda create -y -q -n tensorflow-gcs-config python=3.6 ${CONDA_ADD_PACKAGES} + +RUN echo ". /miniconda/etc/profile.d/conda.sh" >> ~/.bashrc && \ + echo "source activate tensorflow-gcs-config" >> ~/.bashrc + +ARG PIP_ADD_PACKAGES="" + +RUN /bin/bash -c "source activate tensorflow-gcs-config && python -m pip install -U \ + pytest \ + pylint \ + boto3 \ + twine \ + google-cloud-pubsub==0.39.1 \ + pandas \ + fastavro \ + 'tensorflow>=2' \ + ${PIP_ADD_PACKAGES} \ + " + +# This just forces a new fetch of the latest TF binary if the version changes. +RUN /bin/bash -c "echo ${TF_VERSION}" +RUN /bin/bash -c "source activate tensorflow-gcs-config && python -m pip install -U 'tensorflow>=2'" + +RUN bazel help > /dev/null diff --git a/tensorflow-whl/tensorflow-gcs-config/MANIFEST.in b/tensorflow-whl/tensorflow-gcs-config/MANIFEST.in new file mode 100644 index 00000000..5318e0fa --- /dev/null +++ b/tensorflow-whl/tensorflow-gcs-config/MANIFEST.in @@ -0,0 +1,2 @@ +include tensorflow_gcs_config/*.py +include tensorflow_gcs_config/*.so diff --git a/tensorflow-whl/tensorflow-gcs-config/README.md b/tensorflow-whl/tensorflow-gcs-config/README.md new file mode 100644 index 00000000..f19d77c9 --- /dev/null +++ b/tensorflow-whl/tensorflow-gcs-config/README.md @@ -0,0 +1,10 @@ +# Tensorflow GCS Configuration Ops + +This package provides TF 2.X compatible versions of the +`tf.contrib.cloud.configure_gcs()` operations. + +This is a slightly modified and repackaged version of the GCS code in TensorFlow I/O, +in particular the [tfio.gcs](https://www.tensorflow.org/io/api_docs/python/tfio/gcs) module. + +This is a copy of the internal source released as the +[tensorflow-gcs-config](https://pypi.org/project/tensorflow-gcs-config/) package. diff --git a/tensorflow-whl/tensorflow-gcs-config/WORKSPACE b/tensorflow-whl/tensorflow-gcs-config/WORKSPACE new file mode 100644 index 00000000..4ec14723 --- /dev/null +++ b/tensorflow-whl/tensorflow-gcs-config/WORKSPACE @@ -0,0 +1,44 @@ +workspace(name = "tensorflow_gcs_config") + +load("//third_party/tensorflow:tf_configure.bzl", "tf_configure") +load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive") + +tf_configure( + name = "local_config_tf", +) + +http_archive( + name = "com_github_grpc_grpc", + sha256 = "1bf082fb3016154d3f806da8eb5876caf05743da4b2e8130fadd000df74b5bb6", + strip_prefix = "grpc-1.21.1", + urls = [ + "https://mirror.bazel.build/github.com/grpc/grpc/archive/v1.21.1.tar.gz", + "https://github.com/grpc/grpc/archive/v1.21.1.tar.gz", + ], +) + +# 3.7.1 with a fix to BUILD file +http_archive( + name = "com_google_protobuf", + sha256 = "1c020fafc84acd235ec81c6aac22d73f23e85a700871466052ff231d69c1b17a", + strip_prefix = "protobuf-5902e759108d14ee8e6b0b07653dac2f4e70ac73", + urls = [ + "http://mirror.tensorflow.org/github.com/protocolbuffers/protobuf/archive/5902e759108d14ee8e6b0b07653dac2f4e70ac73.tar.gz", + "https://github.com/protocolbuffers/protobuf/archive/5902e759108d14ee8e6b0b07653dac2f4e70ac73.tar.gz", + ], +) + +load("@com_github_grpc_grpc//bazel:grpc_deps.bzl", "grpc_deps") + +grpc_deps() + +http_archive( + name = "jsoncpp_git", + build_file = "//third_party:jsoncpp.BUILD", + sha256 = "c49deac9e0933bcb7044f08516861a2d560988540b23de2ac1ad443b219afdb6", + strip_prefix = "jsoncpp-1.8.4", + urls = [ + "http://mirror.tensorflow.org/github.com/open-source-parsers/jsoncpp/archive/1.8.4.tar.gz", + "https://github.com/open-source-parsers/jsoncpp/archive/1.8.4.tar.gz", + ], +) diff --git a/tensorflow-whl/tensorflow-gcs-config/build.py b/tensorflow-whl/tensorflow-gcs-config/build.py new file mode 100644 index 00000000..efe40577 --- /dev/null +++ b/tensorflow-whl/tensorflow-gcs-config/build.py @@ -0,0 +1,98 @@ +# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================= + +from __future__ import print_function + +import os +import re +import sys +import tensorflow as tf + +def write_config(): + """Retrive compile and link information from tensorflow and write to .bazelrc.""" + + cflags = tf.sysconfig.get_compile_flags() + + inc_regex = re.compile("^-I") + opt_regex = re.compile("^-D") + + include_list = [] + opt_list = [] + + for arg in cflags: + if inc_regex.match(arg): + include_list.append(arg) + elif opt_regex.match(arg): + opt_list.append(arg) + else: + print("WARNING: Unexpected cflag item {}".format(arg)) + + + if len(include_list) != 1: + print("ERROR: Expected a single include directory in " + + "tf.sysconfig.get_compile_flags()") + exit(1) + + + library_regex = re.compile("^-l") + libdir_regex = re.compile("^-L") + + library_list = [] + libdir_list = [] + + lib = tf.sysconfig.get_link_flags() + + for arg in lib: + if library_regex.match(arg): + library_list.append(arg) + elif libdir_regex.match(arg): + libdir_list.append(arg) + else: + print("WARNING: Unexpected link flag item {}".format(arg)) + + if len(library_list) != 1 or len(libdir_list) != 1: + print("ERROR: Expected exactly one lib and one libdir in" + + "tf.sysconfig.get_link_flags()") + exit(1) + + try: + + with open(".bazelrc", "w") as bazel_rc: + for opt in opt_list: + bazel_rc.write('build --copt="{}"\n'.format(opt)) + + bazel_rc.write('build --action_env TF_HEADER_DIR="{}"\n' + .format(include_list[0][2:])) + + bazel_rc.write('build --action_env TF_SHARED_LIBRARY_DIR="{}"\n' + .format(libdir_list[0][2:])) + library_name = library_list[0][2:] + if library_name.startswith(":"): + library_name = library_name[1:] + else: + library_name = "lib" + library_name + ".so" + bazel_rc.write('build --action_env TF_SHARED_LIBRARY_NAME="{}"\n' + .format(library_name)) + bazel_rc.close() + except OSError: + print("ERROR: Writing .bazelrc") + exit(1) + + +def compile_bazel(): + write_config() + + if os.system('rm -f tensorflow_gcs_config/*.so && bazel build -c dbg //tensorflow_gcs_config:_gcs_config_ops.so && cp bazel-bin/tensorflow_gcs_config/_gcs_config_ops.so tensorflow_gcs_config/') != 0: + raise Exception('Failed to build C extension.') diff --git a/tensorflow-whl/tensorflow-gcs-config/setup.py b/tensorflow-whl/tensorflow-gcs-config/setup.py new file mode 100644 index 00000000..6a2d59cf --- /dev/null +++ b/tensorflow-whl/tensorflow-gcs-config/setup.py @@ -0,0 +1,20 @@ +from setuptools import setup + + +setup_kwargs = { +} + +from build import compile_bazel +compile_bazel() + +setup( + name='tensorflow-gcs-config', + version='2.1.7', + description='TensorFlow operations for configuring access to GCS (Google Compute Storage) resources.', + long_description='TensorFlow operations for configuring access to GCS (Google Compute Storage) resources.', + author='Google, Inc.', + author_email=None, + url=None, + packages = ['tensorflow_gcs_config'], + include_package_data=True, +) diff --git a/tensorflow-whl/tensorflow-gcs-config/tensorflow_gcs_config/BUILD b/tensorflow-whl/tensorflow-gcs-config/tensorflow_gcs_config/BUILD new file mode 100644 index 00000000..ca6f8efc --- /dev/null +++ b/tensorflow-whl/tensorflow-gcs-config/tensorflow_gcs_config/BUILD @@ -0,0 +1,22 @@ +licenses(["notice"]) # Apache 2.0 + +package(default_visibility = ["//visibility:public"]) + +cc_binary( + name = "_gcs_config_ops.so", + srcs = [ + "gcs_config_op_kernels.cc", + "gcs_config_ops.cc", + ], + copts = [ + "-pthread", + "-std=c++11", + "-DNDEBUG", + ], + linkshared = 1, + deps = [ + "@jsoncpp_git//:jsoncpp", + "@local_config_tf//:libtensorflow_framework", + "@local_config_tf//:tf_header_lib", + ], +) diff --git a/tensorflow-whl/tensorflow-gcs-config/tensorflow_gcs_config/__init__.py b/tensorflow-whl/tensorflow-gcs-config/tensorflow_gcs_config/__init__.py new file mode 100644 index 00000000..fe12cda4 --- /dev/null +++ b/tensorflow-whl/tensorflow-gcs-config/tensorflow_gcs_config/__init__.py @@ -0,0 +1,132 @@ +# Copyright 2016 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""GCS file system configuration for TensorFlow.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import inspect +import json +import os +import sys + +import tensorflow as tf +from tensorflow import errors +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops +from tensorflow.python.ops import array_ops +from tensorflow.python.training import training + +def _load_library(filename, lib="op"): + """_load_library""" + f = inspect.getfile(sys._getframe(1)) # pylint: disable=protected-access + + # Construct filename + f = os.path.join(os.path.dirname(f), filename) + filenames = [f] + + # Function to load the library, return True if file system library is loaded + load_fn = tf.load_op_library if lib == "op" \ + else lambda f: tf.compat.v1.load_file_system_library(f) is None + + # Try to load all paths for file, fail if none succeed + errs = [] + for f in filenames: + try: + l = load_fn(f) + if l is not None: + return l + except errors.NotFoundError as e: + errs.append(str(e)) + raise NotImplementedError( + "unable to open file: " + + "{}, from paths: {}\ncaused by: {}".format(filename, filenames, errs)) + +_gcs_config_so = _load_library("_gcs_config_ops.so") +gcs_configure_credentials = _gcs_config_so.gcs_configure_credentials +gcs_configure_block_cache = _gcs_config_so.gcs_configure_block_cache + +class BlockCacheParams(object): # pylint: disable=useless-object-inheritance + """BlockCacheParams is a struct used for configuring the GCS Block Cache.""" + + def __init__(self, block_size=None, max_bytes=None, max_staleness=None): + self._block_size = block_size or 128 * 1024 * 1024 + self._max_bytes = max_bytes or 2 * self._block_size + self._max_staleness = max_staleness or 0 + + @property + def block_size(self): + return self._block_size + + @property + def max_bytes(self): + return self._max_bytes + + @property + def max_staleness(self): + return self._max_staleness + +def configure_gcs(credentials=None, block_cache=None, device=None): + """Configures the GCS file system for a given a session. + + Warning: GCS `credentials` may be transmitted over the network unencrypted. + Please ensure that the network is trusted before using this function. For + users running code entirely within Google Cloud, your data is protected by + encryption in between data centers. For more information, please take a look + at https://cloud.google.com/security/encryption-in-transit/. + + Args: + credentials: [Optional.] A JSON string + block_cache: [Optional.] A BlockCacheParams to configure the block cache . + device: [Optional.] The device to place the configure ops. + """ + def configure(credentials, block_cache): + """Helper function to actually configure GCS.""" + if credentials: + if isinstance(credentials, dict): + credentials = json.dumps(credentials) + creds = gcs_configure_credentials(credentials) + else: + creds = tf.constant(0) + + if block_cache: + cache = gcs_configure_block_cache( + max_cache_size=block_cache.max_bytes, + block_size=block_cache.block_size, + max_staleness=block_cache.max_staleness) + else: + cache = tf.constant(0) + + return tf.tuple([creds, cache]) + + if device: + with ops.device(device): + return configure(credentials, block_cache) + return configure(credentials, block_cache) + +def configure_gcs_from_colab_auth(device='/job:worker/replica:0/task:0/device:CPU:0'): + """ConfigureColabSession configures the GCS file system in Colab. + + Args: + """ + # Read from the application default credentials (adc). + adc_filename = os.environ.get( + 'GOOGLE_APPLICATION_CREDENTIALS', '/content/adc.json') + with open(adc_filename) as f: + data = json.load(f) + return configure_gcs(credentials=data, device=device) + + diff --git a/tensorflow-whl/tensorflow-gcs-config/tensorflow_gcs_config/gcs_config_op_kernels.cc b/tensorflow-whl/tensorflow-gcs-config/tensorflow_gcs_config/gcs_config_op_kernels.cc new file mode 100644 index 00000000..3e2fc310 --- /dev/null +++ b/tensorflow-whl/tensorflow-gcs-config/tensorflow_gcs_config/gcs_config_op_kernels.cc @@ -0,0 +1,218 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include + +#include "include/json/json.h" +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/tensor_shape.h" +#include "tensorflow/core/platform/cloud/curl_http_request.h" +#include "tensorflow/core/platform/cloud/gcs_file_system.h" +#include "tensorflow/core/platform/cloud/oauth_client.h" +#include "tensorflow/core/util/ptr_util.h" + +namespace tensorflow { +namespace { + +// The default initial delay between retries with exponential backoff. +constexpr int kInitialRetryDelayUsec = 500000; // 0.5 sec + +// The minimum time delta between now and the token expiration time +// for the token to be re-used. +constexpr int kExpirationTimeMarginSec = 60; + +// The URL to retrieve the auth bearer token via OAuth with a refresh token. +constexpr char kOAuthV3Url[] = "https://www.googleapis.com/oauth2/v3/token"; + +// The URL to retrieve the auth bearer token via OAuth with a private key. +constexpr char kOAuthV4Url[] = "https://www.googleapis.com/oauth2/v4/token"; + +// The authentication token scope to request. +constexpr char kOAuthScope[] = "https://www.googleapis.com/auth/cloud-platform"; + +Status RetrieveGcsFs(OpKernelContext* ctx, RetryingGcsFileSystem** fs) { + DCHECK(fs != nullptr); + *fs = nullptr; + + FileSystem* filesystem = nullptr; + TF_RETURN_IF_ERROR( + ctx->env()->GetFileSystemForFile("gs://fake/file.text", &filesystem)); + if (filesystem == nullptr) { + return errors::FailedPrecondition("The GCS file system is not registered."); + } + + *fs = dynamic_cast(filesystem); + if (*fs == nullptr) { + return errors::Internal( + "The filesystem registered under the 'gs://' scheme was not a " + "tensorflow::RetryingGcsFileSystem*."); + } + return Status::OK(); +} + +template +Status ParseScalarArgument(OpKernelContext* ctx, StringPiece argument_name, + T* output) { + const Tensor* argument_t; + TF_RETURN_IF_ERROR(ctx->input(argument_name, &argument_t)); + if (!TensorShapeUtils::IsScalar(argument_t->shape())) { + return errors::InvalidArgument(argument_name, " must be a scalar"); + } + *output = argument_t->scalar()(); + return Status::OK(); +} + +template <> +Status ParseScalarArgument(OpKernelContext* ctx, StringPiece argument_name, + std::string* output) { + const Tensor* argument_t; + TF_RETURN_IF_ERROR(ctx->input(argument_name, &argument_t)); + if (!TensorShapeUtils::IsScalar(argument_t->shape())) { + return errors::InvalidArgument(argument_name, " must be a scalar"); + } + const ::tensorflow::tstring& tstr = argument_t->scalar<::tensorflow::tstring>()(); + output->assign(tstr.data(), tstr.size()); + return Status::OK(); +} + +// GcsCredentialsOpKernel overrides the credentials used by the gcs_filesystem. +class GcsCredentialsOpKernel : public OpKernel { + public: + explicit GcsCredentialsOpKernel(OpKernelConstruction* ctx) : OpKernel(ctx) {} + void Compute(OpKernelContext* ctx) override { + // Get a handle to the GCS file system. + RetryingGcsFileSystem* gcs = nullptr; + OP_REQUIRES_OK(ctx, RetrieveGcsFs(ctx, &gcs)); + + std::string json_string; + OP_REQUIRES_OK(ctx, ParseScalarArgument(ctx, "json", &json_string)); + + Json::Value json; + Json::Reader reader; + std::stringstream json_stream(json_string); + OP_REQUIRES(ctx, reader.parse(json_stream, json), + errors::InvalidArgument("Could not parse json: ", json_string)); + + OP_REQUIRES( + ctx, json.isMember("refresh_token") || json.isMember("private_key"), + errors::InvalidArgument("JSON format incompatible; did not find fields " + "`refresh_token` or `private_key`.")); + + auto provider = + tensorflow::MakeUnique(json, ctx->env()); + + // Test getting a token + string dummy_token; + OP_REQUIRES_OK(ctx, provider->GetToken(&dummy_token)); + OP_REQUIRES(ctx, !dummy_token.empty(), + errors::InvalidArgument( + "Could not retrieve a token with the given credentials.")); + + // Set the provider. + gcs->underlying()->SetAuthProvider(std::move(provider)); + } + + private: + class ConstantAuthProvider : public AuthProvider { + public: + ConstantAuthProvider(const Json::Value& json, + std::unique_ptr oauth_client, Env* env, + int64 initial_retry_delay_usec) + : json_(json), + oauth_client_(std::move(oauth_client)), + env_(env), + initial_retry_delay_usec_(initial_retry_delay_usec) {} + + ConstantAuthProvider(const Json::Value& json, Env* env) + : ConstantAuthProvider(json, tensorflow::MakeUnique(), env, + kInitialRetryDelayUsec) {} + + ~ConstantAuthProvider() override {} + + Status GetToken(string* token) override { + mutex_lock l(mu_); + const uint64 now_sec = env_->NowSeconds(); + + if (!current_token_.empty() && + now_sec + kExpirationTimeMarginSec < expiration_timestamp_sec_) { + *token = current_token_; + return Status::OK(); + } + if (json_.isMember("refresh_token")) { + TF_RETURN_IF_ERROR(oauth_client_->GetTokenFromRefreshTokenJson( + json_, kOAuthV3Url, ¤t_token_, &expiration_timestamp_sec_)); + } else if (json_.isMember("private_key")) { + TF_RETURN_IF_ERROR(oauth_client_->GetTokenFromServiceAccountJson( + json_, kOAuthV4Url, kOAuthScope, ¤t_token_, + &expiration_timestamp_sec_)); + } else { + return errors::FailedPrecondition( + "Unexpected content of the JSON credentials file."); + } + + *token = current_token_; + return Status::OK(); + } + + private: + Json::Value json_; + std::unique_ptr oauth_client_; + Env* env_; + + mutex mu_; + string current_token_ GUARDED_BY(mu_); + uint64 expiration_timestamp_sec_ GUARDED_BY(mu_) = 0; + + // The initial delay for exponential backoffs when retrying failed calls. + const int64 initial_retry_delay_usec_; + TF_DISALLOW_COPY_AND_ASSIGN(ConstantAuthProvider); + }; +}; + +REGISTER_KERNEL_BUILDER(Name("GcsConfigureCredentials").Device(DEVICE_CPU), + GcsCredentialsOpKernel); + +class GcsBlockCacheOpKernel : public OpKernel { + public: + explicit GcsBlockCacheOpKernel(OpKernelConstruction* ctx) : OpKernel(ctx) {} + void Compute(OpKernelContext* ctx) override { + // Get a handle to the GCS file system. + RetryingGcsFileSystem* gcs = nullptr; + OP_REQUIRES_OK(ctx, RetrieveGcsFs(ctx, &gcs)); + + size_t max_cache_size, block_size, max_staleness; + OP_REQUIRES_OK(ctx, ParseScalarArgument(ctx, "max_cache_size", + &max_cache_size)); + OP_REQUIRES_OK(ctx, + ParseScalarArgument(ctx, "block_size", &block_size)); + OP_REQUIRES_OK( + ctx, ParseScalarArgument(ctx, "max_staleness", &max_staleness)); + + if (gcs->underlying()->block_size() == block_size && + gcs->underlying()->max_bytes() == max_cache_size && + gcs->underlying()->max_staleness() == max_staleness) { + LOG(INFO) << "Skipping resetting the GCS block cache."; + return; + } + gcs->underlying()->ResetFileBlockCache(block_size, max_cache_size, + max_staleness); + } +}; + +REGISTER_KERNEL_BUILDER(Name("GcsConfigureBlockCache").Device(DEVICE_CPU), + GcsBlockCacheOpKernel); + +} // namespace +} // namespace tensorflow diff --git a/tensorflow-whl/tensorflow-gcs-config/tensorflow_gcs_config/gcs_config_ops.cc b/tensorflow-whl/tensorflow-gcs-config/tensorflow_gcs_config/gcs_config_ops.cc new file mode 100644 index 00000000..3c4a5570 --- /dev/null +++ b/tensorflow-whl/tensorflow-gcs-config/tensorflow_gcs_config/gcs_config_ops.cc @@ -0,0 +1,66 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/framework/common_shape_fns.h" +#include "tensorflow/core/framework/op.h" +#include "tensorflow/core/framework/shape_inference.h" + +namespace tensorflow { + +using shape_inference::InferenceContext; + +REGISTER_OP("GcsConfigureCredentials") + .Input("json: string") + .SetShapeFn(shape_inference::NoOutputs) + .Doc(R"doc( +Configures the credentials used by the GCS client of the local TF runtime. +The json input can be of the format: +1. Refresh Token: +{ + "client_id": "", + "client_secret": "", + "refresh_token: "", + "type": "authorized_user", +} +2. Service Account: +{ + "type": "service_account", + "project_id": "", + "private_key_id": "", + "private_key": "------BEGIN PRIVATE KEY-----\n\n-----END PRIVATE KEY------\n", + "client_email": "@.iam.gserviceaccount.com", + "client_id": "", + # Some additional fields elided +} +Note the credentials established through this method are shared across all +sessions run on this runtime. +Note be sure to feed the inputs to this op to ensure the credentials are not +stored in a constant op within the graph that might accidentally be checkpointed +or in other ways be persisted or exfiltrated. +)doc"); + +REGISTER_OP("GcsConfigureBlockCache") + .Input("max_cache_size: uint64") + .Input("block_size: uint64") + .Input("max_staleness: uint64") + .SetShapeFn(shape_inference::NoOutputs) + .Doc(R"doc( +Re-configures the GCS block cache with the new configuration values. +If the values are the same as already configured values, this op is a no-op. If +they are different, the current contents of the block cache is dropped, and a +new block cache is created fresh. +)doc"); + +} // namespace tensorflow diff --git a/tensorflow-whl/tensorflow-gcs-config/third_party/BUILD b/tensorflow-whl/tensorflow-gcs-config/third_party/BUILD new file mode 100644 index 00000000..e69de29b diff --git a/tensorflow-whl/tensorflow-gcs-config/third_party/jsoncpp.BUILD b/tensorflow-whl/tensorflow-gcs-config/third_party/jsoncpp.BUILD new file mode 100644 index 00000000..cf3cba05 --- /dev/null +++ b/tensorflow-whl/tensorflow-gcs-config/third_party/jsoncpp.BUILD @@ -0,0 +1,37 @@ +licenses(["unencumbered"]) # Public Domain or MIT + +exports_files(["LICENSE"]) + +cc_library( + name = "jsoncpp", + srcs = [ + "include/json/assertions.h", + "src/lib_json/json_reader.cpp", + "src/lib_json/json_tool.h", + "src/lib_json/json_value.cpp", + "src/lib_json/json_writer.cpp", + ], + hdrs = [ + "include/json/autolink.h", + "include/json/config.h", + "include/json/features.h", + "include/json/forwards.h", + "include/json/json.h", + "include/json/reader.h", + "include/json/value.h", + "include/json/version.h", + "include/json/writer.h", + ], + copts = [ + "-DJSON_USE_EXCEPTION=0", + "-DJSON_HAS_INT64", + ], + includes = ["include"], + visibility = ["//visibility:public"], + deps = [":private"], +) + +cc_library( + name = "private", + textual_hdrs = ["src/lib_json/json_valueiterator.inl"], +) diff --git a/tensorflow-whl/tensorflow-gcs-config/third_party/tensorflow/BUILD b/tensorflow-whl/tensorflow-gcs-config/third_party/tensorflow/BUILD new file mode 100644 index 00000000..e69de29b diff --git a/tensorflow-whl/tensorflow-gcs-config/third_party/tensorflow/BUILD.tpl b/tensorflow-whl/tensorflow-gcs-config/third_party/tensorflow/BUILD.tpl new file mode 100644 index 00000000..3c78b1f2 --- /dev/null +++ b/tensorflow-whl/tensorflow-gcs-config/third_party/tensorflow/BUILD.tpl @@ -0,0 +1,18 @@ +package(default_visibility = ["//visibility:public"]) + +cc_library( + name = "tf_header_lib", + hdrs = [":tf_header_include"], + includes = ["include"], + visibility = ["//visibility:public"], +) + +cc_library( + name = "libtensorflow_framework", + srcs = [":libtensorflow_framework.so"], + #data = ["lib/libtensorflow_framework.so"], + visibility = ["//visibility:public"], +) + +%{TF_HEADER_GENRULE} +%{TF_SHARED_LIBRARY_GENRULE} diff --git a/tensorflow-whl/tensorflow-gcs-config/third_party/tensorflow/tf_configure.bzl b/tensorflow-whl/tensorflow-gcs-config/third_party/tensorflow/tf_configure.bzl new file mode 100644 index 00000000..49d82684 --- /dev/null +++ b/tensorflow-whl/tensorflow-gcs-config/third_party/tensorflow/tf_configure.bzl @@ -0,0 +1,210 @@ +"""Setup TensorFlow as external dependency""" + +_TF_HEADER_DIR = "TF_HEADER_DIR" +_TF_SHARED_LIBRARY_DIR = "TF_SHARED_LIBRARY_DIR" +_TF_SHARED_LIBRARY_NAME = "TF_SHARED_LIBRARY_NAME" + +def _tpl(repository_ctx, tpl, substitutions = {}, out = None): + if not out: + out = tpl + repository_ctx.template( + out, + Label("//third_party/tensorflow:%s.tpl" % tpl), + substitutions, + ) + +def _fail(msg): + """Output failure message when auto configuration fails.""" + red = "\033[0;31m" + no_color = "\033[0m" + fail("%sPython Configuration Error:%s %s\n" % (red, no_color, msg)) + +def _is_windows(repository_ctx): + """Returns true if the host operating system is windows.""" + os_name = repository_ctx.os.name.lower() + if os_name.find("windows") != -1: + return True + return False + +def _execute( + repository_ctx, + cmdline, + error_msg = None, + error_details = None, + empty_stdout_fine = False): + """Executes an arbitrary shell command. + + Helper for executes an arbitrary shell command. + + Args: + repository_ctx: the repository_ctx object. + cmdline: list of strings, the command to execute. + error_msg: string, a summary of the error if the command fails. + error_details: string, details about the error or steps to fix it. + empty_stdout_fine: bool, if True, an empty stdout result is fine, otherwise + it's an error. + + Returns: + The result of repository_ctx.execute(cmdline). + """ + result = repository_ctx.execute(cmdline) + if result.stderr or not (empty_stdout_fine or result.stdout): + _fail("\n".join([ + error_msg.strip() if error_msg else "Repository command failed", + result.stderr.strip(), + error_details if error_details else "", + ])) + return result + +def _read_dir(repository_ctx, src_dir): + """Returns a string with all files in a directory. + + Finds all files inside a directory, traversing subfolders and following + symlinks. The returned string contains the full path of all files + separated by line breaks. + + Args: + repository_ctx: the repository_ctx object. + src_dir: directory to find files from. + + Returns: + A string of all files inside the given dir. + """ + if _is_windows(repository_ctx): + src_dir = src_dir.replace("/", "\\") + find_result = _execute( + repository_ctx, + ["cmd.exe", "/c", "dir", src_dir, "/b", "/s", "/a-d"], + empty_stdout_fine = True, + ) + + # src_files will be used in genrule.outs where the paths must + # use forward slashes. + result = find_result.stdout.replace("\\", "/") + else: + find_result = _execute( + repository_ctx, + ["find", src_dir, "-follow", "-type", "f"], + empty_stdout_fine = True, + ) + result = find_result.stdout + return result + +def _genrule(genrule_name, command, outs): + """Returns a string with a genrule. + + Genrule executes the given command and produces the given outputs. + + Args: + genrule_name: A unique name for genrule target. + command: The command to run. + outs: A list of files generated by this rule. + + Returns: + A genrule target. + """ + return ( + "genrule(\n" + + ' name = "' + + genrule_name + '",\n' + + " outs = [\n" + + outs + + "\n ],\n" + + ' cmd = """\n' + + command + + '\n """,\n' + + ")\n" + ) + +def _norm_path(path): + """Returns a path with '/' and remove the trailing slash.""" + path = path.replace("\\", "/") + if path[-1] == "/": + path = path[:-1] + return path + +def _symlink_genrule_for_dir( + repository_ctx, + src_dir, + dest_dir, + genrule_name, + src_files = [], + dest_files = []): + """Returns a genrule to symlink(or copy if on Windows) a set of files. + + If src_dir is passed, files will be read from the given directory; otherwise + we assume files are in src_files and dest_files. + + Args: + repository_ctx: the repository_ctx object. + src_dir: source directory. + dest_dir: directory to create symlink in. + genrule_name: genrule name. + src_files: list of source files instead of src_dir. + dest_files: list of corresonding destination files. + + Returns: + genrule target that creates the symlinks. + """ + if src_dir != None: + src_dir = _norm_path(src_dir) + dest_dir = _norm_path(dest_dir) + files = "\n".join(sorted(_read_dir(repository_ctx, src_dir).splitlines())) + + # Create a list with the src_dir stripped to use for outputs. + dest_files = files.replace(src_dir, "").splitlines() + src_files = files.splitlines() + command = [] + outs = [] + for i in range(len(dest_files)): + if dest_files[i] != "": + # If we have only one file to link we do not want to use the dest_dir, as + # $(@D) will include the full path to the file. + dest = "$(@D)/" + dest_dir + dest_files[i] if len(dest_files) != 1 else "$(@D)/" + dest_files[i] + + # Copy the headers to create a sandboxable setup. + cmd = "cp -f" + command.append(cmd + ' "%s" "%s"' % (src_files[i], dest)) + outs.append(' "' + dest_dir + dest_files[i] + '",') + genrule = _genrule( + genrule_name, + " && ".join(command), + "\n".join(outs), + ) + return genrule + +def _tf_pip_impl(repository_ctx): + tf_header_dir = repository_ctx.os.environ[_TF_HEADER_DIR] + tf_header_rule = _symlink_genrule_for_dir( + repository_ctx, + tf_header_dir, + "include", + "tf_header_include", + ) + + tf_shared_library_dir = repository_ctx.os.environ[_TF_SHARED_LIBRARY_DIR] + tf_shared_library_name = repository_ctx.os.environ[_TF_SHARED_LIBRARY_NAME] + tf_shared_library_path = "%s/%s" % (tf_shared_library_dir, tf_shared_library_name) + + tf_shared_library_rule = _symlink_genrule_for_dir( + repository_ctx, + None, + "", + "libtensorflow_framework.so", + [tf_shared_library_path], + ["libtensorflow_framework.so"], + ) + + _tpl(repository_ctx, "BUILD", { + "%{TF_HEADER_GENRULE}": tf_header_rule, + "%{TF_SHARED_LIBRARY_GENRULE}": tf_shared_library_rule, + }) + +tf_configure = repository_rule( + implementation = _tf_pip_impl, + environ = [ + _TF_HEADER_DIR, + _TF_SHARED_LIBRARY_DIR, + _TF_SHARED_LIBRARY_NAME, + ], +) From 8ee2c3bbefc449d4416ffdab63fd3d59ef6f91c7 Mon Sep 17 00:00:00 2001 From: Mark Collins Date: Wed, 22 Apr 2020 15:25:47 -0600 Subject: [PATCH 2/2] Update changelog --- tensorflow-whl/CHANGELOG.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tensorflow-whl/CHANGELOG.md b/tensorflow-whl/CHANGELOG.md index 2f87c08a..39a1d972 100644 --- a/tensorflow-whl/CHANGELOG.md +++ b/tensorflow-whl/CHANGELOG.md @@ -12,4 +12,5 @@ * `2.1.0-py36`: TensorFlow 2.1.0 with Python 3.6 * `2.1.0-py36-2`: TensorFlow 2.1.0 with CUDA 10.1 * `2.1.0-py37`: TensorFlow 2.1.0 with Python 3.7 -* `2.1.0-py37-2`: TensorFlow 2.1.0 with Python 3.7 & DLVM base image. \ No newline at end of file +* `2.1.0-py37-2`: TensorFlow 2.1.0 with Python 3.7 & DLVM base image. +* `2.1.0-py37-3`: TensorFlow 2.1.0 with Python 3.7, DLVM base image, tensorflow-gcs-config.