Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
6 changes: 3 additions & 3 deletions tensorflow/compiler/tests/BUILD
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
load("@xla//third_party/rules_python/python:py_library.bzl", "py_library")
load("//tensorflow:strict.default.bzl", "py_strict_library", "py_strict_test")
load("//tensorflow:tensorflow.bzl", "py_test")
load("//tensorflow:tensorflow.default.bzl", "cuda_py_strict_test", "tf_cuda_cc_test")
load("//tensorflow/compiler/aot:tfcompile.bzl", "tf_library")
load("//tensorflow/compiler/tests:build_combined_defs.bzl", "tf_xla_combined_py_test")
Expand Down Expand Up @@ -44,7 +44,7 @@ package_group(

generate_backend_suites()

py_strict_library(
py_library(
name = "xla_test",
testonly = 1,
srcs = ["xla_test.py"],
Expand Down Expand Up @@ -82,7 +82,7 @@ py_library(
],
)

py_strict_test(
py_test(
name = "xla_test_test",
size = "small",
srcs = ["xla_test_test.py"],
Expand Down
101 changes: 41 additions & 60 deletions tensorflow/core/distributed_runtime/eager/eager_service_impl.cc
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,9 @@ limitations under the License.
#include "tensorflow/core/distributed_runtime/worker_cache.h"
#include "tensorflow/core/distributed_runtime/worker_env.h"
#include "tensorflow/core/framework/function.h"
#include "tensorflow/core/framework/node_def_util.h"
#include "tensorflow/core/framework/rendezvous.h"
#include "tensorflow/core/lib/gtl/cleanup.h"
#include "tensorflow/core/nccl/collective_communicator.h"
#include "tensorflow/core/platform/errors.h"
#include "tensorflow/core/platform/host_info.h"
Expand All @@ -66,62 +68,15 @@ absl::Status GetNumRetvals(
FunctionLibraryDefinition* func_lib_def, const std::string& op_name,
const google::protobuf::Map<std::string, tensorflow::AttrValue>& attrs,
int* num_retvals) {
const tensorflow::OpRegistrationData* op_reg_data = nullptr;
auto status = tensorflow::OpRegistry::Global()->LookUp(op_name, &op_reg_data);
if (absl::IsNotFound(status)) {
status = func_lib_def->LookUp(op_name, &op_reg_data);
}
TF_RETURN_IF_ERROR(status);
const OpDef* op_def = nullptr;
TF_RETURN_IF_ERROR(func_lib_def->LookUpOpDef(op_name, &op_def));

const tensorflow::OpDef& op_def = op_reg_data->op_def;
NodeDef ndef;
ndef.set_op(op_name);
*ndef.mutable_attr() = attrs;
AddDefaultsToNodeDef(*op_def, &ndef);

for (const auto& output_arg : op_def.output_arg()) {
if (!output_arg.number_attr().empty()) {
auto iter = attrs.find(output_arg.number_attr());
if (iter == attrs.end()) {
return absl::InvalidArgumentError(
absl::StrCat("Unable to find number_attr ",
output_arg.number_attr(), " for Op: ", op_name));
}
int64_t repeats = iter->second.i();
if (repeats < 0) {
return absl::InvalidArgumentError(
absl::StrCat("Expected >= 0 number_attr for Op: ", op_name,
", but got ", repeats));
}
if (repeats > std::numeric_limits<int>::max() - *num_retvals) {
return absl::InvalidArgumentError(
absl::StrCat("Too many return values for Op: ", op_name));
}
*num_retvals += repeats;
} else if (!output_arg.type_list_attr().empty()) {
auto iter = attrs.find(output_arg.type_list_attr());
if (iter == attrs.end()) {
return absl::InvalidArgumentError(
absl::StrCat("Unable to find type_list_attr ",
output_arg.type_list_attr(), " for Op: ", op_name));
}
int64_t repeats = iter->second.list().type_size();
if (repeats < 0) {
return absl::InvalidArgumentError(
absl::StrCat("Expected >= 0 type_list_attr size for Op: ", op_name,
", but got ", repeats));
}
if (repeats > std::numeric_limits<int>::max() - *num_retvals) {
return absl::InvalidArgumentError(
absl::StrCat("Too many return values for Op: ", op_name));
}
*num_retvals += repeats;
} else {
if (*num_retvals >= std::numeric_limits<int>::max()) {
return absl::InvalidArgumentError(
absl::StrCat("Too many return values for Op: ", op_name));
}
*num_retvals += 1;
}
}

return absl::OkStatus();
return NumOutputsForNode(ndef, *op_def, num_retvals);
}

absl::Status GetEagerOperationAndNumRetvals(const Operation& operation,
Expand Down Expand Up @@ -904,6 +859,13 @@ absl::Status EagerServiceImpl::SendPackedHandle(

std::vector<tensorflow::TensorHandle*> handles;
handles.resize(send_packed_handle.handles_size());
// Cleanup handles in case of early exit due to errors.
auto cleanup = tensorflow::gtl::MakeCleanup([&handles] {
for (auto* h : handles) {
if (h) h->Unref();
}
});

for (int i = 0; i < send_packed_handle.handles_size(); ++i) {
const auto& item = send_packed_handle.handles(i);
if (item.has_local_handle()) {
Expand All @@ -914,24 +876,43 @@ absl::Status EagerServiceImpl::SendPackedHandle(
item.local_handle().tensor().DebugString()));
}
Device* op_device = nullptr;
TF_RETURN_IF_ERROR(eager_context->FindDeviceFromName(
item.local_handle().device().c_str(), &op_device));
absl::Status status = eager_context->FindDeviceFromName(
item.local_handle().device().c_str(), &op_device);
if (!status.ok()) {
return status;
}
handles[i] = TensorHandle::CreateLocalHandle(
std::move(tensor), /*d=*/nullptr, op_device, eager_context);
} else {
TF_RETURN_IF_ERROR(
absl::Status status =
eager_context->RemoteMgr()->DeserializeRemoteTensorHandle(
item.remote_handle(), &handles[i]));
item.remote_handle(), &handles[i]);
if (!status.ok()) {
return status;
}
}
}

tensorflow::DataType dtype = handles.at(0)->dtype;
for (int i = 1; i < handles.size(); ++i) {
if (handles.at(i)->dtype != dtype) {
return absl::InvalidArgumentError("Handles do not have the same dtype.");
}
}

tensorflow::TensorHandle* packed_handle = nullptr;
std::vector<tensorflow::TensorHandle*> handles_to_pack = handles;
// Create a unshaped packed TensorHandle.
TF_RETURN_IF_ERROR(TensorHandle::CreatePackedHandle(
absl::Status s = TensorHandle::CreatePackedHandle(
std::move(handles_to_pack), handles.at(0)->dtype, TensorShape(),
send_packed_handle.device_name(), eager_context, &packed_handle));
send_packed_handle.device_name(), eager_context, &packed_handle);
if (!s.ok()) {
return s;
}

// Cancel the cleanup for the individual handles, as they are now refcounted
// by `packed_handle`.
cleanup.release();
for (auto* h : handles) {
// Unref handle since it has a ref in the packed handle now.
h->Unref();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1404,6 +1404,7 @@ TEST_F(EagerServiceImplTest, SendPackedHandleTest) {
remote_handle->set_output_num(5);
remote_handle->set_op_device(device2);
remote_handle->set_device(device2);
remote_handle->set_dtype(tensorflow::DataType::DT_FLOAT);

TF_ASSERT_OK(eager_service_impl.Enqueue(nullptr, &remote_enqueue_request,
&remote_enqueue_response));
Expand Down
50 changes: 37 additions & 13 deletions tensorflow/core/kernels/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -549,6 +549,12 @@ cc_library(
cc_library(
name = "pooling_ops_gpu_hdrs",
hdrs = ["maxpooling_op_gpu.h"],
deps = [
"//tensorflow/core:portable_gif_internal",
"//tensorflow/core/framework:tensor_types",
"//tensorflow/core/util:tensor_format",
"@com_google_absl//absl/status",
],
)

# We keep this target only because some contrib/ targets depend on it. The
Expand Down Expand Up @@ -4350,6 +4356,8 @@ tf_kernel_library(
":redux_functor",
"//tensorflow/core/profiler/lib:scoped_annotation",
"//tensorflow/core/util:determinism_for_kernels",
"//tensorflow/core/util:overflow",
"@com_google_absl//absl/status",
] + if_cuda_or_rocm([
":reduction_ops",
"@xla//xla/stream_executor:event_based_timer",
Expand Down Expand Up @@ -4642,8 +4650,13 @@ tf_kernel_library(
"//tensorflow/core:lib_internal",
"//tensorflow/core/framework:bounds_check",
"//tensorflow/core/platform:stream_executor",
"//tensorflow/core/util:overflow",
"@com_google_absl//absl/log:check",
"@com_google_absl//absl/status",
"@com_google_absl//absl/strings",
"@eigen_archive//:eigen3",
"@local_config_cuda//cuda:cudnn_header",
"@xla//xla/tsl/framework/fixedpoint",
],
)

Expand Down Expand Up @@ -4675,7 +4688,19 @@ cc_library(
deps = [
":eigen_helpers",
":ops_util_hdrs",
":pooling_ops_gpu_hdrs",
"//tensorflow/core:framework",
"//tensorflow/core:framework_lite",
"//tensorflow/core:portable_gif_internal",
"//tensorflow/core/framework:bounds_check",
"//tensorflow/core/framework:numeric_types",
"//tensorflow/core/framework:tensor_shape",
"//tensorflow/core/framework:tensor_types",
"//tensorflow/core/util:padding",
"//tensorflow/core/util:tensor_format",
"@com_google_absl//absl/status",
"@eigen_archive//:eigen3",
"@xla//xla/tsl/framework/fixedpoint",
],
)

Expand Down Expand Up @@ -4753,6 +4778,8 @@ tf_kernel_library(
deps = [
"//tensorflow/core:framework",
"//tensorflow/core:lib",
"//tensorflow/core/util:overflow",
"@com_google_absl//absl/status",
"@eigen_archive//:eigen3",
],
)
Expand Down Expand Up @@ -6661,7 +6688,6 @@ filegroup(
"transpose_op.h",
"where_op.h",
"xent_op.h",
] + [
"//tensorflow/core/kernels/data:portable_all_op_kernels_headers",
"//tensorflow/core/kernels/image:adjust_contrast_op.h",
"//tensorflow/core/kernels/image:adjust_hue_op.h",
Expand Down Expand Up @@ -6810,9 +6836,6 @@ filegroup(
"population_count_op.h",
"winograd_transform.h",
":portable_extended_ops_headers",
"@xla//xla/tsl/framework/contraction:eigen_contraction_kernel.cc",
"@xla//xla/tsl/framework/contraction:eigen_contraction_kernel.h",
] + [
"//tensorflow/core/kernels/image:colorspace_op.cc",
"//tensorflow/core/kernels/image:crop_and_resize_op.cc",
"//tensorflow/core/kernels/image:crop_and_resize_op.h",
Expand All @@ -6830,6 +6853,8 @@ filegroup(
"//tensorflow/core/kernels/linalg:einsum_op_impl_int32.cc",
"//tensorflow/core/kernels/linalg:einsum_op_impl_int64.cc",
"//tensorflow/core/kernels/uniform_quant_ops:portable_all_op_kernels",
"@xla//xla/tsl/framework/contraction:eigen_contraction_kernel.cc",
"@xla//xla/tsl/framework/contraction:eigen_contraction_kernel.h",
],
)

Expand Down Expand Up @@ -7201,6 +7226,7 @@ cc_library(
linkopts = if_android(["-ldl"]),
tags = [
"manual",
"nofixdeps",
"notap",
],
# These headers are not self-contained, so should be included in textual_hdrs only.
Expand Down Expand Up @@ -8023,8 +8049,8 @@ cc_library(
)

# For a more maintainable build this target should not exist and the headers
# should be split into the existing cc_library targets, but this change was
# automatically done so that we can remove long standing issues and complexity
# should be split into the existing cc_library targets, but this change was
# automatically done so that we can remove long standing issues and complexity
# in the build system. It's up to the OWNERS of this package to get rid of it or
# not. The use of the textual_hdrs attribute is discouraged, use hdrs instead.
# Here it is used to avoid header parsing errors in packages where the feature
Expand All @@ -8035,16 +8061,14 @@ cc_library(
tags = ["avoid_dep"],
textual_hdrs = glob(["*.h"]),
visibility = [
"//visibility:public",
],
deps = [
"//tensorflow/core/framework:graph_proto_cc",
"//tensorflow/core/framework:node_def_proto_cc",
"//tensorflow/core/framework:types_proto_cc",
"@com_google_absl//absl/synchronization",
"//smartass/brain:__subpackages__",
"//tensorflow:__subpackages__",
],
)

# Deleted deps
# [

tf_kernel_library(
name = "stochastic_cast_op",
features = ["-layering_check"],
Expand Down
32 changes: 17 additions & 15 deletions tensorflow/core/kernels/avgpooling_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -623,21 +623,23 @@ class AvgPoolingGradOpCustomGPUKernel : public OpKernel {
in_cols, window_cols, /*dilation_rate=*/1,
col_stride, padding_, &out_width, &pad_cols));

RunAvePoolBackwardNHWC<T>(out_backprop.flat<T>().data(), // top_diff
out_backprop_batch, // num
in_rows, // height
in_cols, // width
out_backprop_depth, // channels
out_backprop_rows, // pooled_height
out_backprop_cols, // pooled_width
window_rows, // kernel_h
window_cols, // kernel_w
row_stride, // stride_h
col_stride, // stride_w
pad_rows, // pad_t
pad_cols, // pad_l
output->flat<T>().data(), // bottom_diff
context->eigen_gpu_device()); // d
OP_REQUIRES_OK(
context,
RunAvePoolBackwardNHWC<T>(out_backprop.flat<T>().data(), // top_diff
out_backprop_batch, // num
in_rows, // height
in_cols, // width
out_backprop_depth, // channels
out_backprop_rows, // pooled_height
out_backprop_cols, // pooled_width
window_rows, // kernel_h
window_cols, // kernel_w
row_stride, // stride_h
col_stride, // stride_w
pad_rows, // pad_t
pad_cols, // pad_l
output->flat<T>().data(), // bottom_diff
context->eigen_gpu_device())); // d
} else {
DnnPoolingGradOp<T>::Compute(context, se::dnn::PoolingMode::kAverage,
ksize_, stride_, padding_,
Expand Down
15 changes: 7 additions & 8 deletions tensorflow/core/kernels/avgpooling_op.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ limitations under the License.
#define TENSORFLOW_CORE_KERNELS_AVGPOOLING_OP_H_
// Functor definition for AvgPoolingOp, must be compilable by nvcc.

#include "absl/status/status.h"
#include "tensorflow/core/framework/tensor_types.h"
#include "tensorflow/core/kernels/eigen_pooling.h"
#include "tensorflow/core/platform/types.h"
Expand Down Expand Up @@ -62,14 +63,12 @@ typedef Eigen::GpuDevice GPUDevice;
// pad_l: padding size to the left side
// bottom_diff: backprop to the input of the pooling layer.
template <typename T>
bool RunAvePoolBackwardNHWC(const T* const top_diff, const int num,
const int height, const int width,
const int channels, const int pooled_height,
const int pooled_width, const int kernel_h,
const int kernel_w, const int stride_h,
const int stride_w, const int pad_t,
const int pad_l, T* const bottom_diff,
const GPUDevice& d);
absl::Status RunAvePoolBackwardNHWC(const T* top_diff, int num, int height,
int width, int channels, int pooled_height,
int pooled_width, int kernel_h,
int kernel_w, int stride_h, int stride_w,
int pad_t, int pad_l, T* bottom_diff,
const GPUDevice& d);

} // namespace tensorflow

Expand Down
Loading
Loading