diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 8f862e329..02601764c 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -53,7 +53,7 @@ jobs: } - name: Upload packages artifacts - uses: actions/upload-artifact@v1.0.0 + uses: actions/upload-artifact@v4.0.0 with: name: "drop-ci-packages" path: './packages' diff --git a/README.md b/README.md index 36ec1660c..75cad0aa7 100644 --- a/README.md +++ b/README.md @@ -15,19 +15,13 @@ English | [中文](docs/README-CN.md) -**=========================================================** - -### [Voting: Naming Convention Approach of v1.0.0](https://github.com/SciSharp/TensorFlow.NET/issues/1074) - -Dear all, - -We would like to urge you to participate in our upcoming vote regarding the naming convention for TensorFlow.NET version 1.0.0 in [#1074](https://github.com/SciSharp/TensorFlow.NET/issues/1074). Your participation in the vote is essential to help us decide on the best approach for improving the naming convention used in previous versions. - -Thank you, - -TensorFlow.NET Authors - -**=========================================================** +> [!IMPORTANT] +> We're happy that our work on tensorflow.net has attracted many users. However, at this time, none of the main maintainers of this repo is available for new features and bug fix. We won't refuse PRs and will help to review them. +> +> If you would like to be a contributor or maintainer of tensorflow.net, we'd like to help you to start up. +> +> We feel sorry for that and we'll resume the maintaining for this project once one of us has bandwidth for it. +> *master branch and v0.100.x is corresponding to tensorflow v2.10, v0.6x branch is from tensorflow v2.6, v0.15-tensorflow1.15 is from tensorflow1.15. Please add `https://www.myget.org/F/scisharp/api/v3/index.json` to nuget source to use nightly release.* @@ -75,9 +69,12 @@ PM> Install-Package TensorFlow.Keras The second part is the computing support part. Only one of the following packages is needed, depending on your device and system. ``` -### CPU version for Windows, Linux and Mac +### CPU version for Windows and Linux PM> Install-Package SciSharp.TensorFlow.Redist +### CPU version for MacOS +PM> Install-Package SciSharp.TensorFlow.Redist-OSX + ### GPU version for Windows (CUDA and cuDNN are required) PM> Install-Package SciSharp.TensorFlow.Redist-Windows-GPU diff --git a/TensorFlow.NET.sln b/TensorFlow.NET.sln index 87729e27d..e0c273568 100644 --- a/TensorFlow.NET.sln +++ b/TensorFlow.NET.sln @@ -39,6 +39,10 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Tensorflow.Benchmark", "too EndProject Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Tensorflow.Console", "tools\TensorFlowNET.Console\Tensorflow.Console.csproj", "{1DC32255-BA1F-4D6D-A9C9-5BD5ED71CAA0}" EndProject +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "TensorFlow.Kernel.UnitTest", "test\TensorFlow.Kernel.UnitTest\TensorFlow.Kernel.UnitTest.csproj", "{654A027D-1364-4729-880B-144DFE1FF5BB}" +EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Tensorflow.UnitTest", "test\Tensorflow.UnitTest\Tensorflow.UnitTest.csproj", "{A73DF5A6-866E-4AED-9017-AA2EE86368C4}" +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|Any CPU = Debug|Any CPU @@ -322,6 +326,42 @@ Global {1DC32255-BA1F-4D6D-A9C9-5BD5ED71CAA0}.Release|x64.Build.0 = Release|x64 {1DC32255-BA1F-4D6D-A9C9-5BD5ED71CAA0}.Release|x86.ActiveCfg = Release|Any CPU {1DC32255-BA1F-4D6D-A9C9-5BD5ED71CAA0}.Release|x86.Build.0 = Release|Any CPU + {654A027D-1364-4729-880B-144DFE1FF5BB}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {654A027D-1364-4729-880B-144DFE1FF5BB}.Debug|Any CPU.Build.0 = Debug|Any CPU + {654A027D-1364-4729-880B-144DFE1FF5BB}.Debug|x64.ActiveCfg = Debug|Any CPU + {654A027D-1364-4729-880B-144DFE1FF5BB}.Debug|x64.Build.0 = Debug|Any CPU + {654A027D-1364-4729-880B-144DFE1FF5BB}.Debug|x86.ActiveCfg = Debug|Any CPU + {654A027D-1364-4729-880B-144DFE1FF5BB}.Debug|x86.Build.0 = Debug|Any CPU + {654A027D-1364-4729-880B-144DFE1FF5BB}.GPU|Any CPU.ActiveCfg = Debug|Any CPU + {654A027D-1364-4729-880B-144DFE1FF5BB}.GPU|Any CPU.Build.0 = Debug|Any CPU + {654A027D-1364-4729-880B-144DFE1FF5BB}.GPU|x64.ActiveCfg = Debug|Any CPU + {654A027D-1364-4729-880B-144DFE1FF5BB}.GPU|x64.Build.0 = Debug|Any CPU + {654A027D-1364-4729-880B-144DFE1FF5BB}.GPU|x86.ActiveCfg = Debug|Any CPU + {654A027D-1364-4729-880B-144DFE1FF5BB}.GPU|x86.Build.0 = Debug|Any CPU + {654A027D-1364-4729-880B-144DFE1FF5BB}.Release|Any CPU.ActiveCfg = Release|Any CPU + {654A027D-1364-4729-880B-144DFE1FF5BB}.Release|Any CPU.Build.0 = Release|Any CPU + {654A027D-1364-4729-880B-144DFE1FF5BB}.Release|x64.ActiveCfg = Release|Any CPU + {654A027D-1364-4729-880B-144DFE1FF5BB}.Release|x64.Build.0 = Release|Any CPU + {654A027D-1364-4729-880B-144DFE1FF5BB}.Release|x86.ActiveCfg = Release|Any CPU + {654A027D-1364-4729-880B-144DFE1FF5BB}.Release|x86.Build.0 = Release|Any CPU + {A73DF5A6-866E-4AED-9017-AA2EE86368C4}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {A73DF5A6-866E-4AED-9017-AA2EE86368C4}.Debug|Any CPU.Build.0 = Debug|Any CPU + {A73DF5A6-866E-4AED-9017-AA2EE86368C4}.Debug|x64.ActiveCfg = Debug|Any CPU + {A73DF5A6-866E-4AED-9017-AA2EE86368C4}.Debug|x64.Build.0 = Debug|Any CPU + {A73DF5A6-866E-4AED-9017-AA2EE86368C4}.Debug|x86.ActiveCfg = Debug|Any CPU + {A73DF5A6-866E-4AED-9017-AA2EE86368C4}.Debug|x86.Build.0 = Debug|Any CPU + {A73DF5A6-866E-4AED-9017-AA2EE86368C4}.GPU|Any CPU.ActiveCfg = Debug|Any CPU + {A73DF5A6-866E-4AED-9017-AA2EE86368C4}.GPU|Any CPU.Build.0 = Debug|Any CPU + {A73DF5A6-866E-4AED-9017-AA2EE86368C4}.GPU|x64.ActiveCfg = Debug|Any CPU + {A73DF5A6-866E-4AED-9017-AA2EE86368C4}.GPU|x64.Build.0 = Debug|Any CPU + {A73DF5A6-866E-4AED-9017-AA2EE86368C4}.GPU|x86.ActiveCfg = Debug|Any CPU + {A73DF5A6-866E-4AED-9017-AA2EE86368C4}.GPU|x86.Build.0 = Debug|Any CPU + {A73DF5A6-866E-4AED-9017-AA2EE86368C4}.Release|Any CPU.ActiveCfg = Release|Any CPU + {A73DF5A6-866E-4AED-9017-AA2EE86368C4}.Release|Any CPU.Build.0 = Release|Any CPU + {A73DF5A6-866E-4AED-9017-AA2EE86368C4}.Release|x64.ActiveCfg = Release|Any CPU + {A73DF5A6-866E-4AED-9017-AA2EE86368C4}.Release|x64.Build.0 = Release|Any CPU + {A73DF5A6-866E-4AED-9017-AA2EE86368C4}.Release|x86.ActiveCfg = Release|Any CPU + {A73DF5A6-866E-4AED-9017-AA2EE86368C4}.Release|x86.Build.0 = Release|Any CPU EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE @@ -342,6 +382,8 @@ Global {D24FCAA5-548C-4251-B226-A1B6535D0845} = {E1A5D2B7-10AF-4876-85C0-7714EF274214} {C23563DB-FE21-48E7-A411-87A109E4A899} = {E1A5D2B7-10AF-4876-85C0-7714EF274214} {1DC32255-BA1F-4D6D-A9C9-5BD5ED71CAA0} = {E1A5D2B7-10AF-4876-85C0-7714EF274214} + {654A027D-1364-4729-880B-144DFE1FF5BB} = {1B0918B9-65AD-4F34-A287-AF4597B27DBD} + {A73DF5A6-866E-4AED-9017-AA2EE86368C4} = {1B0918B9-65AD-4F34-A287-AF4597B27DBD} EndGlobalSection GlobalSection(ExtensibilityGlobals) = postSolution SolutionGuid = {2DEAD3CC-486B-4918-A607-50B0DE7B114A} diff --git a/data/img001.bmp b/data/img001.bmp new file mode 100644 index 000000000..d149d76f1 Binary files /dev/null and b/data/img001.bmp differ diff --git a/src/TensorFlowNET.Core/APIs/c_api.customize.cs b/src/TensorFlowNET.Core/APIs/c_api.customize.cs index 510e52eb7..bee4897ee 100644 --- a/src/TensorFlowNET.Core/APIs/c_api.customize.cs +++ b/src/TensorFlowNET.Core/APIs/c_api.customize.cs @@ -8,10 +8,10 @@ namespace Tensorflow public partial class c_api { [DllImport(TensorFlowLibName)] - public static extern void TFC_SetAttr(SafeGraphHandle graph, IntPtr op, string attr_name, SafeBufferHandle attr_value_proto, SafeStatusHandle status); + public static extern void TF_SetAttr(SafeGraphHandle graph, IntPtr op, string attr_name, SafeBufferHandle attr_value_proto, SafeStatusHandle status); [DllImport(TensorFlowLibName)] - public static extern SafeBufferHandle TFC_GetHandleShapeAndType(SafeGraphHandle c_graph, TF_Output output); + public static extern SafeBufferHandle TF_GetHandleShapeAndType(SafeGraphHandle c_graph, TF_Output output); [DllImport(TensorFlowLibName)] - public static extern void TFC_SetHandleShapeAndType(SafeGraphHandle c_graph, TF_Output output, byte[] data, long proto_len, SafeStatusHandle status); + public static extern void TF_SetHandleShapeAndType(SafeGraphHandle c_graph, TF_Output output, byte[] data, long proto_len, SafeStatusHandle status); } } diff --git a/src/TensorFlowNET.Core/APIs/tf.array.cs b/src/TensorFlowNET.Core/APIs/tf.array.cs index 4d9c3da58..b529cd319 100644 --- a/src/TensorFlowNET.Core/APIs/tf.array.cs +++ b/src/TensorFlowNET.Core/APIs/tf.array.cs @@ -140,6 +140,16 @@ public Tensor identity(Tensor input, string name = null) public Tensor gather(Tensor @params, Tensor indices, string name = null, int axis = 0) => array_ops.gather(@params, indices, name: name, axis: ops.convert_to_tensor(axis)); + /// + /// Gather slices from `params` into a Tensor with shape specified by `indices`. + /// + /// + /// + /// + /// + public Tensor gather_nd(Tensor @params, Tensor indices, string name = null) + => gen_array_ops.gather_nd(@params, indices, name: name); + /// /// Return the elements, either from `x` or `y`, depending on the `condition`. /// diff --git a/src/TensorFlowNET.Core/APIs/tf.image.cs b/src/TensorFlowNET.Core/APIs/tf.image.cs index ac9cbc60d..41ef52967 100644 --- a/src/TensorFlowNET.Core/APIs/tf.image.cs +++ b/src/TensorFlowNET.Core/APIs/tf.image.cs @@ -339,6 +339,13 @@ public Tensor decode_image(Tensor contents, int channels = 0, TF_DataType dtype => image_ops_impl.decode_image(contents, channels: channels, dtype: dtype, name: name, expand_animations: expand_animations); + public Tensor encode_png(Tensor contents, string name = null) + => image_ops_impl.encode_png(contents, name: name); + + public Tensor encode_jpeg(Tensor contents, string name = null) + => image_ops_impl.encode_jpeg(contents, name: name); + + /// /// Convenience function to check if the 'contents' encodes a JPEG image. /// diff --git a/src/TensorFlowNET.Core/APIs/tf.io.cs b/src/TensorFlowNET.Core/APIs/tf.io.cs index be1e86e6c..ea1e44b28 100644 --- a/src/TensorFlowNET.Core/APIs/tf.io.cs +++ b/src/TensorFlowNET.Core/APIs/tf.io.cs @@ -16,6 +16,7 @@ limitations under the License. using System.Collections.Generic; using Tensorflow.IO; +using Tensorflow.Operations; namespace Tensorflow { @@ -46,6 +47,12 @@ public Operation save_v2(Tensor prefix, string[] tensor_names, public Tensor[] restore_v2(Tensor prefix, string[] tensor_names, string[] shape_and_slices, TF_DataType[] dtypes, string name = null) => ops.restore_v2(prefix, tensor_names, shape_and_slices, dtypes, name: name); + + public Operation write_file(string filename, Tensor conentes, string name = null) + => write_file(Tensorflow.ops.convert_to_tensor(filename, TF_DataType.TF_STRING), conentes, name); + + public Operation write_file(Tensor filename, Tensor conentes, string name = null) + => gen_ops.write_file(filename, conentes, name); } public GFile gfile = new GFile(); diff --git a/src/TensorFlowNET.Core/APIs/tf.nn.cs b/src/TensorFlowNET.Core/APIs/tf.nn.cs index 397c68c7c..112c48628 100644 --- a/src/TensorFlowNET.Core/APIs/tf.nn.cs +++ b/src/TensorFlowNET.Core/APIs/tf.nn.cs @@ -101,6 +101,8 @@ public Tensor embedding_lookup(Tensor @params, name: name); public IActivation relu() => new relu(); + + public IActivation swish() => new swish(); public IActivation tanh() => new tanh(); @@ -111,6 +113,9 @@ public Tensor tanh(Tensor x, string name = null) public Tensor relu(Tensor features, string name = null) => gen_nn_ops.relu(features, name); + public Tensor relu6(Tensor features, string name = null) + => gen_nn_ops.relu6(features, name); + public Tensor[] fused_batch_norm(Tensor x, Tensor scale, Tensor offset, diff --git a/src/TensorFlowNET.Core/Eager/EagerRunner.RecordGradient.cs b/src/TensorFlowNET.Core/Eager/EagerRunner.RecordGradient.cs index 59d5fd030..2bdd65f5b 100644 --- a/src/TensorFlowNET.Core/Eager/EagerRunner.RecordGradient.cs +++ b/src/TensorFlowNET.Core/Eager/EagerRunner.RecordGradient.cs @@ -80,6 +80,11 @@ BackwardFunction GetGradientFunction(string op_name, Tensor[] op_outputs) => (out_grads, unneeded_gradients) => { + if(!ops.gradientFunctions.ContainsKey(op_name)) + { + throw new Exception($"gradientFunctions not find op_name: {op_name}"); + } + if (ops.gradientFunctions[op_name] == null) return new Tensor[op_inputs.Length]; diff --git a/src/TensorFlowNET.Core/Gradients/array_grad.cs b/src/TensorFlowNET.Core/Gradients/array_grad.cs index 4b7027992..a4da60eed 100644 --- a/src/TensorFlowNET.Core/Gradients/array_grad.cs +++ b/src/TensorFlowNET.Core/Gradients/array_grad.cs @@ -381,5 +381,48 @@ public static Tensor[] _ReverseV2Grad(Operation op, Tensor[] grads) var axis = op.inputs[1]; return new Tensor[] { array_ops.reverse(grad, axis), null }; } + + [RegisterGradient("Tile")] + public static Tensor[] _TileGrad(Operation op, Tensor[] grads) + { + var grad = grads[0]; + var input_shape = array_ops.shape(op.inputs[0], out_type: op.inputs[1].dtype); + var split_shape = array_ops.reshape(array_ops.transpose(array_ops.stack(new Tensor[] { op.inputs[1], input_shape })), new Shape(-1)); + var axes = math_ops.range(0, array_ops.size(split_shape), 2); + + //# Sum reduces grad along the first dimension for IndexedSlices + //if isinstance(grad, indexed_slices_lib.IndexedSlices): + //input_shape_0 = math_ops.cast(input_shape[0], grad.indices.dtype) + //grad = math_ops.unsorted_segment_sum( + // grad.values, math_ops.mod(grad.indices, input_shape_0), input_shape_0) + //split_shape = array_ops.concat([[1], split_shape[1:]], axis = 0) + + var input_grad = math_ops.reduce_sum(array_ops.reshape(grad, split_shape), axes); + if (!tf.Context.executing_eagerly()) + { + input_grad.set_shape(op.inputs[0].GetShape()); + } + return new Tensor[] { input_grad, null }; + } + + [RegisterGradient("GatherNd")] + public static Tensor[] _GatherNdGrad(Operation op, Tensor[] grads) + { + var @ref = op.inputs[0]; + var indices = op.inputs[1]; + var grad = grads[0]; + var ref_shape = array_ops.shape(@ref, out_type: indices.dtype); + Tensor ref_grad = null; + if (indices.shape.ndim == 2 && indices.shape.dims[indices.shape.Length - 1] == 1) + { + ref_grad = (Tensor)new IndexedSlices(grad, array_ops.squeeze(indices, axis: -1), ref_shape); + } + else + { + ref_grad = gen_array_ops.scatter_nd(indices, grad, ref_shape); + } + return new Tensor[] { ref_grad, null }; + } + } } diff --git a/src/TensorFlowNET.Core/Gradients/nn_grad.cs b/src/TensorFlowNET.Core/Gradients/nn_grad.cs index a43a91b9a..87646a9ea 100644 --- a/src/TensorFlowNET.Core/Gradients/nn_grad.cs +++ b/src/TensorFlowNET.Core/Gradients/nn_grad.cs @@ -229,6 +229,37 @@ public static Tensor[] _Conv2DGrad(Operation op, Tensor[] grads) }; } + /// + /// Gradient function for Conv2D. + /// + /// + /// + /// + [RegisterGradient("DepthwiseConv2dNative")] + public static Tensor[] _DepthwiseConv2DGrad(Operation op, Tensor[] grads) + { + var dilations = op.get_attr_list("dilations"); + var strides = op.get_attr_list("strides"); + var padding = op.get_attr("padding"); + var explicit_paddings = op.get_attr_list("explicit_paddings"); + var data_format = op.get_attr("data_format"); + var shape = gen_array_ops.shape_n(new Tensor[] { op.inputs[0], op.inputs[1] }); + + return new Tensor[] + { + gen_nn_ops.depthwise_conv2d_native_backprop_input( + shape[0], op.inputs[1], grads[0], + strides, padding, explicit_paddings, + dilations: dilations, + data_format: data_format), + gen_nn_ops.depthwise_conv2d_native_backprop_filter(op.inputs[0], shape[1], grads[0], + strides, padding, + dilations: dilations, + explicit_paddings: explicit_paddings, + data_format: data_format) + }; + } + [RegisterGradient("FusedBatchNorm")] public static Tensor[] _FusedBatchNormGrad(Operation op, Tensor[] grads) => _BaseFusedBatchNormGrad(op, 0, grads); diff --git a/src/TensorFlowNET.Core/Keras/Activations/Activations.cs b/src/TensorFlowNET.Core/Keras/Activations/Activations.cs index f0d59ed62..37264104a 100644 --- a/src/TensorFlowNET.Core/Keras/Activations/Activations.cs +++ b/src/TensorFlowNET.Core/Keras/Activations/Activations.cs @@ -32,6 +32,7 @@ public interface IActivationsApi Activation Linear { get; } Activation Relu { get; } + Activation Relu6 { get; } Activation Sigmoid { get; } diff --git a/src/TensorFlowNET.Core/Keras/ArgsDefinition/DataAdapterArgs.cs b/src/TensorFlowNET.Core/Keras/ArgsDefinition/DataAdapterArgs.cs index 78882e82d..ba0332836 100644 --- a/src/TensorFlowNET.Core/Keras/ArgsDefinition/DataAdapterArgs.cs +++ b/src/TensorFlowNET.Core/Keras/ArgsDefinition/DataAdapterArgs.cs @@ -1,5 +1,6 @@ using Tensorflow.Keras.Engine; using Tensorflow.Keras.Saving; +using Tensorflow.NumPy; namespace Tensorflow.Keras.ArgsDefinition { @@ -16,5 +17,7 @@ public class DataAdapterArgs: IKerasConfig public int Worker { get; set; } public bool UseMultiprocessing { get; set; } public IModel Model { get; set; } + public Dictionary ClassWeight = null; + public NDArray SampleWeight = null; } } diff --git a/src/TensorFlowNET.Core/Keras/ArgsDefinition/DataHandlerArgs.cs b/src/TensorFlowNET.Core/Keras/ArgsDefinition/DataHandlerArgs.cs index 82530e950..72d0bb811 100644 --- a/src/TensorFlowNET.Core/Keras/ArgsDefinition/DataHandlerArgs.cs +++ b/src/TensorFlowNET.Core/Keras/ArgsDefinition/DataHandlerArgs.cs @@ -1,5 +1,6 @@ using Tensorflow.Keras.Engine; using Tensorflow.Keras.Saving; +using Tensorflow.NumPy; namespace Tensorflow.Keras.ArgsDefinition { @@ -18,5 +19,7 @@ public class DataHandlerArgs: IKerasConfig public bool UseMultiprocessing { get; set; } = false; public IModel Model { get; set; } public IVariableV1 StepsPerExecution { get; set; } + public Dictionary ClassWeight = null; + public NDArray SampleWeight = null; } } diff --git a/src/TensorFlowNET.Core/Keras/ArgsDefinition/Merging/MergeArgs.cs b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Merging/MergeArgs.cs index 0140b3dd0..9bcf1908e 100644 --- a/src/TensorFlowNET.Core/Keras/ArgsDefinition/Merging/MergeArgs.cs +++ b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Merging/MergeArgs.cs @@ -1,13 +1,15 @@ -using System; +using Newtonsoft.Json; +using System; using System.Collections.Generic; using System.Text; namespace Tensorflow.Keras.ArgsDefinition { // TODO: complete the implementation - public class MergeArgs : LayerArgs + public class MergeArgs : AutoSerializeLayerArgs { public Tensors Inputs { get; set; } + [JsonProperty("axis")] public int Axis { get; set; } } } diff --git a/src/TensorFlowNET.Core/Keras/ArgsDefinition/Rnn/GRUOptionalArgs.cs b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Rnn/GRUOptionalArgs.cs index d441dc828..1d215576f 100644 --- a/src/TensorFlowNET.Core/Keras/ArgsDefinition/Rnn/GRUOptionalArgs.cs +++ b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Rnn/GRUOptionalArgs.cs @@ -4,10 +4,8 @@ namespace Tensorflow.Keras.ArgsDefinition { - public class GRUOptionalArgs + public class GRUOptionalArgs : RnnOptionalArgs { public string Identifier => "GRU"; - - public Tensor Mask { get; set; } = null; } } diff --git a/src/TensorFlowNET.Core/Keras/ArgsDefinition/Rnn/LSTMOptionalArgs.cs b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Rnn/LSTMOptionalArgs.cs new file mode 100644 index 000000000..2829927c3 --- /dev/null +++ b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Rnn/LSTMOptionalArgs.cs @@ -0,0 +1,11 @@ +using System; +using System.Collections.Generic; +using System.Text; + +namespace Tensorflow.Keras.ArgsDefinition.Rnn +{ + public class LSTMOptionalArgs : RnnOptionalArgs + { + public string Identifier => "LSTM"; + } +} diff --git a/src/TensorFlowNET.Core/Keras/ArgsDefinition/Rnn/SimpleRNNOptionalArgs.cs b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Rnn/SimpleRNNOptionalArgs.cs new file mode 100644 index 000000000..a8b8caf06 --- /dev/null +++ b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Rnn/SimpleRNNOptionalArgs.cs @@ -0,0 +1,11 @@ +using System; +using System.Collections.Generic; +using System.Text; + +namespace Tensorflow.Keras.ArgsDefinition.Rnn +{ + public class SimpleRNNOptionalArgs : RnnOptionalArgs + { + public string Identifier => "SimpleRNN"; + } +} diff --git a/src/TensorFlowNET.Core/Keras/Engine/IModel.cs b/src/TensorFlowNET.Core/Keras/Engine/IModel.cs index 19f3df9ba..889c76d91 100644 --- a/src/TensorFlowNET.Core/Keras/Engine/IModel.cs +++ b/src/TensorFlowNET.Core/Keras/Engine/IModel.cs @@ -3,6 +3,7 @@ using Tensorflow.Keras.Metrics; using Tensorflow.Keras.Saving; using Tensorflow.NumPy; +using Tensorflow.Util; namespace Tensorflow.Keras.Engine; @@ -22,8 +23,11 @@ ICallback fit(NDArray x, NDArray y, int verbose = 1, List callbacks = null, float validation_split = 0f, - (NDArray val_x, NDArray val_y)? validation_data = null, + ValidationDataPack validation_data = null, + int validation_step = 10, bool shuffle = true, + Dictionary class_weight = null, + NDArray sample_weight = null, int initial_epoch = 0, int max_queue_size = 10, int workers = 1, @@ -35,8 +39,24 @@ ICallback fit(IEnumerable x, NDArray y, int verbose = 1, List callbacks = null, float validation_split = 0f, - (IEnumerable val_x, NDArray val_y)? validation_data = null, + ValidationDataPack validation_data = null, bool shuffle = true, + Dictionary class_weight = null, + NDArray sample_weight = null, + int initial_epoch = 0, + int max_queue_size = 10, + int workers = 1, + bool use_multiprocessing = false); + + public ICallback fit(IDatasetV2 dataset, + int batch_size = -1, + int epochs = 1, + int verbose = 1, + List callbacks = null, + IDatasetV2 validation_data = null, + int validation_step = 10, // 间隔多少次会进行一次验证 + bool shuffle = true, + Dictionary class_weight = null, int initial_epoch = 0, int max_queue_size = 10, int workers = 1, @@ -63,6 +83,8 @@ void load_weights(string filepath, Dictionary evaluate(NDArray x, NDArray y, int batch_size = -1, int verbose = 1, + NDArray sample_weight = null, + int steps = -1, int max_queue_size = 10, int workers = 1, @@ -78,6 +100,14 @@ Tensors predict(Tensors x, int workers = 1, bool use_multiprocessing = false); + public Tensors predict(IDatasetV2 dataset, + int batch_size = -1, + int verbose = 0, + int steps = -1, + int max_queue_size = 10, + int workers = 1, + bool use_multiprocessing = false); + void summary(int line_length = -1, float[] positions = null); IKerasConfig get_config(); diff --git a/src/TensorFlowNET.Core/Keras/Layers/ILayersApi.cs b/src/TensorFlowNET.Core/Keras/Layers/ILayersApi.cs index 5e08eadc4..57273eb08 100644 --- a/src/TensorFlowNET.Core/Keras/Layers/ILayersApi.cs +++ b/src/TensorFlowNET.Core/Keras/Layers/ILayersApi.cs @@ -55,6 +55,12 @@ public ILayer Conv1D(int filters, string kernel_initializer = "glorot_uniform", string bias_initializer = "zeros"); + public ILayer Conv2D(int filters, + Shape kernel_size = null, + Shape strides = null, + string padding = "valid" + ); + public ILayer Conv2D(int filters, Shape kernel_size = null, Shape strides = null, @@ -95,6 +101,19 @@ public ILayer Conv2D(int filters, bool use_bias = true, string kernel_initializer = "glorot_uniform", string bias_initializer = "zeros"); + public ILayer DepthwiseConv2D(Shape kernel_size = null, + Shape strides = null, + string padding = "valid", + string data_format = null, + Shape dilation_rate = null, + int groups = 1, + int depth_multiplier = 1, + string activation = null, + bool use_bias = false, + string kernel_initializer = "glorot_uniform", + string bias_initializer = "zeros", + string depthwise_initializer = "glorot_uniform" + ); public ILayer Dense(int units); public ILayer Dense(int units, @@ -161,6 +180,9 @@ public ILayer LayerNormalization(Axis? axis, public ILayer Normalization(Shape? input_shape = null, int? axis = -1, float? mean = null, float? variance = null, bool invert = false); public ILayer LeakyReLU(float alpha = 0.3f); + public ILayer ReLU6(); + + public IRnnCell LSTMCell(int uints, string activation = "tanh", string recurrent_activation = "sigmoid", diff --git a/src/TensorFlowNET.Core/Keras/Regularizers/IRegularizer.cs b/src/TensorFlowNET.Core/Keras/Regularizers/IRegularizer.cs index f4045c7b2..06dbb7c8c 100644 --- a/src/TensorFlowNET.Core/Keras/Regularizers/IRegularizer.cs +++ b/src/TensorFlowNET.Core/Keras/Regularizers/IRegularizer.cs @@ -1,7 +1,25 @@ -namespace Tensorflow.Keras +using Newtonsoft.Json; +using System.Collections.Generic; +using Tensorflow.Keras.Saving.Common; + +namespace Tensorflow.Keras { - public interface IRegularizer - { - Tensor Apply(RegularizerArgs args); - } + [JsonConverter(typeof(CustomizedRegularizerJsonConverter))] + public interface IRegularizer + { + [JsonProperty("class_name")] + string ClassName { get; } + [JsonProperty("config")] + IDictionary Config { get; } + Tensor Apply(RegularizerArgs args); + } + + public interface IRegularizerApi + { + IRegularizer GetRegularizerFromName(string name); + IRegularizer L1 { get; } + IRegularizer L2 { get; } + IRegularizer L1L2 { get; } + } + } diff --git a/src/TensorFlowNET.Core/Keras/Saving/Json/CustomizedRegularizerJsonConverter.cs b/src/TensorFlowNET.Core/Keras/Saving/Json/CustomizedRegularizerJsonConverter.cs new file mode 100644 index 000000000..4b1790aca --- /dev/null +++ b/src/TensorFlowNET.Core/Keras/Saving/Json/CustomizedRegularizerJsonConverter.cs @@ -0,0 +1,57 @@ +using Newtonsoft.Json.Linq; +using Newtonsoft.Json; +using System; +using System.Collections.Generic; +using System.Text; +using Tensorflow.Operations.Regularizers; + +namespace Tensorflow.Keras.Saving.Common +{ + class RegularizerInfo + { + public string class_name { get; set; } + public JObject config { get; set; } + } + + public class CustomizedRegularizerJsonConverter : JsonConverter + { + public override bool CanConvert(Type objectType) + { + return objectType == typeof(IRegularizer); + } + + public override bool CanRead => true; + + public override bool CanWrite => true; + + public override void WriteJson(JsonWriter writer, object? value, JsonSerializer serializer) + { + var regularizer = value as IRegularizer; + if (regularizer is null) + { + JToken.FromObject(null).WriteTo(writer); + return; + } + JToken.FromObject(new RegularizerInfo() + { + class_name = regularizer.ClassName, + config = JObject.FromObject(regularizer.Config) + }, serializer).WriteTo(writer); + } + + public override object? ReadJson(JsonReader reader, Type objectType, object? existingValue, JsonSerializer serializer) + { + var info = serializer.Deserialize(reader); + if (info is null) + { + return null; + } + return info.class_name switch + { + "L1L2" => new L1L2 (info.config["l1"].ToObject(), info.config["l2"].ToObject()), + "L1" => new L1(info.config["l1"].ToObject()), + "L2" => new L2(info.config["l2"].ToObject()), + }; + } + } +} diff --git a/src/TensorFlowNET.Core/NumPy/Implementation/NumPyImpl.Creation.cs b/src/TensorFlowNET.Core/NumPy/Implementation/NumPyImpl.Creation.cs index fa4ef0191..c0f9e695d 100644 --- a/src/TensorFlowNET.Core/NumPy/Implementation/NumPyImpl.Creation.cs +++ b/src/TensorFlowNET.Core/NumPy/Implementation/NumPyImpl.Creation.cs @@ -101,9 +101,10 @@ Array ReadValueMatrix(BinaryReader reader, Array matrix, int bytes, Type type, i Array ReadObjectMatrix(BinaryReader reader, Array matrix, int[] shape) { - Stream stream = reader.BaseStream; + Stream deflateStream = reader.BaseStream; + BufferedStream bufferedStream = new BufferedStream(deflateStream); var unpickler = new Unpickler(); - return (MultiArrayPickleWarpper)unpickler.load(stream); + return (MultiArrayPickleWarpper)unpickler.load(bufferedStream); } public (NDArray, NDArray) meshgrid(T[] array, bool copy = true, bool sparse = false) diff --git a/src/TensorFlowNET.Core/NumPy/Numpy.Manipulation.cs b/src/TensorFlowNET.Core/NumPy/Numpy.Manipulation.cs index 940856056..5e2574170 100644 --- a/src/TensorFlowNET.Core/NumPy/Numpy.Manipulation.cs +++ b/src/TensorFlowNET.Core/NumPy/Numpy.Manipulation.cs @@ -30,6 +30,15 @@ public static NDArray concatenate((NDArray, NDArray) tuple, int axis = 0) [AutoNumPy] public static NDArray stack(params NDArray[] arrays) => new NDArray(array_ops.stack(arrays)); + [AutoNumPy] + public static NDArray stack(NDArray[] arrays, int axis = 0) => new NDArray(array_ops.stack(arrays, axis)); + + [AutoNumPy] + public static NDArray stack((NDArray, NDArray) tuple, int axis = 0) => new NDArray(array_ops.stack(new[] { tuple.Item1, tuple.Item2 }, axis)); + + [AutoNumPy] + public static NDArray stack((NDArray, NDArray, NDArray) tuple, int axis = 0) => new NDArray(array_ops.stack(new[] { tuple.Item1, tuple.Item2, tuple.Item3 }, axis)); + [AutoNumPy] public static NDArray moveaxis(NDArray array, Axis source, Axis destination) => new NDArray(array_ops.moveaxis(array, source, destination)); } diff --git a/src/TensorFlowNET.Core/NumPy/Numpy.Math.cs b/src/TensorFlowNET.Core/NumPy/Numpy.Math.cs index 5bc97952b..2559638b3 100644 --- a/src/TensorFlowNET.Core/NumPy/Numpy.Math.cs +++ b/src/TensorFlowNET.Core/NumPy/Numpy.Math.cs @@ -85,5 +85,11 @@ public static NDArray dot(NDArray x1, NDArray x2, NDArray? axes = null, string? [AutoNumPy] public static NDArray add(NDArray x, NDArray y) => new NDArray(math_ops.add(x, y)); + + [AutoNumPy] + public static NDArray greater(NDArray x, NDArray y) => new NDArray(tf.greater(x, y)); + + [AutoNumPy] + public static NDArray less(NDArray x, NDArray y) => new NDArray(tf.less(x, y)); } } diff --git a/src/TensorFlowNET.Core/Operations/Operation.cs b/src/TensorFlowNET.Core/Operations/Operation.cs index e59c381cb..2105c53fa 100644 --- a/src/TensorFlowNET.Core/Operations/Operation.cs +++ b/src/TensorFlowNET.Core/Operations/Operation.cs @@ -437,7 +437,7 @@ internal void _set_attr(string attr_name, AttrValue attr_value) internal void _set_attr_with_buf(string attr_name, Buffer attr_buf) { Status status = new(); - c_api.TFC_SetAttr(graph, _handle, attr_name, attr_buf, status); + c_api.TF_SetAttr(graph, _handle, attr_name, attr_buf, status); status.Check(true); } } diff --git a/src/TensorFlowNET.Core/Operations/Regularizers/L1.cs b/src/TensorFlowNET.Core/Operations/Regularizers/L1.cs new file mode 100644 index 000000000..9e0619454 --- /dev/null +++ b/src/TensorFlowNET.Core/Operations/Regularizers/L1.cs @@ -0,0 +1,33 @@ +using System; + +using Tensorflow.Keras; + +namespace Tensorflow.Operations.Regularizers +{ + public class L1 : IRegularizer + { + float _l1; + private readonly Dictionary _config; + + public string ClassName => "L1"; + public virtual IDictionary Config => _config; + + public L1(float l1 = 0.01f) + { + // l1 = 0.01 if l1 is None else l1 + // validate_float_arg(l1, name = "l1") + // self.l1 = ops.convert_to_tensor(l1) + this._l1 = l1; + + _config = new(); + _config["l1"] = _l1; + } + + + public Tensor Apply(RegularizerArgs args) + { + //return self.l1 * ops.sum(ops.absolute(x)) + return _l1 * math_ops.reduce_sum(math_ops.abs(args.X)); + } + } +} diff --git a/src/TensorFlowNET.Core/Operations/Regularizers/L1L2.cs b/src/TensorFlowNET.Core/Operations/Regularizers/L1L2.cs new file mode 100644 index 000000000..e3af00eb5 --- /dev/null +++ b/src/TensorFlowNET.Core/Operations/Regularizers/L1L2.cs @@ -0,0 +1,48 @@ +using System; + +using Tensorflow.Keras; + +namespace Tensorflow.Operations.Regularizers +{ + public class L1L2 : IRegularizer + { + float _l1; + float _l2; + private readonly Dictionary _config; + + public string ClassName => "L1L2"; + public virtual IDictionary Config => _config; + + public L1L2(float l1 = 0.0f, float l2 = 0.0f) + { + //l1 = 0.0 if l1 is None else l1 + //l2 = 0.0 if l2 is None else l2 + // validate_float_arg(l1, name = "l1") + // validate_float_arg(l2, name = "l2") + + // self.l1 = l1 + // self.l2 = l2 + this._l1 = l1; + this._l2 = l2; + + _config = new(); + _config["l1"] = l1; + _config["l2"] = l2; + } + + public Tensor Apply(RegularizerArgs args) + { + //regularization = ops.convert_to_tensor(0.0, dtype = x.dtype) + //if self.l1: + // regularization += self.l1 * ops.sum(ops.absolute(x)) + //if self.l2: + // regularization += self.l2 * ops.sum(ops.square(x)) + //return regularization + + Tensor regularization = tf.constant(0.0, args.X.dtype); + regularization += _l1 * math_ops.reduce_sum(math_ops.abs(args.X)); + regularization += _l2 * math_ops.reduce_sum(math_ops.square(args.X)); + return regularization; + } + } +} diff --git a/src/TensorFlowNET.Core/Operations/Regularizers/L2.cs b/src/TensorFlowNET.Core/Operations/Regularizers/L2.cs new file mode 100644 index 000000000..6c0e950a9 --- /dev/null +++ b/src/TensorFlowNET.Core/Operations/Regularizers/L2.cs @@ -0,0 +1,33 @@ +using System; + +using Tensorflow.Keras; + +namespace Tensorflow.Operations.Regularizers +{ + public class L2 : IRegularizer + { + float _l2; + private readonly Dictionary _config; + + public string ClassName => "L2"; + public virtual IDictionary Config => _config; + + public L2(float l2 = 0.01f) + { + // l2 = 0.01 if l2 is None else l2 + // validate_float_arg(l2, name = "l2") + // self.l2 = l2 + this._l2 = l2; + + _config = new(); + _config["l2"] = _l2; + } + + + public Tensor Apply(RegularizerArgs args) + { + //return self.l2 * ops.sum(ops.square(x)) + return _l2 * math_ops.reduce_sum(math_ops.square(args.X)); + } + } +} diff --git a/src/TensorFlowNET.Core/Operations/array_ops.cs b/src/TensorFlowNET.Core/Operations/array_ops.cs index f80dcd2c4..548a885ed 100644 --- a/src/TensorFlowNET.Core/Operations/array_ops.cs +++ b/src/TensorFlowNET.Core/Operations/array_ops.cs @@ -166,6 +166,11 @@ public static Tensor boolean_mask(T1 tensor, T2 mask, string name = "boo throw new ValueError("mask cannot be scalar."); var leading_size = gen_math_ops.prod(shape(tensor_tensor)[$"{axis}:{axis + ndims_mask}"], ops.convert_to_tensor(new[] { 0 })); + if (leading_size.rank == 0) + { + leading_size = expand_dims(leading_size, 0); + } + var shape1 = concat(new[] { shape(tensor_tensor)[$":{axis}"], @@ -185,7 +190,7 @@ public static Tensor boolean_mask(T1 tensor, T2 mask, string name = "boo private static Tensor _apply_mask_1d(Tensor reshaped_tensor, Tensor mask, int axis = 0) { - var indices = squeeze(where(mask), axis: new[] { 1 }); + var indices = squeeze(where_v2(mask), axis: new[] { 1 }); return gather(reshaped_tensor, indices, axis: ops.convert_to_tensor(axis)); } @@ -829,7 +834,7 @@ public static Tensor strided_slice_grad(Tensor shape, Tensor begin, Tensor end, /// A `Tensor`. Has the same type as `input`. /// Contains the same data as `input`, but has one or more dimensions of /// size 1 removed. - public static Tensor squeeze(Tensor input, int[] axis = null, string name = null) + public static Tensor squeeze(Tensor input, Axis axis = null, string name = null) => gen_array_ops.squeeze(input, axis, name); public static Tensor identity(Tensor input, string name = null) @@ -990,7 +995,7 @@ public static Tensor gather(ResourceVariable @params, Tensor indices, string nam return @params.sparse_read(indices, name); } - public static Tensor transpose(T1 a, Axis perm, string name = "transpose", bool conjugate = false) + public static Tensor transpose(T1 a, Axis perm = null, string name = "transpose", bool conjugate = false) { return tf_with(ops.name_scope(name, "transpose", new { a }), scope => { @@ -1139,5 +1144,18 @@ public static Tensor placeholder(TF_DataType dtype, Shape shape = null, string n var _op = tf.OpDefLib._apply_op_helper("Placeholder", name: name, args: new { dtype, shape }); return _op.output; } + + public static int get_positive_axis(int axis, int ndims=-100, string axis_name="axis", string ndims_name= "ndims") + { + if(ndims != -100) + { + if (axis >= 0 && axis < ndims) return axis; + else if (-ndims <= axis && axis < 0) return axis + ndims; + else throw new ValueError($"{axis_name}={axis} out of bounds:expected {-ndims}<={axis_name}<{ndims}"); + + } else if(axis < 0) throw new ValueError($"{axis_name}={axis} may only be negative if {ndims_name} is statically known."); + return axis; + } + } } diff --git a/src/TensorFlowNET.Core/Operations/handle_data_util.cs b/src/TensorFlowNET.Core/Operations/handle_data_util.cs index a01efc520..363d3144e 100644 --- a/src/TensorFlowNET.Core/Operations/handle_data_util.cs +++ b/src/TensorFlowNET.Core/Operations/handle_data_util.cs @@ -51,7 +51,7 @@ public static void set_handle_data(Tensor target_t, HandleData handle_data) } Status status = new(); var proto = handle_data.ToByteArray(); - c_api.TFC_SetHandleShapeAndType(target_t.graph.c_graph, target_t._as_tf_output(), proto, proto.Length, status); + c_api.TF_SetHandleShapeAndType(target_t.graph.c_graph, target_t._as_tf_output(), proto, proto.Length, status); status.Check(true); } diff --git a/src/TensorFlowNET.Core/Operations/image_ops_impl.cs b/src/TensorFlowNET.Core/Operations/image_ops_impl.cs index 318b8b142..f1aff28ee 100644 --- a/src/TensorFlowNET.Core/Operations/image_ops_impl.cs +++ b/src/TensorFlowNET.Core/Operations/image_ops_impl.cs @@ -102,7 +102,10 @@ internal static Operation[] _CheckAtLeast3DImage(Tensor image, bool require_stat { throw new ValueError("\'image\' must be fully defined."); } - var dims = image_shape["-3:"]; + var dims = new Shape(new[] { + image_shape.dims[image_shape.dims.Length - 3], + image_shape.dims[image_shape.dims.Length - 2], + image_shape.dims[image_shape.dims.Length - 1]}); foreach (var dim in dims.dims) { if (dim == 0) @@ -112,16 +115,18 @@ internal static Operation[] _CheckAtLeast3DImage(Tensor image, bool require_stat } var image_shape_last_three_elements = new Shape(new[] { - image_shape.dims[image_shape.dims.Length - 1], + image_shape.dims[image_shape.dims.Length - 3], image_shape.dims[image_shape.dims.Length - 2], - image_shape.dims[image_shape.dims.Length - 3]}); + image_shape.dims[image_shape.dims.Length - 1]}); if (!image_shape_last_three_elements.IsFullyDefined) { Tensor image_shape_ = array_ops.shape(image); - var image_shape_return = tf.constant(new[] { - image_shape_.dims[image_shape.dims.Length - 1], - image_shape_.dims[image_shape.dims.Length - 2], - image_shape_.dims[image_shape.dims.Length - 3]}); + var image_shape_return = tf.slice(image_shape_, new[] { Math.Max(image_shape.dims.Length - 3, 0) }, new[] { 3 }); + + //var image_shape_return = tf.constant(new[] { + // image_shape_.dims[image_shape_.dims.Length - 3], + // image_shape_.dims[image_shape_.dims.Length - 2], + // image_shape_.dims[image_shape_.dims.Length - 1]}); return new Operation[] { check_ops.assert_positive( @@ -209,10 +214,10 @@ internal static Tensor _random_flip(Tensor image, int flip_index, int seed, stri } public static Tensor flip_left_right(Tensor image) - => _flip(image, 0, "flip_left_right"); + => _flip(image, 1, "flip_left_right"); public static Tensor flip_up_down(Tensor image) - => _flip(image, 1, "flip_up_down"); + => _flip(image, 0, "flip_up_down"); internal static Tensor _flip(Tensor image, int flip_index, string scope_name) { @@ -223,11 +228,11 @@ internal static Tensor _flip(Tensor image, int flip_index, string scope_name) Shape shape = image.shape; if (shape.ndim == 3 || shape.ndim == Unknown) { - return fix_image_flip_shape(image, gen_array_ops.reverse(image, ops.convert_to_tensor(new int[] { flip_index }))); + return fix_image_flip_shape(image, gen_array_ops.reverse_v2(image, ops.convert_to_tensor(new int[] { flip_index }))); } else if (shape.ndim == 4) { - return gen_array_ops.reverse_v2(image, ops.convert_to_tensor(new[] { (flip_index + 1) % 2 })); + return gen_array_ops.reverse_v2(image, ops.convert_to_tensor(new[] { flip_index + 1 })); } else { @@ -2047,6 +2052,22 @@ internal static (Tensor, Tensor) non_max_suppression_padded_v1(Tensor boxes, Ten }); } + public static Tensor encode_jpeg(Tensor contents, string name = null) + { + return tf_with(ops.name_scope(name, "encode_jpeg"), scope => + { + return gen_ops.encode_jpeg(contents, name:name); + }); + } + + public static Tensor encode_png(Tensor contents, string name = null) + { + return tf_with(ops.name_scope(name, "encode_png"), scope => + { + return gen_ops.encode_png(contents, name: name); + }); + } + public static Tensor is_jpeg(Tensor contents, string name = null) { return tf_with(ops.name_scope(name, "is_jpeg"), scope => diff --git a/src/TensorFlowNET.Core/Tensorflow.Binding.csproj b/src/TensorFlowNET.Core/Tensorflow.Binding.csproj index be714618d..42c0399da 100644 --- a/src/TensorFlowNET.Core/Tensorflow.Binding.csproj +++ b/src/TensorFlowNET.Core/Tensorflow.Binding.csproj @@ -4,8 +4,8 @@ netstandard2.0;net6.0 Tensorflow.Binding Tensorflow - 2.11.0 - 0.110.3 + 2.15.0 + 0.150.0 10.0 enable Haiping Chen, Eli Belash, Yaohui Liu, Meinrad Recheis @@ -20,12 +20,16 @@ Google's TensorFlow full binding in .NET Standard. Building, training and infering deep learning models. https://tensorflownet.readthedocs.io - 0.110.3.0 + 0.150.0.0 + tf.net 0.150.x and above are based on tensorflow native 2.15.0 + * Support BERT model. + tf.net 0.110.x and above are based on tensorflow native 2.11.0 * Support RNN, LSTM model. * Support Transformer model. - + * Added IMDB dataset. + tf.net 0.100.x and above are based on tensorflow native 2.10.0 * Eager Mode is added finally. @@ -42,8 +46,9 @@ https://tensorflownet.readthedocs.io tf.net 0.7x.x aligns with TensorFlow v2.7.x native library. tf.net 0.10x.x aligns with TensorFlow v2.10.x native library. tf.net 0.11x.x aligns with TensorFlow v2.11.x native library. + tf.net 0.15x.x aligns with TensorFlow v2.15.x native library. - 0.110.3.0 + 0.150.0.0 LICENSE true packages @@ -174,8 +179,8 @@ https://tensorflownet.readthedocs.io - - + + diff --git a/src/TensorFlowNET.Core/Tensors/Ragged/RaggedTensor.cs b/src/TensorFlowNET.Core/Tensors/Ragged/RaggedTensor.cs index 4f85e1081..0f09d4128 100644 --- a/src/TensorFlowNET.Core/Tensors/Ragged/RaggedTensor.cs +++ b/src/TensorFlowNET.Core/Tensors/Ragged/RaggedTensor.cs @@ -163,5 +163,38 @@ public static implicit operator RaggedTensor(Tensor tensor) { return tensor.Tag as RaggedTensor; } + public Tensor nrows(TF_DataType out_type, string name = null) + { + tf_with(ops.name_scope(name, "RaggedNRows"), scope => + { + return math_ops.cast(this._row_partition.nrows(), dtype: out_type); + }); + return null; + } + public RaggedTensor row_lengths(int axis=-1, string name=null) + { + if (axis == 0) return this._row_partition.nrows(); + if (axis == 1) return this._row_partition.row_lengths(); + var values = (RaggedTensor)this._values; + axis = array_ops.get_positive_axis( + axis, this.shape.rank, ndims_name: "rank(this)"); + if (axis == 0) return this.nrows(this._row_partition.GetDataType()); + else if (axis == 1) + { + var splits = this._row_partition.row_splits; + return splits[new Slice(start: 1)] - splits[new Slice(stop: -1)]; + + } + else if (this._values is RaggedTensor) + { + return values.row_lengths(axis - 1); + } + else + { + var shape = array_ops.shape(values, out_type: this._row_partition.GetDataType()); + return array_ops.ones(shape[new Slice(stop:axis - 1)], this._row_partition.GetDataType()) * + shape[axis - 1]; + } + } } } diff --git a/src/TensorFlowNET.Core/Tensors/Ragged/RowPartition.cs b/src/TensorFlowNET.Core/Tensors/Ragged/RowPartition.cs index 29dc525df..9e242ff38 100644 --- a/src/TensorFlowNET.Core/Tensors/Ragged/RowPartition.cs +++ b/src/TensorFlowNET.Core/Tensors/Ragged/RowPartition.cs @@ -14,10 +14,15 @@ You may obtain a copy of the License at limitations under the License. ******************************************************************************/ +using Serilog.Debugging; using System; +using System.Collections.Concurrent; using System.Collections.Generic; +//using System.ComponentModel.DataAnnotations; using System.Text; +using System.Xml.Linq; using Tensorflow.Framework; +using Tensorflow.NumPy; using static Tensorflow.Binding; namespace Tensorflow @@ -99,5 +104,55 @@ public static RowPartition from_row_splits(Tensor row_splits, return new RowPartition(row_splits); }); } + + public static RowPartition from_row_lengths(Tensor row_lengths, + bool validate=true, + TF_DataType dtype = TF_DataType.TF_INT32, + TF_DataType dtype_hint= TF_DataType.TF_INT32) + { + row_lengths = _convert_row_partition( + row_lengths, "row_lengths", dtype_hint: dtype_hint, dtype: dtype); + Tensor row_limits = math_ops.cumsum(row_lengths, tf.constant(-1)); + Tensor row_splits = array_ops.concat(new Tensor[] { tf.convert_to_tensor(np.array(new int[] { 0 }, TF_DataType.TF_INT64)), row_limits }, axis:0); + return new RowPartition(row_splits: row_splits, row_lengths: row_lengths); + } + + public static Tensor _convert_row_partition(Tensor partition, string name, TF_DataType dtype, + TF_DataType dtype_hint= TF_DataType.TF_INT64) + { + if (partition is NDArray && partition.GetDataType() == np.int32) partition = ops.convert_to_tensor(partition, name: name); + if (partition.GetDataType() != np.int32 && partition.GetDataType() != np.int64) throw new ValueError($"{name} must have dtype int32 or int64"); + return partition; + } + + public Tensor nrows() + { + /*Returns the number of rows created by this `RowPartition*/ + if (this._nrows != null) return this._nrows; + var nsplits = tensor_shape.dimension_at_index(this._row_splits.shape, 0); + if (nsplits == null) return array_ops.shape(this._row_splits, out_type: this.row_splits.dtype)[0] - 1; + else return constant_op.constant(nsplits.value - 1, dtype: this.row_splits.dtype); + } + + public Tensor row_lengths() + { + + if (this._row_splits != null) + { + int nrows_plus_one = tensor_shape.dimension_value(this._row_splits.shape[0]); + return tf.constant(nrows_plus_one - 1); + + } + if (this._row_lengths != null) + { + var nrows = tensor_shape.dimension_value(this._row_lengths.shape[0]); + return tf.constant(nrows); + } + if(this._nrows != null) + { + return tensor_util.constant_value(this._nrows); + } + return tf.constant(-1); + } } } diff --git a/src/TensorFlowNET.Core/Tensors/tensor_util.cs b/src/TensorFlowNET.Core/Tensors/tensor_util.cs index e65c4850d..6e5024efd 100644 --- a/src/TensorFlowNET.Core/Tensors/tensor_util.cs +++ b/src/TensorFlowNET.Core/Tensors/tensor_util.cs @@ -1,4 +1,4 @@ -/***************************************************************************** +/***************************************************************************** Copyright 2018 The TensorFlow.NET Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); @@ -67,7 +67,7 @@ public static NDArray MakeNdarray(TensorProto tensor) T[] ExpandArrayToSize(IList src) { - if(src.Count == 0) + if (src.Count == 0) { return new T[0]; } @@ -77,7 +77,7 @@ T[] ExpandArrayToSize(IList src) var first_elem = src[0]; var last_elem = src[src.Count - 1]; T[] res = new T[num_elements]; - for(long i = 0; i < num_elements; i++) + for (long i = 0; i < num_elements; i++) { if (i < pre) res[i] = first_elem; else if (i >= num_elements - after) res[i] = last_elem; @@ -121,7 +121,7 @@ T[] ExpandArrayToSize(IList src) $"https://www.tensorflow.org/api_docs/python/tf/dtypes for supported TF dtypes."); } - if(values.size == 0) + if (values.size == 0) { return np.zeros(shape, tensor_dtype); } @@ -135,6 +135,47 @@ T[] ExpandArrayToSize(IList src) TF_DataType.TF_QINT32 }; + private static Array ConvertArray(Array inputArray, Func converter) + { + if (inputArray == null) + throw new ArgumentNullException(nameof(inputArray)); + + var elementType = typeof(TOut); + var lengths = new int[inputArray.Rank]; + for (var i = 0; i < inputArray.Rank; i++) + { + lengths[i] = inputArray.GetLength(i); + } + + var outputArray = Array.CreateInstance(elementType, lengths); + + FillArray(inputArray, outputArray, converter, new int[inputArray.Rank], 0); + + return outputArray; + } + + private static void FillArray(Array inputArray, Array outputArray, Func converter, int[] indices, int dimension) + { + if (dimension == inputArray.Rank - 1) + { + for (int i = 0; i < inputArray.GetLength(dimension); i++) + { + indices[dimension] = i; + var inputValue = (TIn)inputArray.GetValue(indices); + var convertedValue = converter(inputValue); + outputArray.SetValue(convertedValue, indices); + } + } + else + { + for (int i = 0; i < inputArray.GetLength(dimension); i++) + { + indices[dimension] = i; + FillArray(inputArray, outputArray, converter, indices, dimension + 1); + } + } + } + /// /// Create a TensorProto, invoked in graph mode /// @@ -154,22 +195,30 @@ public static TensorProto make_tensor_proto(object values, TF_DataType dtype = T var origin_dtype = values.GetDataType(); if (dtype == TF_DataType.DtInvalid) dtype = origin_dtype; - else if(origin_dtype != dtype) + else if (origin_dtype != dtype) { var new_system_dtype = dtype.as_system_dtype(); - if (values is long[] long_values) + + if (dtype != TF_DataType.TF_STRING && dtype != TF_DataType.TF_VARIANT && dtype != TF_DataType.TF_RESOURCE) { - if (dtype == TF_DataType.TF_INT32) - values = long_values.Select(x => (int)Convert.ChangeType(x, new_system_dtype)).ToArray(); - } - else if (values is double[] double_values) + if (values is Array arrayValues) + { + values = dtype switch + { + TF_DataType.TF_INT32 => ConvertArray(arrayValues, Convert.ToInt32), + TF_DataType.TF_FLOAT => ConvertArray(arrayValues, Convert.ToSingle), + TF_DataType.TF_DOUBLE => ConvertArray(arrayValues, Convert.ToDouble), + _ => values, + }; + } else + { + values = Convert.ChangeType(values, new_system_dtype); + } + + } else { - if (dtype == TF_DataType.TF_FLOAT) - values = double_values.Select(x => (float)Convert.ChangeType(x, new_system_dtype)).ToArray(); - } - else - values = Convert.ChangeType(values, new_system_dtype); + } dtype = values.GetDataType(); } @@ -249,6 +298,9 @@ public static TensorProto make_tensor_proto(object values, TF_DataType dtype = T case sbyte val: tensor_proto.IntVal.AddRange(new[] { (int)val }); break; + case byte val: + tensor_proto.IntVal.AddRange(new[] { (int)val }); + break; case int val: tensor_proto.IntVal.AddRange(new[] { val }); break; @@ -262,7 +314,7 @@ public static TensorProto make_tensor_proto(object values, TF_DataType dtype = T tensor_proto.DoubleVal.AddRange(new[] { val }); break; default: - throw new Exception("make_tensor_proto Not Implemented"); + throw new Exception($"make_tensor_proto Not Implemented {values.GetType().Name}"); } } @@ -284,7 +336,7 @@ bool hasattr(Graph property, string attr) if (tensor is EagerTensor eagerTensor) { - if(tensor.dtype == tf.int64) + if (tensor.dtype == tf.int64) return new Shape(tensor.ToArray()); else return new Shape(tensor.ToArray()); @@ -459,7 +511,7 @@ bool hasattr(Graph property, string attr) var d_ = new int[value.size]; foreach (var (index, d) in enumerate(value.ToArray())) d_[index] = d >= 0 ? d : -1; - + ret = ret.merge_with(new Shape(d_)); } return ret; diff --git a/src/TensorFlowNET.Core/Training/Saving/SavedModel/AugmentedGraphView.cs b/src/TensorFlowNET.Core/Training/Saving/SavedModel/AugmentedGraphView.cs index a91933357..9d0b3f001 100644 --- a/src/TensorFlowNET.Core/Training/Saving/SavedModel/AugmentedGraphView.cs +++ b/src/TensorFlowNET.Core/Training/Saving/SavedModel/AugmentedGraphView.cs @@ -88,11 +88,11 @@ private ConcreteFunction maybe_uncache_variable_captures(ConcreteFunction concre public override (IList, IDictionary>) breadth_first_traversal() { - Trackable get_merged_trackable(Trackable x) + void merged_trackable(Trackable x) { // TODO: complete it with new definitions `Asset` and `TrackableConstant`. - return x; } + var trackable_objects = base.breadth_first_traversal(); foreach(var obj in _children_cache.Keys) @@ -100,7 +100,7 @@ Trackable get_merged_trackable(Trackable x) // skip the deletion of cache (maybe do it later). foreach(var pair in _children_cache[obj]) { - _children_cache[obj][pair.Key] = get_merged_trackable(pair.Value); + merged_trackable(pair.Value); } } @@ -109,15 +109,11 @@ Trackable get_merged_trackable(Trackable x) public List<(string, Trackable)> list_dependencies(Trackable obj) { - IDictionary children; - if (!_children_cache.ContainsKey(obj)) + if (!_children_cache.TryGetValue(obj, out var children)) { children= new Dictionary(); } - else - { - children= _children_cache[obj]; - } + List<(string, Trackable)> res = new(); foreach(var pair in obj.deserialization_dependencies(children)) { diff --git a/src/TensorFlowNET.Core/Util/Data.cs b/src/TensorFlowNET.Core/Util/Data.cs new file mode 100644 index 000000000..fe3466ed0 --- /dev/null +++ b/src/TensorFlowNET.Core/Util/Data.cs @@ -0,0 +1,78 @@ +using OneOf; +using Tensorflow.NumPy; + +namespace Tensorflow.Util +{ + /// + /// ValidationDataPack is used to pass validation data to fit method. + /// It can recive data which could be A tuple `(x_val, xy_val)` or `(x_val, y_val, sample_weight_val)` of Numpy arrays. + /// + public class ValidationDataPack + { + internal OneOf val_x; + internal NDArray val_y; + internal NDArray val_sample_weight = null; + public bool val_x_is_array = false; + public ValidationDataPack((NDArray, NDArray) validation_data) + { + this.val_x = validation_data.Item1; + this.val_y = validation_data.Item2; + } + + public ValidationDataPack((NDArray, NDArray, NDArray) validation_data) + { + this.val_x = validation_data.Item1; + this.val_y = validation_data.Item2; + this.val_sample_weight = validation_data.Item3; + } + + public ValidationDataPack((IEnumerable, NDArray) validation_data) + { + this.val_x = validation_data.Item1.ToArray(); + this.val_y = validation_data.Item2; + val_x_is_array = true; + } + + public ValidationDataPack((IEnumerable, NDArray, NDArray) validation_data) + { + this.val_x = validation_data.Item1.ToArray(); + this.val_y = validation_data.Item2; + this.val_sample_weight = validation_data.Item3; + val_x_is_array = true; + } + + public static implicit operator ValidationDataPack((NDArray, NDArray) validation_data) + => new ValidationDataPack(validation_data); + + public static implicit operator ValidationDataPack((NDArray, NDArray, NDArray) validation_data) + => new ValidationDataPack(validation_data); + + public static implicit operator ValidationDataPack((IEnumerable, NDArray) validation_data) + => new ValidationDataPack(validation_data); + + public static implicit operator ValidationDataPack((IEnumerable, NDArray, NDArray) validation_data) + => new ValidationDataPack(validation_data); + + public void Deconstruct(out NDArray val_x, out NDArray val_y) + { + val_x = this.val_x.AsT0; + val_y = this.val_y; + } + + public void Deconstruct(out NDArray val_x, out NDArray val_y, out NDArray val_sample_weight) + { + val_x = this.val_x.AsT0; + val_y = this.val_y; + val_sample_weight = this.val_sample_weight; + } + + // add a unuse parameter to make it different from Deconstruct(out NDArray val_x, out NDArray val_y, out NDArray val_sample_weight) + public void Deconstruct(out NDArray[] val_x_array, out NDArray val_y, out NDArray val_sample_weight, out NDArray unuse) + { + val_x_array = this.val_x.AsT1; + val_y = this.val_y; + val_sample_weight = this.val_sample_weight; + unuse = null; + } + } +} diff --git a/src/TensorFlowNET.Core/Variables/variables.py.cs b/src/TensorFlowNET.Core/Variables/variables.py.cs index 0c07e0243..91f57e292 100644 --- a/src/TensorFlowNET.Core/Variables/variables.py.cs +++ b/src/TensorFlowNET.Core/Variables/variables.py.cs @@ -72,7 +72,9 @@ public static List global_variables(string scope = null) public static Operation variables_initializer(IVariableV1[] var_list, string name = "init") { if (var_list.Length > 0) + { return control_flow_ops.group(var_list.Select(x => x.Initializer).ToArray(), name); + } else return gen_control_flow_ops.no_op(name: name); } @@ -152,10 +154,5 @@ public static Operation _safe_initial_value_from_op(string name, Operation op, D return op; } - - public static Tensor global_variables_initializer() - { - throw new NotImplementedException(); - } } } diff --git a/src/TensorFlowNET.Core/ops.cs b/src/TensorFlowNET.Core/ops.cs index 351fd18ff..6f51150a2 100644 --- a/src/TensorFlowNET.Core/ops.cs +++ b/src/TensorFlowNET.Core/ops.cs @@ -590,7 +590,7 @@ public static bool inside_function() public static HandleData get_resource_handle_data(Tensor graph_op) { - var handle_data = c_api.TFC_GetHandleShapeAndType(graph_op.graph.c_graph, graph_op._as_tf_output()); + var handle_data = c_api.TF_GetHandleShapeAndType(graph_op.graph.c_graph, graph_op._as_tf_output()); try{ var handle_str = c_api.ByteStringPiece(handle_data.DangerousGetHandle() == IntPtr.Zero ? null : new Buffer(handle_data)); return HandleData.Parser.ParseFrom(handle_str); diff --git a/src/TensorFlowNET.Keras/Activations.cs b/src/TensorFlowNET.Keras/Activations.cs index ce5b4eb13..d3801902f 100644 --- a/src/TensorFlowNET.Keras/Activations.cs +++ b/src/TensorFlowNET.Keras/Activations.cs @@ -20,6 +20,11 @@ public class Activations: IActivationsApi Name = "relu", ActivationFunction = (features, name) => tf.Context.ExecuteOp("Relu", name, new ExecuteOpArgs(features)) }; + private static Activation _relu6 = new Activation() + { + Name = "relu6", + ActivationFunction = (features, name) => tf.Context.ExecuteOp("Relu6", name, new ExecuteOpArgs(features)) + }; private static Activation _sigmoid = new Activation() { Name = "sigmoid", @@ -55,6 +60,7 @@ static Activations() _nameActivationMap = new Dictionary(); RegisterActivation(_relu); + RegisterActivation(_relu6); RegisterActivation(_linear); RegisterActivation(_sigmoid); RegisterActivation(_softmax); @@ -65,6 +71,7 @@ static Activations() public Activation Linear => _linear; public Activation Relu => _relu; + public Activation Relu6 => _relu6; public Activation Sigmoid => _sigmoid; diff --git a/src/TensorFlowNET.Keras/Callbacks/Earlystopping.cs b/src/TensorFlowNET.Keras/Callbacks/Earlystopping.cs index 36993b637..a2a2ecfe2 100644 --- a/src/TensorFlowNET.Keras/Callbacks/Earlystopping.cs +++ b/src/TensorFlowNET.Keras/Callbacks/Earlystopping.cs @@ -19,8 +19,10 @@ public class EarlyStopping: ICallback string _monitor; string _mode; bool _restore_best_weights; - List? _best_weights; + List? _best_weights; CallbackParams _parameters; + Func _monitor_op; + public Dictionary>? history { get; set; } // user need to pass a CallbackParams to EarlyStopping, CallbackParams at least need the model public EarlyStopping(CallbackParams parameters,string monitor = "val_loss", float min_delta = 0f, int patience = 0, @@ -38,17 +40,49 @@ public EarlyStopping(CallbackParams parameters,string monitor = "val_loss", floa _min_delta = Math.Abs(min_delta); _restore_best_weights = restore_best_weights; _mode = mode; - if (mode != "auto" && mode != "min" && mode != "max") + + if (_mode != "auto" && _mode != "min" && _mode != "max") + { + Console.WriteLine($"EarlyStopping mode {_mode} is unknown, fallback to auto mode."); + _mode = "auto"; + } + + if (_mode == "min") + { + _monitor_op = np.less; + } + else if (_mode == "max") + { + _monitor_op = np.greater; + } + else + { + if (_monitor.EndsWith("acc") || _monitor.EndsWith("accuracy") || _monitor.EndsWith("auc")) + { + _monitor_op = np.greater; + } + else + { + _monitor_op = np.less; + } + } + + if (_monitor_op == np.greater) { - Console.WriteLine("EarlyStopping mode %s is unknown, fallback to auto mode.", mode); + _min_delta *= 1; + } + else + { + _min_delta *= -1; } } public void on_train_begin() { _wait = 0; _stopped_epoch = 0; + _best = _monitor_op == np.less ? (float)np.Inf : (float)-np.Inf; + _best_weights = null; _best_epoch = 0; - _best = (float)np.Inf; } public void on_epoch_begin(int epoch) @@ -74,7 +108,7 @@ public void on_epoch_end(int epoch, Dictionary epoch_logs) // Restore the weights after first epoch if no progress is ever made. if (_restore_best_weights && _best_weights == null) { - _best_weights = _parameters.Model.Weights; + _best_weights = _parameters.Model.get_weights(); } _wait += 1; @@ -83,7 +117,7 @@ public void on_epoch_end(int epoch, Dictionary epoch_logs) _best = current; _best_epoch = epoch; if (_restore_best_weights) - _best_weights = _parameters.Model.TrainableWeights; + _best_weights = _parameters.Model.get_weights(); // Only restart wait if we beat both the baseline and our previous best. if (_baseline == 0f || _is_improvement(current, _baseline)) _wait = 0; @@ -99,7 +133,7 @@ public void on_epoch_end(int epoch, Dictionary epoch_logs) { Console.WriteLine($"Restoring model weights from the end of the best epoch: {_best_epoch + 1}"); } - _parameters.Model.Weights = _best_weights; + _parameters.Model.set_weights(_best_weights); } } } @@ -131,21 +165,7 @@ float get_monitor_value(Dictionary logs) } public bool _is_improvement(float monitor_value, float reference_value) { - bool less_op = (monitor_value - _min_delta) < reference_value; - bool greater_op = (monitor_value - _min_delta) >= reference_value; - if (_mode == "min") - return less_op; - else if (_mode == "max") - return greater_op; - else - { - if (_monitor.EndsWith("acc") || _monitor.EndsWith("accuracy") || _monitor.EndsWith("auc")) - { - return greater_op; - } - else - return less_op; - } + return _monitor_op(monitor_value - _min_delta, reference_value); } public void on_test_end(Dictionary logs) diff --git a/src/TensorFlowNET.Keras/Datasets/Imdb.cs b/src/TensorFlowNET.Keras/Datasets/Imdb.cs index 081c26cb9..4d6df913b 100644 --- a/src/TensorFlowNET.Keras/Datasets/Imdb.cs +++ b/src/TensorFlowNET.Keras/Datasets/Imdb.cs @@ -112,35 +112,39 @@ public DatasetPass load_data( if (start_char != null) { - int[,] new_x_train_array = new int[x_train_array.GetLength(0), x_train_array.GetLength(1) + 1]; - for (var i = 0; i < x_train_array.GetLength(0); i++) + var (d1, d2) = (x_train_array.GetLength(0), x_train_array.GetLength(1)); + int[,] new_x_train_array = new int[d1, d2 + 1]; + for (var i = 0; i < d1; i++) { new_x_train_array[i, 0] = (int)start_char; - Array.Copy(x_train_array, i * x_train_array.GetLength(1), new_x_train_array, i * new_x_train_array.GetLength(1) + 1, x_train_array.GetLength(1)); + Array.Copy(x_train_array, i * d2, new_x_train_array, i * (d2 + 1) + 1, d2); } - int[,] new_x_test_array = new int[x_test_array.GetLength(0), x_test_array.GetLength(1) + 1]; - for (var i = 0; i < x_test_array.GetLength(0); i++) + (d1, d2) = (x_test_array.GetLength(0), x_test_array.GetLength(1)); + int[,] new_x_test_array = new int[d1, d2 + 1]; + for (var i = 0; i < d1; i++) { new_x_test_array[i, 0] = (int)start_char; - Array.Copy(x_test_array, i * x_test_array.GetLength(1), new_x_test_array, i * new_x_test_array.GetLength(1) + 1, x_test_array.GetLength(1)); + Array.Copy(x_test_array, i * d2, new_x_test_array, i * (d2 + 1) + 1, d2); } x_train_array = new_x_train_array; x_test_array = new_x_test_array; } else if (index_from != 0) { - for (var i = 0; i < x_train_array.GetLength(0); i++) + var (d1, d2) = (x_train_array.GetLength(0), x_train_array.GetLength(1)); + for (var i = 0; i < d1; i++) { - for (var j = 0; j < x_train_array.GetLength(1); j++) + for (var j = 0; j < d2; j++) { if (x_train_array[i, j] == 0) break; x_train_array[i, j] += index_from; } } - for (var i = 0; i < x_test_array.GetLength(0); i++) + (d1, d2) = (x_test_array.GetLength(0), x_test_array.GetLength(1)); + for (var i = 0; i < d1; i++) { - for (var j = 0; j < x_test_array.GetLength(1); j++) + for (var j = 0; j < d2; j++) { if (x_test_array[i, j] == 0) break; @@ -169,9 +173,10 @@ public DatasetPass load_data( if (num_words == null) { + var (d1, d2) = (xs_array.GetLength(0), xs_array.GetLength(1)); num_words = 0; - for (var i = 0; i < xs_array.GetLength(0); i++) - for (var j = 0; j < xs_array.GetLength(1); j++) + for (var i = 0; i < d1; i++) + for (var j = 0; j < d2; j++) num_words = max((int)num_words, (int)xs_array[i, j]); } @@ -180,10 +185,11 @@ public DatasetPass load_data( // 0 (padding), 1 (start), 2 (OOV) if (oov_char != null) { - int[,] new_xs_array = new int[xs_array.GetLength(0), xs_array.GetLength(1)]; - for (var i = 0; i < xs_array.GetLength(0); i++) + var (d1, d2) = (xs_array.GetLength(0), xs_array.GetLength(1)); + int[,] new_xs_array = new int[d1, d2]; + for (var i = 0; i < d1; i++) { - for (var j = 0; j < xs_array.GetLength(1); j++) + for (var j = 0; j < d2; j++) { if (xs_array[i, j] == 0 || skip_top <= xs_array[i, j] && xs_array[i, j] < num_words) new_xs_array[i, j] = xs_array[i, j]; @@ -195,11 +201,12 @@ public DatasetPass load_data( } else { - int[,] new_xs_array = new int[xs_array.GetLength(0), xs_array.GetLength(1)]; - for (var i = 0; i < xs_array.GetLength(0); i++) + var (d1, d2) = (xs_array.GetLength(0), xs_array.GetLength(1)); + int[,] new_xs_array = new int[d1, d2]; + for (var i = 0; i < d1; i++) { int k = 0; - for (var j = 0; j < xs_array.GetLength(1); j++) + for (var j = 0; j < d2; j++) { if (xs_array[i, j] == 0 || skip_top <= xs_array[i, j] && xs_array[i, j] < num_words) new_xs_array[i, k++] = xs_array[i, j]; diff --git a/src/TensorFlowNET.Keras/Engine/DataAdapters/DataAdapter.cs b/src/TensorFlowNET.Keras/Engine/DataAdapters/DataAdapter.cs index 6c7d53b2f..590f30a78 100644 --- a/src/TensorFlowNET.Keras/Engine/DataAdapters/DataAdapter.cs +++ b/src/TensorFlowNET.Keras/Engine/DataAdapters/DataAdapter.cs @@ -2,6 +2,7 @@ using System.Collections.Generic; using System.Text; using Tensorflow.Keras.ArgsDefinition; +using Tensorflow.Util; namespace Tensorflow.Keras.Engine.DataAdapters { @@ -34,9 +35,75 @@ public virtual (Tensors, Tensors) Expand1d(Tensors x, Tensors y) return (x, y); } + public virtual (Tensors, Tensors, Tensors) Expand1d(Tensors x, Tensors y, Tensors sample_weight) + { + for (int i = 0; i < x.Length; i++) + { + if (x[i].shape.ndim == 1) + x[i] = array_ops.expand_dims(x[i], axis: -1); + } + for (int i = 0; i < y.Length; i++) + { + if (y[i].shape.ndim == 1) + y[i] = array_ops.expand_dims(y[i], axis: -1); + } + for (int i = 0; i < sample_weight.Length; i++) + { + if (sample_weight[i].shape.ndim == 1) + sample_weight[i] = array_ops.expand_dims(sample_weight[i], axis: -1); + } + return (x, y, sample_weight); + } + public virtual bool ShouldRecreateIterator() { return true; } + + public static ((NDArray, NDArray, NDArray),ValidationDataPack) train_validation_split((NDArray, NDArray, NDArray) x_y_sample_weight, float validation_split) + { + var x = x_y_sample_weight.Item1; + var y = x_y_sample_weight.Item2; + var sample_weight = x_y_sample_weight.Item3; + int train_count = Convert.ToInt32(x.dims[0] * (1 - validation_split)); + var train_x = x[new Slice(0, train_count)]; + var train_y = y[new Slice(0, train_count)]; + ValidationDataPack validation_data; + if (sample_weight != null) + { + validation_data = (x[new Slice(train_count)], y[new Slice(train_count)], sample_weight[new Slice(train_count)]); + sample_weight = sample_weight[new Slice(0, train_count)]; + } + else + { + validation_data = (x[new Slice(train_count)], y[new Slice(train_count)]); + } + + return ((train_x, train_y, sample_weight), validation_data); + } + + public static ((IEnumerable, NDArray, NDArray), ValidationDataPack) train_validation_split((IEnumerable, NDArray, NDArray) x_y_sample_weight, float validation_split) + { + var x = x_y_sample_weight.Item1; + var y = x_y_sample_weight.Item2; + var sample_weight = x_y_sample_weight.Item3; + int train_count = Convert.ToInt32(y.dims[0] * (1 - validation_split)); + var train_x = x.Select(x => x[new Slice(0, train_count)] as NDArray); + var train_y = y[new Slice(0, train_count)]; + var val_x = x.Select(x => x[new Slice(train_count)] as NDArray); + var val_y = y[new Slice(train_count)]; + + ValidationDataPack validation_data; + if (sample_weight != null) + { + validation_data = (val_x, val_y, sample_weight[new Slice(train_count)]); + sample_weight = sample_weight[new Slice(0, train_count)]; + } + else + { + validation_data = (val_x, val_y); + } + return ((train_x, train_y, sample_weight), validation_data); + } } } diff --git a/src/TensorFlowNET.Keras/Engine/DataAdapters/DataHandler.cs b/src/TensorFlowNET.Keras/Engine/DataAdapters/DataHandler.cs index 4723222f2..a305e5033 100644 --- a/src/TensorFlowNET.Keras/Engine/DataAdapters/DataHandler.cs +++ b/src/TensorFlowNET.Keras/Engine/DataAdapters/DataHandler.cs @@ -2,6 +2,9 @@ using System.Collections.Generic; using Tensorflow.Keras.ArgsDefinition; using static Tensorflow.Binding; +using Tensorflow.Keras.Utils; +using Tensorflow.Util; +using Tensorflow.Framework; namespace Tensorflow.Keras.Engine.DataAdapters { @@ -23,11 +26,13 @@ public class DataHandler long _steps_per_execution_value; int _initial_epoch => args.InitialEpoch; int _epochs => args.Epochs; + NDArray _sample_weight => args.SampleWeight; IVariableV1 _steps_per_execution; public DataHandler(DataHandlerArgs args) { this.args = args; + if (args.StepsPerExecution == null) { _steps_per_execution = tf.Variable(1L); @@ -48,6 +53,7 @@ public DataHandler(DataHandlerArgs args) BatchSize = args.BatchSize, Steps = args.StepsPerEpoch, Epochs = args.Epochs - args.InitialEpoch, + SampleWeight = args.SampleWeight, Shuffle = args.Shuffle, MaxQueueSize = args.MaxQueueSize, Worker = args.Workers, @@ -72,10 +78,75 @@ public DataHandler(DataHandlerArgs args) } _dataset = _adapter.GetDataset(); - _inferred_steps = _infer_steps(args.StepsPerEpoch, _dataset); _current_step = 0; _step_increment = _steps_per_execution_value - 1; _insufficient_data = false; + _configure_dataset_and_inferred_steps(args.X, args.ClassWeight); + } + + void _configure_dataset_and_inferred_steps(Tensors x, Dictionary class_weight) + { + if (_dataset == null) + { + _dataset = _adapter.GetDataset(); + _inferred_steps = _infer_steps(args.StepsPerEpoch, _dataset); + } + + if (class_weight != null) + { + _dataset = _dataset.map(_make_class_weight_map_fn(class_weight)); + } + _inferred_steps = _infer_steps(args.StepsPerEpoch, _dataset); + } + + + Func _make_class_weight_map_fn(Dictionary class_weight) + { + var class_ids = class_weight.Keys.OrderBy(key => key).ToList(); + var expected_class_ids = range(class_ids[0], class_ids[class_ids.Count - 1] + 1); + if (!class_ids.SequenceEqual(expected_class_ids)) + { + throw new ValueError("Expected `class_weight` to be a dict with keys from 0 to one less "+ + $"than the number of classes, found {class_weight}"); + } + + var class_weight_list = new List(); + foreach (var class_id in class_ids) + { + class_weight_list.Add(class_weight[class_id]); + } + var class_weight_tensor = tf.convert_to_tensor(class_weight_list.ToArray()); + + Func _class_weight_map_fn = (Tensors data) => + { + var x = data[0]; + var y = data[1]; + var sw = _sample_weight == null ? null : ops.convert_to_tensor(_sample_weight); + + if (y.shape.rank > 2) + { + throw new ValueError("`class_weight` not supported for 3+ dimensional targets."); + } + + var y_classes = smart_module.smart_cond( + y.shape.rank == 2 && y.shape[1] > 1, + () => math_ops.argmax(y, dimension: 1), + () => math_ops.cast(tf.reshape(y, (-1)), TF_DataType.TF_INT64)); + + var cw = array_ops.gather(class_weight_tensor, y_classes); + if (sw != null) + { + cw = tf.cast(cw, sw.dtype); + cw *= sw; + } + else + { + sw = cw; + } + return new Tensors { x, y, sw }; + }; + + return _class_weight_map_fn; } long _infer_steps(int steps_per_epoch, IDatasetV2 dataset) diff --git a/src/TensorFlowNET.Keras/Engine/DataAdapters/IDataAdapter.cs b/src/TensorFlowNET.Keras/Engine/DataAdapters/IDataAdapter.cs index 4bdc49795..bb71b0a2d 100644 --- a/src/TensorFlowNET.Keras/Engine/DataAdapters/IDataAdapter.cs +++ b/src/TensorFlowNET.Keras/Engine/DataAdapters/IDataAdapter.cs @@ -17,6 +17,8 @@ public interface IDataAdapter IDatasetV2 GetDataset(); int GetSize(); (Tensors, Tensors) Expand1d(Tensors x, Tensors y); + (Tensors, Tensors, Tensors) Expand1d(Tensors x, Tensors y, Tensors sample_weight); + bool ShouldRecreateIterator(); } } diff --git a/src/TensorFlowNET.Keras/Engine/DataAdapters/TensorLikeDataAdapter.cs b/src/TensorFlowNET.Keras/Engine/DataAdapters/TensorLikeDataAdapter.cs index 16e646a35..978a3f51c 100644 --- a/src/TensorFlowNET.Keras/Engine/DataAdapters/TensorLikeDataAdapter.cs +++ b/src/TensorFlowNET.Keras/Engine/DataAdapters/TensorLikeDataAdapter.cs @@ -20,7 +20,7 @@ public class TensorLikeDataAdapter : DataAdapter, IDataAdapter public TensorLikeDataAdapter(DataAdapterArgs args) { this.args = args; - _process_tensorlike(); + Tensor sample_weight_tensor = args.SampleWeight != null ? _process_tensorlike(args.SampleWeight) : null; num_samples = (int)args.X.shape[0]; var batch_size = args.BatchSize == -1 ? 32 : args.BatchSize; _batch_size = batch_size; @@ -37,6 +37,8 @@ public TensorLikeDataAdapter(DataAdapterArgs args) inputs.AddRange(args.X); if (args.Y != null) inputs.AddRange(args.Y); + if (sample_weight_tensor != null) + inputs.Add(sample_weight_tensor); dataset = slice_inputs(indices_dataset, inputs); dataset.FirstInputTensorCount = args.X.Length; } @@ -94,8 +96,9 @@ IDatasetV2 slice_inputs(IDatasetV2 indices_dataset, Tensors elements) public override bool ShouldRecreateIterator() => false; - void _process_tensorlike() + Tensor _process_tensorlike(NDArray sample_weights) { + return tf.convert_to_tensor(sample_weights); } } } diff --git a/src/TensorFlowNET.Keras/Engine/Functional.FromConfig.cs b/src/TensorFlowNET.Keras/Engine/Functional.FromConfig.cs index 7b826af8e..375fc9106 100644 --- a/src/TensorFlowNET.Keras/Engine/Functional.FromConfig.cs +++ b/src/TensorFlowNET.Keras/Engine/Functional.FromConfig.cs @@ -30,7 +30,7 @@ public static (Tensors, Tensors, Dictionary) reconstruct_from_co created_layers = created_layers ?? new Dictionary(); var node_index_map = new Dictionary<(string, int), int>(); var node_count_by_layer = new Dictionary(); - var unprocessed_nodes = new Dictionary(); + var unprocessed_nodes = new Dictionary>(); // First, we create all layers and enqueue nodes to be processed foreach (var layer_data in config.Layers) process_layer(created_layers, layer_data, unprocessed_nodes, node_count_by_layer); @@ -79,7 +79,7 @@ public static (Tensors, Tensors, Dictionary) reconstruct_from_co static void process_layer(Dictionary created_layers, LayerConfig layer_data, - Dictionary unprocessed_nodes, + Dictionary> unprocessed_nodes, Dictionary node_count_by_layer) { ILayer layer = null; @@ -92,32 +92,38 @@ static void process_layer(Dictionary created_layers, created_layers[layer_name] = layer; } - node_count_by_layer[layer] = _should_skip_first_node(layer) ? 1 : 0; + node_count_by_layer[layer] = layer_data.InboundNodes.Count - (_should_skip_first_node(layer) ? 1 : 0); var inbound_nodes_data = layer_data.InboundNodes; foreach (var node_data in inbound_nodes_data) { if (!unprocessed_nodes.ContainsKey(layer)) - unprocessed_nodes[layer] = node_data; + unprocessed_nodes[layer] = new List() { node_data }; else - unprocessed_nodes.Add(layer, node_data); + unprocessed_nodes[layer].Add(node_data); } } static void process_node(ILayer layer, - NodeConfig node_data, + List nodes_data, Dictionary created_layers, Dictionary node_count_by_layer, Dictionary<(string, int), int> node_index_map) { + var input_tensors = new List(); - var inbound_layer_name = node_data.Name; - var inbound_node_index = node_data.NodeIndex; - var inbound_tensor_index = node_data.TensorIndex; - var inbound_layer = created_layers[inbound_layer_name]; - var inbound_node = inbound_layer.InboundNodes[inbound_node_index]; - input_tensors.Add(inbound_node.Outputs[inbound_node_index]); + for (int i = 0; i < nodes_data.Count; i++) + { + var node_data = nodes_data[i]; + var inbound_layer_name = node_data.Name; + var inbound_node_index = node_data.NodeIndex; + var inbound_tensor_index = node_data.TensorIndex; + + var inbound_layer = created_layers[inbound_layer_name]; + var inbound_node = inbound_layer.InboundNodes[inbound_node_index]; + input_tensors.Add(inbound_node.Outputs[inbound_node_index]); + } var output_tensors = layer.Apply(input_tensors); diff --git a/src/TensorFlowNET.Keras/Engine/Functional.cs b/src/TensorFlowNET.Keras/Engine/Functional.cs index 7347585f8..75854d82c 100644 --- a/src/TensorFlowNET.Keras/Engine/Functional.cs +++ b/src/TensorFlowNET.Keras/Engine/Functional.cs @@ -180,7 +180,7 @@ void ComputeTensorUsageCount() var (nodes_in_decreasing_depth, layer_indices) = BuildMap(outputs); var network_nodes = nodes_in_decreasing_depth .Select(node => MakeNodeKey(node.Layer.Name, node.Layer.InboundNodes.IndexOf(node))) - .ToArray(); + .ToList(); var nodes_depths = new Dictionary(); var layers_depths = new Dictionary(); @@ -221,7 +221,7 @@ void ComputeTensorUsageCount() layers_depths[input_layer] = 0; layer_indices[input_layer] = -1; nodes_depths[input_layer.InboundNodes[0]] = 0; - network_nodes.add(MakeNodeKey(input_layer.Name, 0)); + network_nodes.Add(MakeNodeKey(input_layer.Name, 0)); } } @@ -231,7 +231,7 @@ void ComputeTensorUsageCount() { if (!nodes_by_depth.ContainsKey(depth)) nodes_by_depth[depth] = new List(); - nodes_by_depth[depth].append(node); + nodes_by_depth[depth].Add(node); } var layers_by_depth = new Dictionary>(); @@ -239,7 +239,7 @@ void ComputeTensorUsageCount() { if (!layers_by_depth.ContainsKey(depth)) layers_by_depth[depth] = new List(); - layers_by_depth[depth].append(layer); + layers_by_depth[depth].Add(layer); } // Get sorted list of layer depths. @@ -260,7 +260,7 @@ void ComputeTensorUsageCount() // Get sorted list of node depths. depth_keys = nodes_by_depth.Keys.OrderBy(x => x).Reverse(); - return (network_nodes, nodes_by_depth, layers, layers_by_depth); + return (network_nodes.ToArray(), nodes_by_depth, layers, layers_by_depth); } string MakeNodeKey(string layer_name, int node_index) diff --git a/src/TensorFlowNET.Keras/Engine/Layer.Serialize.cs b/src/TensorFlowNET.Keras/Engine/Layer.Serialize.cs index ed5c2de0a..49811417e 100644 --- a/src/TensorFlowNET.Keras/Engine/Layer.Serialize.cs +++ b/src/TensorFlowNET.Keras/Engine/Layer.Serialize.cs @@ -27,6 +27,6 @@ public override IDictionary _trackable_children(SaveType save children = new Dictionary(); } - return children.Concat(base._trackable_children(save_type, cache)).ToDictionary(x => x.Key, x => x.Value); + return children.Concat(base._trackable_children(save_type, cache)).GroupBy(x => x.Key).Select(g => g.First()).ToDictionary(x => x.Key, x => x.Value); } } \ No newline at end of file diff --git a/src/TensorFlowNET.Keras/Engine/LossesContainer.cs b/src/TensorFlowNET.Keras/Engine/LossesContainer.cs index 6a91450de..c06fca593 100644 --- a/src/TensorFlowNET.Keras/Engine/LossesContainer.cs +++ b/src/TensorFlowNET.Keras/Engine/LossesContainer.cs @@ -26,11 +26,11 @@ public LossesContainer(ILossFunc losses, string[] output_names = null) /// /// /// - public Tensor Call(Tensor y_true, Tensor y_pred) + public Tensor Call(Tensor y_true, Tensor y_pred, Tensor sample_weight = null) { if (!_built) Build(y_pred); - var loss_value = _losses.Call(y_true, y_pred); + var loss_value = _losses.Call(y_true, y_pred, sample_weight:sample_weight); var loss_metric_value = loss_value; var batch_dim = array_ops.shape(y_true)[0]; diff --git a/src/TensorFlowNET.Keras/Engine/Model.Evaluate.cs b/src/TensorFlowNET.Keras/Engine/Model.Evaluate.cs index a74a77f18..ec99d7ef9 100644 --- a/src/TensorFlowNET.Keras/Engine/Model.Evaluate.cs +++ b/src/TensorFlowNET.Keras/Engine/Model.Evaluate.cs @@ -30,6 +30,7 @@ public partial class Model public Dictionary evaluate(NDArray x, NDArray y, int batch_size = -1, int verbose = 1, + NDArray sample_weight = null, int steps = -1, int max_queue_size = 10, int workers = 1, @@ -51,6 +52,7 @@ public Dictionary evaluate(NDArray x, NDArray y, StepsPerEpoch = steps, InitialEpoch = 0, Epochs = 1, + SampleWeight = sample_weight, MaxQueueSize = max_queue_size, Workers = workers, UseMultiprocessing = use_multiprocessing, @@ -68,13 +70,19 @@ public Dictionary evaluate(NDArray x, NDArray y, return evaluate(data_handler, callbacks, is_val, test_function); } - public Dictionary evaluate(IEnumerable x, Tensor y, int verbose = 1, bool is_val = false) + public Dictionary evaluate( + IEnumerable x, + Tensor y, + int verbose = 1, + NDArray sample_weight = null, + bool is_val = false) { var data_handler = new DataHandler(new DataHandlerArgs { X = new Tensors(x.ToArray()), Y = y, Model = this, + SampleWeight = sample_weight, StepsPerExecution = _steps_per_execution }); @@ -104,7 +112,19 @@ public Dictionary evaluate(IDatasetV2 x, int verbose = 1, bool is Steps = data_handler.Inferredsteps }); - return evaluate(data_handler, callbacks, is_val, test_function); + Func> testFunction; + + if (data_handler.DataAdapter.GetDataset().structure.Length > 2 || + data_handler.DataAdapter.GetDataset().FirstInputTensorCount > 1) + { + testFunction = test_step_multi_inputs_function; + } + else + { + testFunction = test_function; + } + + return evaluate(data_handler, callbacks, is_val, testFunction); } /// @@ -130,6 +150,7 @@ Dictionary evaluate(DataHandler data_handler, CallbackList callba var end_step = step + data_handler.StepIncrement; if (!is_val) callbacks.on_test_batch_end(end_step, logs); + GC.Collect(); } } callbacks.on_test_end(logs); @@ -140,7 +161,8 @@ Dictionary evaluate(DataHandler data_handler, CallbackList callba Dictionary test_function(DataHandler data_handler, OwnedIterator iterator) { var data = iterator.next(); - var outputs = test_step(data_handler, data[0], data[1]); + var outputs = data.Length == 2 ? test_step(data_handler, data[0], data[1]) : + test_step(data_handler, data[0], data[1], data[2]); tf_with(ops.control_dependencies(new object[0]), ctl => _test_counter.assign_add(1)); return outputs; } @@ -149,7 +171,13 @@ Dictionary test_step_multi_inputs_function(DataHandler data_handl { var data = iterator.next(); var x_size = data_handler.DataAdapter.GetDataset().FirstInputTensorCount; - var outputs = test_step(data_handler, data.Take(x_size).ToArray(), data.Skip(x_size).ToArray()); + var outputs = data.Length == 2 ? + test_step(data_handler, new Tensors(data.Take(x_size).ToArray()), new Tensors(data.Skip(x_size).ToArray())) : + test_step( + data_handler, + new Tensors(data.Take(x_size).ToArray()), + new Tensors(data.Skip(x_size).Take(x_size).ToArray()), + new Tensors(data.Skip(2 * x_size).ToArray())); tf_with(ops.control_dependencies(new object[0]), ctl => _test_counter.assign_add(1)); return outputs; } @@ -157,11 +185,22 @@ Dictionary test_step_multi_inputs_function(DataHandler data_handl Dictionary test_step(DataHandler data_handler, Tensors x, Tensors y) { - (x, y) = data_handler.DataAdapter.Expand1d(x, y); + (x,y) = data_handler.DataAdapter.Expand1d(x, y); + var y_pred = Apply(x, training: false); + var loss = compiled_loss.Call(y, y_pred); compiled_metrics.update_state(y, y_pred); return metrics.Select(x => (x.Name, x.result())).ToDictionary(x => x.Item1, x => (float)x.Item2); } + + Dictionary test_step(DataHandler data_handler, Tensors x, Tensors y, Tensors sample_weight) + { + (x, y, sample_weight) = data_handler.DataAdapter.Expand1d(x, y, sample_weight); + var y_pred = Apply(x, training: false); + var loss = compiled_loss.Call(y, y_pred, sample_weight: sample_weight); + compiled_metrics.update_state(y, y_pred); + return metrics.Select(x => (x.Name, x.result())).ToDictionary(x => x.Item1, x => (float)x.Item2); + } } } diff --git a/src/TensorFlowNET.Keras/Engine/Model.Fit.cs b/src/TensorFlowNET.Keras/Engine/Model.Fit.cs index d6f89d8be..e1303513e 100644 --- a/src/TensorFlowNET.Keras/Engine/Model.Fit.cs +++ b/src/TensorFlowNET.Keras/Engine/Model.Fit.cs @@ -6,10 +6,13 @@ using Tensorflow.Keras.Engine.DataAdapters; using System.Diagnostics; using Tensorflow.Keras.Callbacks; -using System.Data; +using Tensorflow.Util; +using OneOf; namespace Tensorflow.Keras.Engine { + + public partial class Model { /// @@ -19,19 +22,30 @@ public partial class Model /// /// /// - /// /// + /// /// /// /// + /// + /// + /// + /// + /// + /// + /// + /// public ICallback fit(NDArray x, NDArray y, int batch_size = -1, int epochs = 1, int verbose = 1, List callbacks = null, float validation_split = 0f, - (NDArray val_x, NDArray val_y)? validation_data = null, + ValidationDataPack validation_data = null, + int validation_step = 10, bool shuffle = true, + Dictionary class_weight = null, + NDArray sample_weight = null, int initial_epoch = 0, int max_queue_size = 10, int workers = 1, @@ -43,25 +57,24 @@ public ICallback fit(NDArray x, NDArray y, $"The array x and y should have same value at dim 0, but got {x.dims[0]} and {y.dims[0]}"); } - var train_x = x; - var train_y = y; + // The default dtype in NDArray is double, so we need to cast sample_weight to float to mul with loss which's dtype is float. + sample_weight = sample_weight?.astype(TF_DataType.TF_FLOAT); if (validation_split != 0f && validation_data == null) { - int train_count = Convert.ToInt32(x.dims[0] * (1 - validation_split)); - train_x = x[new Slice(0, train_count)]; - train_y = y[new Slice(0, train_count)]; - validation_data = (val_x: x[new Slice(train_count)], val_y: y[new Slice(train_count)]); + ((x, y, sample_weight), validation_data) = DataAdapter.train_validation_split((x, y, sample_weight), validation_split); } var data_handler = new DataHandler(new DataHandlerArgs { - X = train_x, - Y = train_y, + X = x, + Y = y, + SampleWeight = sample_weight, BatchSize = batch_size, InitialEpoch = initial_epoch, Epochs = epochs, Shuffle = shuffle, + ClassWeight = class_weight, MaxQueueSize = max_queue_size, Workers = workers, UseMultiprocessing = use_multiprocessing, @@ -73,14 +86,17 @@ public ICallback fit(NDArray x, NDArray y, train_step_func: train_step_function); } + public ICallback fit(IEnumerable x, NDArray y, int batch_size = -1, int epochs = 1, int verbose = 1, List callbacks = null, float validation_split = 0f, - (IEnumerable val_x, NDArray val_y)? validation_data = null, + ValidationDataPack validation_data = null, bool shuffle = true, + Dictionary class_weight = null, + NDArray sample_weight = null, int initial_epoch = 0, int max_queue_size = 10, int workers = 1, @@ -95,27 +111,24 @@ public ICallback fit(IEnumerable x, NDArray y, } } - var train_x = x; - var train_y = y; + sample_weight = sample_weight?.astype(TF_DataType.TF_FLOAT); + if (validation_split != 0f && validation_data == null) { - int train_count = Convert.ToInt32(y.dims[0] * (1 - validation_split)); - train_x = x.Select(x => x[new Slice(0, train_count)] as NDArray); - train_y = y[new Slice(0, train_count)]; - var val_x = x.Select(x => x[new Slice(train_count)] as NDArray); - var val_y = y[new Slice(train_count)]; - validation_data = (val_x, val_y); + ((x, y, sample_weight), validation_data) = DataAdapter.train_validation_split((x, y, sample_weight), validation_split); } var data_handler = new DataHandler(new DataHandlerArgs { - X = new Tensors(train_x.ToArray()), - Y = train_y, + X = new Tensors(x.ToArray()), + Y = y, + SampleWeight = sample_weight, BatchSize = batch_size, InitialEpoch = initial_epoch, Epochs = epochs, Shuffle = shuffle, + ClassWeight = class_weight, MaxQueueSize = max_queue_size, Workers = workers, UseMultiprocessing = use_multiprocessing, @@ -136,14 +149,15 @@ public ICallback fit(IEnumerable x, NDArray y, } } - public History fit(IDatasetV2 dataset, + public ICallback fit(IDatasetV2 dataset, int batch_size = -1, int epochs = 1, int verbose = 1, List callbacks = null, IDatasetV2 validation_data = null, - int validation_step = 10, // 间隔多少次会进行一次验证 + int validation_step = 10, bool shuffle = true, + Dictionary class_weight = null, int initial_epoch = 0, int max_queue_size = 10, int workers = 1, @@ -157,6 +171,7 @@ public History fit(IDatasetV2 dataset, InitialEpoch = initial_epoch, Epochs = epochs, Shuffle = shuffle, + ClassWeight = class_weight, MaxQueueSize = max_queue_size, Workers = workers, UseMultiprocessing = use_multiprocessing, @@ -164,9 +179,20 @@ public History fit(IDatasetV2 dataset, StepsPerExecution = _steps_per_execution }); + Func> trainStepFunction; + + if (data_handler.DataAdapter.GetDataset().structure.Length > 2 || + data_handler.DataAdapter.GetDataset().FirstInputTensorCount > 1) + { + trainStepFunction = train_step_multi_inputs_function; + } + else + { + trainStepFunction = train_step_function; + } return FitInternal(data_handler, epochs, validation_step, verbose, callbacks, validation_data: validation_data, - train_step_func: train_step_function); + train_step_func: trainStepFunction); } History FitInternal(DataHandler data_handler, int epochs, int validation_step, int verbose, List callbackList, IDatasetV2 validation_data, @@ -204,13 +230,14 @@ History FitInternal(DataHandler data_handler, int epochs, int validation_step, i var end_step = step + data_handler.StepIncrement; End_step = end_step; callbacks.on_train_batch_end(end_step, logs); + GC.Collect(); } if (validation_data != null) { if (validation_step > 0 && epoch ==0 || (epoch) % validation_step != 0) continue; - + var val_logs = evaluate(validation_data); foreach(var log in val_logs) { @@ -219,11 +246,10 @@ History FitInternal(DataHandler data_handler, int epochs, int validation_step, i callbacks.on_train_batch_end(End_step, logs); } + GC.Collect(); callbacks.on_epoch_end(epoch, logs); - GC.Collect(); - GC.WaitForPendingFinalizers(); if (stop_training) { break; @@ -233,7 +259,7 @@ History FitInternal(DataHandler data_handler, int epochs, int validation_step, i return callbacks.History; } - History FitInternal(DataHandler data_handler, int epochs, int verbose, List callbackList, (NDArray, NDArray)? validation_data, + History FitInternal(DataHandler data_handler, int epochs, int verbose, List callbackList, ValidationDataPack validation_data, Func> train_step_func) { stop_training = false; @@ -268,13 +294,29 @@ History FitInternal(DataHandler data_handler, int epochs, int verbose, List val_logs; + if (!validation_data.val_x_is_array) + { + (val_x, val_y, val_sample_weight) = validation_data; + // Because evaluate calls call_test_batch_end, this interferes with our output on the screen + // so we need to pass a is_val parameter to stop on_test_batch_end + val_logs = evaluate(val_x, val_y, sample_weight: val_sample_weight, is_val: true); + + } + else + { + (val_x_array, val_y, val_sample_weight, _) = validation_data; + val_logs = evaluate(val_x_array, val_y, sample_weight: val_sample_weight, is_val: true); + } foreach (var log in val_logs) { logs["val_" + log.Key] = log.Value; @@ -286,7 +328,6 @@ History FitInternal(DataHandler data_handler, int epochs, int verbose, List callbackList, (IEnumerable, NDArray)? validation_data, - Func> train_step_func) - { - stop_training = false; - _train_counter.assign(0); - var callbacks = new CallbackList(new CallbackParams - { - Model = this, - Verbose = verbose, - Epochs = epochs, - Steps = data_handler.Inferredsteps - }); - - if (callbackList != null) - { - foreach (var callback in callbackList) - callbacks.callbacks.add(callback); - } - - callbacks.on_train_begin(); - - foreach (var (epoch, iterator) in data_handler.enumerate_epochs()) - { - reset_metrics(); - callbacks.on_epoch_begin(epoch); - // data_handler.catch_stop_iteration(); - var logs = new Dictionary(); - long End_step = 0; - foreach (var step in data_handler.steps()) - { - callbacks.on_train_batch_begin(step); - logs = train_step_func(data_handler, iterator); - var end_step = step + data_handler.StepIncrement; - End_step = end_step; - callbacks.on_train_batch_end(end_step, logs); - } - - if (validation_data != null) - { - var val_logs = evaluate(validation_data.Value.Item1, validation_data.Value.Item2); - foreach (var log in val_logs) - { - logs["val_" + log.Key] = log.Value; - callbacks.on_train_batch_end(End_step, logs); - } - } - - callbacks.on_epoch_end(epoch, logs); - - GC.Collect(); - GC.WaitForPendingFinalizers(); - if (stop_training) - { - break; - } - } - - return callbacks.History; - } } } diff --git a/src/TensorFlowNET.Keras/Engine/Model.Predict.cs b/src/TensorFlowNET.Keras/Engine/Model.Predict.cs index cbe4a7295..e3a5aba68 100644 --- a/src/TensorFlowNET.Keras/Engine/Model.Predict.cs +++ b/src/TensorFlowNET.Keras/Engine/Model.Predict.cs @@ -102,9 +102,9 @@ Tensors PredictInternal(DataHandler data_handler, int verbose) for (int i = 0; i < batch_outputs.Length; i++) batch_outputs[i] = tf.concat(new Tensor[] { batch_outputs[i], tmp_batch_outputs[i] }, axis: 0); } - var end_step = step + data_handler.StepIncrement; callbacks.on_predict_batch_end(end_step, new Dictionary { { "outputs", batch_outputs } }); + GC.Collect(); } } diff --git a/src/TensorFlowNET.Keras/Engine/Model.Train.cs b/src/TensorFlowNET.Keras/Engine/Model.Train.cs index ad3c70d2d..8f1ec808c 100644 --- a/src/TensorFlowNET.Keras/Engine/Model.Train.cs +++ b/src/TensorFlowNET.Keras/Engine/Model.Train.cs @@ -12,7 +12,9 @@ public partial class Model Dictionary train_step_function(DataHandler data_handler, OwnedIterator iterator) { var data = iterator.next(); - var outputs = train_step(data_handler, data[0], data[1]); + // whether have sample_weight + var outputs = data.Length == 2 ? train_step(data_handler, data[0], data[1]) : + train_step(data_handler, data[0], data[1], data[2]); tf_with(ops.control_dependencies(new object[0]), ctl => _train_counter.assign_add(1)); return outputs; } @@ -21,7 +23,13 @@ Dictionary train_step_multi_inputs_function(DataHandler data_hand { var data = iterator.next(); var x_size = data_handler.DataAdapter.GetDataset().FirstInputTensorCount; - var outputs = train_step(data_handler, new Tensors(data.Take(x_size).ToArray()), new Tensors(data.Skip(x_size).ToArray())); + var outputs = data.Length == 2 ? + train_step(data_handler, new Tensors(data.Take(x_size).ToArray()), new Tensors(data.Skip(x_size).ToArray())) : + train_step( + data_handler, + new Tensors(data.Take(x_size).ToArray()), + new Tensors(data.Skip(x_size).Take(x_size).ToArray()), + new Tensors(data.Skip(2 * x_size).ToArray())); tf_with(ops.control_dependencies(new object[0]), ctl => _train_counter.assign_add(1)); return outputs; } @@ -61,6 +69,34 @@ Dictionary train_step(DataHandler data_handler, Tensors x, Tensor }); return dict; } + Dictionary train_step(DataHandler data_handler, Tensors x, Tensors y, Tensors sample_weight = null) + { + (x, y, sample_weight) = data_handler.DataAdapter.Expand1d(x, y, sample_weight); + using var tape = tf.GradientTape(); + var y_pred = Apply(x, training: true); + var loss = compiled_loss.Call(y, y_pred, sample_weight:sample_weight); + + // For custom training steps, users can just write: + // trainable_variables = self.trainable_variables + // gradients = tape.gradient(loss, trainable_variables) + // self.optimizer.apply_gradients(zip(gradients, trainable_variables)) + // The _minimize call does a few extra steps unnecessary in most cases, + // such as loss scaling and gradient clipping. + _minimize(tape, optimizer, loss, TrainableVariables); + compiled_metrics.update_state(y, y_pred); + + var dict = new Dictionary(); + metrics.ToList().ForEach(x => + { + var r = x.result(); + if (r.ndim > 0) + { + r = tf.reduce_mean(r); + } + dict[x.Name] = (float)r; + }); + return dict; + } void _minimize(GradientTape tape, IOptimizer optimizer, Tensor loss, List trainable_variables) { diff --git a/src/TensorFlowNET.Keras/Engine/Model.Training.cs b/src/TensorFlowNET.Keras/Engine/Model.Training.cs index 50d934d9d..457b3d694 100644 --- a/src/TensorFlowNET.Keras/Engine/Model.Training.cs +++ b/src/TensorFlowNET.Keras/Engine/Model.Training.cs @@ -10,8 +10,38 @@ namespace Tensorflow.Keras.Engine { public partial class Model { + static Dictionary> weightsCache + = new Dictionary>(); + public void load_weights(string filepath, bool by_name = false, bool skip_mismatch = false, object options = null) { + // Get from cache + if (weightsCache.ContainsKey(filepath)) + { + var filtered_layers = new List(); + foreach (var layer in Layers) + { + var weights = hdf5_format._legacy_weights(layer); + if (weights.Count > 0) + filtered_layers.append(layer); + } + + var weight_value_tuples = new List<(IVariableV1, NDArray)>(); + filtered_layers.Select((layer, i) => + { + var symbolic_weights = hdf5_format._legacy_weights(layer); + foreach(var weight in symbolic_weights) + { + var weight_value = weightsCache[filepath].First(x => x.Item1 == weight.Name).Item2; + weight_value_tuples.Add((weight, weight_value)); + } + return layer; + }).ToList(); + + keras.backend.batch_set_value(weight_value_tuples); + return; + } + long fileId = Hdf5.OpenFile(filepath, true); if(fileId < 0) { @@ -29,8 +59,11 @@ public void load_weights(string filepath, bool by_name = false, bool skip_mismat throw new NotImplementedException(""); else { - hdf5_format.load_weights_from_hdf5_group(fileId, Layers); + var weight_value_tuples = hdf5_format.load_weights_from_hdf5_group(fileId, Layers); Hdf5.CloseFile(fileId); + + weightsCache[filepath] = weight_value_tuples.Select(x => (x.Item1.Name, x.Item2)).ToList(); + keras.backend.batch_set_value(weight_value_tuples); } } diff --git a/src/TensorFlowNET.Keras/Layers/Activation/ReLu6.cs b/src/TensorFlowNET.Keras/Layers/Activation/ReLu6.cs new file mode 100644 index 000000000..5af3f7677 --- /dev/null +++ b/src/TensorFlowNET.Keras/Layers/Activation/ReLu6.cs @@ -0,0 +1,25 @@ +using System; +using System.Collections.Generic; +using System.Text; +using Tensorflow.Keras.ArgsDefinition; +using Tensorflow.Keras.Engine; +using Tensorflow.Common.Types; +using static Tensorflow.Binding; + +namespace Tensorflow.Keras.Layers +{ + /// + /// Leaky version of a Rectified Linear Unit. + /// + public class ReLu6 : Layer + { + public ReLu6() : base(new LayerArgs { }) + { + } + + protected override Tensors Call(Tensors inputs, Tensors state = null, bool? training = null, IOptionalArgs? optional_args = null) + { + return tf.nn.relu6(inputs); + } + } +} diff --git a/src/TensorFlowNET.Keras/Layers/Convolution/DepthwiseConv2D.cs b/src/TensorFlowNET.Keras/Layers/Convolution/DepthwiseConv2D.cs new file mode 100644 index 000000000..dae4a4036 --- /dev/null +++ b/src/TensorFlowNET.Keras/Layers/Convolution/DepthwiseConv2D.cs @@ -0,0 +1,167 @@ +using System; +using System.Collections.Generic; +using System.Text; +using System; +using Tensorflow.Keras.ArgsDefinition; +using Tensorflow.Keras.Saving; +using Tensorflow.Common.Types; +using Tensorflow.Keras.Utils; +using Tensorflow.Operations; +using Newtonsoft.Json; +using System.Security.Cryptography; + +namespace Tensorflow.Keras.Layers +{ + public class DepthwiseConv2DArgs: Conv2DArgs + { + /// + /// depth_multiplier: The number of depthwise convolution output channels for + /// each input channel.The total number of depthwise convolution output + /// channels will be equal to `filters_in* depth_multiplier`. + /// + [JsonProperty("depth_multiplier")] + public int DepthMultiplier { get; set; } = 1; + + [JsonProperty("depthwise_initializer")] + public IInitializer DepthwiseInitializer { get; set; } + } + + public class DepthwiseConv2D : Conv2D + { + /// + /// depth_multiplier: The number of depthwise convolution output channels for + /// each input channel.The total number of depthwise convolution output + /// channels will be equal to `filters_in* depth_multiplier`. + /// + int DepthMultiplier = 1; + + IInitializer DepthwiseInitializer; + + int[] strides; + + int[] dilation_rate; + + string getDataFormat() + { + return data_format == "channels_first" ? "NCHW" : "NHWC"; + } + + static int _id = 1; + + public DepthwiseConv2D(DepthwiseConv2DArgs args):base(args) + { + args.Padding = args.Padding.ToUpper(); + + if(string.IsNullOrEmpty(args.Name)) + name = "DepthwiseConv2D_" + _id; + + this.DepthMultiplier = args.DepthMultiplier; + this.DepthwiseInitializer = args.DepthwiseInitializer; + + } + + public override void build(KerasShapesWrapper input_shape) + { + //base.build(input_shape); + + var shape = input_shape.ToSingleShape(); + + int channel_axis = data_format == "channels_first" ? 1 : -1; + var input_channel = channel_axis < 0 ? + shape.dims[shape.ndim + channel_axis] : + shape.dims[channel_axis]; + + var arg = args as DepthwiseConv2DArgs; + + if (arg.Strides.ndim != shape.ndim) + { + if (arg.Strides.ndim == 2) + { + this.strides = new int[] { 1, (int)arg.Strides[0], (int)arg.Strides[1], 1 }; + } + else + { + this.strides = conv_utils.normalize_tuple(new int[] { (int)arg.Strides[0] }, shape.ndim, "strides"); + } + } + else + { + this.strides = arg.Strides.dims.Select(o=>(int)(o)).ToArray(); + } + + if (arg.DilationRate.ndim != shape.ndim) + { + this.dilation_rate = conv_utils.normalize_tuple(new int[] { (int)arg.DilationRate[0] }, shape.ndim, "dilation_rate"); + } + + long channel_data = data_format == "channels_first" ? shape[0] : shape[shape.Length - 1]; + + var depthwise_kernel_shape = this.kernel_size.dims.concat(new long[] { + channel_data, + this.DepthMultiplier + }); + + this.kernel = this.add_weight( + shape: depthwise_kernel_shape, + initializer: this.DepthwiseInitializer != null ? this.DepthwiseInitializer : this.kernel_initializer, + name: "depthwise_kernel", + trainable: true, + dtype: DType, + regularizer: this.kernel_regularizer + ); + + var axes = new Dictionary(); + axes.Add(-1, (int)input_channel); + inputSpec = new InputSpec(min_ndim: rank + 2, axes: axes); + + + if (use_bias) + { + bias = add_weight(name: "bias", + shape: ((int)channel_data), + initializer: bias_initializer, + trainable: true, + dtype: DType); + } + + built = true; + _buildInputShape = input_shape; + } + + protected override Tensors Call(Tensors inputs, Tensors state = null, + bool? training = false, IOptionalArgs? optional_args = null) + { + Tensor outputs = null; + + outputs = gen_nn_ops.depthwise_conv2d_native( + inputs, + filter: this.kernel.AsTensor(), + strides: this.strides, + padding: this.padding, + dilations: this.dilation_rate, + data_format: this.getDataFormat(), + name: name + ); + + if (use_bias) + { + if (data_format == "channels_first") + { + throw new NotImplementedException("call channels_first"); + } + else + { + outputs = gen_nn_ops.bias_add(outputs, ops.convert_to_tensor(bias), + data_format: this.getDataFormat(), name: name); + } + } + + if (activation != null) + outputs = activation.Apply(outputs); + + + return outputs; + } + + } +} \ No newline at end of file diff --git a/src/TensorFlowNET.Keras/Layers/LayersApi.cs b/src/TensorFlowNET.Keras/Layers/LayersApi.cs index 928e7e337..a1e4c11b1 100644 --- a/src/TensorFlowNET.Keras/Layers/LayersApi.cs +++ b/src/TensorFlowNET.Keras/Layers/LayersApi.cs @@ -112,7 +112,28 @@ public ILayer Conv1D(int filters, KernelInitializer = GetInitializerByName(kernel_initializer), BiasInitializer = GetInitializerByName(bias_initializer) }); - + public ILayer Conv2D(int filters, + Shape kernel_size = null, + Shape strides = null, + string padding = "valid") + => new Conv2D(new Conv2DArgs + { + Rank = 2, + Filters = filters, + KernelSize = (kernel_size == null) ? (5, 5) : kernel_size, + Strides = strides == null ? (1, 1) : strides, + Padding = padding, + DataFormat = null, + DilationRate = (1, 1), + Groups = 1, + UseBias = false, + KernelRegularizer = null, + KernelInitializer =tf.glorot_uniform_initializer, + BiasInitializer = tf.zeros_initializer, + BiasRegularizer = null, + ActivityRegularizer = null, + Activation = keras.activations.Linear, + }); /// /// 2D convolution layer (e.g. spatial convolution over images). /// This layer creates a convolution kernel that is convolved with the layer input to produce a tensor of outputs. @@ -210,6 +231,38 @@ public ILayer Conv2D(int filters, Activation = keras.activations.GetActivationFromName(activation) }); + public ILayer DepthwiseConv2D(Shape kernel_size = null, + Shape strides = null, + string padding = "valid", + string data_format = null, + Shape dilation_rate = null, + int groups = 1, + int depth_multiplier = 1, + string activation = null, + bool use_bias = false, + string kernel_initializer = "glorot_uniform", + string bias_initializer = "zeros", + string depthwise_initializer = "glorot_uniform" + ) + => new DepthwiseConv2D(new DepthwiseConv2DArgs + { + Rank = 2, + Filters = 1, + KernelSize = (kernel_size == null) ? (5, 5) : kernel_size, + Strides = strides == null ? (1) : strides, + Padding = padding, + DepthMultiplier = depth_multiplier, + DataFormat = data_format, + DilationRate = dilation_rate == null ? (1) : dilation_rate, + Groups = groups, + UseBias = use_bias, + KernelInitializer = GetInitializerByName(kernel_initializer), + DepthwiseInitializer = GetInitializerByName(depthwise_initializer == null ? kernel_initializer : depthwise_initializer), + BiasInitializer = GetInitializerByName(bias_initializer), + Activation = keras.activations.GetActivationFromName(activation), + }); + + /// /// Transposed convolution layer (sometimes called Deconvolution). /// @@ -234,7 +287,7 @@ public ILayer Conv2DTranspose(int filters, string data_format = null, Shape dilation_rate = null, string activation = null, - bool use_bias = true, + bool use_bias = false, string kernel_initializer = null, string bias_initializer = null, string kernel_regularizer = null, @@ -682,6 +735,15 @@ public ILayer LeakyReLU(float alpha = 0.3f) }); + /// + /// Leaky version of a Rectified Linear Unit. + /// + /// Negative slope coefficient. + /// + public ILayer ReLU6() + => new ReLu6(); + + public IRnnCell SimpleRNNCell( int units, string activation = "tanh", diff --git a/src/TensorFlowNET.Keras/Layers/Merging/Concatenate.cs b/src/TensorFlowNET.Keras/Layers/Merging/Concatenate.cs index a2a8286ba..fa82426ce 100644 --- a/src/TensorFlowNET.Keras/Layers/Merging/Concatenate.cs +++ b/src/TensorFlowNET.Keras/Layers/Merging/Concatenate.cs @@ -39,6 +39,7 @@ public override void build(KerasShapesWrapper input_shape) shape_set.Add(shape); }*/ _buildInputShape = input_shape; + built = true; } protected override Tensors _merge_function(Tensors inputs) diff --git a/src/TensorFlowNET.Keras/Regularizers.cs b/src/TensorFlowNET.Keras/Regularizers.cs index 98da27a7f..73b72a051 100644 --- a/src/TensorFlowNET.Keras/Regularizers.cs +++ b/src/TensorFlowNET.Keras/Regularizers.cs @@ -1,8 +1,51 @@ -namespace Tensorflow.Keras +using Tensorflow.Operations.Regularizers; + +namespace Tensorflow.Keras { - public class Regularizers + public class Regularizers: IRegularizerApi + { + private static Dictionary _nameActivationMap; + + public IRegularizer l1(float l1 = 0.01f) + => new L1(l1); + public IRegularizer l2(float l2 = 0.01f) + => new L2(l2); + + //From TF source + //# The default value for l1 and l2 are different from the value in l1_l2 + //# for backward compatibility reason. Eg, L1L2(l2=0.1) will only have l2 + //# and no l1 penalty. + public IRegularizer l1l2(float l1 = 0.00f, float l2 = 0.00f) + => new L1L2(l1, l2); + + static Regularizers() { - public IRegularizer l2(float l2 = 0.01f) - => new L2(l2); + _nameActivationMap = new Dictionary(); + _nameActivationMap["L1"] = new L1(); + _nameActivationMap["L1"] = new L2(); + _nameActivationMap["L1"] = new L1L2(); } + + public IRegularizer L1 => l1(); + + public IRegularizer L2 => l2(); + + public IRegularizer L1L2 => l1l2(); + + public IRegularizer GetRegularizerFromName(string name) + { + if (name == null) + { + throw new Exception($"Regularizer name cannot be null"); + } + if (!_nameActivationMap.TryGetValue(name, out var res)) + { + throw new Exception($"Regularizer {name} not found"); + } + else + { + return res; + } + } + } } diff --git a/src/TensorFlowNET.Keras/Regularizers/L1.cs b/src/TensorFlowNET.Keras/Regularizers/L1.cs deleted file mode 100644 index 0f904b6f9..000000000 --- a/src/TensorFlowNET.Keras/Regularizers/L1.cs +++ /dev/null @@ -1,19 +0,0 @@ -using System; - -namespace Tensorflow.Keras -{ - public class L1 : IRegularizer - { - float l1; - - public L1(float l1 = 0.01f) - { - this.l1 = l1; - } - - public Tensor Apply(RegularizerArgs args) - { - return l1 * math_ops.reduce_sum(math_ops.abs(args.X)); - } - } -} diff --git a/src/TensorFlowNET.Keras/Regularizers/L1L2.cs b/src/TensorFlowNET.Keras/Regularizers/L1L2.cs deleted file mode 100644 index f619f1582..000000000 --- a/src/TensorFlowNET.Keras/Regularizers/L1L2.cs +++ /dev/null @@ -1,24 +0,0 @@ -using System; -using static Tensorflow.Binding; -namespace Tensorflow.Keras -{ - public class L1L2 : IRegularizer - { - float l1; - float l2; - - public L1L2(float l1 = 0.0f, float l2 = 0.0f) - { - this.l1 = l1; - this.l2 = l2; - - } - public Tensor Apply(RegularizerArgs args) - { - Tensor regularization = tf.constant(0.0, args.X.dtype); - regularization += l1 * math_ops.reduce_sum(math_ops.abs(args.X)); - regularization += l2 * math_ops.reduce_sum(math_ops.square(args.X)); - return regularization; - } - } -} diff --git a/src/TensorFlowNET.Keras/Regularizers/L2.cs b/src/TensorFlowNET.Keras/Regularizers/L2.cs deleted file mode 100644 index 034bbd236..000000000 --- a/src/TensorFlowNET.Keras/Regularizers/L2.cs +++ /dev/null @@ -1,17 +0,0 @@ -namespace Tensorflow.Keras -{ - public class L2 : IRegularizer - { - float l2; - - public L2(float l2 = 0.01f) - { - this.l2 = l2; - } - - public Tensor Apply(RegularizerArgs args) - { - return l2 * math_ops.reduce_sum(math_ops.square(args.X)); - } - } -} diff --git a/src/TensorFlowNET.Keras/Saving/hdf5_format.cs b/src/TensorFlowNET.Keras/Saving/hdf5_format.cs index bab0efecf..68b73953d 100644 --- a/src/TensorFlowNET.Keras/Saving/hdf5_format.cs +++ b/src/TensorFlowNET.Keras/Saving/hdf5_format.cs @@ -82,7 +82,7 @@ public static void load_optimizer_weights_from_hdf5_group(long filepath = -1, Di } - public static void load_weights_from_hdf5_group(long f, List layers) + public static List<(IVariableV1, NDArray)> load_weights_from_hdf5_group(long f, List layers) { string original_keras_version = "2.5.0"; string original_backend = null; @@ -152,7 +152,7 @@ public static void load_weights_from_hdf5_group(long f, List layers) weight_value_tuples.AddRange(zip(symbolic_weights, weight_values)); } - keras.backend.batch_set_value(weight_value_tuples); + return weight_value_tuples; } public static void toarrayf4(long filepath = -1, Dictionary custom_objects = null, bool compile = false) diff --git a/src/TensorFlowNET.Keras/Tensorflow.Keras.csproj b/src/TensorFlowNET.Keras/Tensorflow.Keras.csproj index 36d1bc1d4..eb8ebf93c 100644 --- a/src/TensorFlowNET.Keras/Tensorflow.Keras.csproj +++ b/src/TensorFlowNET.Keras/Tensorflow.Keras.csproj @@ -7,7 +7,7 @@ enable Tensorflow.Keras AnyCPU;x64 - 0.11.3 + 0.15.0 Haiping Chen Keras for .NET Apache 2.0, Haiping Chen since 2018 @@ -30,6 +30,7 @@ * Fixed memory leak for YOLOv3 model. * Support RNN and LSTM models * Support Transformer model + * Support BERT model Keras for .NET @@ -42,8 +43,8 @@ Keras is an API designed for human beings, not machines. Keras follows best prac Git False Open.snk - 0.11.3.0 - 0.11.3.0 + 0.15.0.0 + 0.15.0.0 LICENSE Debug;Release;GPU @@ -143,7 +144,7 @@ Keras is an API designed for human beings, not machines. Keras follows best prac - + diff --git a/src/TensorFlowNET.Keras/Utils/data_utils.cs b/src/TensorFlowNET.Keras/Utils/data_utils.cs index e6db0ef72..b0bc15540 100644 --- a/src/TensorFlowNET.Keras/Utils/data_utils.cs +++ b/src/TensorFlowNET.Keras/Utils/data_utils.cs @@ -53,15 +53,17 @@ public static (int[,], long[]) _remove_long_seq(int maxlen, int[,] seq, long[] l new_seq, new_label: shortened lists for `seq` and `label`. */ + var nRow = seq.GetLength(0); + var nCol = seq.GetLength(1); List new_seq = new List(); List new_label = new List(); - for (var i = 0; i < seq.GetLength(0); i++) + for (var i = 0; i < nRow; i++) { - if (maxlen < seq.GetLength(1) && seq[i, maxlen] != 0) + if (maxlen < nCol && seq[i, maxlen] != 0) continue; int[] sentence = new int[maxlen]; - for (var j = 0; j < maxlen && j < seq.GetLength(1); j++) + for (var j = 0; j < maxlen && j < nCol; j++) { sentence[j] = seq[i, j]; } diff --git a/src/TensorFlowNET.Keras/Utils/generic_utils.cs b/src/TensorFlowNET.Keras/Utils/generic_utils.cs index 5402f4995..20937e2e5 100644 --- a/src/TensorFlowNET.Keras/Utils/generic_utils.cs +++ b/src/TensorFlowNET.Keras/Utils/generic_utils.cs @@ -112,12 +112,23 @@ public static FunctionalConfig deserialize_model_config(JToken json) foreach (var token in layersToken) { var args = deserialize_layer_args(token["class_name"].ToObject(), token["config"]); + + List nodeConfig = null; //python tensorflow sometimes exports inbound nodes in an extra nested array + if (token["inbound_nodes"].Count() > 0 && token["inbound_nodes"][0].Count() > 0 && token["inbound_nodes"][0][0].Count() > 0) + { + nodeConfig = token["inbound_nodes"].ToObject>>().FirstOrDefault() ?? new List(); + } + else + { + nodeConfig = token["inbound_nodes"].ToObject>(); + } + config.Layers.Add(new LayerConfig() { Config = args, Name = token["name"].ToObject(), ClassName = token["class_name"].ToObject(), - InboundNodes = token["inbound_nodes"].ToObject>() + InboundNodes = nodeConfig, }); } config.InputLayers = json["input_layers"].ToObject>(); diff --git a/src/TensorflowNET.Hub/Tensorflow.Hub.csproj b/src/TensorflowNET.Hub/Tensorflow.Hub.csproj index 3c09f808e..efa37598d 100644 --- a/src/TensorflowNET.Hub/Tensorflow.Hub.csproj +++ b/src/TensorflowNET.Hub/Tensorflow.Hub.csproj @@ -26,7 +26,7 @@ - + diff --git a/test/TensorFlow.Kernel.UnitTest/TensorFlow.Kernel.UnitTest.csproj b/test/TensorFlow.Kernel.UnitTest/TensorFlow.Kernel.UnitTest.csproj new file mode 100644 index 000000000..461993408 --- /dev/null +++ b/test/TensorFlow.Kernel.UnitTest/TensorFlow.Kernel.UnitTest.csproj @@ -0,0 +1,24 @@ + + + + net6.0 + enable + enable + + false + true + + + + + + + + + + + + + + + diff --git a/test/TensorFlow.Kernel.UnitTest/array_ops/concat_op_test.cs b/test/TensorFlow.Kernel.UnitTest/array_ops/concat_op_test.cs new file mode 100644 index 000000000..67d0aa602 --- /dev/null +++ b/test/TensorFlow.Kernel.UnitTest/array_ops/concat_op_test.cs @@ -0,0 +1,63 @@ +using Microsoft.VisualStudio.TestTools.UnitTesting; +using Tensorflow; +using Tensorflow.NumPy; +using static Tensorflow.Binding; + +namespace TensorFlow.Kernel.UnitTest +{ + [TestClass] + public class concat_op_test + { + [TestMethod] + public void testConcatEmpty() + { + var t1 = tf.constant(new int[] { }); + var t2 = tf.constant(new int[] { }); + var c = array_ops.concat(new[] { t1, t2 }, 0); + var expected = np.array(new int[] { }); + Assert.IsTrue(Enumerable.SequenceEqual(expected.ToArray(), c.numpy().ToArray())); + } + + [TestMethod] + public void testConcatNegativeAxis() + { + var t1 = tf.constant(new int[,] { { 1, 2, 3 }, { 4, 5, 6 } }); + var t2 = tf.constant(new int[,] { { 7, 8, 9 }, { 10, 11, 12 } }); + var c = array_ops.concat(new[] { t1, t2 }, -2); + var expected = np.array(new int[,,] { { { 1, 2, 3 }, { 4, 5, 6 } }, { { 7, 8, 9 }, { 10, 11, 12 } } }); + Assert.IsTrue(Enumerable.SequenceEqual(expected.ToArray(), c.numpy().ToArray())); + + c = array_ops.concat(new[] { t1, t2 }, -1); + expected = np.array(new int[,] { { 1, 2, 3, 7, 8, 9 }, { 4, 5, 6, 10, 11, 12 } }); + Assert.IsTrue(Enumerable.SequenceEqual(expected.ToArray(), c.numpy().ToArray())); + } + + [TestMethod] + [DataRow(TF_DataType.TF_INT32)] + [DataRow(TF_DataType.TF_INT64)] + [DataRow(TF_DataType.TF_UINT32)] + [DataRow(TF_DataType.TF_UINT64)] + public void testConcatDtype(TF_DataType dtype) + { + var t1 = tf.constant(new int[,] { { 1, 2, 3 }, { 4, 5, 6 } }, dtype: dtype); + var t2 = tf.constant(new int[,] { { 7, 8, 9 }, { 10, 11, 12 } }, dtype: dtype); + var c = array_ops.concat(new[] { t1, t2 }, 1); + var expected = np.array(new int[,] { { 1, 2, 3, 7, 8, 9 }, { 4, 5, 6, 10, 11, 12 } }); + Assert.IsTrue(Enumerable.SequenceEqual(expected.ToArray(), tf.cast(c, TF_DataType.TF_INT32).numpy().ToArray())); + + } + + [TestMethod] + [DataRow(TF_DataType.TF_INT32)] + [DataRow(TF_DataType.TF_INT64)] + public void testConcatAxisType(TF_DataType dtype) + { + var t1 = tf.constant(new int[,] { { 1, 2, 3 }, { 4, 5, 6 } }); + var t2 = tf.constant(new int[,] { { 7, 8, 9 }, { 10, 11, 12 } }); + var c = array_ops.concat(new[] { t1, t2 }, tf.constant(1, dtype: dtype)); + var expected = np.array(new int[,] { { 1, 2, 3, 7, 8, 9 }, { 4, 5, 6, 10, 11, 12 } }); + Assert.IsTrue(Enumerable.SequenceEqual(expected.ToArray(), tf.cast(c, TF_DataType.TF_INT32).numpy().ToArray())); + } + + } +} \ No newline at end of file diff --git a/test/TensorFlowNET.Graph.UnitTest/Basics/TensorTest.cs b/test/TensorFlowNET.Graph.UnitTest/Basics/TensorTest.cs index 90de78743..8093c1f23 100644 --- a/test/TensorFlowNET.Graph.UnitTest/Basics/TensorTest.cs +++ b/test/TensorFlowNET.Graph.UnitTest/Basics/TensorTest.cs @@ -3,6 +3,7 @@ using System; using System.Linq; using static Tensorflow.Binding; +using Tensorflow; namespace TensorFlowNET.UnitTest.Basics { @@ -60,14 +61,14 @@ public void batch_to_space_nd() Assert.IsTrue(Enumerable.SequenceEqual(new int[] { 15, 21, 16, 22, 17, 23 }, result[0, 3].ToArray())); } - [TestMethod, Ignore] + [TestMethod] public void boolean_mask() { + if (!tf.executing_eagerly()) + tf.enable_eager_execution(); var tensor = new[] { 0, 1, 2, 3 }; var mask = np.array(new[] { true, false, true, false }); var masked = tf.boolean_mask(tensor, mask); - var sess = tf.Session(); - var result = sess.run(masked); Assert.IsTrue(Enumerable.SequenceEqual(new int[] { 0, 2 }, masked.ToArray())); } } diff --git a/test/TensorFlowNET.Graph.UnitTest/GradientTest/GradientTest.cs b/test/TensorFlowNET.Graph.UnitTest/GradientTest/GradientTest.cs index fc2280051..cea6de172 100644 --- a/test/TensorFlowNET.Graph.UnitTest/GradientTest/GradientTest.cs +++ b/test/TensorFlowNET.Graph.UnitTest/GradientTest/GradientTest.cs @@ -625,25 +625,6 @@ public void testPartialDerivatives() } } - // TODO: remove when np.testing.assert_allclose(a, b) is implemented - private class CollectionComparer : System.Collections.IComparer - { - private readonly double _epsilon = 1e-07; - - public int Compare(object x, object y) - { - var a = (double)x; - var b = (double)y; - - double delta = Math.Abs(a - b); - if (delta < _epsilon) - { - return 0; - } - return a.CompareTo(b); - } - } - private struct Case { public Tensor[] grad1; @@ -748,8 +729,7 @@ Tensor[] gradients(Tensor[] ys, Tensor[] xs, Tensor[] stop_gradients = null) var npgrad2 = result[1]; foreach (var (a, b) in npgrad1.Zip(npgrad2)) { - // TODO: np.testing.assert_allclose(a, b); - CollectionAssert.AreEqual(a.ToArray(), b.ToArray(), new CollectionComparer()); + self.assertAllClose(a, b); } } } @@ -776,8 +756,6 @@ public void testUnconnectedGradientsNoneUnconnectedGradients() [TestMethod] public void testUnconnectedGradientsZerosUnconnectedGradients() { - - //def testUnconnectedGradientsZerosUnconnectedGradients(self): // with ops.Graph().as_default(): // x = constant(1.0, shape=[2, 2]) diff --git a/test/TensorFlowNET.Graph.UnitTest/ImageTest.cs b/test/TensorFlowNET.Graph.UnitTest/ImageTest.cs index d671b6096..127b65bf6 100644 --- a/test/TensorFlowNET.Graph.UnitTest/ImageTest.cs +++ b/test/TensorFlowNET.Graph.UnitTest/ImageTest.cs @@ -4,6 +4,7 @@ using Tensorflow; using static Tensorflow.Binding; using System; +using System.IO; namespace TensorFlowNET.UnitTest { @@ -164,5 +165,94 @@ public void TestCropAndResize() Assert.AreEqual(result.size, 16ul); Assert.AreEqual(result[0, 0, 0, 0], 12f); } + + [TestMethod] + public void ImageSaveTest() + { + var imgPath = TestHelper.GetFullPathFromDataDir("img001.bmp"); + var jpegImgPath = TestHelper.GetFullPathFromDataDir("img001.jpeg"); + var pngImgPath = TestHelper.GetFullPathFromDataDir("img001.png"); + + File.Delete(jpegImgPath); + File.Delete(pngImgPath); + + var contents = tf.io.read_file(imgPath); + var bmp = tf.image.decode_image(contents); + Assert.AreEqual(bmp.name, "decode_image/DecodeImage:0"); + + var jpeg = tf.image.encode_jpeg(bmp); + var op1 = tf.io.write_file(jpegImgPath, jpeg); + + var png = tf.image.encode_png(bmp); + var op2 = tf.io.write_file(pngImgPath, png); + + this.session().run(op1); + this.session().run(op2); + + Assert.IsTrue(File.Exists(jpegImgPath), "not find file:" + jpegImgPath); + Assert.IsTrue(File.Exists(pngImgPath), "not find file:" + pngImgPath); + + // 如果要测试图片正确性,需要注释下面两行代码 + File.Delete(jpegImgPath); + File.Delete(pngImgPath); + } + + [TestMethod] + public void ImageFlipTest() + { + var imgPath = TestHelper.GetFullPathFromDataDir("img001.bmp"); + + var contents = tf.io.read_file(imgPath); + var bmp = tf.image.decode_image(contents); + + // 左右翻转 + var lrImgPath = TestHelper.GetFullPathFromDataDir("img001_lr.png"); + File.Delete(lrImgPath); + + var lr = tf.image.flip_left_right(bmp); + var png = tf.image.encode_png(lr); + var op = tf.io.write_file(lrImgPath, png); + this.session().run(op); + + Assert.IsTrue(File.Exists(lrImgPath), "not find file:" + lrImgPath); + + // 上下翻转 + var updownImgPath = TestHelper.GetFullPathFromDataDir("img001_updown.png"); + File.Delete(updownImgPath); + + var updown = tf.image.flip_up_down(bmp); + var pngupdown = tf.image.encode_png(updown); + var op2 = tf.io.write_file(updownImgPath, pngupdown); + this.session().run(op2); + Assert.IsTrue(File.Exists(updownImgPath)); + + + // 暂时先人工观测图片是否翻转,观测时需要删除下面这两行代码 + File.Delete(lrImgPath); + File.Delete(updownImgPath); + + // 多图翻转 + // 目前直接通过 bmp 拿到 shape ,这里先用默认定义图片大小来构建了 + var mImg = tf.stack(new[] { bmp, lr }, axis:0); + print(mImg.shape); + + var up2 = tf.image.flip_up_down(mImg); + + var updownImgPath_m1 = TestHelper.GetFullPathFromDataDir("img001_m_ud.png"); // 直接上下翻转 + File.Delete(updownImgPath_m1); + + var img001_updown_m2 = TestHelper.GetFullPathFromDataDir("img001_m_lr_ud.png"); // 先左右再上下 + File.Delete(img001_updown_m2); + + var png2 = tf.image.encode_png(up2[0]); + tf.io.write_file(updownImgPath_m1, png2); + + png2 = tf.image.encode_png(up2[1]); + tf.io.write_file(img001_updown_m2, png2); + + // 如果要测试图片正确性,需要注释下面两行代码 + File.Delete(updownImgPath_m1); + File.Delete(img001_updown_m2); + } } } diff --git a/test/TensorFlowNET.Graph.UnitTest/TensorFlowNET.Graph.UnitTest.csproj b/test/TensorFlowNET.Graph.UnitTest/TensorFlowNET.Graph.UnitTest.csproj index 78a0938c5..40dd53f74 100644 --- a/test/TensorFlowNET.Graph.UnitTest/TensorFlowNET.Graph.UnitTest.csproj +++ b/test/TensorFlowNET.Graph.UnitTest/TensorFlowNET.Graph.UnitTest.csproj @@ -24,7 +24,7 @@ - + @@ -36,6 +36,7 @@ + diff --git a/test/TensorFlowNET.Keras.UnitTest/EagerModeTestBase.cs b/test/TensorFlowNET.Keras.UnitTest/EagerModeTestBase.cs index c7eab364c..635f13a54 100644 --- a/test/TensorFlowNET.Keras.UnitTest/EagerModeTestBase.cs +++ b/test/TensorFlowNET.Keras.UnitTest/EagerModeTestBase.cs @@ -33,6 +33,40 @@ public bool Equal(float[] f1, float[] f2) return ret; } + + public void AssertArray(int[] f1, int[] f2) + { + bool ret = false; + for (var i = 0; i < f1.Length; i++) + { + ret = f1[i] == f2[i]; + if (!ret) + break; + } + + if (!ret) + { + Assert.Fail($"Array not Equal:[{string.Join(",", f1)}] [{string.Join(",", f2)}]"); + } + } + + public void AssertArray(float[] f1, float[] f2) + { + bool ret = false; + var tolerance = .00001f; + for (var i = 0; i < f1.Length; i++) + { + ret = Math.Abs(f1[i] - f2[i]) <= tolerance; + if (!ret) + break; + } + + if (!ret) + { + Assert.Fail($"Array float not Equal:[{string.Join(",", f1)}] [{string.Join(",", f2)}]"); + } + } + public bool Equal(double[] d1, double[] d2) { bool ret = false; diff --git a/test/TensorFlowNET.Keras.UnitTest/Layers/Layers.Convolution.Test.cs b/test/TensorFlowNET.Keras.UnitTest/Layers/Layers.Convolution.Test.cs index 997dcb4f6..15c6e80fe 100644 --- a/test/TensorFlowNET.Keras.UnitTest/Layers/Layers.Convolution.Test.cs +++ b/test/TensorFlowNET.Keras.UnitTest/Layers/Layers.Convolution.Test.cs @@ -1,6 +1,8 @@ using Microsoft.VisualStudio.TestTools.UnitTesting; +using System.Linq; using Tensorflow.NumPy; using static Tensorflow.KerasApi; +using static Tensorflow.Binding; namespace Tensorflow.Keras.UnitTest.Layers { @@ -193,5 +195,128 @@ public void BasicConv2D_ksize_dilation_same() Assert.AreEqual(x.dims[2], y.shape[2]); Assert.AreEqual(filters, y.shape[3]); } + + + [TestMethod] + public void BasicDepthwiseConv2D() + { + var conv = keras.layers.DepthwiseConv2D(kernel_size:3, strides:1, activation: null, + padding:"same", depthwise_initializer: "ones"); + + var x = np.arange(2 * 9* 9* 3).reshape((2, 9, 9, 3)); + var x2 = ops.convert_to_tensor(x, TF_DataType.TF_FLOAT); + + var y = conv.Apply(x2); + + print($"input:{x2.shape} DepthwiseConv2D.out: {y.shape}"); + + + Assert.AreEqual(4, y.shape.ndim); + var arr = y.numpy().reshape((2, 9, 9, 3)); + + AssertArray(x[new int[] { 1, 1, 1 }].ToArray(), new int[] { 273, 274, 275 }); + AssertArray(arr[new int[] { 1, 1, 1 }].ToArray(), new float[] { 2457f, 2466f, 2475f }); + + var bn = keras.layers.BatchNormalization(); + var y2 = bn.Apply(y); + arr = y2.numpy().ToArray(); + + double delta = 0.0001; // 误差范围 + + Assert.AreEqual(arr[0], 59.97002f, delta); + Assert.AreEqual(arr[1], 63.96802f, delta); + } + + + [TestMethod] + public void BasicDepthwiseConv2D_strides_2() + { + var conv = keras.layers.DepthwiseConv2D(kernel_size: 3, strides: (1, 2, 2, 1), activation: null, + padding: "same", depthwise_initializer: "ones"); + + var x = np.arange(2 * 9 * 9 * 3).reshape((2, 9, 9, 3)); + var x2 = ops.convert_to_tensor(x, TF_DataType.TF_FLOAT); + + var y = conv.Apply(x2); + + print($"input:{x2.shape} DepthwiseConv2D.out: {y.shape}"); + + Assert.AreEqual(4, y.shape.ndim); + var arr = y.numpy().reshape((2, 5, 5, 3)); + + AssertArray(x[new int[] { 1, 1, 1 }].ToArray(), new int[] { 273, 274, 275 }); + AssertArray(arr[new int[] { 1, 1, 1 }].ToArray(), new float[] { 2727f, 2736f, 2745f }); + + var bn = keras.layers.BatchNormalization(); + var y2 = bn.Apply(y); + arr = y2.numpy().ToArray(); + + double delta = 0.0001; // 误差范围 + + Assert.AreEqual(arr[0], 59.97002f, delta); + Assert.AreEqual(arr[1], 63.96802f, delta); + } + + + + [TestMethod] + public void BasicDepthwiseConv2D_strides_3() + { + var conv = keras.layers.DepthwiseConv2D(kernel_size: 3, strides: 3, activation: null, + padding: "same", depthwise_initializer: "ones"); + + var x = np.arange(2 * 9 * 9 * 3).reshape((2, 9, 9, 3)); + var x2 = ops.convert_to_tensor(x, TF_DataType.TF_FLOAT); + + var y = conv.Apply(x2); + + print($"input:{x2.shape} DepthwiseConv2D.out: {y.shape}"); + + Assert.AreEqual(4, y.shape.ndim); + var arr = y.numpy().reshape((2, 3, 3, 3)); + + AssertArray(x[new int[] { 1, 1, 1 }].ToArray(), new int[] { 273, 274, 275 }); + AssertArray(arr[new int[] { 1, 1, 1 }].ToArray(), new float[] { 3267f, 3276f, 3285f }); + + var bn = keras.layers.BatchNormalization(); + var y2 = bn.Apply(y); + arr = y2.numpy().ToArray(); + + double delta = 0.0001; // 误差范围 + + Assert.AreEqual(arr[0], 269.86508f, delta); + Assert.AreEqual(arr[1], 278.8606f, delta); + + } + [TestMethod] + public void BasicDepthwiseConv2D_UseBias() + { + var conv = keras.layers.DepthwiseConv2D(kernel_size: 3, strides: 1, activation: null, + use_bias: true, padding: "same", + depthwise_initializer: "ones", + bias_initializer:"ones" + ); + + var weight = conv.get_weights(); + + var x = np.arange(9 * 9 * 3).reshape((1, 9, 9, 3)); + var x2 = ops.convert_to_tensor(x, TF_DataType.TF_FLOAT); + var y = conv.Apply(x2); + + Assert.AreEqual(4, y.shape.ndim); + var arr = y.numpy().ToArray(); + + Assert.AreEqual(arr[0], 61f); + Assert.AreEqual(arr[1], 65f); + + var bn = keras.layers.BatchNormalization(); + var y2 = bn.Apply(y); + arr = y2.numpy().ToArray(); + + double delta = 0.0001; // 误差范围 + + Assert.AreEqual(arr[0], 60.96952f, delta); + Assert.AreEqual(arr[1], 64.96752f, delta); + } } } diff --git a/test/TensorFlowNET.Keras.UnitTest/Layers/Layers.Merging.Test.cs b/test/TensorFlowNET.Keras.UnitTest/Layers/Layers.Merging.Test.cs index 36e44e482..9bc2fa767 100644 --- a/test/TensorFlowNET.Keras.UnitTest/Layers/Layers.Merging.Test.cs +++ b/test/TensorFlowNET.Keras.UnitTest/Layers/Layers.Merging.Test.cs @@ -1,4 +1,5 @@ using Microsoft.VisualStudio.TestTools.UnitTesting; +using System.Collections.Generic; using Tensorflow.NumPy; using static Tensorflow.KerasApi; @@ -8,12 +9,16 @@ namespace Tensorflow.Keras.UnitTest.Layers public class LayersMergingTest : EagerModeTestBase { [TestMethod] - public void Concatenate() + [DataRow(1, 4, 1, 5)] + [DataRow(2, 2, 2, 5)] + [DataRow(3, 2, 1, 10)] + public void Concatenate(int axis, int shapeA, int shapeB, int shapeC) { - var x = np.arange(20).reshape((2, 2, 5)); - var y = np.arange(20, 30).reshape((2, 1, 5)); - var z = keras.layers.Concatenate(axis: 1).Apply(new Tensors(x, y)); - Assert.AreEqual((2, 3, 5), z.shape); + var x = np.arange(10).reshape((1, 2, 1, 5)); + var y = np.arange(10, 20).reshape((1, 2, 1, 5)); + var z = keras.layers.Concatenate(axis: axis).Apply(new Tensors(x, y)); + Assert.AreEqual((1, shapeA, shapeB, shapeC), z.shape); } + } } diff --git a/test/TensorFlowNET.Keras.UnitTest/Layers/Rnn.Test.cs b/test/TensorFlowNET.Keras.UnitTest/Layers/Rnn.Test.cs index dbf5cae1e..67e2b0464 100644 --- a/test/TensorFlowNET.Keras.UnitTest/Layers/Rnn.Test.cs +++ b/test/TensorFlowNET.Keras.UnitTest/Layers/Rnn.Test.cs @@ -74,8 +74,8 @@ public void TrainLSTMWithMnist() OneHot = true, ValidationSize = 55000, }).Result; - - model.fit(dataset.Train.Data, dataset.Train.Labels, batch_size: 16, epochs: 1); + var sample_weight = np.ones(((int)dataset.Train.Data.shape[0])); + model.fit(dataset.Train.Data, dataset.Train.Labels, batch_size: 16, epochs: 1, sample_weight:sample_weight); } [TestMethod] diff --git a/test/TensorFlowNET.Keras.UnitTest/Model/ModelLoadTest.cs b/test/TensorFlowNET.Keras.UnitTest/Model/ModelLoadTest.cs index cb570fc0c..c733537e7 100644 --- a/test/TensorFlowNET.Keras.UnitTest/Model/ModelLoadTest.cs +++ b/test/TensorFlowNET.Keras.UnitTest/Model/ModelLoadTest.cs @@ -1,10 +1,14 @@ using Microsoft.VisualStudio.TestPlatform.Utilities; using Microsoft.VisualStudio.TestTools.UnitTesting; +using Newtonsoft.Json.Linq; +using System.Collections.Generic; using System.Linq; +using System.Xml.Linq; using Tensorflow.Keras.Engine; using Tensorflow.Keras.Optimizers; using Tensorflow.Keras.UnitTest.Helpers; using Tensorflow.NumPy; +using static HDF.PInvoke.H5Z; using static Tensorflow.Binding; using static Tensorflow.KerasApi; @@ -124,4 +128,91 @@ public void TestModelBeforeTF2_5() var model = tf.saved_model.load(@"D:\development\temp\saved_model") as Tensorflow.Keras.Engine.Model; model.summary(); } + + + [TestMethod] + public void BiasRegularizerSaveAndLoad() + { + var savemodel = keras.Sequential(new List() + { + tf.keras.layers.InputLayer((227, 227, 3)), + tf.keras.layers.Conv2D(96, (11, 11), (4, 4), activation:"relu", padding:"valid"), + tf.keras.layers.BatchNormalization(), + tf.keras.layers.MaxPooling2D((3, 3), strides:(2, 2)), + + tf.keras.layers.Conv2D(256, (5, 5), (1, 1), "same", activation: keras.activations.Relu, bias_regularizer:keras.regularizers.L1L2), + tf.keras.layers.BatchNormalization(), + + tf.keras.layers.Conv2D(256, (5, 5), (1, 1), "same", activation: keras.activations.Relu, bias_regularizer:keras.regularizers.L2), + tf.keras.layers.BatchNormalization(), + + tf.keras.layers.Conv2D(256, (5, 5), (1, 1), "same", activation: keras.activations.Relu, bias_regularizer:keras.regularizers.L1), + tf.keras.layers.BatchNormalization(), + tf.keras.layers.MaxPooling2D((3, 3), (2, 2)), + + tf.keras.layers.Flatten(), + + tf.keras.layers.Dense(1000, activation: "linear"), + tf.keras.layers.Softmax(1) + }); + + savemodel.compile(tf.keras.optimizers.Adam(), tf.keras.losses.SparseCategoricalCrossentropy(from_logits: true), new string[] { "accuracy" }); + + var num_epochs = 1; + var batch_size = 8; + + var trainDataset = new RandomDataSet(new Shape(227, 227, 3), 16); + + savemodel.fit(trainDataset.Data, trainDataset.Labels, batch_size, num_epochs); + + savemodel.save(@"./bias_regularizer_save_and_load", save_format: "tf"); + + var loadModel = tf.keras.models.load_model(@"./bias_regularizer_save_and_load"); + loadModel.summary(); + + loadModel.compile(tf.keras.optimizers.Adam(), tf.keras.losses.SparseCategoricalCrossentropy(from_logits: true), new string[] { "accuracy" }); + + var fitDataset = new RandomDataSet(new Shape(227, 227, 3), 16); + + loadModel.fit(fitDataset.Data, fitDataset.Labels, batch_size, num_epochs); + } + + + [TestMethod] + public void CreateConcatenateModelSaveAndLoad() + { + // a small demo model that is just here to see if the axis value for the concatenate method is saved and loaded. + var input_layer = tf.keras.layers.Input((8, 8, 5)); + + var conv1 = tf.keras.layers.Conv2D(2, kernel_size: 3, activation: "relu", padding: "same"/*, data_format: "_conv_1"*/).Apply(input_layer); + conv1.Name = "conv1"; + + var conv2 = tf.keras.layers.Conv2D(2, kernel_size: 3, activation: "relu", padding: "same"/*, data_format: "_conv_2"*/).Apply(input_layer); + conv2.Name = "conv2"; + + var concat1 = tf.keras.layers.Concatenate(axis: 3).Apply((conv1, conv2)); + concat1.Name = "concat1"; + + var model = tf.keras.Model(input_layer, concat1); + model.compile(tf.keras.optimizers.Adam(), tf.keras.losses.CategoricalCrossentropy()); + + model.save(@"Assets/concat_axis3_model"); + + + var tensorInput = np.arange(320).reshape((1, 8, 8, 5)).astype(TF_DataType.TF_FLOAT); + + var tensors1 = model.predict(tensorInput); + + Assert.AreEqual((1, 8, 8, 4), tensors1.shape); + + model = null; + keras.backend.clear_session(); + + var model2 = tf.keras.models.load_model(@"Assets/concat_axis3_model"); + + var tensors2 = model2.predict(tensorInput); + + Assert.AreEqual(tensors1.shape, tensors2.shape); + } + } diff --git a/test/TensorFlowNET.Keras.UnitTest/MultiInputModelTest.cs b/test/TensorFlowNET.Keras.UnitTest/MultiInputModelTest.cs index dd8ef8f91..54b76d41a 100644 --- a/test/TensorFlowNET.Keras.UnitTest/MultiInputModelTest.cs +++ b/test/TensorFlowNET.Keras.UnitTest/MultiInputModelTest.cs @@ -2,6 +2,7 @@ using System; using Tensorflow.Keras.Optimizers; using Tensorflow.NumPy; +using static Tensorflow.Binding; using static Tensorflow.KerasApi; namespace Tensorflow.Keras.UnitTest @@ -54,10 +55,91 @@ public void LeNetModel() var x = new NDArray[] { x1, x2 }; model.fit(x, dataset.Train.Labels, batch_size: 8, epochs: 3); + x1 = x1["0:8"]; + x2 = x1; + + x = new NDArray[] { x1, x2 }; + var y = dataset.Train.Labels["0:8"]; + (model as Engine.Model).evaluate(x, y); + x1 = np.ones((1, 28, 28, 1), TF_DataType.TF_FLOAT); x2 = np.zeros((1, 28, 28, 1), TF_DataType.TF_FLOAT); var pred = model.predict((x1, x2)); Console.WriteLine(pred); } + + [TestMethod] + public void LeNetModelDataset() + { + var inputs = keras.Input((28, 28, 1)); + var conv1 = keras.layers.Conv2D(16, (3, 3), activation: "relu", padding: "same").Apply(inputs); + var pool1 = keras.layers.MaxPooling2D((2, 2), 2).Apply(conv1); + var conv2 = keras.layers.Conv2D(32, (3, 3), activation: "relu", padding: "same").Apply(pool1); + var pool2 = keras.layers.MaxPooling2D((2, 2), 2).Apply(conv2); + var flat1 = keras.layers.Flatten().Apply(pool2); + + var inputs_2 = keras.Input((28, 28, 1)); + var conv1_2 = keras.layers.Conv2D(16, (3, 3), activation: "relu", padding: "same").Apply(inputs_2); + var pool1_2 = keras.layers.MaxPooling2D((4, 4), 4).Apply(conv1_2); + var conv2_2 = keras.layers.Conv2D(32, (1, 1), activation: "relu", padding: "same").Apply(pool1_2); + var pool2_2 = keras.layers.MaxPooling2D((2, 2), 2).Apply(conv2_2); + var flat1_2 = keras.layers.Flatten().Apply(pool2_2); + + var concat = keras.layers.Concatenate().Apply((flat1, flat1_2)); + var dense1 = keras.layers.Dense(512, activation: "relu").Apply(concat); + var dense2 = keras.layers.Dense(128, activation: "relu").Apply(dense1); + var dense3 = keras.layers.Dense(10, activation: "relu").Apply(dense2); + var output = keras.layers.Softmax(-1).Apply(dense3); + + var model = keras.Model((inputs, inputs_2), output); + model.summary(); + + var data_loader = new MnistModelLoader(); + + var dataset = data_loader.LoadAsync(new ModelLoadSetting + { + TrainDir = "mnist", + OneHot = false, + ValidationSize = 59900, + }).Result; + + var loss = keras.losses.SparseCategoricalCrossentropy(); + var optimizer = new Adam(0.001f); + model.compile(optimizer, loss, new string[] { "accuracy" }); + + NDArray x1 = np.reshape(dataset.Train.Data, (dataset.Train.Data.shape[0], 28, 28, 1)); + + var multiInputDataset = tf.data.Dataset.zip( + tf.data.Dataset.from_tensor_slices(x1), + tf.data.Dataset.from_tensor_slices(x1), + tf.data.Dataset.from_tensor_slices(dataset.Train.Labels) + ).batch(8); + multiInputDataset.FirstInputTensorCount = 2; + + model.fit(multiInputDataset, epochs: 3); + + x1 = x1["0:8"]; + + multiInputDataset = tf.data.Dataset.zip( + tf.data.Dataset.from_tensor_slices(x1), + tf.data.Dataset.from_tensor_slices(x1), + tf.data.Dataset.from_tensor_slices(dataset.Train.Labels["0:8"]) + ).batch(8); + multiInputDataset.FirstInputTensorCount = 2; + + (model as Engine.Model).evaluate(multiInputDataset); + + x1 = np.ones((1, 28, 28, 1), TF_DataType.TF_FLOAT); + var x2 = np.zeros((1, 28, 28, 1), TF_DataType.TF_FLOAT); + + multiInputDataset = tf.data.Dataset.zip( + tf.data.Dataset.from_tensor_slices(x1), + tf.data.Dataset.from_tensor_slices(x2) + ).batch(8); + multiInputDataset.FirstInputTensorCount = 2; + + var pred = model.predict(multiInputDataset); + Console.WriteLine(pred); + } } } diff --git a/test/TensorFlowNET.Keras.UnitTest/Tensorflow.Keras.UnitTest.csproj b/test/TensorFlowNET.Keras.UnitTest/Tensorflow.Keras.UnitTest.csproj index 3910eba1c..edac1c2ff 100644 --- a/test/TensorFlowNET.Keras.UnitTest/Tensorflow.Keras.UnitTest.csproj +++ b/test/TensorFlowNET.Keras.UnitTest/Tensorflow.Keras.UnitTest.csproj @@ -13,7 +13,7 @@ - + @@ -25,6 +25,7 @@ + diff --git a/test/TensorFlowNET.Native.UnitTest/Tensorflow.Native.UnitTest.csproj b/test/TensorFlowNET.Native.UnitTest/Tensorflow.Native.UnitTest.csproj index a4f1ec567..c054a8707 100644 --- a/test/TensorFlowNET.Native.UnitTest/Tensorflow.Native.UnitTest.csproj +++ b/test/TensorFlowNET.Native.UnitTest/Tensorflow.Native.UnitTest.csproj @@ -44,7 +44,7 @@ - + diff --git a/test/TensorFlowNET.UnitTest/EagerModeTestBase.cs b/test/TensorFlowNET.UnitTest/EagerModeTestBase.cs index d08f4e505..b7b9ae128 100644 --- a/test/TensorFlowNET.UnitTest/EagerModeTestBase.cs +++ b/test/TensorFlowNET.UnitTest/EagerModeTestBase.cs @@ -20,6 +20,20 @@ public bool Equal(float f1, float f2) return Math.Abs(f1 - f2) <= tolerance; } + public bool Equal(long[] l1, long[] l2) + { + if (l1.Length != l2.Length) + return false; + + for (var i = 0; i < l1.Length; i++) + { + if (l1[i] != l2[i]) + return false; + } + + return true; + } + public bool Equal(float[] f1, float[] f2) { bool ret = false; diff --git a/test/TensorFlowNET.UnitTest/GradientTest/GradientEagerTest.cs b/test/TensorFlowNET.UnitTest/GradientTest/GradientEagerTest.cs index e41e1d617..1cfceb3e3 100644 --- a/test/TensorFlowNET.UnitTest/GradientTest/GradientEagerTest.cs +++ b/test/TensorFlowNET.UnitTest/GradientTest/GradientEagerTest.cs @@ -62,7 +62,7 @@ public void SquaredDifference_1D() // Calcute the gradient of (x1-x2)^2 // by Automatic Differentiation in Eager mode // Expected is 2*(abs(x1-x2)) - Tensor x1 = new NDArray( new float[] { 1, 3, 5, 21, 19, 17 }); + Tensor x1 = new NDArray(new float[] { 1, 3, 5, 21, 19, 17 }); Tensor x2 = new NDArray(new float[] { 29, 27, 23, 7, 11, 13 }); float[] expected = new float[] { @@ -173,5 +173,34 @@ public void ConditionalMultiply() var result = grad(x, 4); Assert.AreEqual((float)result, 4.0f); } + + [TestMethod] + public void Tile() + { + var a = tf.constant(new int[] { 1 }, TF_DataType.TF_FLOAT); + var b = tf.constant(new int[] { 2 }); + using (var tape = tf.GradientTape()) + { + tape.watch(a); + var y = tf.tile(a, b); + var grad = tape.gradient(y, a); + Assert.AreEqual((float)grad.numpy(), 2.0f); + } + } + + [TestMethod] + public void GatherNdTest() + { + var x = tf.constant(new float[,] { { 1.0f, 2.0f, 3.0f }, { 1.0f, 2.0f, 3.0f }, { 1.0f, 2.0f, 3.0f } }, dtype: TF_DataType.TF_FLOAT); + var indices = tf.constant(new int[,] { { 0, 1 }, { 1, 1 }, { 2, 1 } }, dtype: TF_DataType.TF_INT32); + using (var tape = tf.GradientTape()) + { + tape.watch(x); + var res = tf.gather_nd(x, indices); + var grad = tape.gradient(res, x); + var expected = np.array(new float[,] { { 0f, 1f, 0f }, { 0f, 1f, 0f }, { 0f, 1f, 0f } }); + Assert.IsTrue(Enumerable.SequenceEqual(grad.ToArray(), expected.ToArray())); + } + } } } diff --git a/test/TensorFlowNET.UnitTest/ManagedAPI/ArrayOpsTest.cs b/test/TensorFlowNET.UnitTest/ManagedAPI/ArrayOpsTest.cs index 675689bb1..e25c9779d 100644 --- a/test/TensorFlowNET.UnitTest/ManagedAPI/ArrayOpsTest.cs +++ b/test/TensorFlowNET.UnitTest/ManagedAPI/ArrayOpsTest.cs @@ -3,6 +3,7 @@ using Tensorflow; using static Tensorflow.Binding; using System.Linq; +using Tensorflow.Operations; namespace TensorFlowNET.UnitTest.ManagedAPI { @@ -105,5 +106,321 @@ public void ReverseArray() Assert.IsTrue(Equal(a[0].ToArray().Reverse().ToArray(), b[0].ToArray())); Assert.IsTrue(Equal(a[1].ToArray().Reverse().ToArray(), b[1].ToArray())); } + + [TestMethod] + public void ReverseImgArray3D() + { + // 创建 sourceImg 数组 + var sourceImgArray = new float[,,] { + { + { 237, 28, 36 }, + { 255, 255, 255 }, + { 255, 255, 255 } + }, + { + { 255, 255, 255 }, + { 255, 255, 255 }, + { 255, 255, 255 } + } + }; + var sourceImg = ops.convert_to_tensor(sourceImgArray); + + // 创建 lrImg 数组 + var lrImgArray = new float[,,] { + { + { 255, 255, 255 }, + { 255, 255, 255 }, + { 237, 28, 36 } + }, + { + { 255, 255, 255 }, + { 255, 255, 255 }, + { 255, 255, 255 } + } + }; + var lrImg = ops.convert_to_tensor(lrImgArray); + + var lr = tf.image.flip_left_right(sourceImg); + Assert.IsTrue(Equal(lrImg.numpy().ToArray(), lr.numpy().ToArray()), "tf.image.flip_left_right fail."); + + var lr2 = tf.reverse(sourceImg, 1); + Assert.IsTrue(Equal(lrImg.numpy().ToArray(), lr2.numpy().ToArray()), "tf.reverse (axis=1) fail."); + + var lr3 = gen_array_ops.reverse_v2(sourceImg, ops.convert_to_tensor(new[] { 1 })); + Assert.IsTrue(Equal(lrImg.numpy().ToArray(), lr3.numpy().ToArray()), "gen_array_ops.reverse_v2 axis=1 fail."); + + // 创建 udImg 数组 + var udImgArray = new float[,,] { + { + { 255, 255, 255 }, + { 255, 255, 255 }, + { 255, 255, 255 } + }, + { + { 237, 28, 36 }, + { 255, 255, 255 }, + { 255, 255, 255 } + } + }; + var udImg = ops.convert_to_tensor(udImgArray); + + var ud = tf.image.flip_up_down(sourceImg); + Assert.IsTrue(Equal(udImg.numpy().ToArray(), ud.numpy().ToArray()), "tf.image.flip_up_down fail."); + + var ud2 = tf.reverse(sourceImg, new Axis(0)); + Assert.IsTrue(Equal(udImg.numpy().ToArray(), ud2.numpy().ToArray()), "tf.reverse (axis=0) fail."); + + var ud3 = gen_array_ops.reverse_v2(sourceImg, ops.convert_to_tensor(new[] { 0 })); + Assert.IsTrue(Equal(udImg.numpy().ToArray(), ud3.numpy().ToArray()), "gen_array_ops.reverse_v2 axis=0 fail."); + } + + [TestMethod] + public void ReverseImgArray4D() + { + // 原图左上角,加一张左右翻转后的图片 + var m = new float[,,,] { + { + { + { 237, 28, 36 }, + { 255, 255, 255 }, + { 255, 255, 255 } + }, + { + { 255, 255, 255 }, + { 255, 255, 255 }, + { 255, 255, 255 } + } + }, + { + { + { 255, 255, 255 }, + { 255, 255, 255 }, + { 237, 28, 36 } + }, + { + { 255, 255, 255 }, + { 255, 255, 255 }, + { 255, 255, 255 } + } + } + }; + var sourceImg = ops.convert_to_tensor(m); + + var lrArray = new float[,,,] { + { + { + { 255, 255, 255 }, + { 255, 255, 255 }, + { 237, 28, 36 }, + }, + { + { 255, 255, 255 }, + { 255, 255, 255 }, + { 255, 255, 255 } + } + }, + { + { + { 237, 28, 36 }, + { 255, 255, 255 }, + { 255, 255, 255 }, + }, + { + { 255, 255, 255 }, + { 255, 255, 255 }, + { 255, 255, 255 } + } + } + }; + var lrImg = ops.convert_to_tensor(lrArray); + + // 创建 ud 数组 + var udArray = new float[,,,] { + { + { + { 255, 255, 255 }, + { 255, 255, 255 }, + { 255, 255, 255 } + }, + { + { 237, 28, 36 }, + { 255, 255, 255 }, + { 255, 255, 255 } + } + }, + { + { + { 255, 255, 255 }, + { 255, 255, 255 }, + { 255, 255, 255 } + }, + { + { 255, 255, 255 }, + { 255, 255, 255 }, + { 237, 28, 36 } + } + } + }; + var udImg = ops.convert_to_tensor(udArray); + + var ud3 = gen_array_ops.reverse_v2(sourceImg, ops.convert_to_tensor(new[] { 1 })); + Assert.IsTrue(Equal(udImg.numpy().ToArray(), ud3.numpy().ToArray()), "gen_array_ops.reverse_v2 axis=1 fail."); + + var ud2 = tf.reverse(sourceImg, new Axis(1)); + Assert.IsTrue(Equal(udImg.numpy().ToArray(), ud2.numpy().ToArray()), "tf.reverse (axis=1) fail."); + + var ud = tf.image.flip_up_down(sourceImg); + Assert.IsTrue(Equal(udImg.numpy().ToArray(), ud.numpy().ToArray()), "tf.image.flip_up_down fail."); + + // 左右翻转 + var lr = tf.image.flip_left_right(sourceImg); + Assert.IsTrue(Equal(lrImg.numpy().ToArray(), lr.numpy().ToArray()), "tf.image.flip_left_right fail."); + + var lr2 = tf.reverse(sourceImg, 0); + Assert.IsTrue(Equal(lrImg.numpy().ToArray(), lr2.numpy().ToArray()), "tf.reverse (axis=1) fail."); + + var lr3 = gen_array_ops.reverse_v2(sourceImg, ops.convert_to_tensor(new[] { 0 })); + Assert.IsTrue(Equal(lrImg.numpy().ToArray(), lr3.numpy().ToArray()), "gen_array_ops.reverse_v2 axis=1 fail."); + + } + + [TestMethod] + public void ReverseImgArray4D_3x3() + { + // 原图左上角,加一张左右翻转后的图片 + var m = new float[,,,] { + { + { + { 237, 28, 36 }, + { 255, 255, 255 }, + { 255, 255, 255 } + }, + { + { 255, 255, 255 }, + { 255, 255, 255 }, + { 255, 255, 255 } + }, + { + { 255, 255, 255 }, + { 255, 255, 255 }, + { 255, 255, 255 } + } + }, + { + { + { 255, 255, 255 }, + { 255, 255, 255 }, + { 237, 28, 36 } + }, + { + { 255, 255, 255 }, + { 255, 255, 255 }, + { 255, 255, 255 } + }, + { + { 255, 255, 255 }, + { 255, 255, 255 }, + { 255, 255, 255 } + } + } + }; + var sourceImg = ops.convert_to_tensor(m); + + var lrArray = new float[,,,] { + { + { + { 255, 255, 255 }, + { 255, 255, 255 }, + { 237, 28, 36 }, + }, + { + { 255, 255, 255 }, + { 255, 255, 255 }, + { 255, 255, 255 } + }, + { + { 255, 255, 255 }, + { 255, 255, 255 }, + { 255, 255, 255 } + } + }, + { + { + { 237, 28, 36 }, + { 255, 255, 255 }, + { 255, 255, 255 }, + }, + { + { 255, 255, 255 }, + { 255, 255, 255 }, + { 255, 255, 255 } + }, + { + { 255, 255, 255 }, + { 255, 255, 255 }, + { 255, 255, 255 } + } + } + }; + var lrImg = ops.convert_to_tensor(lrArray); + + // 创建 ud 数组 + var udArray = new float[,,,] { + { + { + { 255, 255, 255 }, + { 255, 255, 255 }, + { 255, 255, 255 } + }, + { + { 255, 255, 255 }, + { 255, 255, 255 }, + { 255, 255, 255 } + }, + { + { 237, 28, 36 }, + { 255, 255, 255 }, + { 255, 255, 255 } + } + }, + { { + { 255, 255, 255 }, + { 255, 255, 255 }, + { 255, 255, 255 } + }, + { + { 255, 255, 255 }, + { 255, 255, 255 }, + { 255, 255, 255 } + }, + { + { 255, 255, 255 }, + { 255, 255, 255 }, + { 237, 28, 36 } + } + } + }; + var udImg = ops.convert_to_tensor(udArray); + + var ud3 = gen_array_ops.reverse_v2(sourceImg, ops.convert_to_tensor(new[] { 1 })); + Assert.IsTrue(Equal(udImg.numpy().ToArray(), ud3.numpy().ToArray()), "gen_array_ops.reverse_v2 axis=1 fail."); + + var ud2 = tf.reverse(sourceImg, new Axis(1)); + Assert.IsTrue(Equal(udImg.numpy().ToArray(), ud2.numpy().ToArray()), "tf.reverse (axis=1) fail."); + + var ud = tf.image.flip_up_down(sourceImg); + Assert.IsTrue(Equal(udImg.numpy().ToArray(), ud.numpy().ToArray()), "tf.image.flip_up_down fail."); + + // 左右翻转 + var lr = tf.image.flip_left_right(sourceImg); + Assert.IsTrue(Equal(lrImg.numpy().ToArray(), lr.numpy().ToArray()), "tf.image.flip_left_right fail."); + + var lr2 = tf.reverse(sourceImg, 0); + Assert.IsTrue(Equal(lrImg.numpy().ToArray(), lr2.numpy().ToArray()), "tf.reverse (axis=1) fail."); + + var lr3 = gen_array_ops.reverse_v2(sourceImg, ops.convert_to_tensor(new[] { 0 })); + Assert.IsTrue(Equal(lrImg.numpy().ToArray(), lr3.numpy().ToArray()), "gen_array_ops.reverse_v2 axis=1 fail."); + + } } } diff --git a/test/TensorFlowNET.UnitTest/ManagedAPI/RaggedTensorTest.cs b/test/TensorFlowNET.UnitTest/ManagedAPI/RaggedTensorTest.cs new file mode 100644 index 000000000..7a3de882e --- /dev/null +++ b/test/TensorFlowNET.UnitTest/ManagedAPI/RaggedTensorTest.cs @@ -0,0 +1,26 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; +using Microsoft.VisualStudio.TestTools.UnitTesting; +using Tensorflow; +using Tensorflow.NumPy; +using static Tensorflow.Binding; + +namespace TensorFlowNET.UnitTest.ManagedAPI +{ + public class RaggedTensorTest :EagerModeTestBase + { + [TestMethod] + public void Test_from_row_lengths() + { + var row_lengths = tf.convert_to_tensor(np.array(new int[] { 2, 0, 3, 1, 1 }, TF_DataType.TF_INT64)); + var rp = RowPartition.from_row_lengths(row_lengths, validate: false); + var rp_row_lengths = rp.row_lengths(); + var rp_nrows = rp.nrows(); + Assert.IsTrue(rp_nrows.ToArray()[0] == rp.nrows().ToArray()[0]); + + } + } +} diff --git a/test/TensorFlowNET.UnitTest/NumPy/ShapeTest.cs b/test/TensorFlowNET.UnitTest/NumPy/ShapeTest.cs new file mode 100644 index 000000000..f5a8685be --- /dev/null +++ b/test/TensorFlowNET.UnitTest/NumPy/ShapeTest.cs @@ -0,0 +1,44 @@ +using Microsoft.VisualStudio.TestTools.UnitTesting; +using Tensorflow.NumPy; +using System; +using System.Linq; +using static Tensorflow.Binding; +using Tensorflow; + +namespace TensorFlowNET.UnitTest.NumPy +{ + [TestClass] + public class ShapeTest : EagerModeTestBase + { + [Ignore] + [TestMethod] + public unsafe void ShapeGetLastElements() + { + // test code from function _CheckAtLeast3DImage + // 之前的 _CheckAtLeast3DImage 有bug,现在通过测试,下面的代码是正确的 + // todo: shape["-3:"] 的写法,目前有bug,需要修复,单元测试等修复后再放开,暂时先忽略测试 + + var image_shape = new Shape(new[] { 32, 64, 3 }); + var image_shape_4d = new Shape(new[] { 4, 64, 32, 3 }); + + var image_shape_last_three_elements = new Shape(new[] { + image_shape.dims[image_shape.dims.Length - 3], + image_shape.dims[image_shape.dims.Length - 2], + image_shape.dims[image_shape.dims.Length - 1]}); + + var image_shape_last_three_elements2 = image_shape["-3:"]; + + Assert.IsTrue(Equal(image_shape_last_three_elements.dims, image_shape_last_three_elements2.dims), "3dims get fail."); + + var image_shape_last_three_elements_4d = new Shape(new[] { + image_shape_4d.dims[image_shape_4d.dims.Length - 3], + image_shape_4d.dims[image_shape_4d.dims.Length - 2], + image_shape_4d.dims[image_shape_4d.dims.Length - 1]}); + + var image_shape_last_three_elements2_4d = image_shape_4d["-3:"]; + + Assert.IsTrue(Equals(image_shape_last_three_elements_4d.dims, image_shape_last_three_elements2_4d.dims), "4dims get fail."); + } + + } +} \ No newline at end of file diff --git a/test/TensorFlowNET.UnitTest/PythonTest.cs b/test/TensorFlowNET.UnitTest/PythonTest.cs deleted file mode 100644 index 50cc2b328..000000000 --- a/test/TensorFlowNET.UnitTest/PythonTest.cs +++ /dev/null @@ -1,322 +0,0 @@ -using Microsoft.VisualStudio.TestTools.UnitTesting; -using Newtonsoft.Json.Linq; -using Tensorflow.NumPy; -using System; -using System.Collections; -using System.Linq; -using Tensorflow; -using static Tensorflow.Binding; - -namespace TensorFlowNET.UnitTest -{ - /// - /// Use as base class for test classes to get additional assertions - /// - public class PythonTest - { - #region python compatibility layer - protected PythonTest self { get => this; } - protected int None => -1; - #endregion - - #region pytest assertions - - public void assertItemsEqual(ICollection given, ICollection expected) - { - if (given is Hashtable && expected is Hashtable) - { - Assert.AreEqual(JObject.FromObject(expected).ToString(), JObject.FromObject(given).ToString()); - return; - } - Assert.IsNotNull(expected); - Assert.IsNotNull(given); - var e = expected.OfType().ToArray(); - var g = given.OfType().ToArray(); - Assert.AreEqual(e.Length, g.Length, $"The collections differ in length expected {e.Length} but got {g.Length}"); - for (int i = 0; i < e.Length; i++) - { - /*if (g[i] is NDArray && e[i] is NDArray) - assertItemsEqual((g[i] as NDArray).GetData(), (e[i] as NDArray).GetData()); - else*/ - if (e[i] is ICollection && g[i] is ICollection) - assertEqual(g[i], e[i]); - else - Assert.AreEqual(e[i], g[i], $"Items differ at index {i}, expected {e[i]} but got {g[i]}"); - } - } - - public void assertAllEqual(ICollection given, ICollection expected) - { - assertItemsEqual(given, expected); - } - - public void assertFloat32Equal(float expected, float actual, string msg) - { - float eps = 1e-6f; - Assert.IsTrue(Math.Abs(expected - actual) < eps * Math.Max(1.0f, Math.Abs(expected)), $"{msg}: expected {expected} vs actual {actual}"); - } - - public void assertFloat64Equal(double expected, double actual, string msg) - { - double eps = 1e-16f; - Assert.IsTrue(Math.Abs(expected - actual) < eps * Math.Max(1.0f, Math.Abs(expected)), $"{msg}: expected {expected} vs actual {actual}"); - } - - public void AssetSequenceEqual(float[] expected, float[] actual) - { - float eps = 1e-5f; - for (int i = 0; i < expected.Length; i++) - Assert.IsTrue(Math.Abs(expected[i] - actual[i]) < eps * Math.Max(1.0f, Math.Abs(expected[i])), $"expected {expected} vs actual {actual}"); - } - - public void AssetSequenceEqual(double[] expected, double[] actual) - { - double eps = 1e-5f; - for (int i = 0; i < expected.Length; i++) - Assert.IsTrue(Math.Abs(expected[i] - actual[i]) < eps * Math.Max(1.0f, Math.Abs(expected[i])), $"expected {expected} vs actual {actual}"); - } - - public void assertEqual(object given, object expected) - { - /*if (given is NDArray && expected is NDArray) - { - assertItemsEqual((given as NDArray).GetData(), (expected as NDArray).GetData()); - return; - }*/ - if (given is Hashtable && expected is Hashtable) - { - Assert.AreEqual(JObject.FromObject(expected).ToString(), JObject.FromObject(given).ToString()); - return; - } - if (given is ICollection && expected is ICollection) - { - assertItemsEqual(given as ICollection, expected as ICollection); - return; - } - if (given is float && expected is float) - { - assertFloat32Equal((float)expected, (float)given, ""); - return; - } - if (given is double && expected is double) - { - assertFloat64Equal((double)expected, (double)given, ""); - return; - } - Assert.AreEqual(expected, given); - } - - public void assertEquals(object given, object expected) - { - assertEqual(given, expected); - } - - public void assert(object given) - { - if (given is bool) - Assert.IsTrue((bool)given); - Assert.IsNotNull(given); - } - - public void assertIsNotNone(object given) - { - Assert.IsNotNull(given); - } - - public void assertFalse(bool cond) - { - Assert.IsFalse(cond); - } - - public void assertTrue(bool cond) - { - Assert.IsTrue(cond); - } - - public void assertAllClose(NDArray array1, NDArray array2, double eps = 1e-5) - { - Assert.IsTrue(np.allclose(array1, array2, rtol: eps)); - } - - public void assertAllClose(double value, NDArray array2, double eps = 1e-5) - { - var array1 = np.ones_like(array2) * value; - Assert.IsTrue(np.allclose(array1, array2, rtol: eps)); - } - - public void assertProtoEquals(object toProto, object o) - { - throw new NotImplementedException(); - } - - #endregion - - #region tensor evaluation and test session - - //protected object _eval_helper(Tensor[] tensors) - //{ - // if (tensors == null) - // return null; - // return nest.map_structure(self._eval_tensor, tensors); - //} - - protected object _eval_tensor(object tensor) - { - if (tensor == null) - return None; - //else if (callable(tensor)) - // return self._eval_helper(tensor()) - else - { - try - { - //TODO: - // if sparse_tensor.is_sparse(tensor): - // return sparse_tensor.SparseTensorValue(tensor.indices, tensor.values, - // tensor.dense_shape) - //return (tensor as Tensor).numpy(); - } - catch (Exception) - { - throw new ValueError("Unsupported type: " + tensor.GetType()); - } - return null; - } - } - - /// - /// This function is used in many original tensorflow unit tests to evaluate tensors - /// in a test session with special settings (for instance constant folding off) - /// - /// - public T evaluate(Tensor tensor) - { - object result = null; - // if context.executing_eagerly(): - // return self._eval_helper(tensors) - // else: - { - var sess = tf.Session(); - var ndarray = tensor.eval(sess); - if (typeof(T) == typeof(double)) - { - double x = ndarray; - result = x; - } - else if (typeof(T) == typeof(int)) - { - int x = ndarray; - result = x; - } - else - { - result = ndarray; - } - - return (T)result; - } - } - - - public Session cached_session() - { - throw new NotImplementedException(); - } - - //Returns a TensorFlow Session for use in executing tests. - public Session session(Graph graph = null, object config = null, bool use_gpu = false, bool force_gpu = false) - { - //Note that this will set this session and the graph as global defaults. - - //Use the `use_gpu` and `force_gpu` options to control where ops are run.If - //`force_gpu` is True, all ops are pinned to `/device:GPU:0`. Otherwise, if - //`use_gpu` is True, TensorFlow tries to run as many ops on the GPU as - //possible.If both `force_gpu and `use_gpu` are False, all ops are pinned to - //the CPU. - - //Example: - //```python - //class MyOperatorTest(test_util.TensorFlowTestCase): - // def testMyOperator(self): - // with self.session(use_gpu= True): - // valid_input = [1.0, 2.0, 3.0, 4.0, 5.0] - // result = MyOperator(valid_input).eval() - // self.assertEqual(result, [1.0, 2.0, 3.0, 5.0, 8.0] - // invalid_input = [-1.0, 2.0, 7.0] - // with self.assertRaisesOpError("negative input not supported"): - // MyOperator(invalid_input).eval() - //``` - - //Args: - // graph: Optional graph to use during the returned session. - // config: An optional config_pb2.ConfigProto to use to configure the - // session. - // use_gpu: If True, attempt to run as many ops as possible on GPU. - // force_gpu: If True, pin all ops to `/device:GPU:0`. - - //Yields: - // A Session object that should be used as a context manager to surround - // the graph building and execution code in a test case. - - Session s = null; - //if (context.executing_eagerly()) - // yield None - //else - //{ - s = self._create_session(graph, config, force_gpu); - //} - return s.as_default(); - } - - // See session() for details. - private Session _create_session(Graph graph, object cfg, bool forceGpu) - { - var prepare_config = new Func((config) => - { - // """Returns a config for sessions. - // Args: - // config: An optional config_pb2.ConfigProto to use to configure the - // session. - // Returns: - // A config_pb2.ConfigProto object. - - //TODO: config - - // # use_gpu=False. Currently many tests rely on the fact that any device - // # will be used even when a specific device is supposed to be used. - // allow_soft_placement = not force_gpu - // if config is None: - // config = config_pb2.ConfigProto() - // config.allow_soft_placement = allow_soft_placement - // config.gpu_options.per_process_gpu_memory_fraction = 0.3 - // elif not allow_soft_placement and config.allow_soft_placement: - // config_copy = config_pb2.ConfigProto() - // config_copy.CopyFrom(config) - // config = config_copy - // config.allow_soft_placement = False - // # Don't perform optimizations for tests so we don't inadvertently run - // # gpu ops on cpu - // config.graph_options.optimizer_options.opt_level = -1 - // # Disable Grappler constant folding since some tests & benchmarks - // # use constant input and become meaningless after constant folding. - // # DO NOT DISABLE GRAPPLER OPTIMIZERS WITHOUT CONSULTING WITH THE - // # GRAPPLER TEAM. - // config.graph_options.rewrite_options.constant_folding = ( - // rewriter_config_pb2.RewriterConfig.OFF) - // config.graph_options.rewrite_options.pin_to_host_optimization = ( - // rewriter_config_pb2.RewriterConfig.OFF) - return config; - }); - //TODO: use this instead of normal session - //return new ErrorLoggingSession(graph = graph, config = prepare_config(config)) - return new Session(graph);//, config = prepare_config(config)) - } - - #endregion - - public void AssetSequenceEqual(T[] a, T[] b) - { - Assert.IsTrue(Enumerable.SequenceEqual(a, b)); - } - } -} diff --git a/test/TensorFlowNET.UnitTest/Tensorflow.Binding.UnitTest.csproj b/test/TensorFlowNET.UnitTest/Tensorflow.Binding.UnitTest.csproj index 7a6a7f92c..5264cb104 100644 --- a/test/TensorFlowNET.UnitTest/Tensorflow.Binding.UnitTest.csproj +++ b/test/TensorFlowNET.UnitTest/Tensorflow.Binding.UnitTest.csproj @@ -51,6 +51,7 @@ + diff --git a/test/TensorFlowNET.UnitTest/Training/GradientDescentOptimizerTests.cs b/test/TensorFlowNET.UnitTest/Training/GradientDescentOptimizerTests.cs new file mode 100644 index 000000000..3b53ff9cd --- /dev/null +++ b/test/TensorFlowNET.UnitTest/Training/GradientDescentOptimizerTests.cs @@ -0,0 +1,232 @@ +using Microsoft.VisualStudio.TestTools.UnitTesting; +using System; +using System.Linq; +using Tensorflow; +using Tensorflow.NumPy; +using static Tensorflow.Binding; + +namespace TensorFlowNET.UnitTest.Training +{ + [TestClass] + public class GradientDescentOptimizerTest : PythonTest + { + private static TF_DataType GetTypeForNumericType() where T : struct + { + return Type.GetTypeCode(typeof(T)) switch + { + TypeCode.Single => np.float32, + TypeCode.Double => np.float64, + _ => throw new NotImplementedException(), + }; + } + + private void TestBasic() where T : struct + { + var dtype = GetTypeForNumericType(); + + // train.GradientDescentOptimizer is V1 only API. + tf.Graph().as_default(); + using (var sess = self.cached_session()) + { + var var0 = tf.Variable(new[] { 1.0, 2.0 }, dtype: dtype); + var var1 = tf.Variable(new[] { 3.0, 4.0 }, dtype: dtype); + var grads0 = tf.constant(new[] { 0.1, 0.1 }, dtype: dtype); + var grads1 = tf.constant(new[] { 0.01, 0.01 }, dtype: dtype); + var optimizer = tf.train.GradientDescentOptimizer(3.0f); + var grads_and_vars = new[] { + Tuple.Create(grads0, var0 as IVariableV1), + Tuple.Create(grads1, var1 as IVariableV1) + }; + var sgd_op = optimizer.apply_gradients(grads_and_vars); + + var global_variables = tf.global_variables_initializer(); + sess.run(global_variables); + + var initialVar0 = sess.run(var0); + var initialVar1 = sess.run(var1); + // Fetch params to validate initial values + self.assertAllCloseAccordingToType(new[] { 1.0, 2.0 }, self.evaluate(var0)); + self.assertAllCloseAccordingToType(new[] { 3.0, 4.0 }, self.evaluate(var1)); + // Run 1 step of sgd + sgd_op.run(); + // Validate updated params + self.assertAllCloseAccordingToType( + new[] { 1.0 - 3.0 * 0.1, 2.0 - 3.0 * 0.1 }, + self.evaluate(var0)); + self.assertAllCloseAccordingToType( + new[] { 3.0 - 3.0 * 0.01, 4.0 - 3.0 * 0.01 }, + self.evaluate(var1)); + // TODO: self.assertEqual(0, len(optimizer.variables())); + } + } + + [TestMethod] + public void TestBasic() + { + //TODO: add np.half + TestBasic(); + TestBasic(); + } + + private void TestMinimizeResourceVariable() where T : struct + { + var dtype = GetTypeForNumericType(); + + // train.GradientDescentOptimizer is V1 only API. + tf.Graph().as_default(); + using (var sess = self.cached_session()) + { + var var0 = tf.Variable(new[,] { { 1.0f, 2.0f } }, dtype: dtype); + var var1 = tf.Variable(new[] { 3.0 }, dtype: dtype); + var x = tf.constant(new[,] { { 4.0f }, { 5.0f } }, dtype: dtype); + + var pred = math_ops.matmul(var0, x) + var1; + var loss = pred * pred; + var sgd_op = tf.train.GradientDescentOptimizer(1.0f).minimize(loss); + + var global_variables = tf.global_variables_initializer(); + sess.run(global_variables); + + sess.run(new[] { var0, var1 }); + // Fetch params to validate initial values + self.assertAllCloseAccordingToType(new[,] { { 1.0, 2.0 } }, self.evaluate(var0)); + self.assertAllCloseAccordingToType(new[] { 3.0 }, self.evaluate(var1)); + // Run 1 step of sgd + sgd_op.run(); + // Validate updated params + var np_pred = 1.0 * 4.0 + 2.0 * 5.0 + 3.0; + var np_grad = 2 * np_pred; + self.assertAllCloseAccordingToType( + new[,] { { 1.0 - np_grad * 4.0, 2.0 - np_grad * 5.0 } }, + self.evaluate(var0)); + self.assertAllCloseAccordingToType( + new[] { 3.0 - np_grad }, + self.evaluate(var1)); + } + } + + [TestMethod] + public void TestMinimizeResourceVariable() + { + //TODO: add np.half + TestMinimizeResourceVariable(); + TestMinimizeResourceVariable(); + } + + private void TestTensorLearningRate() where T : struct + { + var dtype = GetTypeForNumericType(); + + // train.GradientDescentOptimizer is V1 only API. + tf.Graph().as_default(); + using (var sess = self.cached_session()) + { + var var0 = tf.Variable(new[] { 1.0, 2.0 }, dtype: dtype); + var var1 = tf.Variable(new[] { 3.0, 4.0 }, dtype: dtype); + var grads0 = tf.constant(new[] { 0.1, 0.1 }, dtype: dtype); + var grads1 = tf.constant(new[] { 0.01, 0.01 }, dtype: dtype); + var lrate = constant_op.constant(3.0); + var grads_and_vars = new[] { + Tuple.Create(grads0, var0 as IVariableV1), + Tuple.Create(grads1, var1 as IVariableV1) + }; + var sgd_op = tf.train.GradientDescentOptimizer(lrate) + .apply_gradients(grads_and_vars); + + var global_variables = tf.global_variables_initializer(); + sess.run(global_variables); + + var initialVar0 = sess.run(var0); + var initialVar1 = sess.run(var1); + // Fetch params to validate initial values + self.assertAllCloseAccordingToType(new[] { 1.0, 2.0 }, self.evaluate(var0)); + self.assertAllCloseAccordingToType(new[] { 3.0, 4.0 }, self.evaluate(var1)); + // Run 1 step of sgd + sgd_op.run(); + // Validate updated params + self.assertAllCloseAccordingToType( + new[] { 1.0 - 3.0 * 0.1, 2.0 - 3.0 * 0.1 }, + self.evaluate(var0)); + self.assertAllCloseAccordingToType( + new[] { 3.0 - 3.0 * 0.01, 4.0 - 3.0 * 0.01 }, + self.evaluate(var1)); + // TODO: self.assertEqual(0, len(optimizer.variables())); + } + } + + [TestMethod] + public void TestTensorLearningRate() + { + //TODO: add np.half + TestTensorLearningRate(); + TestTensorLearningRate(); + } + + public void TestGradWrtRef() where T : struct + { + var dtype = GetTypeForNumericType(); + + var graph = tf.Graph().as_default(); + using (var sess = self.cached_session()) + { + var opt = tf.train.GradientDescentOptimizer(3.0f); + var values = new[] { 1.0, 3.0 }; + var vars_ = values.Select( + v => tf.Variable(new[] { v }, dtype: dtype) as IVariableV1 + ).ToList(); + var grads_and_vars = opt.compute_gradients(tf.add(vars_[0], vars_[1]), vars_); + sess.run(tf.global_variables_initializer()); + foreach (var (grad, _) in grads_and_vars) + self.assertAllCloseAccordingToType(new[] { 1.0 }, self.evaluate(grad)); + + } + } + + [TestMethod] + public void TestGradWrtRef() + { + TestGradWrtRef(); + TestGradWrtRef(); + } + + public void TestWithGlobalStep() where T : struct + { + var dtype = GetTypeForNumericType(); + + tf.Graph().as_default(); + using (var sess = self.cached_session()) + { + var global_step = tf.Variable(0, trainable: false); + var var0 = tf.Variable(new[] { 1.0, 2.0 }, dtype: dtype); + var var1 = tf.Variable(new[] { 3.0, 4.0 }, dtype: dtype); + var grads0 = tf.constant(new[] { 0.1, 0.1 }, dtype: dtype); + var grads1 = tf.constant(new[] { 0.01, 0.01 }, dtype: dtype); + var grads_and_vars = new[] { + Tuple.Create(grads0, var0 as IVariableV1), + Tuple.Create(grads1, var1 as IVariableV1) + }; + var sgd_op = tf.train.GradientDescentOptimizer(3.0f) + .apply_gradients(grads_and_vars, global_step: global_step); + + sess.run(tf.global_variables_initializer()); + // Fetch params to validate initial values + self.assertAllCloseAccordingToType(new[] { 1.0, 2.0 }, self.evaluate(var0)); + self.assertAllCloseAccordingToType(new[] { 3.0, 4.0 }, self.evaluate(var1)); + // Run 1 step of sgd + sgd_op.run(); + // Validate updated params and global_step + self.assertAllCloseAccordingToType(new[] { 1.0 - 3.0 * 0.1, 2.0 - 3.0 * 0.1 }, self.evaluate(var0)); + self.assertAllCloseAccordingToType(new[] { 3.0 - 3.0 * 0.01, 4.0 - 3.0 * 0.01 }, self.evaluate(var1)); + Assert.AreEqual(1, self.evaluate(global_step)); + } + + } + + [TestMethod] + public void TestWithGlobalStep() + { + TestWithGlobalStep(); + TestWithGlobalStep(); + } + } +} diff --git a/test/TensorFlowNET.Graph.UnitTest/PythonTest.cs b/test/Tensorflow.UnitTest/PythonTest.cs similarity index 73% rename from test/TensorFlowNET.Graph.UnitTest/PythonTest.cs rename to test/Tensorflow.UnitTest/PythonTest.cs index ccf59f5ae..1ccd39f02 100644 --- a/test/TensorFlowNET.Graph.UnitTest/PythonTest.cs +++ b/test/Tensorflow.UnitTest/PythonTest.cs @@ -1,13 +1,9 @@ using Microsoft.VisualStudio.TestTools.UnitTesting; using Newtonsoft.Json.Linq; using Tensorflow.NumPy; -using System; using System.Collections; -using System.Linq; using Tensorflow; using static Tensorflow.Binding; -using OneOf.Types; -using System.Collections.Generic; namespace TensorFlowNET.UnitTest { @@ -64,6 +60,20 @@ public void assertFloat64Equal(double expected, double actual, string msg) Assert.IsTrue(Math.Abs(expected - actual) < eps * Math.Max(1.0f, Math.Abs(expected)), $"{msg}: expected {expected} vs actual {actual}"); } + public void AssetSequenceEqual(float[] expected, float[] actual) + { + float eps = 1e-5f; + for (int i = 0; i < expected.Length; i++) + Assert.IsTrue(Math.Abs(expected[i] - actual[i]) < eps * Math.Max(1.0f, Math.Abs(expected[i])), $"expected {expected} vs actual {actual}"); + } + + public void AssetSequenceEqual(double[] expected, double[] actual) + { + double eps = 1e-5f; + for (int i = 0; i < expected.Length; i++) + Assert.IsTrue(Math.Abs(expected[i] - actual[i]) < eps * Math.Max(1.0f, Math.Abs(expected[i])), $"expected {expected} vs actual {actual}"); + } + public void assertEqual(object given, object expected) { /*if (given is NDArray && expected is NDArray) @@ -76,9 +86,9 @@ public void assertEqual(object given, object expected) Assert.AreEqual(JObject.FromObject(expected).ToString(), JObject.FromObject(given).ToString()); return; } - if (given is ICollection && expected is ICollection) + if (given is ICollection collectionGiven && expected is ICollection collectionExpected) { - assertItemsEqual(given as ICollection, expected as ICollection); + assertItemsEqual(collectionGiven, collectionExpected); return; } if (given is float && expected is float) @@ -123,13 +133,83 @@ public void assertTrue(bool cond) public void assertAllClose(NDArray array1, NDArray array2, double eps = 1e-5) { - Assert.IsTrue(np.allclose(array1, array2, rtol: eps)); + CollectionAssert.AreEqual(array1.ToArray(), array2.ToArray(), new CollectionComparer(eps)); + + //TODO: Assert.IsTrue(np.allclose(array1, array2, rtol: eps)); } public void assertAllClose(double value, NDArray array2, double eps = 1e-5) { + if (array2.shape.IsScalar) + { + double value2 = array2; + Assert.AreEqual(value, value2, eps); + return; + } var array1 = np.ones_like(array2) * value; - // Assert.IsTrue(np.allclose(array1, array2, rtol: eps)); + CollectionAssert.AreEqual(array1.ToArray(), array2.ToArray(), new CollectionComparer(eps)); + + //TODO: Assert.IsTrue(np.allclose(array1, array2, rtol: eps)); + } + + private class CollectionComparer : IComparer + { + private readonly double _epsilon; + + public CollectionComparer(double eps = 1e-06) + { + _epsilon = eps; + } + public int Compare(object? x, object? y) + { + if (x == null && y == null) + { + return 0; + } + else if (x == null) + { + return -1; + } + else if (y == null) + { + return 1; + } + + var a = Convert.ToDouble(x); + var b = Convert.ToDouble(y); + + double delta = Math.Abs(a - b); + if (delta < _epsilon) + { + return 0; + } + return a.CompareTo(b); + } + } + + public void assertAllCloseAccordingToType( + double[,] expected, + T[,] given, + double eps = 1e-6, + float float_eps = 1e-6f) + { + Assert.AreEqual(expected.GetLength(0), given.GetLength(0)); + Assert.AreEqual(expected.GetLength(1), given.GetLength(1)); + + var flattenGiven = given.Cast().ToArray(); + assertAllCloseAccordingToType(expected, flattenGiven, eps, float_eps); + } + + public void assertAllCloseAccordingToType( + ICollection expected, + ICollection given, + double eps = 1e-6, + float float_eps = 1e-6f) + { + // TODO: check if any of arguments is not double and change toletance + // remove givenAsDouble and cast expected instead + var givenAsDouble = given.Select(x => Convert.ToDouble(x)).ToArray(); + CollectionAssert.AreEqual(expected, givenAsDouble, new CollectionComparer(eps)); } public void assertProtoEquals(object toProto, object o) @@ -141,9 +221,9 @@ public void assertProtoEquals(object toProto, object o) #region tensor evaluation and test session - private Session _cached_session = null; - private Graph _cached_graph = null; - private object _cached_config = null; + private Session? _cached_session = null; + private Graph? _cached_graph = null; + private object? _cached_config = null; private bool _cached_force_gpu = false; private void _ClearCachedSession() @@ -155,7 +235,6 @@ private void _ClearCachedSession() } } - //protected object _eval_helper(Tensor[] tensors) //{ // if (tensors == null) @@ -163,7 +242,7 @@ private void _ClearCachedSession() // return nest.map_structure(self._eval_tensor, tensors); //} - protected object _eval_tensor(object tensor) + protected object? _eval_tensor(object tensor) { if (tensor == null) return None; @@ -194,22 +273,44 @@ protected object _eval_tensor(object tensor) /// public T evaluate(Tensor tensor) { - object result = null; + object? result = null; // if context.executing_eagerly(): // return self._eval_helper(tensors) // else: { - var sess = tf.Session(); + var sess = tf.get_default_session(); var ndarray = tensor.eval(sess); - if (typeof(T) == typeof(double)) + + if (typeof(T) == typeof(int)) + { + int i = ndarray; + result = i; + } + else if (typeof(T) == typeof(float)) + { + float f = ndarray; + result = f; + } + else if (typeof(T) == typeof(double)) { - double x = ndarray; - result = x; + double d = ndarray; + result = d; } - else if (typeof(T) == typeof(int)) + else if ( + typeof(T) == typeof(double[]) + || typeof(T) == typeof(double[,])) { - int x = ndarray; - result = x; + result = ndarray.ToMultiDimArray(); + } + else if (typeof(T) == typeof(float[]) + || typeof(T) == typeof(float[,])) + { + result = ndarray.ToMultiDimArray(); + } + else if (typeof(T) == typeof(int[]) + || typeof(T) == typeof(int[,])) + { + result = ndarray.ToMultiDimArray(); } else { @@ -220,9 +321,10 @@ public T evaluate(Tensor tensor) } } + ///Returns a TensorFlow Session for use in executing tests. - public Session cached_session( - Graph graph = null, object config = null, bool use_gpu = false, bool force_gpu = false) + public Session? cached_session( + Graph? graph = null, object? config = null, bool use_gpu = false, bool force_gpu = false) { // This method behaves differently than self.session(): for performance reasons // `cached_session` will by default reuse the same session within the same @@ -268,12 +370,12 @@ public Session cached_session( var sess = self._get_cached_session( graph, config, force_gpu, crash_if_inconsistent_args: true); using var cached = self._constrain_devices_and_set_default(sess, use_gpu, force_gpu); - return cached; + return cached; } } //Returns a TensorFlow Session for use in executing tests. - public Session session(Graph graph = null, object config = null, bool use_gpu = false, bool force_gpu = false) + public Session session(Graph? graph = null, object? config = null, bool use_gpu = false, bool force_gpu = false) { //Note that this will set this session and the graph as global defaults. @@ -307,7 +409,7 @@ public Session session(Graph graph = null, object config = null, bool use_gpu = // A Session object that should be used as a context manager to surround // the graph building and execution code in a test case. - Session s = null; + Session? s = null; //if (context.executing_eagerly()) // yield None //else @@ -317,12 +419,13 @@ public Session session(Graph graph = null, object config = null, bool use_gpu = return s.as_default(); } - private Session _constrain_devices_and_set_default(Session sess, bool use_gpu, bool force_gpu) + private Session? _constrain_devices_and_set_default(Session sess, bool use_gpu, bool force_gpu) { // Set the session and its graph to global default and constrain devices.""" if (tf.executing_eagerly()) return null; - else { + else + { sess.graph.as_default(); sess.as_default(); { @@ -342,16 +445,16 @@ private Session _constrain_devices_and_set_default(Session sess, bool use_gpu, b } else if (use_gpu) return sess; - else + else using (sess.graph.device("/device:CPU:0")) return sess; } - + } } // See session() for details. - private Session _create_session(Graph graph, object cfg, bool forceGpu) + private Session _create_session(Graph? graph, object? cfg, bool forceGpu) { var prepare_config = new Func((config) => { @@ -395,8 +498,8 @@ private Session _create_session(Graph graph, object cfg, bool forceGpu) } private Session _get_cached_session( - Graph graph = null, - object config = null, + Graph? graph = null, + object? config = null, bool force_gpu = false, bool crash_if_inconsistent_args = true) { @@ -409,26 +512,30 @@ private Session _get_cached_session( self._cached_config = config; self._cached_force_gpu = force_gpu; return sess; - } else { + } + else + { - if (crash_if_inconsistent_args && !self._cached_graph.Equals(graph)) + if (crash_if_inconsistent_args && self._cached_graph != null && !self._cached_graph.Equals(graph)) throw new ValueError(@"The graph used to get the cached session is different than the one that was used to create the session. Maybe create a new session with self.session()"); - if (crash_if_inconsistent_args && !self._cached_config.Equals(config)) { + if (crash_if_inconsistent_args && self._cached_config != null && !self._cached_config.Equals(config)) + { throw new ValueError(@"The config used to get the cached session is different than the one that was used to create the session. Maybe create a new session with self.session()"); } - if (crash_if_inconsistent_args && !self._cached_force_gpu.Equals(force_gpu)) { + if (crash_if_inconsistent_args && !self._cached_force_gpu.Equals(force_gpu)) + { throw new ValueError(@"The force_gpu value used to get the cached session is different than the one that was used to create the session. Maybe create a new session with self.session()"); } - return _cached_session; + return self._cached_session; } } diff --git a/test/Tensorflow.UnitTest/Tensorflow.UnitTest.csproj b/test/Tensorflow.UnitTest/Tensorflow.UnitTest.csproj new file mode 100644 index 000000000..9ad6bc7a5 --- /dev/null +++ b/test/Tensorflow.UnitTest/Tensorflow.UnitTest.csproj @@ -0,0 +1,24 @@ + + + + net6.0 + enable + enable + + false + true + + + + + + + + + + + + + + + diff --git a/test/TensorflowNET.Hub.Unittest/Tensorflow.Hub.Unittest.csproj b/test/TensorflowNET.Hub.Unittest/Tensorflow.Hub.Unittest.csproj index 4c3918e4a..c93b89256 100644 --- a/test/TensorflowNET.Hub.Unittest/Tensorflow.Hub.Unittest.csproj +++ b/test/TensorflowNET.Hub.Unittest/Tensorflow.Hub.Unittest.csproj @@ -9,7 +9,7 @@ - + diff --git a/tools/TensorFlowNET.Console/Tensorflow.Console.csproj b/tools/TensorFlowNET.Console/Tensorflow.Console.csproj index ecc2d30b5..bb60b6b63 100644 --- a/tools/TensorFlowNET.Console/Tensorflow.Console.csproj +++ b/tools/TensorFlowNET.Console/Tensorflow.Console.csproj @@ -19,13 +19,10 @@ AnyCPU - - - - + diff --git a/tools/Tensorflow.CodeGen/Tensorflow.CodeGen.csproj b/tools/Tensorflow.CodeGen/Tensorflow.CodeGen.csproj index 03195e6ac..2afc68a3c 100644 --- a/tools/Tensorflow.CodeGen/Tensorflow.CodeGen.csproj +++ b/tools/Tensorflow.CodeGen/Tensorflow.CodeGen.csproj @@ -9,7 +9,6 @@ - diff --git a/tools/Tensorflow.UnitTest.RedistHolder/Tensorflow.UnitTest.RedistHolder.csproj b/tools/Tensorflow.UnitTest.RedistHolder/Tensorflow.UnitTest.RedistHolder.csproj index 1ca387dbb..0d1018cab 100644 --- a/tools/Tensorflow.UnitTest.RedistHolder/Tensorflow.UnitTest.RedistHolder.csproj +++ b/tools/Tensorflow.UnitTest.RedistHolder/Tensorflow.UnitTest.RedistHolder.csproj @@ -5,7 +5,7 @@ - +