From 2a3960228ba79655e515b030116b8b2b2f84c9c9 Mon Sep 17 00:00:00 2001 From: Alexandre Gattiker Date: Mon, 24 Feb 2020 20:13:02 +0100 Subject: [PATCH 01/18] Update getting_started.md --- docs/getting_started.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/getting_started.md b/docs/getting_started.md index a93b9fa5..e6e7b97f 100644 --- a/docs/getting_started.md +++ b/docs/getting_started.md @@ -195,7 +195,7 @@ specified). Example ML pipelines using R have a single step to train a model. Th * The third stage of the pipeline, **Deploy to ACI**, deploys the model to the QA environment in [Azure Container Instances](https://azure.microsoft.com/en-us/services/container-instances/). It then runs a *smoke test* to validate the deployment, i.e. sends a sample query to the scoring web service and verifies that it returns a response in the expected format. -The pipeline uses a Docker container on the Azure Pipelines agents to accomplish the pipeline steps. The image of the container ***mcr.microsoft.com/mlops/python:latest*** is built with this [Dockerfile](./environment_setup/Dockerfile) and it has all necessary dependencies installed for the purposes of this repository. This image serves as an example of using a custom Docker image that provides a pre-baked environment. This environment is guaranteed to be the same on any building agent, VM or local machine. In your project you will want to build your own Docker image that only contains the dependencies and tools required for your use case. This image will be more likely smaller and therefore faster, and it will be totally maintained by your team. +The pipeline uses a Docker container on the Azure Pipelines agents to accomplish the pipeline steps. The image of the container ***mcr.microsoft.com/mlops/python:latest*** is built with this [Dockerfile](../environment_setup/Dockerfile) and it has all necessary dependencies installed for the purposes of this repository. This image serves as an example of using a custom Docker image that provides a pre-baked environment. This environment is guaranteed to be the same on any building agent, VM or local machine. In your project you will want to build your own Docker image that only contains the dependencies and tools required for your use case. This image will be more likely smaller and therefore faster, and it will be totally maintained by your team. Wait until the pipeline finishes and verify that there is a new model in the **ML Workspace**: From 9919c067fcb1f1f4ab1be6e8cf8c05aa1abc6009 Mon Sep 17 00:00:00 2001 From: Alexandre Gattiker Date: Mon, 24 Feb 2020 20:13:16 +0100 Subject: [PATCH 02/18] Update diabetes_regression-ci-build-train.yml --- .../diabetes_regression-ci-build-train.yml | 20 +++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/.pipelines/diabetes_regression-ci-build-train.yml b/.pipelines/diabetes_regression-ci-build-train.yml index ce9ced1d..00201a5c 100644 --- a/.pipelines/diabetes_regression-ci-build-train.yml +++ b/.pipelines/diabetes_regression-ci-build-train.yml @@ -1,4 +1,12 @@ # Continuous Integration (CI) pipeline that orchestrates the training, evaluation, registration, deployment, and testing of the diabetes_regression model. + +name: Diabetes regression model training and deployment + +resources: + containers: + - container: mlops + image: mcr.microsoft.com/mlops/python:latest + pr: none trigger: branches: @@ -24,7 +32,7 @@ stages: jobs: - job: "Model_CI_Pipeline" displayName: "Model CI Pipeline" - container: mcr.microsoft.com/mlops/python:latest + container: mlops timeoutInMinutes: 0 steps: - template: azdo-base-pipeline.yml @@ -46,7 +54,7 @@ stages: - job: "Get_Pipeline_ID" condition: and(succeeded(), eq(coalesce(variables['auto-trigger-training'], 'true'), 'true')) displayName: "Get Pipeline ID for execution" - container: mcr.microsoft.com/mlops/python:latest + container: mlops timeoutInMinutes: 0 steps: - task: AzureCLI@1 @@ -81,7 +89,7 @@ stages: dependsOn: "Run_ML_Pipeline" condition: always() displayName: "Determine if evaluation succeeded and new model is registered" - container: mcr.microsoft.com/mlops/python:latest + container: mlops timeoutInMinutes: 0 steps: - template: diabetes_regression-template-get-model-version.yml @@ -93,7 +101,7 @@ stages: jobs: - job: "Deploy_ACI" displayName: "Deploy to ACI" - container: mcr.microsoft.com/mlops/python:latest + container: mlops timeoutInMinutes: 0 steps: - template: diabetes_regression-template-get-model-version.yml @@ -126,7 +134,7 @@ stages: jobs: - job: "Deploy_AKS" displayName: "Deploy to AKS" - container: mcr.microsoft.com/mlops/python:latest + container: mlops timeoutInMinutes: 0 steps: - template: diabetes_regression-template-get-model-version.yml @@ -160,7 +168,7 @@ stages: jobs: - job: "Deploy_Webapp" displayName: "Deploy to Webapp" - container: mcr.microsoft.com/mlops/python:latest + container: mlops timeoutInMinutes: 0 steps: - template: diabetes_regression-template-get-model-version.yml From 2d7f427c4adb909d8f844f8707c5e3a004e80bda Mon Sep 17 00:00:00 2001 From: Alexandre Gattiker Date: Mon, 24 Feb 2020 20:37:33 +0100 Subject: [PATCH 03/18] . --- .pipelines/azdo-abtest-pipeline.yml | 8 +++++++- .pipelines/azdo-pr-build-train.yml | 10 ++++++++-- .pipelines/diabetes_regression-ci-build-train.yml | 4 +--- .pipelines/diabetes_regression-ci-image.yml | 8 +++++++- 4 files changed, 23 insertions(+), 7 deletions(-) diff --git a/.pipelines/azdo-abtest-pipeline.yml b/.pipelines/azdo-abtest-pipeline.yml index 2a395594..f532c33c 100644 --- a/.pipelines/azdo-abtest-pipeline.yml +++ b/.pipelines/azdo-abtest-pipeline.yml @@ -1,4 +1,10 @@ # Pipeline for the canary deployment workflow. + +resources: + containers: + - container: mlops + image: mcr.microsoft.com/mlops/python:latest + pr: none trigger: branches: @@ -31,7 +37,7 @@ stages: timeoutInMinutes: 0 pool: vmImage: 'ubuntu-latest' - container: mcr.microsoft.com/mlops/python:latest + container: mlops steps: - task: AzureCLI@1 inputs: diff --git a/.pipelines/azdo-pr-build-train.yml b/.pipelines/azdo-pr-build-train.yml index ef00f2eb..9c48202a 100644 --- a/.pipelines/azdo-pr-build-train.yml +++ b/.pipelines/azdo-pr-build-train.yml @@ -1,4 +1,10 @@ # Pipeline to run basic code quality tests as part of pull requests to the master branch. + +resources: + containers: + - container: mlops + image: mcr.microsoft.com/mlops/python:latest + trigger: none pr: branches: @@ -8,11 +14,11 @@ pr: pool: vmImage: 'ubuntu-latest' -container: mcr.microsoft.com/mlops/python:latest +container: mlops variables: - template: diabetes_regression-variables.yml - group: devopsforai-aml-vg steps: -- template: azdo-base-pipeline.yml \ No newline at end of file +- template: azdo-base-pipeline.yml diff --git a/.pipelines/diabetes_regression-ci-build-train.yml b/.pipelines/diabetes_regression-ci-build-train.yml index 00201a5c..538037b1 100644 --- a/.pipelines/diabetes_regression-ci-build-train.yml +++ b/.pipelines/diabetes_regression-ci-build-train.yml @@ -1,11 +1,9 @@ # Continuous Integration (CI) pipeline that orchestrates the training, evaluation, registration, deployment, and testing of the diabetes_regression model. -name: Diabetes regression model training and deployment - resources: containers: - container: mlops - image: mcr.microsoft.com/mlops/python:latest + image: mcr.microsoft.com/mlops/python/$(Build.BuildId):latest pr: none trigger: diff --git a/.pipelines/diabetes_regression-ci-image.yml b/.pipelines/diabetes_regression-ci-image.yml index 3b112d5a..a3ab2937 100644 --- a/.pipelines/diabetes_regression-ci-image.yml +++ b/.pipelines/diabetes_regression-ci-image.yml @@ -1,4 +1,10 @@ # Builds the container image that is used by other pipelines for scoring. + +resources: + containers: + - container: mlops + image: mcr.microsoft.com/mlops/python:latest + pr: none trigger: branches: @@ -16,7 +22,7 @@ trigger: pool: vmImage: 'ubuntu-latest' -container: mcr.microsoft.com/mlops/python:latest +container: mlops variables: - group: devopsforai-aml-vg From 299c9bf5eef6c47d3d4fbbde43b4ea5ecf427be1 Mon Sep 17 00:00:00 2001 From: Alexandre Gattiker Date: Mon, 24 Feb 2020 20:38:52 +0100 Subject: [PATCH 04/18] Update diabetes_regression-ci-build-train.yml --- .pipelines/diabetes_regression-ci-build-train.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pipelines/diabetes_regression-ci-build-train.yml b/.pipelines/diabetes_regression-ci-build-train.yml index 538037b1..419d50d9 100644 --- a/.pipelines/diabetes_regression-ci-build-train.yml +++ b/.pipelines/diabetes_regression-ci-build-train.yml @@ -3,7 +3,7 @@ resources: containers: - container: mlops - image: mcr.microsoft.com/mlops/python/$(Build.BuildId):latest + image: mcr.microsoft.com/mlops/python/$(Build.SourceBranch):latest pr: none trigger: From d9a2b4e0c9e9ead000c93a2527f14eb1cc324d71 Mon Sep 17 00:00:00 2001 From: Alexandre Gattiker Date: Mon, 24 Feb 2020 20:40:47 +0100 Subject: [PATCH 05/18] Update diabetes_regression-ci-build-train.yml --- .pipelines/diabetes_regression-ci-build-train.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.pipelines/diabetes_regression-ci-build-train.yml b/.pipelines/diabetes_regression-ci-build-train.yml index 419d50d9..4c8b13f7 100644 --- a/.pipelines/diabetes_regression-ci-build-train.yml +++ b/.pipelines/diabetes_regression-ci-build-train.yml @@ -4,6 +4,7 @@ resources: containers: - container: mlops image: mcr.microsoft.com/mlops/python/$(Build.SourceBranch):latest + endpoint: acrconnection pr: none trigger: From e7097f92d987e1edc2d5a2679ba1a20772d88d00 Mon Sep 17 00:00:00 2001 From: Alexandre Gattiker Date: Mon, 24 Feb 2020 20:42:06 +0100 Subject: [PATCH 06/18] Update diabetes_regression-ci-build-train.yml --- .pipelines/diabetes_regression-ci-build-train.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.pipelines/diabetes_regression-ci-build-train.yml b/.pipelines/diabetes_regression-ci-build-train.yml index 4c8b13f7..28f3f01f 100644 --- a/.pipelines/diabetes_regression-ci-build-train.yml +++ b/.pipelines/diabetes_regression-ci-build-train.yml @@ -3,8 +3,7 @@ resources: containers: - container: mlops - image: mcr.microsoft.com/mlops/python/$(Build.SourceBranch):latest - endpoint: acrconnection + image: mcr.microsoft.com/mlops/python/$(Build.SourceBranchName):latest pr: none trigger: From 4db36b8bda057b9d22fa094b433860c2418d9edb Mon Sep 17 00:00:00 2001 From: Alexandre Gattiker Date: Mon, 24 Feb 2020 21:15:19 +0100 Subject: [PATCH 07/18] . --- .../diabetes_regression-ci-build-train.yml | 2 +- docs/custom_container.md | 85 +++++++++++++++++++ docs/getting_started.md | 8 +- 3 files changed, 91 insertions(+), 4 deletions(-) create mode 100644 docs/custom_container.md diff --git a/.pipelines/diabetes_regression-ci-build-train.yml b/.pipelines/diabetes_regression-ci-build-train.yml index 28f3f01f..c22ba590 100644 --- a/.pipelines/diabetes_regression-ci-build-train.yml +++ b/.pipelines/diabetes_regression-ci-build-train.yml @@ -3,7 +3,7 @@ resources: containers: - container: mlops - image: mcr.microsoft.com/mlops/python/$(Build.SourceBranchName):latest + image: mcr.microsoft.com/mlops/python:latest pr: none trigger: diff --git a/docs/custom_container.md b/docs/custom_container.md new file mode 100644 index 00000000..4d55ebe3 --- /dev/null +++ b/docs/custom_container.md @@ -0,0 +1,85 @@ +# Customizing the Azure DevOps job container + +The Model training and deployment pipeline uses a Docker container +on the Azure Pipelines agents to provide a reproducible environment +to run test and deployment code. + The image of the container +`mcr.microsoft.com/mlops/python:latest` is built with this +[Dockerfile](../environment_setup/Dockerfile). + +In your project you will want to build your own +Docker image that only contains the dependencies and tools required for your +use case. This image will be more likely smaller and therefore faster, and it +will be totally maintained by your team. + +## Provision an Azure Container Registry + +An Azure Container Registry is deployed along your Azure ML Workspace to manage models. +You can use that registry instance to store your MLOps container image as well, or +provision a separate instance. + +## Create a Registry Service Connection + +Create a service connection to your Azure Container Registry: +- As *Connection type*, select *Docker Registry* +- As *Registry type*, select *Azure Container Registry* +- As *Azure container registry*, select your Container registry instance. + +As *Service connection name*, enter `acrconnection`. + +## Update the environment definition + +Modify the [Dockerfile](../environment_setup/Dockerfile) and/or the +[ci_dependencies.yml](../diabetes_regression/ci_dependencies.yml) Conda +environment definition to tailor your environment. + +If a package is available in a Conda package repository, then we recommend that +you use the Conda installation rather than the pip installation. Conda packages +typically come with prebuilt binaries that make installation more reliable. + +## Create a container build pipeline + +In your [Azure DevOps](https://dev.azure.com) project create a new build +pipeline referring to the +[docker-image-pipeline.yml](environment_setup/docker-image-pipeline.yml) +pipeline definition in your forked repository. + +Create a pipeline variable named `agentImageName` and give it an appropriate +value to name your image with, e.g. `mlops/diabetes_regression`. + +Run the pipeline. + +## Modify the model pipeline + +Modify the model pipeline file [diabetes_regression-ci-build-train.yml](../.pipelines/diabetes_regression-ci-build-train.yml) by replacing this section: + +``` +resources: + containers: + - container: mlops + image: mcr.microsoft.com/mlops/python:latest +``` + +with (using the image name previously defined): + +``` +resources: + containers: + - container: mlops + image: mlops/diabetes_regression + endpoint: acrconnection +``` + +Run the pipeline and ensure your container has been used. + +## Dealing with branch concurrency + +Especially when working in a team, it's possible that multiple team members + +For example, if the master branch is using scikit-learn and Alice creates a branch to use Tensorflow instead, and she removes scikit-learn from the +[ci_dependencies.yml](../diabetes_regression/ci_dependencies.yml) Conda environment definition +and runs the [docker-image-pipeline.yml](environment_setup/docker-image-pipeline.yml) Docker image, the master branch will stop building. + +Alice could leave scikit-learn in addition to Tensorflow in the environment, but that is not ideal, as she would have to take an extra step to remove scikit-learn after merging her branch to master. + +A better approach would be for Alice to use a distinct name for her environment, such as mlops/diabetes_regression_tensorflow. diff --git a/docs/getting_started.md b/docs/getting_started.md index e6e7b97f..de4267eb 100644 --- a/docs/getting_started.md +++ b/docs/getting_started.md @@ -158,7 +158,7 @@ performs linting, unit testing and publishes a training pipeline. ### Set up the Pipeline In your [Azure DevOps](https://dev.azure.com) project create and run a new build -pipeline referring to the [diabetes_regression-ci-build-train.yml](./.pipelines/azdo-ci-build-train.yml) +pipeline referring to the [diabetes_regression-ci-build-train.yml](../.pipelines/azdo-ci-build-train.yml) pipeline definition in your forked repository: ![configure ci build pipeline](./images/ci-build-pipeline-configure.png) @@ -195,8 +195,6 @@ specified). Example ML pipelines using R have a single step to train a model. Th * The third stage of the pipeline, **Deploy to ACI**, deploys the model to the QA environment in [Azure Container Instances](https://azure.microsoft.com/en-us/services/container-instances/). It then runs a *smoke test* to validate the deployment, i.e. sends a sample query to the scoring web service and verifies that it returns a response in the expected format. -The pipeline uses a Docker container on the Azure Pipelines agents to accomplish the pipeline steps. The image of the container ***mcr.microsoft.com/mlops/python:latest*** is built with this [Dockerfile](../environment_setup/Dockerfile) and it has all necessary dependencies installed for the purposes of this repository. This image serves as an example of using a custom Docker image that provides a pre-baked environment. This environment is guaranteed to be the same on any building agent, VM or local machine. In your project you will want to build your own Docker image that only contains the dependencies and tools required for your use case. This image will be more likely smaller and therefore faster, and it will be totally maintained by your team. - Wait until the pipeline finishes and verify that there is a new model in the **ML Workspace**: ![trained model](./images/trained-model.png) @@ -254,6 +252,10 @@ Make sure your webapp has the credentials to pull the image from the Azure Conta ![build](./images/multi-stage-webapp.png) +# Customizing the Azure DevOps job container + +The pipeline uses a Docker container on the Azure Pipelines agents to accomplish the pipeline steps. The image of the container ***mcr.microsoft.com/mlops/python:latest*** is built with this [Dockerfile](../environment_setup/Dockerfile) and it has all necessary dependencies installed for the purposes of this repository. This image serves as an example of using a custom Docker image that provides a pre-baked environment. This environment is guaranteed to be the same on any building agent, VM or local machine. In your project you will want to build your own Docker image that only contains the dependencies and tools required for your use case. This image will be more likely smaller and therefore faster, and it will be totally maintained by your team. See [instructions for Customizing the Azure DevOps job container](./custom_container.md). + # Next steps * You may wish to follow the [bootstrap instructions](../bootstrap/README.md) to create a starting point for your project use case. From cb4c4c238daadce06103fb4afd32f53bb48488a1 Mon Sep 17 00:00:00 2001 From: Alexandre Gattiker Date: Mon, 24 Feb 2020 21:18:29 +0100 Subject: [PATCH 08/18] Update custom_container.md --- docs/custom_container.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/docs/custom_container.md b/docs/custom_container.md index 4d55ebe3..2b77d6d2 100644 --- a/docs/custom_container.md +++ b/docs/custom_container.md @@ -24,8 +24,7 @@ Create a service connection to your Azure Container Registry: - As *Connection type*, select *Docker Registry* - As *Registry type*, select *Azure Container Registry* - As *Azure container registry*, select your Container registry instance. - -As *Service connection name*, enter `acrconnection`. +- As *Service connection name*, enter `acrconnection`. ## Update the environment definition From 510ea8e188677f5c35d5aa1ec16aa7f9664c967c Mon Sep 17 00:00:00 2001 From: Alexandre Gattiker Date: Mon, 24 Feb 2020 21:19:51 +0100 Subject: [PATCH 09/18] Update custom_container.md --- docs/custom_container.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/custom_container.md b/docs/custom_container.md index 2b77d6d2..b8ec14d6 100644 --- a/docs/custom_container.md +++ b/docs/custom_container.md @@ -40,13 +40,13 @@ typically come with prebuilt binaries that make installation more reliable. In your [Azure DevOps](https://dev.azure.com) project create a new build pipeline referring to the -[docker-image-pipeline.yml](environment_setup/docker-image-pipeline.yml) +[./environment_setup/docker-image-pipeline.yml](environment_setup/docker-image-pipeline.yml) pipeline definition in your forked repository. Create a pipeline variable named `agentImageName` and give it an appropriate value to name your image with, e.g. `mlops/diabetes_regression`. -Run the pipeline. +Save and run the pipeline. ## Modify the model pipeline From 85a8d7577622c04d36d4af41c1a597ff49b559c3 Mon Sep 17 00:00:00 2001 From: Alexandre Gattiker Date: Mon, 24 Feb 2020 21:24:25 +0100 Subject: [PATCH 10/18] Update custom_container.md --- docs/custom_container.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/custom_container.md b/docs/custom_container.md index b8ec14d6..071a72a5 100644 --- a/docs/custom_container.md +++ b/docs/custom_container.md @@ -73,12 +73,12 @@ Run the pipeline and ensure your container has been used. ## Dealing with branch concurrency -Especially when working in a team, it's possible that multiple team members +Especially when working in a team, it's possible for environment changes across branches to interfere with one another. -For example, if the master branch is using scikit-learn and Alice creates a branch to use Tensorflow instead, and she removes scikit-learn from the +For example, if the master branch is using scikit-learn and you create a branch to use Tensorflow instead, and you remove scikit-learn from the [ci_dependencies.yml](../diabetes_regression/ci_dependencies.yml) Conda environment definition and runs the [docker-image-pipeline.yml](environment_setup/docker-image-pipeline.yml) Docker image, the master branch will stop building. -Alice could leave scikit-learn in addition to Tensorflow in the environment, but that is not ideal, as she would have to take an extra step to remove scikit-learn after merging her branch to master. +You could leave scikit-learn in addition to Tensorflow in the environment, but that is not ideal, as you would have to take an extra step to remove scikit-learn after merging your branch to master. -A better approach would be for Alice to use a distinct name for her environment, such as mlops/diabetes_regression_tensorflow. +A better approach would be to use a distinct name for your modified environment, such as `mlops/diabetes_regression/tensorflow`. From b4412f4bbee70e5f8e243d95275ee96f15b9c4b7 Mon Sep 17 00:00:00 2001 From: Alexandre Gattiker Date: Mon, 24 Feb 2020 21:30:20 +0100 Subject: [PATCH 11/18] Update getting_started.md --- docs/getting_started.md | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/docs/getting_started.md b/docs/getting_started.md index de4267eb..3f5dae9e 100644 --- a/docs/getting_started.md +++ b/docs/getting_started.md @@ -254,7 +254,9 @@ Make sure your webapp has the credentials to pull the image from the Azure Conta # Customizing the Azure DevOps job container -The pipeline uses a Docker container on the Azure Pipelines agents to accomplish the pipeline steps. The image of the container ***mcr.microsoft.com/mlops/python:latest*** is built with this [Dockerfile](../environment_setup/Dockerfile) and it has all necessary dependencies installed for the purposes of this repository. This image serves as an example of using a custom Docker image that provides a pre-baked environment. This environment is guaranteed to be the same on any building agent, VM or local machine. In your project you will want to build your own Docker image that only contains the dependencies and tools required for your use case. This image will be more likely smaller and therefore faster, and it will be totally maintained by your team. See [instructions for Customizing the Azure DevOps job container](./custom_container.md). +The pipeline uses a Docker container on the Azure Pipelines agents to accomplish the pipeline steps. The image of the container ***mcr.microsoft.com/mlops/python:latest*** is built with this [Dockerfile](../environment_setup/Dockerfile) and it has all necessary dependencies installed for the purposes of this repository. This image serves as an example of using a custom Docker image that provides a pre-baked environment. This environment is guaranteed to be the same on any building agent, VM or local machine. + +In your project you will want to build your own Docker image that only contains the dependencies and tools required for your use case. This image will be more likely smaller and therefore faster, and it will be totally maintained by your team. See [instructions for customizing the Azure DevOps job container](./custom_container.md). # Next steps @@ -263,6 +265,7 @@ The pipeline uses a Docker container on the Azure Pipelines agents to accomplish * The provided pipeline definition YAML file is a sample starting point, which you should tailor to your processes and environment. * You should edit the pipeline definition to remove unused stages. For example, if you are deploying to ACI and AKS, you should delete the unused `Deploy_Webapp` stage. * You may wish to enable [manual approvals](https://docs.microsoft.com/en-us/azure/devops/pipelines/process/approvals) before the deployment stages. +* You may want to use [Azure DevOps self-hosted agents](https://docs.microsoft.com/en-us/azure/devops/pipelines/agents/agents?view=azure-devops&tabs=browser#install) to speed up your ML pipeline execution. The Docker container image for the ML pipeline is sizable, and having it cached on the agent between runs can trim several minutes from your runs. * You can install additional Conda or pip packages by modifying the YAML environment configurations under the `diabetes_regression` directory. Make sure to use fixed version numbers for all packages to ensure reproducibility, and use the same versions across environments. * You can explore aspects of model observability in the solution, such as: * **Logging**: navigate to the Application Insights instance linked to the Azure ML Portal, From 4ffc60feb24df2e9b1c77b36c2ac893322139038 Mon Sep 17 00:00:00 2001 From: Alexandre Gattiker Date: Mon, 24 Feb 2020 21:35:00 +0100 Subject: [PATCH 12/18] Update getting_started.md --- docs/getting_started.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/getting_started.md b/docs/getting_started.md index 3f5dae9e..60f4e6b1 100644 --- a/docs/getting_started.md +++ b/docs/getting_started.md @@ -158,7 +158,7 @@ performs linting, unit testing and publishes a training pipeline. ### Set up the Pipeline In your [Azure DevOps](https://dev.azure.com) project create and run a new build -pipeline referring to the [diabetes_regression-ci-build-train.yml](../.pipelines/azdo-ci-build-train.yml) +pipeline referring to the [diabetes_regression-ci-build-train.yml](../.pipelines/diabetes_regression-ci-build-train.yml) pipeline definition in your forked repository: ![configure ci build pipeline](./images/ci-build-pipeline-configure.png) From f6eae395e1f55cd28e569ee4d50de11f14b22d9e Mon Sep 17 00:00:00 2001 From: Alexandre Gattiker Date: Mon, 24 Feb 2020 21:36:28 +0100 Subject: [PATCH 13/18] Update custom_container.md --- docs/custom_container.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/custom_container.md b/docs/custom_container.md index 071a72a5..8e59b275 100644 --- a/docs/custom_container.md +++ b/docs/custom_container.md @@ -23,8 +23,8 @@ provision a separate instance. Create a service connection to your Azure Container Registry: - As *Connection type*, select *Docker Registry* - As *Registry type*, select *Azure Container Registry* -- As *Azure container registry*, select your Container registry instance. -- As *Service connection name*, enter `acrconnection`. +- As *Azure container registry*, select your Container registry instance +- As *Service connection name*, enter `acrconnection` ## Update the environment definition @@ -40,7 +40,7 @@ typically come with prebuilt binaries that make installation more reliable. In your [Azure DevOps](https://dev.azure.com) project create a new build pipeline referring to the -[./environment_setup/docker-image-pipeline.yml](environment_setup/docker-image-pipeline.yml) +[../environment_setup/docker-image-pipeline.yml](environment_setup/docker-image-pipeline.yml) pipeline definition in your forked repository. Create a pipeline variable named `agentImageName` and give it an appropriate @@ -77,7 +77,7 @@ Especially when working in a team, it's possible for environment changes across For example, if the master branch is using scikit-learn and you create a branch to use Tensorflow instead, and you remove scikit-learn from the [ci_dependencies.yml](../diabetes_regression/ci_dependencies.yml) Conda environment definition -and runs the [docker-image-pipeline.yml](environment_setup/docker-image-pipeline.yml) Docker image, the master branch will stop building. +and runs the [docker-image-pipeline.yml](../environment_setup/docker-image-pipeline.yml) Docker image, the master branch will stop building. You could leave scikit-learn in addition to Tensorflow in the environment, but that is not ideal, as you would have to take an extra step to remove scikit-learn after merging your branch to master. From 3f5d430e0f039aa69845dbb74065bee7b75576e6 Mon Sep 17 00:00:00 2001 From: Alexandre Gattiker Date: Mon, 24 Feb 2020 21:36:51 +0100 Subject: [PATCH 14/18] Update custom_container.md --- docs/custom_container.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/custom_container.md b/docs/custom_container.md index 8e59b275..61ab62ae 100644 --- a/docs/custom_container.md +++ b/docs/custom_container.md @@ -40,7 +40,7 @@ typically come with prebuilt binaries that make installation more reliable. In your [Azure DevOps](https://dev.azure.com) project create a new build pipeline referring to the -[../environment_setup/docker-image-pipeline.yml](environment_setup/docker-image-pipeline.yml) +[./environment_setup/docker-image-pipeline.yml](../environment_setup/docker-image-pipeline.yml) pipeline definition in your forked repository. Create a pipeline variable named `agentImageName` and give it an appropriate From 69bd88727f87c7a6199dd9ff12d5748955e6bacb Mon Sep 17 00:00:00 2001 From: Alexandre Gattiker Date: Mon, 24 Feb 2020 21:40:22 +0100 Subject: [PATCH 15/18] Update custom_container.md --- docs/custom_container.md | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/docs/custom_container.md b/docs/custom_container.md index 61ab62ae..09aa78cb 100644 --- a/docs/custom_container.md +++ b/docs/custom_container.md @@ -75,9 +75,11 @@ Run the pipeline and ensure your container has been used. Especially when working in a team, it's possible for environment changes across branches to interfere with one another. -For example, if the master branch is using scikit-learn and you create a branch to use Tensorflow instead, and you remove scikit-learn from the +For example, if the master branch is using scikit-learn and you create a branch to use Tensorflow instead, and you +decide to remove scikit-learn from the [ci_dependencies.yml](../diabetes_regression/ci_dependencies.yml) Conda environment definition -and runs the [docker-image-pipeline.yml](../environment_setup/docker-image-pipeline.yml) Docker image, the master branch will stop building. +and run the [docker-image-pipeline.yml](../environment_setup/docker-image-pipeline.yml) Docker image, +then the master branch will stop building. You could leave scikit-learn in addition to Tensorflow in the environment, but that is not ideal, as you would have to take an extra step to remove scikit-learn after merging your branch to master. From dd61a7accfea1afe4622fa1f6489b0bca89fbd53 Mon Sep 17 00:00:00 2001 From: Alexandre Gattiker Date: Wed, 26 Feb 2020 16:42:25 +0100 Subject: [PATCH 16/18] PR comments --- docs/custom_container.md | 27 ++++++++++++++++++++------- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/docs/custom_container.md b/docs/custom_container.md index 09aa78cb..fcc0e449 100644 --- a/docs/custom_container.md +++ b/docs/custom_container.md @@ -20,7 +20,7 @@ provision a separate instance. ## Create a Registry Service Connection -Create a service connection to your Azure Container Registry: +[Create a service connection](https://docs.microsoft.com/en-us/azure/devops/pipelines/library/service-endpoints?view=azure-devops&tabs=yaml#sep-docreg) to your Azure Container Registry: - As *Connection type*, select *Docker Registry* - As *Registry type*, select *Azure Container Registry* - As *Azure container registry*, select your Container registry instance @@ -29,8 +29,12 @@ Create a service connection to your Azure Container Registry: ## Update the environment definition Modify the [Dockerfile](../environment_setup/Dockerfile) and/or the -[ci_dependencies.yml](../diabetes_regression/ci_dependencies.yml) Conda +[ci_dependencies.yml](../diabetes_regression/ci_dependencies.yml) CI Conda environment definition to tailor your environment. +Conda provides a [reusable environment for training and deployment with Azure Machine Learning](https://docs.microsoft.com/en-us/azure/machine-learning/how-to-use-environments). +The Conda environment used for CI should use the same package versions as the Conda environment +used for the Azure ML training and scoring environments (defined in [conda_dependencies.yml](../diabetes_regression/conda_dependencies.yml)). +This enables you to run unit and integration tests using the exact same dependencies as used in the ML pipeline. If a package is available in a Conda package repository, then we recommend that you use the Conda installation rather than the pip installation. Conda packages @@ -40,13 +44,16 @@ typically come with prebuilt binaries that make installation more reliable. In your [Azure DevOps](https://dev.azure.com) project create a new build pipeline referring to the -[./environment_setup/docker-image-pipeline.yml](../environment_setup/docker-image-pipeline.yml) +[environment_setup/docker-image-pipeline.yml](../environment_setup/docker-image-pipeline.yml) pipeline definition in your forked repository. -Create a pipeline variable named `agentImageName` and give it an appropriate -value to name your image with, e.g. `mlops/diabetes_regression`. +Edit the [environment_setup/docker-image-pipeline.yml](../environment_setup/docker-image-pipeline.yml) file +and modify the string `'public/mlops/python'` with an name suitable to describe your environment, +e.g. `'mlops/diabetes_regression'`. -Save and run the pipeline. +Save and run the pipeline. This will build and push a container image to your Azure Container Registry with +the name you have just edited. The next step is to modify the build pipeline to run the CI job on a container +run from that image. ## Modify the model pipeline @@ -71,7 +78,7 @@ resources: Run the pipeline and ensure your container has been used. -## Dealing with branch concurrency +## Addressing conflicting dependencies Especially when working in a team, it's possible for environment changes across branches to interfere with one another. @@ -84,3 +91,9 @@ then the master branch will stop building. You could leave scikit-learn in addition to Tensorflow in the environment, but that is not ideal, as you would have to take an extra step to remove scikit-learn after merging your branch to master. A better approach would be to use a distinct name for your modified environment, such as `mlops/diabetes_regression/tensorflow`. +By changing the name of the image in your branch in both the container build pipeline +[environment_setup/docker-image-pipeline.yml](../environment_setup/docker-image-pipeline.yml) +and the model pipeline file +[diabetes_regression-ci-build-train.yml](../.pipelines/diabetes_regression-ci-build-train.yml), +and running both pipelines in sequence on your branch, +you avoid any branch conflicts, and the name does not have to be changed after merging to master. From d641b879b2258062df3b38874d5ead8b078c52cd Mon Sep 17 00:00:00 2001 From: Alexandre Gattiker Date: Wed, 26 Feb 2020 17:00:37 +0100 Subject: [PATCH 17/18] PR fixes --- bootstrap/README.md | 10 ++++++++++ docs/getting_started.md | 8 ++------ 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/bootstrap/README.md b/bootstrap/README.md index 438c4d02..a05ecc94 100644 --- a/bootstrap/README.md +++ b/bootstrap/README.md @@ -2,6 +2,8 @@ To use this existing project structure and scripts for your new ML project, you can quickly get started from the existing repository, bootstrap and create a template that works for your ML project. Bootstrapping will prepare a similar directory structure for your project which includes renaming files and folders, deleting and cleaning up some directories and fixing imports and absolute path based on your project name. This will enable reusing various resources like pre-built pipelines and scripts for your new project. +## Generating a project structure + To bootstrap from the existing MLOpsPython repository clone this repository, ensure Python is installed locally, and run bootstrap.py script as below `python bootstrap.py --d [dirpath] --n [projectname]` @@ -11,3 +13,11 @@ Where `[dirpath]` is the absolute path to the root of your directory where MLOps The script renames folders, files and files' content from the base project name `diabetes` to your project name. However, you might need to manually rename variables defined in a variable group and their values. [This article](https://docs.microsoft.com/azure/machine-learning/tutorial-convert-ml-experiment-to-production#use-your-own-model-with-mlopspython-code-template) will also assist to use this code template for your own ML project. + +## Customizing the CI and AML environments + +In your project you will want to customize your own Docker image and Conda environment to use only the dependencies and tools required for your use case. This requires you to edit the following environment definition files: +- The Azure ML training and scoring Conda environment defined in [conda_dependencies.yml](diabetes_regression/conda_dependencies.yml). +- The CI Docker image and Conda environment used by the Azure DevOps build agent. See [instructions for customizing the Azure DevOps job container](../docs/custom_container.md). + +You will want to synchronize dependency versions as appropriate between both environment definitions (for example, ML libraries used both in training and in unit tests). \ No newline at end of file diff --git a/docs/getting_started.md b/docs/getting_started.md index 60f4e6b1..f14fc0ca 100644 --- a/docs/getting_started.md +++ b/docs/getting_started.md @@ -195,6 +195,8 @@ specified). Example ML pipelines using R have a single step to train a model. Th * The third stage of the pipeline, **Deploy to ACI**, deploys the model to the QA environment in [Azure Container Instances](https://azure.microsoft.com/en-us/services/container-instances/). It then runs a *smoke test* to validate the deployment, i.e. sends a sample query to the scoring web service and verifies that it returns a response in the expected format. +The pipeline uses a Docker container on the Azure Pipelines agents to accomplish the pipeline steps. The image of the container ***mcr.microsoft.com/mlops/python:latest*** is built with this [Dockerfile](../environment_setup/Dockerfile) and it has all necessary dependencies installed for the purposes of this repository. This image serves as an example of using a custom Docker image that provides a pre-baked environment. This environment is guaranteed to be the same on any building agent, VM or local machine. In your project you will want to build your own Docker image that only contains the dependencies and tools required for your use case. This image will be more likely smaller and therefore faster, and it will be totally maintained by your team. + Wait until the pipeline finishes and verify that there is a new model in the **ML Workspace**: ![trained model](./images/trained-model.png) @@ -252,12 +254,6 @@ Make sure your webapp has the credentials to pull the image from the Azure Conta ![build](./images/multi-stage-webapp.png) -# Customizing the Azure DevOps job container - -The pipeline uses a Docker container on the Azure Pipelines agents to accomplish the pipeline steps. The image of the container ***mcr.microsoft.com/mlops/python:latest*** is built with this [Dockerfile](../environment_setup/Dockerfile) and it has all necessary dependencies installed for the purposes of this repository. This image serves as an example of using a custom Docker image that provides a pre-baked environment. This environment is guaranteed to be the same on any building agent, VM or local machine. - -In your project you will want to build your own Docker image that only contains the dependencies and tools required for your use case. This image will be more likely smaller and therefore faster, and it will be totally maintained by your team. See [instructions for customizing the Azure DevOps job container](./custom_container.md). - # Next steps * You may wish to follow the [bootstrap instructions](../bootstrap/README.md) to create a starting point for your project use case. From 7c9154d4bae467d8453c1f91669c8df67662df6a Mon Sep 17 00:00:00 2001 From: Alexandre Gattiker Date: Wed, 26 Feb 2020 17:02:51 +0100 Subject: [PATCH 18/18] Update README.md --- bootstrap/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bootstrap/README.md b/bootstrap/README.md index a05ecc94..bf7842db 100644 --- a/bootstrap/README.md +++ b/bootstrap/README.md @@ -2,7 +2,7 @@ To use this existing project structure and scripts for your new ML project, you can quickly get started from the existing repository, bootstrap and create a template that works for your ML project. Bootstrapping will prepare a similar directory structure for your project which includes renaming files and folders, deleting and cleaning up some directories and fixing imports and absolute path based on your project name. This will enable reusing various resources like pre-built pipelines and scripts for your new project. -## Generating a project structure +## Generating the project structure To bootstrap from the existing MLOpsPython repository clone this repository, ensure Python is installed locally, and run bootstrap.py script as below