From 0eeaa650d803d9e35a46f3d0c455cc8b95cbd07e Mon Sep 17 00:00:00 2001 From: Jacob Klegar Date: Tue, 29 Dec 2020 18:15:29 -0500 Subject: [PATCH 1/6] example terraform for azure Signed-off-by: Jacob Klegar --- infra/terraform/azure/README.md | 31 ++++++++++ infra/terraform/azure/aks.tf | 15 +++++ infra/terraform/azure/helm.tf | 95 ++++++++++++++++++++++++++++++ infra/terraform/azure/provider.tf | 26 ++++++++ infra/terraform/azure/redis.tf | 8 +++ infra/terraform/azure/sparkop.tf | 27 +++++++++ infra/terraform/azure/storage.tf | 15 +++++ infra/terraform/azure/variables.tf | 31 ++++++++++ infra/terraform/azure/versions.tf | 8 +++ infra/terraform/azure/vnet.tf | 17 ++++++ 10 files changed, 273 insertions(+) create mode 100644 infra/terraform/azure/README.md create mode 100644 infra/terraform/azure/aks.tf create mode 100644 infra/terraform/azure/helm.tf create mode 100644 infra/terraform/azure/provider.tf create mode 100644 infra/terraform/azure/redis.tf create mode 100644 infra/terraform/azure/sparkop.tf create mode 100644 infra/terraform/azure/storage.tf create mode 100644 infra/terraform/azure/variables.tf create mode 100644 infra/terraform/azure/versions.tf create mode 100644 infra/terraform/azure/vnet.tf diff --git a/infra/terraform/azure/README.md b/infra/terraform/azure/README.md new file mode 100644 index 00000000000..b5b5002f9d7 --- /dev/null +++ b/infra/terraform/azure/README.md @@ -0,0 +1,31 @@ +# Terraform config for Feast on Azure + +This serves as a guide on how to deploy Feast on Azure. At the end of this guide, we will have provisioned: +1. AKS cluster +2. Feast services running on AKS +3. Azure Cache (Redis) as online store +4. Spark operator on AKS +5. Kafka running on AKS. + +# Steps + +1. Create a tfvars file, e.g. `my.tfvars`. A sample configuration is as below: + +``` +name_prefix = "feast-0-9" +resource_group = "Feast" # pre-exisiting resource group +aks_namespace = "default" +``` + +3. Configure tf state backend, e.g.: +``` +terraform { + backend "azurerm" { + storage_account_name = "" + container_name = "" + key = "" + } +} +``` + +3. Use `terraform apply -var-file="my.tfvars"` to deploy. diff --git a/infra/terraform/azure/aks.tf b/infra/terraform/azure/aks.tf new file mode 100644 index 00000000000..a121618b6dd --- /dev/null +++ b/infra/terraform/azure/aks.tf @@ -0,0 +1,15 @@ +resource "azurerm_kubernetes_cluster" "main" { + name = "${var.name_prefix}-aks" + location = data.azurerm_resource_group.main.location + resource_group_name = data.azurerm_resource_group.main.name + dns_prefix = var.name_prefix + default_node_pool { + name = "${var.name_prefix}-nodepool" + vm_size = var.aks_machine_type + node_count = var.aks_node_count + vnet_subnet_id = azurerm_subnet.main.id + } + identity { + type = "SystemAssigned" + } +} diff --git a/infra/terraform/azure/helm.tf b/infra/terraform/azure/helm.tf new file mode 100644 index 00000000000..fba4c6d6c92 --- /dev/null +++ b/infra/terraform/azure/helm.tf @@ -0,0 +1,95 @@ +locals { + feast_postgres_secret_name = "${var.name_prefix}-postgres-secret" + feast_helm_values = { + redis = { + enabled = false + } + + grafana = { + enabled = false + } + + postgresql = { + existingSecret = local.feast_postgres_secret_name + } + + feast-core = { + postgresql = { + existingSecret = local.feast_postgres_secret_name + } + } + + feast-online-serving = { + enabled = true + "application-override.yaml" = { + feast = { + core-host = "${var.name_prefix}-feast-core" + core-grpc-port = 6565 + active_store = "online_store" + stores = [ + { + name = "online_store" + type = "REDIS" + config = { + host = azurerm_redis_cache.main.hostname + port = azurerm_redis_cache.main.ssl_port + ssl = true + subscriptions = [ + { + name = "*" + project = "*" + version = "*" + } + ] + } + } + ] + job_store = { + redis_host = azurerm_redis_cache.main.hostname + redis_port = azurerm_redis_cache.main.ssl_port + } + } + } + } + + feast-jupyter = { + enabled = true + envOverrides = { + feast_redis_host = azurerm_redis_cache.main.hostname, + feast_redis_port = azurerm_redis_cache.main.ssl_port, + feast_spark_launcher = "standalone" + feast_spark_staging_location = "https://${azurerm_storage_account.main.name}.blob.core.windows.net/${azurerm_storage_container.staging.name}/artifacts/" + feast_historical_feature_output_location : "https://${azurerm_storage_account.main.name}.blob.core.windows.net/${azurerm_storage_container.staging.name}/out/" + feast_historical_feature_output_format : "parquet" + demo_kafka_brokers : "${azurerm_kubernetes_cluster.main.network_profile[0].dns_service_ip}:9094" + demo_data_location : "https://${azurerm_storage_account.main.name}.blob.core.windows.net/${azurerm_storage_container.staging.name}/test-data/" + } + } + } +} + +resource "random_password" "feast-postgres-password" { + length = 16 + special = false +} + +resource "kubernetes_secret" "feast-postgres-secret" { + metadata { + name = local.feast_postgres_secret_name + } + data = { + postgresql-password = random_password.feast-postgres-password.result + } +} + +resource "helm_release" "feast" { + depends_on = [kubernetes_secret.feast-postgres-secret] + + name = var.name_prefix + namespace = var.aks_namespace + chart = "../../charts/feast" + + values = [ + yamlencode(local.feast_helm_values) + ] +} diff --git a/infra/terraform/azure/provider.tf b/infra/terraform/azure/provider.tf new file mode 100644 index 00000000000..7e24dcc11fc --- /dev/null +++ b/infra/terraform/azure/provider.tf @@ -0,0 +1,26 @@ +provider "azurerm" { + version = "=2.40.0" + features {} +} + +provider "helm" { + version = "~> 1.3.2" + kubernetes { + host = azurerm_kubernetes_cluster.main.kube_config.0.host + username = azurerm_kubernetes_cluster.main.kube_config.0.username + password = azurerm_kubernetes_cluster.main.kube_config.0.password + client_certificate = base64decode(azurerm_kubernetes_cluster.main.kube_config.0.client_certificate) + client_key = base64decode(azurerm_kubernetes_cluster.main.kube_config.0.client_key) + cluster_ca_certificate = base64decode(azurerm_kubernetes_cluster.main.kube_config.0.cluster_ca_certificate) + } +} + +provider "kubernetes" { + version = "~> 1.13.3" + host = azurerm_kubernetes_cluster.main.kube_config.0.host + username = azurerm_kubernetes_cluster.main.kube_config.0.username + password = azurerm_kubernetes_cluster.main.kube_config.0.password + client_certificate = base64decode(azurerm_kubernetes_cluster.main.kube_config.0.client_certificate) + client_key = base64decode(azurerm_kubernetes_cluster.main.kube_config.0.client_key) + cluster_ca_certificate = base64decode(azurerm_kubernetes_cluster.main.kube_config.0.cluster_ca_certificate) +} diff --git a/infra/terraform/azure/redis.tf b/infra/terraform/azure/redis.tf new file mode 100644 index 00000000000..6e41ce2eee5 --- /dev/null +++ b/infra/terraform/azure/redis.tf @@ -0,0 +1,8 @@ +resource "azurerm_redis_cache" "main" { + name = "${var.name_prefix}-redis" + location = data.azurerm_resource_group.main.location + resource_group_name = data.azurerm_resource_group.main.name + capacity = var.redis_capacity + family = "C" + sku_name = "Standard" +} diff --git a/infra/terraform/azure/sparkop.tf b/infra/terraform/azure/sparkop.tf new file mode 100644 index 00000000000..e4aa8d7acab --- /dev/null +++ b/infra/terraform/azure/sparkop.tf @@ -0,0 +1,27 @@ +resource "kubernetes_role" "sparkop-user" { + metadata { + name = "use-spark-operator" + namespace = var.aks_namespace + } + rule { + api_groups = ["sparkoperator.k8s.io"] + resources = ["sparkapplications"] + verbs = ["create", "delete", "deletecollection", "get", "list", "update", "watch", "patch"] + } +} + +resource "kubernetes_role_binding" "sparkop-user" { + metadata { + name = "use-spark-operator" + namespace = var.aks_namespace + } + role_ref { + api_group = "rbac.authorization.k8s.io" + kind = "Role" + name = kubernetes_role.sparkop-user.metadata[0].name + } + subject { + kind = "ServiceAccount" + name = "default" + } +} diff --git a/infra/terraform/azure/storage.tf b/infra/terraform/azure/storage.tf new file mode 100644 index 00000000000..7724fc56f2c --- /dev/null +++ b/infra/terraform/azure/storage.tf @@ -0,0 +1,15 @@ +resource "azurerm_storage_account" "main" { + name = "${var.name_prefix}storage" + resource_group_name = data.azurerm_resource_group.main.name + location = data.azurerm_resource_group.main.location + account_kind = "StorageV2" + account_tier = "Standard" + account_replication_type = var.storage_account_replication_type + allow_blob_public_access = true +} + +resource "azurerm_storage_container" "staging" { + name = "staging" + storage_account_name = azurerm_storage_account.main.name + container_access_type = "blob" +} diff --git a/infra/terraform/azure/variables.tf b/infra/terraform/azure/variables.tf new file mode 100644 index 00000000000..81866a0f9ea --- /dev/null +++ b/infra/terraform/azure/variables.tf @@ -0,0 +1,31 @@ +variable "resource_group" { + type = string +} + +variable "name_prefix" { + type = string +} + +variable "aks_machine_type" { + type = string + default = "Standard_DS2_v2" +} + +variable "aks_node_count" { + type = number + default = 2 +} + +variable "redis_capacity" { + type = number + default = 2 +} + +variable "storage_account_replication_type" { + type = string + default = "LRS" +} + +variable "aks_namespace" { + type = string +} diff --git a/infra/terraform/azure/versions.tf b/infra/terraform/azure/versions.tf new file mode 100644 index 00000000000..6562d4a0672 --- /dev/null +++ b/infra/terraform/azure/versions.tf @@ -0,0 +1,8 @@ +terraform { + required_providers { + azurerm = { + source = "hashicorp/azurerm" + } + } + required_version = ">= 0.13" +} diff --git a/infra/terraform/azure/vnet.tf b/infra/terraform/azure/vnet.tf new file mode 100644 index 00000000000..b8efa5aaa7c --- /dev/null +++ b/infra/terraform/azure/vnet.tf @@ -0,0 +1,17 @@ +data "azurerm_resource_group" "main" { + name = var.resource_group +} + +resource "azurerm_virtual_network" "main" { + name = "${var.name_prefix}-vnet" + location = data.azurerm_resource_group.main.location + resource_group_name = data.azurerm_resource_group.main.name + address_space = ["10.1.0.0/16"] +} + +resource "azurerm_subnet" "main" { + name = "${var.name_prefix}-subnet" + resource_group_name = data.azurerm_resource_group.main.name + virtual_network_name = azurerm_virtual_network.main.name + address_prefixes = ["10.1.0.0/16"] +} From b21302b7ea271105260fb4c08ccf66a224e214da Mon Sep 17 00:00:00 2001 From: Jacob Klegar Date: Thu, 14 Jan 2021 17:44:45 -0500 Subject: [PATCH 2/6] azure tf adjustments Signed-off-by: Jacob Klegar --- infra/terraform/azure/aks.tf | 2 +- infra/terraform/azure/helm.tf | 39 +++++++++++-------- infra/terraform/azure/kafka.tf | 60 ++++++++++++++++++++++++++++++ infra/terraform/azure/provider.tf | 31 +++++++++------ infra/terraform/azure/storage.tf | 6 +++ infra/terraform/azure/variables.tf | 25 +++++++++++++ 6 files changed, 134 insertions(+), 29 deletions(-) create mode 100644 infra/terraform/azure/kafka.tf diff --git a/infra/terraform/azure/aks.tf b/infra/terraform/azure/aks.tf index a121618b6dd..c0899d49c99 100644 --- a/infra/terraform/azure/aks.tf +++ b/infra/terraform/azure/aks.tf @@ -4,7 +4,7 @@ resource "azurerm_kubernetes_cluster" "main" { resource_group_name = data.azurerm_resource_group.main.name dns_prefix = var.name_prefix default_node_pool { - name = "${var.name_prefix}-nodepool" + name = var.name_prefix vm_size = var.aks_machine_type node_count = var.aks_node_count vnet_subnet_id = azurerm_subnet.main.id diff --git a/infra/terraform/azure/helm.tf b/infra/terraform/azure/helm.tf index fba4c6d6c92..ecbcc0ab94d 100644 --- a/infra/terraform/azure/helm.tf +++ b/infra/terraform/azure/helm.tf @@ -9,6 +9,10 @@ locals { enabled = false } + kafka = { + enabled = false + } + postgresql = { existingSecret = local.feast_postgres_secret_name } @@ -34,20 +38,9 @@ locals { host = azurerm_redis_cache.main.hostname port = azurerm_redis_cache.main.ssl_port ssl = true - subscriptions = [ - { - name = "*" - project = "*" - version = "*" - } - ] } } ] - job_store = { - redis_host = azurerm_redis_cache.main.hostname - redis_port = azurerm_redis_cache.main.ssl_port - } } } } @@ -57,12 +50,15 @@ locals { envOverrides = { feast_redis_host = azurerm_redis_cache.main.hostname, feast_redis_port = azurerm_redis_cache.main.ssl_port, - feast_spark_launcher = "standalone" - feast_spark_staging_location = "https://${azurerm_storage_account.main.name}.blob.core.windows.net/${azurerm_storage_container.staging.name}/artifacts/" - feast_historical_feature_output_location : "https://${azurerm_storage_account.main.name}.blob.core.windows.net/${azurerm_storage_container.staging.name}/out/" + feast_spark_launcher = "k8s" + feast_spark_staging_location = "wasbs://${azurerm_storage_container.staging.name}@${azurerm_storage_account.main.name}.blob.core.windows.net/artifacts/" + feast_historical_feature_output_location : "wasbs://${azurerm_storage_container.staging.name}@${azurerm_storage_account.main.name}.blob.core.windows.net/out/" feast_historical_feature_output_format : "parquet" - demo_kafka_brokers : "${azurerm_kubernetes_cluster.main.network_profile[0].dns_service_ip}:9094" - demo_data_location : "https://${azurerm_storage_account.main.name}.blob.core.windows.net/${azurerm_storage_container.staging.name}/test-data/" + demo_kafka_brokers : azurerm_hdinsight_kafka_cluster.main.https_endpoint + demo_data_location : "wasbs://${azurerm_storage_container.staging.name}@${azurerm_storage_account.main.name}.blob.core.windows.net/test-data/" + feast_azure_blob_account_name = azurerm_storage_account.main.name + feast_azure_blob_account_access_key = azurerm_storage_account.main.primary_access_key + feast_spark_ingestion_jar = "./feast/spark/ingestion/target/feast-ingestion-spark-develop.jar" } } } @@ -93,3 +89,14 @@ resource "helm_release" "feast" { yamlencode(local.feast_helm_values) ] } + +resource "helm_release" "sparkop" { + name = "sparkop" + namespace = "default" + repository = "https://googlecloudplatform.github.io/spark-on-k8s-operator" + chart = "spark-operator" + set { + name = "serviceAccounts.spark.name" + value = "spark" + } +} diff --git a/infra/terraform/azure/kafka.tf b/infra/terraform/azure/kafka.tf new file mode 100644 index 00000000000..5f698c02e34 --- /dev/null +++ b/infra/terraform/azure/kafka.tf @@ -0,0 +1,60 @@ +resource "azurerm_hdinsight_kafka_cluster" "main" { + name = "${var.name_prefix}-kafka" + location = data.azurerm_resource_group.main.location + resource_group_name = data.azurerm_resource_group.main.name + cluster_version = "4.0" + tier = "Standard" + + component_version { + kafka = "2.1" + } + + gateway { + enabled = true + username = "${var.name_prefix}-kafka-gateway" + password = random_password.feast-kafka-gateway-password.result + } + + storage_account { + is_default = true + storage_account_key = azurerm_storage_account.main.primary_access_key + storage_container_id = azurerm_storage_container.kafka.id + } + + roles { + head_node { + vm_size = var.kafka_head_vm_size + username = "${var.name_prefix}-kafka-user" + password = random_password.feast-kafka-role-password.result + } + worker_node { + vm_size = var.kafka_worker_vm_size + username = "${var.name_prefix}-kafka-user" + password = random_password.feast-kafka-role-password.result + number_of_disks_per_node = var.kafka_worker_disks_per_node + target_instance_count = var.kafka_worker_target_instance_count + } + zookeeper_node { + vm_size = var.kafka_zookeeper_vm_size + username = "${var.name_prefix}-kafka-user" + password = random_password.feast-kafka-role-password.result + } + } +} + +resource "random_password" "feast-kafka-role-password" { + length = 16 + special = false + min_upper = 1 + min_lower = 1 + min_numeric = 1 +} + +resource "random_password" "feast-kafka-gateway-password" { + length = 16 + special = true + min_upper = 1 + min_lower = 1 + min_special = 1 + min_numeric = 1 +} diff --git a/infra/terraform/azure/provider.tf b/infra/terraform/azure/provider.tf index 7e24dcc11fc..ed5f2afeae8 100644 --- a/infra/terraform/azure/provider.tf +++ b/infra/terraform/azure/provider.tf @@ -3,24 +3,31 @@ provider "azurerm" { features {} } +data "azurerm_kubernetes_cluster" "main" { + name = "${var.name_prefix}-aks" + resource_group_name = data.azurerm_resource_group.main.name +} + provider "helm" { version = "~> 1.3.2" kubernetes { - host = azurerm_kubernetes_cluster.main.kube_config.0.host - username = azurerm_kubernetes_cluster.main.kube_config.0.username - password = azurerm_kubernetes_cluster.main.kube_config.0.password - client_certificate = base64decode(azurerm_kubernetes_cluster.main.kube_config.0.client_certificate) - client_key = base64decode(azurerm_kubernetes_cluster.main.kube_config.0.client_key) - cluster_ca_certificate = base64decode(azurerm_kubernetes_cluster.main.kube_config.0.cluster_ca_certificate) + host = "${data.azurerm_kubernetes_cluster.main.kube_config.0.host}" + username = "${data.azurerm_kubernetes_cluster.main.kube_config.0.username}" + password = "${data.azurerm_kubernetes_cluster.main.kube_config.0.password}" + client_certificate = "${base64decode(data.azurerm_kubernetes_cluster.main.kube_config.0.client_certificate)}" + client_key = "${base64decode(data.azurerm_kubernetes_cluster.main.kube_config.0.client_key)}" + cluster_ca_certificate = "${base64decode(data.azurerm_kubernetes_cluster.main.kube_config.0.cluster_ca_certificate)}" + load_config_file = false } } provider "kubernetes" { version = "~> 1.13.3" - host = azurerm_kubernetes_cluster.main.kube_config.0.host - username = azurerm_kubernetes_cluster.main.kube_config.0.username - password = azurerm_kubernetes_cluster.main.kube_config.0.password - client_certificate = base64decode(azurerm_kubernetes_cluster.main.kube_config.0.client_certificate) - client_key = base64decode(azurerm_kubernetes_cluster.main.kube_config.0.client_key) - cluster_ca_certificate = base64decode(azurerm_kubernetes_cluster.main.kube_config.0.cluster_ca_certificate) + host = "${data.azurerm_kubernetes_cluster.main.kube_config.0.host}" + username = "${data.azurerm_kubernetes_cluster.main.kube_config.0.username}" + password = "${data.azurerm_kubernetes_cluster.main.kube_config.0.password}" + client_certificate = "${base64decode(data.azurerm_kubernetes_cluster.main.kube_config.0.client_certificate)}" + client_key = "${base64decode(data.azurerm_kubernetes_cluster.main.kube_config.0.client_key)}" + cluster_ca_certificate = "${base64decode(data.azurerm_kubernetes_cluster.main.kube_config.0.cluster_ca_certificate)}" + load_config_file = false } diff --git a/infra/terraform/azure/storage.tf b/infra/terraform/azure/storage.tf index 7724fc56f2c..08db2386a49 100644 --- a/infra/terraform/azure/storage.tf +++ b/infra/terraform/azure/storage.tf @@ -13,3 +13,9 @@ resource "azurerm_storage_container" "staging" { storage_account_name = azurerm_storage_account.main.name container_access_type = "blob" } + +resource "azurerm_storage_container" "kafka" { + name = "kafkastorage" + storage_account_name = azurerm_storage_account.main.name + container_access_type = "blob" +} diff --git a/infra/terraform/azure/variables.tf b/infra/terraform/azure/variables.tf index 81866a0f9ea..a8720de8c92 100644 --- a/infra/terraform/azure/variables.tf +++ b/infra/terraform/azure/variables.tf @@ -29,3 +29,28 @@ variable "storage_account_replication_type" { variable "aks_namespace" { type = string } + +variable "kafka_head_vm_size" { + type = string + default = "Standard_DS3_v2" +} + +variable "kafka_worker_vm_size" { + type = string + default = "A5" +} + +variable "kafka_zookeeper_vm_size" { + type = string + default = "Standard_DS3_v2" +} + +variable "kafka_worker_disks_per_node" { + type = number + default = 3 +} + +variable "kafka_worker_target_instance_count" { + type = number + default = 3 +} From 37177114606a38e5841193726d580a349704e4d9 Mon Sep 17 00:00:00 2001 From: Jacob Klegar Date: Tue, 19 Jan 2021 17:23:00 -0500 Subject: [PATCH 3/6] more adjustments Signed-off-by: Jacob Klegar --- infra/terraform/azure/README.md | 4 ++-- infra/terraform/azure/helm.tf | 3 +-- infra/terraform/azure/kafka.tf | 15 +++++++++++++++ infra/terraform/azure/provider.tf | 29 ++++++++++++----------------- infra/terraform/azure/redis.tf | 8 ++++++-- infra/terraform/azure/versions.tf | 8 -------- infra/terraform/azure/vnet.tf | 18 ++++++++++++++++-- 7 files changed, 52 insertions(+), 33 deletions(-) delete mode 100644 infra/terraform/azure/versions.tf diff --git a/infra/terraform/azure/README.md b/infra/terraform/azure/README.md index b5b5002f9d7..cdc8b8b4f90 100644 --- a/infra/terraform/azure/README.md +++ b/infra/terraform/azure/README.md @@ -5,14 +5,14 @@ This serves as a guide on how to deploy Feast on Azure. At the end of this guide 2. Feast services running on AKS 3. Azure Cache (Redis) as online store 4. Spark operator on AKS -5. Kafka running on AKS. +5. Kafka running on HDInsight. # Steps 1. Create a tfvars file, e.g. `my.tfvars`. A sample configuration is as below: ``` -name_prefix = "feast-0-9" +name_prefix = "feast09" resource_group = "Feast" # pre-exisiting resource group aks_namespace = "default" ``` diff --git a/infra/terraform/azure/helm.tf b/infra/terraform/azure/helm.tf index ecbcc0ab94d..8c28762a438 100644 --- a/infra/terraform/azure/helm.tf +++ b/infra/terraform/azure/helm.tf @@ -50,15 +50,14 @@ locals { envOverrides = { feast_redis_host = azurerm_redis_cache.main.hostname, feast_redis_port = azurerm_redis_cache.main.ssl_port, + feast_redis_ssl = true feast_spark_launcher = "k8s" feast_spark_staging_location = "wasbs://${azurerm_storage_container.staging.name}@${azurerm_storage_account.main.name}.blob.core.windows.net/artifacts/" feast_historical_feature_output_location : "wasbs://${azurerm_storage_container.staging.name}@${azurerm_storage_account.main.name}.blob.core.windows.net/out/" feast_historical_feature_output_format : "parquet" - demo_kafka_brokers : azurerm_hdinsight_kafka_cluster.main.https_endpoint demo_data_location : "wasbs://${azurerm_storage_container.staging.name}@${azurerm_storage_account.main.name}.blob.core.windows.net/test-data/" feast_azure_blob_account_name = azurerm_storage_account.main.name feast_azure_blob_account_access_key = azurerm_storage_account.main.primary_access_key - feast_spark_ingestion_jar = "./feast/spark/ingestion/target/feast-ingestion-spark-develop.jar" } } } diff --git a/infra/terraform/azure/kafka.tf b/infra/terraform/azure/kafka.tf index 5f698c02e34..a7403ff709b 100644 --- a/infra/terraform/azure/kafka.tf +++ b/infra/terraform/azure/kafka.tf @@ -26,6 +26,8 @@ resource "azurerm_hdinsight_kafka_cluster" "main" { vm_size = var.kafka_head_vm_size username = "${var.name_prefix}-kafka-user" password = random_password.feast-kafka-role-password.result + subnet_id = azurerm_subnet.kafka.id + virtual_network_id = azurerm_virtual_network.main.id } worker_node { vm_size = var.kafka_worker_vm_size @@ -33,11 +35,15 @@ resource "azurerm_hdinsight_kafka_cluster" "main" { password = random_password.feast-kafka-role-password.result number_of_disks_per_node = var.kafka_worker_disks_per_node target_instance_count = var.kafka_worker_target_instance_count + subnet_id = azurerm_subnet.kafka.id + virtual_network_id = azurerm_virtual_network.main.id } zookeeper_node { vm_size = var.kafka_zookeeper_vm_size username = "${var.name_prefix}-kafka-user" password = random_password.feast-kafka-role-password.result + subnet_id = azurerm_subnet.kafka.id + virtual_network_id = azurerm_virtual_network.main.id } } } @@ -58,3 +64,12 @@ resource "random_password" "feast-kafka-gateway-password" { min_special = 1 min_numeric = 1 } + +resource "kubernetes_secret" "feast-kafka-gateway-secret" { + metadata { + name = "feast-kafka-gateway" + } + data = { + kafka-gateway-password = random_password.feast-kafka-gateway-password.result + } +} diff --git a/infra/terraform/azure/provider.tf b/infra/terraform/azure/provider.tf index ed5f2afeae8..916c10143fc 100644 --- a/infra/terraform/azure/provider.tf +++ b/infra/terraform/azure/provider.tf @@ -3,31 +3,26 @@ provider "azurerm" { features {} } -data "azurerm_kubernetes_cluster" "main" { - name = "${var.name_prefix}-aks" - resource_group_name = data.azurerm_resource_group.main.name -} - provider "helm" { version = "~> 1.3.2" kubernetes { - host = "${data.azurerm_kubernetes_cluster.main.kube_config.0.host}" - username = "${data.azurerm_kubernetes_cluster.main.kube_config.0.username}" - password = "${data.azurerm_kubernetes_cluster.main.kube_config.0.password}" - client_certificate = "${base64decode(data.azurerm_kubernetes_cluster.main.kube_config.0.client_certificate)}" - client_key = "${base64decode(data.azurerm_kubernetes_cluster.main.kube_config.0.client_key)}" - cluster_ca_certificate = "${base64decode(data.azurerm_kubernetes_cluster.main.kube_config.0.cluster_ca_certificate)}" + host = azurerm_kubernetes_cluster.main.kube_config.0.host + username = azurerm_kubernetes_cluster.main.kube_config.0.username + password = azurerm_kubernetes_cluster.main.kube_config.0.password + client_certificate = base64decode(azurerm_kubernetes_cluster.main.kube_config.0.client_certificate) + client_key = base64decode(azurerm_kubernetes_cluster.main.kube_config.0.client_key) + cluster_ca_certificate = base64decode(azurerm_kubernetes_cluster.main.kube_config.0.cluster_ca_certificate) load_config_file = false } } provider "kubernetes" { version = "~> 1.13.3" - host = "${data.azurerm_kubernetes_cluster.main.kube_config.0.host}" - username = "${data.azurerm_kubernetes_cluster.main.kube_config.0.username}" - password = "${data.azurerm_kubernetes_cluster.main.kube_config.0.password}" - client_certificate = "${base64decode(data.azurerm_kubernetes_cluster.main.kube_config.0.client_certificate)}" - client_key = "${base64decode(data.azurerm_kubernetes_cluster.main.kube_config.0.client_key)}" - cluster_ca_certificate = "${base64decode(data.azurerm_kubernetes_cluster.main.kube_config.0.cluster_ca_certificate)}" + host = azurerm_kubernetes_cluster.main.kube_config.0.host + username = azurerm_kubernetes_cluster.main.kube_config.0.username + password = azurerm_kubernetes_cluster.main.kube_config.0.password + client_certificate = base64decode(azurerm_kubernetes_cluster.main.kube_config.0.client_certificate) + client_key = base64decode(azurerm_kubernetes_cluster.main.kube_config.0.client_key) + cluster_ca_certificate = base64decode(azurerm_kubernetes_cluster.main.kube_config.0.cluster_ca_certificate) load_config_file = false } diff --git a/infra/terraform/azure/redis.tf b/infra/terraform/azure/redis.tf index 6e41ce2eee5..c6e85a4a0b8 100644 --- a/infra/terraform/azure/redis.tf +++ b/infra/terraform/azure/redis.tf @@ -3,6 +3,10 @@ resource "azurerm_redis_cache" "main" { location = data.azurerm_resource_group.main.location resource_group_name = data.azurerm_resource_group.main.name capacity = var.redis_capacity - family = "C" - sku_name = "Standard" + family = "P" + sku_name = "Premium" + redis_configuration { + enable_authentication = false + } + subnet_id = azurerm_subnet.redis.id } diff --git a/infra/terraform/azure/versions.tf b/infra/terraform/azure/versions.tf deleted file mode 100644 index 6562d4a0672..00000000000 --- a/infra/terraform/azure/versions.tf +++ /dev/null @@ -1,8 +0,0 @@ -terraform { - required_providers { - azurerm = { - source = "hashicorp/azurerm" - } - } - required_version = ">= 0.13" -} diff --git a/infra/terraform/azure/vnet.tf b/infra/terraform/azure/vnet.tf index b8efa5aaa7c..db790991e01 100644 --- a/infra/terraform/azure/vnet.tf +++ b/infra/terraform/azure/vnet.tf @@ -10,8 +10,22 @@ resource "azurerm_virtual_network" "main" { } resource "azurerm_subnet" "main" { - name = "${var.name_prefix}-subnet" + name = "${var.name_prefix}-aks-subnet" resource_group_name = data.azurerm_resource_group.main.name virtual_network_name = azurerm_virtual_network.main.name - address_prefixes = ["10.1.0.0/16"] + address_prefixes = ["10.1.0.0/24"] +} + +resource "azurerm_subnet" "redis" { + name = "${var.name_prefix}-redis-subnet" + resource_group_name = data.azurerm_resource_group.main.name + virtual_network_name = azurerm_virtual_network.main.name + address_prefixes = ["10.1.128.0/24"] +} + +resource "azurerm_subnet" "kafka" { + name = "${var.name_prefix}-kafka-subnet" + resource_group_name = data.azurerm_resource_group.main.name + virtual_network_name = azurerm_virtual_network.main.name + address_prefixes = ["10.1.64.0/24"] } From f7a2ce995a757fed03ed27dfbe47c7dc74e02583 Mon Sep 17 00:00:00 2001 From: Jacob Klegar Date: Tue, 19 Jan 2021 17:49:00 -0500 Subject: [PATCH 4/6] changes to example notebook Signed-off-by: Jacob Klegar --- examples/minimal/minimal_ride_hailing.ipynb | 16 ++++++++++++++++ infra/terraform/azure/README.md | 6 ++++++ 2 files changed, 22 insertions(+) diff --git a/examples/minimal/minimal_ride_hailing.ipynb b/examples/minimal/minimal_ride_hailing.ipynb index 0d4d9c54b00..31d3efc3a20 100644 --- a/examples/minimal/minimal_ride_hailing.ipynb +++ b/examples/minimal/minimal_ride_hailing.ipynb @@ -571,6 +571,16 @@ " files = [\"s3://\" + path for path in fs.glob(uri + '/part-*')]\n", " ds = ParquetDataset(files, filesystem=fs)\n", " return ds.read().to_pandas()\n", + " elif parsed_uri.scheme == 'wasbs':\n", + " import adlfs\n", + " fs = adlfs.AzureBlobFileSystem(\n", + " account_name=os.getenv('FEAST_AZURE_BLOB_ACCOUNT_NAME'), account_key=os.getenv('FEAST_AZURE_BLOB_ACCOUNT_ACCESS_KEY')\n", + " )\n", + " uripath = parsed_uri.username + parsed_uri.path\n", + " files = fs.glob(uripath + '/part-*')\n", + " print(files)\n", + " ds = ParquetDataset(files, filesystem=fs)\n", + " return ds.read().to_pandas()\n", " else:\n", " raise ValueError(f\"Unsupported URL scheme {uri}\")" ] @@ -1275,6 +1285,12 @@ "metadata": {}, "outputs": [], "source": [ + "# Note: depending on the Kafka configuration you may need to create the Kafka topic first, like below:\n", + "#from confluent_kafka.admin import AdminClient, NewTopic\n", + "#admin = AdminClient({'bootstrap.servers': KAFKA_BROKER})\n", + "#new_topic = NewTopic('driver_trips', num_partitions=1, replication_factor=3)\n", + "#admin.create_topics(new_topic)\n", + "\n", "for record in trips_df.drop(columns=['created']).to_dict('record'):\n", " record[\"datetime\"] = (\n", " record[\"datetime\"].to_pydatetime().replace(tzinfo=pytz.utc)\n", diff --git a/infra/terraform/azure/README.md b/infra/terraform/azure/README.md index cdc8b8b4f90..994ee130c69 100644 --- a/infra/terraform/azure/README.md +++ b/infra/terraform/azure/README.md @@ -29,3 +29,9 @@ terraform { ``` 3. Use `terraform apply -var-file="my.tfvars"` to deploy. + +Note: to get the list of Kafka brokers needed for streaming ingestion, use + +`curl -sS -u : -G https://.azurehdinsight.net/api/v1/clusters//services/KAFKA/components/KAFKA_BROKER | jq -r '["\(.host_components[].HostRoles.host_name):9092"] | join(",")'` + +where the Kafka gateway username is -kafka-gateway, the Kafka cluster name is -kafka, and the Kafka gateway password is a kubectl secret under the name feast-kafka-gateway. From f438086e3fdea686aed1b028d97f988144378640 Mon Sep 17 00:00:00 2001 From: Jacob Klegar Date: Tue, 19 Jan 2021 20:01:51 -0500 Subject: [PATCH 5/6] add default aks namespace Signed-off-by: Jacob Klegar --- infra/terraform/azure/README.md | 1 - infra/terraform/azure/variables.tf | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/infra/terraform/azure/README.md b/infra/terraform/azure/README.md index 994ee130c69..b22c870d12a 100644 --- a/infra/terraform/azure/README.md +++ b/infra/terraform/azure/README.md @@ -14,7 +14,6 @@ This serves as a guide on how to deploy Feast on Azure. At the end of this guide ``` name_prefix = "feast09" resource_group = "Feast" # pre-exisiting resource group -aks_namespace = "default" ``` 3. Configure tf state backend, e.g.: diff --git a/infra/terraform/azure/variables.tf b/infra/terraform/azure/variables.tf index a8720de8c92..be4e7f2c19d 100644 --- a/infra/terraform/azure/variables.tf +++ b/infra/terraform/azure/variables.tf @@ -28,6 +28,7 @@ variable "storage_account_replication_type" { variable "aks_namespace" { type = string + default = "default" } variable "kafka_head_vm_size" { From 5e93fbbbd474a6b62533f4a011719ee7902152f7 Mon Sep 17 00:00:00 2001 From: Jacob Klegar Date: Wed, 27 Jan 2021 17:34:59 -0500 Subject: [PATCH 6/6] remove extra print statement Signed-off-by: Jacob Klegar --- examples/minimal/minimal_ride_hailing.ipynb | 1 - 1 file changed, 1 deletion(-) diff --git a/examples/minimal/minimal_ride_hailing.ipynb b/examples/minimal/minimal_ride_hailing.ipynb index 31d3efc3a20..bc170fa1f8d 100644 --- a/examples/minimal/minimal_ride_hailing.ipynb +++ b/examples/minimal/minimal_ride_hailing.ipynb @@ -578,7 +578,6 @@ " )\n", " uripath = parsed_uri.username + parsed_uri.path\n", " files = fs.glob(uripath + '/part-*')\n", - " print(files)\n", " ds = ParquetDataset(files, filesystem=fs)\n", " return ds.read().to_pandas()\n", " else:\n",