Skip to content

Commit 057424b

Browse files
authored
Azure example terraform (#1274)
* example terraform for azure Signed-off-by: Jacob Klegar <jacob@tecton.ai> * azure tf adjustments Signed-off-by: Jacob Klegar <jacob@tecton.ai> * more adjustments Signed-off-by: Jacob Klegar <jacob@tecton.ai> * changes to example notebook Signed-off-by: Jacob Klegar <jacob@tecton.ai> * add default aks namespace Signed-off-by: Jacob Klegar <jacob@tecton.ai> * remove extra print statement Signed-off-by: Jacob Klegar <jacob@tecton.ai>
1 parent c50c21c commit 057424b

File tree

11 files changed

+418
-0
lines changed

11 files changed

+418
-0
lines changed

examples/minimal/minimal_ride_hailing.ipynb

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -571,6 +571,15 @@
571571
" files = [\"s3://\" + path for path in fs.glob(uri + '/part-*')]\n",
572572
" ds = ParquetDataset(files, filesystem=fs)\n",
573573
" return ds.read().to_pandas()\n",
574+
" elif parsed_uri.scheme == 'wasbs':\n",
575+
" import adlfs\n",
576+
" fs = adlfs.AzureBlobFileSystem(\n",
577+
" account_name=os.getenv('FEAST_AZURE_BLOB_ACCOUNT_NAME'), account_key=os.getenv('FEAST_AZURE_BLOB_ACCOUNT_ACCESS_KEY')\n",
578+
" )\n",
579+
" uripath = parsed_uri.username + parsed_uri.path\n",
580+
" files = fs.glob(uripath + '/part-*')\n",
581+
" ds = ParquetDataset(files, filesystem=fs)\n",
582+
" return ds.read().to_pandas()\n",
574583
" else:\n",
575584
" raise ValueError(f\"Unsupported URL scheme {uri}\")"
576585
]
@@ -1275,6 +1284,12 @@
12751284
"metadata": {},
12761285
"outputs": [],
12771286
"source": [
1287+
"# Note: depending on the Kafka configuration you may need to create the Kafka topic first, like below:\n",
1288+
"#from confluent_kafka.admin import AdminClient, NewTopic\n",
1289+
"#admin = AdminClient({'bootstrap.servers': KAFKA_BROKER})\n",
1290+
"#new_topic = NewTopic('driver_trips', num_partitions=1, replication_factor=3)\n",
1291+
"#admin.create_topics(new_topic)\n",
1292+
"\n",
12781293
"for record in trips_df.drop(columns=['created']).to_dict('record'):\n",
12791294
" record[\"datetime\"] = (\n",
12801295
" record[\"datetime\"].to_pydatetime().replace(tzinfo=pytz.utc)\n",

infra/terraform/azure/README.md

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
# Terraform config for Feast on Azure
2+
3+
This serves as a guide on how to deploy Feast on Azure. At the end of this guide, we will have provisioned:
4+
1. AKS cluster
5+
2. Feast services running on AKS
6+
3. Azure Cache (Redis) as online store
7+
4. Spark operator on AKS
8+
5. Kafka running on HDInsight.
9+
10+
# Steps
11+
12+
1. Create a tfvars file, e.g. `my.tfvars`. A sample configuration is as below:
13+
14+
```
15+
name_prefix = "feast09"
16+
resource_group = "Feast" # pre-exisiting resource group
17+
```
18+
19+
3. Configure tf state backend, e.g.:
20+
```
21+
terraform {
22+
backend "azurerm" {
23+
storage_account_name = "<your storage account name>"
24+
container_name = "<your container name>"
25+
key = "<your blob name>"
26+
}
27+
}
28+
```
29+
30+
3. Use `terraform apply -var-file="my.tfvars"` to deploy.
31+
32+
Note: to get the list of Kafka brokers needed for streaming ingestion, use
33+
34+
`curl -sS -u <Kafka gateway username>:<Kafka gateway password> -G https://<Kafka cluster name>.azurehdinsight.net/api/v1/clusters/<Kafka cluster name>/services/KAFKA/components/KAFKA_BROKER | jq -r '["\(.host_components[].HostRoles.host_name):9092"] | join(",")'`
35+
36+
where the Kafka gateway username is <name_prefix>-kafka-gateway, the Kafka cluster name is <name_prefix>-kafka, and the Kafka gateway password is a kubectl secret under the name feast-kafka-gateway.

infra/terraform/azure/aks.tf

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
resource "azurerm_kubernetes_cluster" "main" {
2+
name = "${var.name_prefix}-aks"
3+
location = data.azurerm_resource_group.main.location
4+
resource_group_name = data.azurerm_resource_group.main.name
5+
dns_prefix = var.name_prefix
6+
default_node_pool {
7+
name = var.name_prefix
8+
vm_size = var.aks_machine_type
9+
node_count = var.aks_node_count
10+
vnet_subnet_id = azurerm_subnet.main.id
11+
}
12+
identity {
13+
type = "SystemAssigned"
14+
}
15+
}

infra/terraform/azure/helm.tf

Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
locals {
2+
feast_postgres_secret_name = "${var.name_prefix}-postgres-secret"
3+
feast_helm_values = {
4+
redis = {
5+
enabled = false
6+
}
7+
8+
grafana = {
9+
enabled = false
10+
}
11+
12+
kafka = {
13+
enabled = false
14+
}
15+
16+
postgresql = {
17+
existingSecret = local.feast_postgres_secret_name
18+
}
19+
20+
feast-core = {
21+
postgresql = {
22+
existingSecret = local.feast_postgres_secret_name
23+
}
24+
}
25+
26+
feast-online-serving = {
27+
enabled = true
28+
"application-override.yaml" = {
29+
feast = {
30+
core-host = "${var.name_prefix}-feast-core"
31+
core-grpc-port = 6565
32+
active_store = "online_store"
33+
stores = [
34+
{
35+
name = "online_store"
36+
type = "REDIS"
37+
config = {
38+
host = azurerm_redis_cache.main.hostname
39+
port = azurerm_redis_cache.main.ssl_port
40+
ssl = true
41+
}
42+
}
43+
]
44+
}
45+
}
46+
}
47+
48+
feast-jupyter = {
49+
enabled = true
50+
envOverrides = {
51+
feast_redis_host = azurerm_redis_cache.main.hostname,
52+
feast_redis_port = azurerm_redis_cache.main.ssl_port,
53+
feast_redis_ssl = true
54+
feast_spark_launcher = "k8s"
55+
feast_spark_staging_location = "wasbs://${azurerm_storage_container.staging.name}@${azurerm_storage_account.main.name}.blob.core.windows.net/artifacts/"
56+
feast_historical_feature_output_location : "wasbs://${azurerm_storage_container.staging.name}@${azurerm_storage_account.main.name}.blob.core.windows.net/out/"
57+
feast_historical_feature_output_format : "parquet"
58+
demo_data_location : "wasbs://${azurerm_storage_container.staging.name}@${azurerm_storage_account.main.name}.blob.core.windows.net/test-data/"
59+
feast_azure_blob_account_name = azurerm_storage_account.main.name
60+
feast_azure_blob_account_access_key = azurerm_storage_account.main.primary_access_key
61+
}
62+
}
63+
}
64+
}
65+
66+
resource "random_password" "feast-postgres-password" {
67+
length = 16
68+
special = false
69+
}
70+
71+
resource "kubernetes_secret" "feast-postgres-secret" {
72+
metadata {
73+
name = local.feast_postgres_secret_name
74+
}
75+
data = {
76+
postgresql-password = random_password.feast-postgres-password.result
77+
}
78+
}
79+
80+
resource "helm_release" "feast" {
81+
depends_on = [kubernetes_secret.feast-postgres-secret]
82+
83+
name = var.name_prefix
84+
namespace = var.aks_namespace
85+
chart = "../../charts/feast"
86+
87+
values = [
88+
yamlencode(local.feast_helm_values)
89+
]
90+
}
91+
92+
resource "helm_release" "sparkop" {
93+
name = "sparkop"
94+
namespace = "default"
95+
repository = "https://googlecloudplatform.github.io/spark-on-k8s-operator"
96+
chart = "spark-operator"
97+
set {
98+
name = "serviceAccounts.spark.name"
99+
value = "spark"
100+
}
101+
}

infra/terraform/azure/kafka.tf

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
resource "azurerm_hdinsight_kafka_cluster" "main" {
2+
name = "${var.name_prefix}-kafka"
3+
location = data.azurerm_resource_group.main.location
4+
resource_group_name = data.azurerm_resource_group.main.name
5+
cluster_version = "4.0"
6+
tier = "Standard"
7+
8+
component_version {
9+
kafka = "2.1"
10+
}
11+
12+
gateway {
13+
enabled = true
14+
username = "${var.name_prefix}-kafka-gateway"
15+
password = random_password.feast-kafka-gateway-password.result
16+
}
17+
18+
storage_account {
19+
is_default = true
20+
storage_account_key = azurerm_storage_account.main.primary_access_key
21+
storage_container_id = azurerm_storage_container.kafka.id
22+
}
23+
24+
roles {
25+
head_node {
26+
vm_size = var.kafka_head_vm_size
27+
username = "${var.name_prefix}-kafka-user"
28+
password = random_password.feast-kafka-role-password.result
29+
subnet_id = azurerm_subnet.kafka.id
30+
virtual_network_id = azurerm_virtual_network.main.id
31+
}
32+
worker_node {
33+
vm_size = var.kafka_worker_vm_size
34+
username = "${var.name_prefix}-kafka-user"
35+
password = random_password.feast-kafka-role-password.result
36+
number_of_disks_per_node = var.kafka_worker_disks_per_node
37+
target_instance_count = var.kafka_worker_target_instance_count
38+
subnet_id = azurerm_subnet.kafka.id
39+
virtual_network_id = azurerm_virtual_network.main.id
40+
}
41+
zookeeper_node {
42+
vm_size = var.kafka_zookeeper_vm_size
43+
username = "${var.name_prefix}-kafka-user"
44+
password = random_password.feast-kafka-role-password.result
45+
subnet_id = azurerm_subnet.kafka.id
46+
virtual_network_id = azurerm_virtual_network.main.id
47+
}
48+
}
49+
}
50+
51+
resource "random_password" "feast-kafka-role-password" {
52+
length = 16
53+
special = false
54+
min_upper = 1
55+
min_lower = 1
56+
min_numeric = 1
57+
}
58+
59+
resource "random_password" "feast-kafka-gateway-password" {
60+
length = 16
61+
special = true
62+
min_upper = 1
63+
min_lower = 1
64+
min_special = 1
65+
min_numeric = 1
66+
}
67+
68+
resource "kubernetes_secret" "feast-kafka-gateway-secret" {
69+
metadata {
70+
name = "feast-kafka-gateway"
71+
}
72+
data = {
73+
kafka-gateway-password = random_password.feast-kafka-gateway-password.result
74+
}
75+
}

infra/terraform/azure/provider.tf

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
provider "azurerm" {
2+
version = "=2.40.0"
3+
features {}
4+
}
5+
6+
provider "helm" {
7+
version = "~> 1.3.2"
8+
kubernetes {
9+
host = azurerm_kubernetes_cluster.main.kube_config.0.host
10+
username = azurerm_kubernetes_cluster.main.kube_config.0.username
11+
password = azurerm_kubernetes_cluster.main.kube_config.0.password
12+
client_certificate = base64decode(azurerm_kubernetes_cluster.main.kube_config.0.client_certificate)
13+
client_key = base64decode(azurerm_kubernetes_cluster.main.kube_config.0.client_key)
14+
cluster_ca_certificate = base64decode(azurerm_kubernetes_cluster.main.kube_config.0.cluster_ca_certificate)
15+
load_config_file = false
16+
}
17+
}
18+
19+
provider "kubernetes" {
20+
version = "~> 1.13.3"
21+
host = azurerm_kubernetes_cluster.main.kube_config.0.host
22+
username = azurerm_kubernetes_cluster.main.kube_config.0.username
23+
password = azurerm_kubernetes_cluster.main.kube_config.0.password
24+
client_certificate = base64decode(azurerm_kubernetes_cluster.main.kube_config.0.client_certificate)
25+
client_key = base64decode(azurerm_kubernetes_cluster.main.kube_config.0.client_key)
26+
cluster_ca_certificate = base64decode(azurerm_kubernetes_cluster.main.kube_config.0.cluster_ca_certificate)
27+
load_config_file = false
28+
}

infra/terraform/azure/redis.tf

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
resource "azurerm_redis_cache" "main" {
2+
name = "${var.name_prefix}-redis"
3+
location = data.azurerm_resource_group.main.location
4+
resource_group_name = data.azurerm_resource_group.main.name
5+
capacity = var.redis_capacity
6+
family = "P"
7+
sku_name = "Premium"
8+
redis_configuration {
9+
enable_authentication = false
10+
}
11+
subnet_id = azurerm_subnet.redis.id
12+
}

infra/terraform/azure/sparkop.tf

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
resource "kubernetes_role" "sparkop-user" {
2+
metadata {
3+
name = "use-spark-operator"
4+
namespace = var.aks_namespace
5+
}
6+
rule {
7+
api_groups = ["sparkoperator.k8s.io"]
8+
resources = ["sparkapplications"]
9+
verbs = ["create", "delete", "deletecollection", "get", "list", "update", "watch", "patch"]
10+
}
11+
}
12+
13+
resource "kubernetes_role_binding" "sparkop-user" {
14+
metadata {
15+
name = "use-spark-operator"
16+
namespace = var.aks_namespace
17+
}
18+
role_ref {
19+
api_group = "rbac.authorization.k8s.io"
20+
kind = "Role"
21+
name = kubernetes_role.sparkop-user.metadata[0].name
22+
}
23+
subject {
24+
kind = "ServiceAccount"
25+
name = "default"
26+
}
27+
}

infra/terraform/azure/storage.tf

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
resource "azurerm_storage_account" "main" {
2+
name = "${var.name_prefix}storage"
3+
resource_group_name = data.azurerm_resource_group.main.name
4+
location = data.azurerm_resource_group.main.location
5+
account_kind = "StorageV2"
6+
account_tier = "Standard"
7+
account_replication_type = var.storage_account_replication_type
8+
allow_blob_public_access = true
9+
}
10+
11+
resource "azurerm_storage_container" "staging" {
12+
name = "staging"
13+
storage_account_name = azurerm_storage_account.main.name
14+
container_access_type = "blob"
15+
}
16+
17+
resource "azurerm_storage_container" "kafka" {
18+
name = "kafkastorage"
19+
storage_account_name = azurerm_storage_account.main.name
20+
container_access_type = "blob"
21+
}

0 commit comments

Comments
 (0)