From de363b7d00f478432ce93c868026a88b98c4be24 Mon Sep 17 00:00:00 2001 From: Blair Chen Date: Mon, 5 Sep 2022 14:39:09 +0800 Subject: [PATCH 01/25] Updates on github PR/Issue templates (#642) This PR updates github PR/Issue templates to make the templates simpler and more accurate. --- .github/ISSUE_TEMPLATE/bug_report_template.yaml | 10 +++++----- .github/ISSUE_TEMPLATE/doc_improvements.yaml | 2 +- .github/ISSUE_TEMPLATE/feature_request_template.yaml | 12 ++++++------ .../non_technical_request_template.yaml | 4 ++-- .github/pull_request_template.md | 8 +++----- 5 files changed, 17 insertions(+), 19 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/bug_report_template.yaml b/.github/ISSUE_TEMPLATE/bug_report_template.yaml index ec5d35051..f6f317370 100644 --- a/.github/ISSUE_TEMPLATE/bug_report_template.yaml +++ b/.github/ISSUE_TEMPLATE/bug_report_template.yaml @@ -36,7 +36,7 @@ body: value: | - **OS Platform and Distribution (e.g., Linux Ubuntu 20.0)**: - **Python version**: - - **yarn version, if running the dev UI**: + - **Spark version, if reporting runtime issue**: validations: required: true - type: textarea @@ -50,7 +50,7 @@ body: attributes: label: Tracking information description: | - Please provide the logs and erros for reproducibility. + Please provide the logs and errors for reproducibility. validations: required: false - type: textarea @@ -70,11 +70,11 @@ body: label: What component(s) does this bug affect? description: Please choose one or more components below. options: - - label: "`Python Feathr Client`: This is the client users use to interact with most of our API. Mostly written in Python." + - label: "`Python Client`: This is the client users use to interact with most of our API. Mostly written in Python." required: false - label: "`Computation Engine`: The computation engine that execute the actual feature join and generation work. Mostly in Scala and Spark." required: false - - label: "`Feature Registry API Layer`: The storage layer supports SQL, Purview(Atlas). The API layer is in Python(FAST API)" + - label: "`Feature Registry API`: The frontend API layer supports SQL, Purview(Atlas) as storage. The API layer is in Python(FAST API)" required: false - - label: "`Feature Registry Web UI layer`: The Web UI for feature registry. Written in React with a few UI frameworks." + - label: "`Feature Registry Web UI`: The Web UI for feature registry. Written in React" required: false diff --git a/.github/ISSUE_TEMPLATE/doc_improvements.yaml b/.github/ISSUE_TEMPLATE/doc_improvements.yaml index 2d68ab80f..bd8703da4 100644 --- a/.github/ISSUE_TEMPLATE/doc_improvements.yaml +++ b/.github/ISSUE_TEMPLATE/doc_improvements.yaml @@ -31,6 +31,6 @@ body: attributes: label: Description of proposal (what needs changing) description: | - Provide a clear description. Why is the proposed documentation better? + Please provide a clear description. Why is the proposed documentation better? validations: required: true diff --git a/.github/ISSUE_TEMPLATE/feature_request_template.yaml b/.github/ISSUE_TEMPLATE/feature_request_template.yaml index 739378525..ddc3c0405 100644 --- a/.github/ISSUE_TEMPLATE/feature_request_template.yaml +++ b/.github/ISSUE_TEMPLATE/feature_request_template.yaml @@ -1,6 +1,6 @@ name: Feature Request description: Use this template for feature requests. -labels: 'enhancement' +labels: 'feature' title: '[FR]' body: @@ -43,21 +43,21 @@ body: attributes: label: Details description: | - Give a clear description here. + Please provide a clear description here. validations: required: false - type: checkboxes id: component attributes: - label: What component(s) does this bug affect? + label: What component(s) does this feature request affect? description: Please choose one or more components below. options: - - label: "`Python Feathr Client`: This is the client users use to interact with most of our API. Mostly written in Python." + - label: "`Python Client`: This is the client users use to interact with most of our API. Mostly written in Python." required: false - label: "`Computation Engine`: The computation engine that execute the actual feature join and generation work. Mostly in Scala and Spark." required: false - - label: "`Feature Registry API Layer`: The storage layer supports SQL, Purview(Atlas). The API layer is in Python(FAST API)" + - label: "`Feature Registry API`: The frontend API layer supports SQL, Purview(Atlas) as storage. The API layer is in Python(FAST API)" required: false - - label: "`Feature Registry Web UI layer`: The Web UI for feature registry. Written in React with a few UI frameworks." + - label: "`Feature Registry Web UI`: The Web UI for feature registry. Written in React" required: false diff --git a/.github/ISSUE_TEMPLATE/non_technical_request_template.yaml b/.github/ISSUE_TEMPLATE/non_technical_request_template.yaml index 4fd97143d..c09310514 100644 --- a/.github/ISSUE_TEMPLATE/non_technical_request_template.yaml +++ b/.github/ISSUE_TEMPLATE/non_technical_request_template.yaml @@ -1,6 +1,6 @@ name: Non-technical Request description: Use this template for non-technical requests -labels: ['enhancement', 'non-technical'] +labels: ['non-technical'] title: '[NT]' body: @@ -25,6 +25,6 @@ body: attributes: label: Non-technical Request Proposal description: | - Provide a clear high-level description of the request, like documentation improvements etc + Please provide a clear description of the request validations: required: true diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md index 0a046111b..735b76e5a 100644 --- a/.github/pull_request_template.md +++ b/.github/pull_request_template.md @@ -2,9 +2,7 @@ @@ -13,12 +11,12 @@ Resolves #XXX ## How was this PR tested? ## Does this PR introduce any user-facing changes? - [ ] No. You can skip the rest of this section. From 369024b9fda95bdf257b632467d3480ca9089e23 Mon Sep 17 00:00:00 2001 From: Richin Jain Date: Tue, 6 Sep 2022 15:26:19 -0700 Subject: [PATCH 02/25] Adding documentation for maven publishing automation (#646) * Adding workflow to automate maven publish * Updating maven workflow * Updating maven workflow * Updating maven workflow file * Adding gpg passphrase * Fixing signing failure * Fixing signing failure * Reverting back to not passing passphare value for Java setup action * Trying a new plugin * Adding sbt ci-release * Updating version for sbt ci-release * Bumping version for testing * Addig sbt compile explicitly * Addig sbt compile explicitly * Adding triger for tags * fixing ioctl issue * Adding decode key script to resolve signing issue * Adding decode key script to resolve signing issue * Adding GPG_OPTIONS to decode script * Adding tty options * Bringing TTY back * Upgrading pgp version * Debugging GPG Signing part in maven publish workflow * Manual trigger workflow * Manual trigger workflow * Manual trigger workflow * Adding gpg import * Falling back on ci-release pipeline * Falling back on ci-release pipeline * Changing the hsot for sonatype credential * Cleaning up the workflow file to do final test * Cleaning up and adding some more comments * Adding more comments * Adding documentation for maven workflow * Fixing wrong env credentials * Adding doc for Maven Automation steps and gotchas --- .github/workflows/publish-to-maven.yml | 6 +-- docs/dev_guide/publish_to_maven.md | 66 +++++++++++++++++++++++--- 2 files changed, 62 insertions(+), 10 deletions(-) diff --git a/.github/workflows/publish-to-maven.yml b/.github/workflows/publish-to-maven.yml index e304ad000..ae4d98e68 100644 --- a/.github/workflows/publish-to-maven.yml +++ b/.github/workflows/publish-to-maven.yml @@ -20,10 +20,10 @@ jobs: java-version: "8" distribution: "temurin" server-id: ossrh - server-username: OSSRH_USERNAME - server-password: OSSRH_PASSWORD + server-username: SONATYPE_USERNAME + server-password: SONATYPE_PASSWORD gpg-private-key: ${{ secrets.MAVEN_GPG_PRIVATE_KEY }} - gpg-passphrase: MAVEN_GPG_PASSPHRASE + gpg-passphrase: PGP_PASSPHRASE # CI release command defaults to publishSigned # Sonatype release command defaults to sonaTypeBundleRelease diff --git a/docs/dev_guide/publish_to_maven.md b/docs/dev_guide/publish_to_maven.md index 772270395..7e4ca163b 100644 --- a/docs/dev_guide/publish_to_maven.md +++ b/docs/dev_guide/publish_to_maven.md @@ -99,15 +99,67 @@ parent: Developer Guides - If the published jar fails to run in Spark with error `java.lang.UnsupportedClassVersionError: com/linkedin/feathr/common/exception/FeathrInputDataException has been compiled by a more recent version of the Java Runtime (class file version 62.0), this version of the Java Runtime only recognizes class file versions up to 52.0`, make sure you complied with the right Java version with -java-home parameter in sbt console. ## CI Automatic Publishing - -(TBD) - +There is a Github Action that automates the above process, you can find it [here](../../.github/workflows/publish-to-maven.yml). This action is triggered anytime a new tag is created, which is usually for release purposes. To manually trigger the pipeline for testing purposes tag can be created using following commands + +```bash + +git tag -a -m "Test tag" +git push --tags + +``` + +Following are some of the things to keep in mind while attempting to do something similar, since signing issues are hard to debug. + +1. There are four secrets that needs to be set for the Github workflow action to work + ```bash + PGP_PASSPHRASE: This is the passphrase that you provided during GPG key pair creation. + PGP_SECRET: The Private Key from GPG key pair created above. + SONATYPE_PASSWORD: Password for oss sonatype repository. + SONATYPE_USERNAME: Username for oss sonatype repository. + ``` + +1. As noted in previous steps, you need to use gpg to create a public-private key pair on your dev machine. The public key is uploaded to a Key server for verification purpose. The private gpg key is used to sign the package being uploaded to maven. We export this private key to be used for signing on Github agent using the following command + + ```bash + + gpg --export-secret-keys --armor YOUR_PRIVATE_KEY_ID > privatekey.asc + ``` + Copy everything from the privatekey.asc file and put it as Github secret with name PGP_SECRET + + To get the private key id you can run the following command and use id under section sec (stands for secret) + + ```bash + $ gpg --list-secret-keys + /Users/myuser/.gnupg/pubring.kbx + ------------------------------- + + sec abc123 2022-08-24 [SC] [expires: 2024-08-23] + 3203203SD....... + uid [ultimate] YOUR NAME + ssb abc123 2022-08-24 [E] [expires: 2024-08-23] + ``` +1. Make sure you are using the right credential host in [sonatype.sbt](../../sonatype.sbt) + - For accounts created before Feb 2021 use __oss.sonatype.org__ + - For accounts created after Feb 2021 use __s01.oss.sonatype.org__ + + +1. Make sure you are using latest release of sbt-pgp package, or atleast the one close to the dev box on which gpg keypair is generated. You can change the version in [build.sbt](../../build.sbt) + ```bash + addSbtPlugin("com.github.sbt" % "sbt-pgp" % "2.1.2") + ``` + +1. We are using sbt-ci-release plugin, that makes the publishing process easier. Read more about it [here](https://github.com/sbt/sbt-ci-release). You can add this in [build.sbt](../../build.sbt) + ```bash + addSbtPlugin("com.github.sbt" % "sbt-ci-release" % "1.5.10") + ``` ### References -https://github.com/xerial/sbt-sonatype +- https://github.com/xerial/sbt-sonatype + +- https://www.linuxbabe.com/security/a-practical-guide-to-gpg-part-1-generate-your-keypair -https://www.linuxbabe.com/security/a-practical-guide-to-gpg-part-1-generate-your-keypair +- https://central.sonatype.org/publish/publish-guide/#deployment -https://central.sonatype.org/publish/publish-guide/#deployment +- https://www.scala-sbt.org/1.x/docs/Using-Sonatype.html -https://www.scala-sbt.org/1.x/docs/Using-Sonatype.html +- https://github.com/sbt/sbt-ci-release From 6a656570a1a4ad7f101d4a72afb9cfcff7a46f8a Mon Sep 17 00:00:00 2001 From: Xiaoyong Zhu Date: Thu, 8 Sep 2022 02:55:05 -0700 Subject: [PATCH 03/25] Update README.md (#649) --- docs/README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/README.md b/docs/README.md index e229b5c00..948aaff35 100644 --- a/docs/README.md +++ b/docs/README.md @@ -4,6 +4,7 @@ [![GitHub Release](https://img.shields.io/github/v/release/linkedin/feathr.svg?style=flat&sort=semver&color=blue)](https://github.com/linkedin/feathr/releases) [![Docs Latest](https://img.shields.io/badge/docs-latest-blue.svg)](https://linkedin.github.io/feathr/) [![Python API](https://img.shields.io/readthedocs/feathr?label=Python%20API)](https://feathr.readthedocs.io/en/latest/) +[![CII Best Practices](https://bestpractices.coreinfrastructure.org/projects/6457/badge)](https://bestpractices.coreinfrastructure.org/projects/6457) ## What is Feathr? From cbf65529b4f2985cfed2bc4fddbaf5c1a47cbf50 Mon Sep 17 00:00:00 2001 From: Hangfei Lin Date: Fri, 9 Sep 2022 13:24:24 -0700 Subject: [PATCH 04/25] Update README.md --- docs/README.md | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/docs/README.md b/docs/README.md index 948aaff35..c0fb8a9ff 100644 --- a/docs/README.md +++ b/docs/README.md @@ -1,4 +1,18 @@ -# An Enterprise-Grade, High Performance Feature Store - Feathr + +

+ Feathr +

+

+ An enterprise-grade, high performance feature store +

+

+ Support: + Slack & + Discussions +

+ + +# [![License](https://img.shields.io/badge/License-Apache%202.0-blue)](https://github.com/linkedin/feathr/blob/main/LICENSE) [![GitHub Release](https://img.shields.io/github/v/release/linkedin/feathr.svg?style=flat&sort=semver&color=blue)](https://github.com/linkedin/feathr/releases) @@ -186,4 +200,4 @@ Build for the community and build by the community. Check out [Community Guideli ## 📢 Slack Channel -Join our [Slack channel](https://feathrai.slack.com) for questions and discussions (or click the [invitation link](https://join.slack.com/t/feathrai/shared_invite/zt-1f8gc99jm-RsRff2tyQNuYdSFyqZc1kg)). +Join our [Slack channel](https://feathrai.slack.com) for questions and discussions (or click the [invitation link](https://join.slack.com/t/feathrai/shared_invite/zt-1ffva5u6v-voq0Us7bbKAw873cEzHOSg)). From a1da6596928e1d8acbb74f88c4f603f2508a258a Mon Sep 17 00:00:00 2001 From: Hangfei Lin Date: Fri, 9 Sep 2022 13:33:58 -0700 Subject: [PATCH 05/25] Update README.md --- docs/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/README.md b/docs/README.md index c0fb8a9ff..6053c5d5c 100644 --- a/docs/README.md +++ b/docs/README.md @@ -3,10 +3,10 @@ Feathr

- An enterprise-grade, high performance feature store + An enterprise-grade, high-performance feature store

- Support: + Support channels: Slack & Discussions

From e66438f882f453eb483a7c10a70fec754749bbe1 Mon Sep 17 00:00:00 2001 From: Xiaoyong Zhu Date: Mon, 12 Sep 2022 11:37:59 -0700 Subject: [PATCH 06/25] Fix broken doc links (#658) --- docs/README.md | 14 +++++++------- docs/concepts/registry-access-control.md | 4 ++-- docs/dev_guide/feathr_overall_release_guide.md | 4 ++-- docs/how-to-guides/azure-deployment-arm.md | 2 +- docs/quickstart_synapse.md | 8 ++++---- docs/samples/customer360/Customer360.ipynb | 2 +- docs/samples/fraud_detection_demo.ipynb | 2 +- docs/samples/product_recommendation_demo.ipynb | 2 +- .../product_recommendation_demo_advanced.ipynb | 2 +- .../feathr/definition/materialization_settings.py | 2 +- .../feathr_user_workspace/nyc_driver_demo.ipynb | 2 +- ui/src/pages/home/home.tsx | 4 ++-- 12 files changed, 24 insertions(+), 24 deletions(-) diff --git a/docs/README.md b/docs/README.md index 6053c5d5c..958da9b49 100644 --- a/docs/README.md +++ b/docs/README.md @@ -16,7 +16,7 @@ [![License](https://img.shields.io/badge/License-Apache%202.0-blue)](https://github.com/linkedin/feathr/blob/main/LICENSE) [![GitHub Release](https://img.shields.io/github/v/release/linkedin/feathr.svg?style=flat&sort=semver&color=blue)](https://github.com/linkedin/feathr/releases) -[![Docs Latest](https://img.shields.io/badge/docs-latest-blue.svg)](https://linkedin.github.io/feathr/) +[![Docs Latest](https://img.shields.io/badge/docs-latest-blue.svg)](https://feathr-ai.github.io/feathr/) [![Python API](https://img.shields.io/readthedocs/feathr?label=Python%20API)](https://feathr.readthedocs.io/en/latest/) [![CII Best Practices](https://bestpractices.coreinfrastructure.org/projects/6457/badge)](https://bestpractices.coreinfrastructure.org/projects/6457) @@ -48,16 +48,16 @@ Feathr automatically computes your feature values and joins them to your trainin Feathr has native integrations with Databricks and Azure Synapse: -Follow the [Feathr ARM deployment guide](https://linkedin.github.io/feathr/how-to-guides/azure-deployment-arm.html) to run Feathr on Azure. This allows you to quickly get started with automated deployment using Azure Resource Manager template. +Follow the [Feathr ARM deployment guide](https://feathr-ai.github.io/feathr/how-to-guides/azure-deployment-arm.html) to run Feathr on Azure. This allows you to quickly get started with automated deployment using Azure Resource Manager template. -If you want to set up everything manually, you can checkout the [Feathr CLI deployment guide](https://linkedin.github.io/feathr/how-to-guides/azure-deployment-cli.html) to run Feathr on Azure. This allows you to understand what is going on and set up one resource at a time. +If you want to set up everything manually, you can checkout the [Feathr CLI deployment guide](https://feathr-ai.github.io/feathr/how-to-guides/azure-deployment-cli.html) to run Feathr on Azure. This allows you to understand what is going on and set up one resource at a time. - Please read the [Quick Start Guide for Feathr on Databricks](./quickstart_databricks.md) to run Feathr with Databricks. - Please read the [Quick Start Guide for Feathr on Azure Synapse](./quickstart_synapse.md) to run Feathr with Azure Synapse. ## 📓 Documentation -- For more details on Feathr, read our [documentation](https://linkedin.github.io/feathr/). +- For more details on Feathr, read our [documentation](https://feathr-ai.github.io/feathr/). - For Python API references, read the [Python API Reference](https://feathr.readthedocs.io/). - For technical talks on Feathr, see the [slides here](./talks/Feathr%20Feature%20Store%20Talk.pdf). The recording is [here](https://www.youtube.com/watch?v=gZg01UKQMTY). @@ -149,15 +149,15 @@ user_item_similarity = DerivedFeature(name="user_item_similarity", ### Define Streaming Features -Read the [Streaming Source Ingestion Guide](https://linkedin.github.io/feathr/how-to-guides/streaming-source-ingestion.html) for more details. +Read the [Streaming Source Ingestion Guide](https://feathr-ai.github.io/feathr/how-to-guides/streaming-source-ingestion.html) for more details. ### Point in Time Joins -Read [Point-in-time Correctness and Point-in-time Join in Feathr](https://linkedin.github.io/feathr/concepts/point-in-time-join.html) for more details. +Read [Point-in-time Correctness and Point-in-time Join in Feathr](https://feathr-ai.github.io/feathr/concepts/point-in-time-join.html) for more details. ### Running Feathr Examples -Follow the [quick start Jupyter Notebook](./samples/product_recommendation_demo.ipynb) to try it out. There is also a companion [quick start guide](https://linkedin.github.io/feathr/quickstart_synapse.html) containing a bit more explanation on the notebook. +Follow the [quick start Jupyter Notebook](./samples/product_recommendation_demo.ipynb) to try it out. There is also a companion [quick start guide](https://feathr-ai.github.io/feathr/quickstart_synapse.html) containing a bit more explanation on the notebook. ## 🗣️ Tech Talks on Feathr diff --git a/docs/concepts/registry-access-control.md b/docs/concepts/registry-access-control.md index 22d4f85ca..3812db38a 100644 --- a/docs/concepts/registry-access-control.md +++ b/docs/concepts/registry-access-control.md @@ -71,12 +71,12 @@ _AAD Group_ is **NOT** supported yet. A _Role Assignment_ is the process of add a `user-role` mapping record into backend storage table. -[Feature Registry](https://linkedin.github.io/feathr/concepts/feature-registry.html#access-control-management-page) section briefly introduced the access control management page, where project admins can manage role assignments. +[Feature Registry](https://feathr-ai.github.io/feathr/concepts/feature-registry.html#access-control-management-page) section briefly introduced the access control management page, where project admins can manage role assignments. Management APIs are not exposed in Feathr Client by design. As we don't want to put control plane together with data plane. ## How to enable Registry Access Control? -[Azure Resource Provisioning](https://linkedin.github.io/feathr/how-to-guides/azure-deployment-arm.html) section has detailed instructions on resource provisioning. For RBAC specific, you will need to manually: +[Azure Resource Provisioning](https://feathr-ai.github.io/feathr/how-to-guides/azure-deployment-arm.html) section has detailed instructions on resource provisioning. For RBAC specific, you will need to manually: 1. Choose `Yes` for `Enable RBAC` in ARM Template, and provision the resources. 2. Create a `userrole` table in provisioned SQL database with [RBAC Schema](../../registry/access_control/scripts/schema.sql). diff --git a/docs/dev_guide/feathr_overall_release_guide.md b/docs/dev_guide/feathr_overall_release_guide.md index 015e48f6f..8846d7796 100644 --- a/docs/dev_guide/feathr_overall_release_guide.md +++ b/docs/dev_guide/feathr_overall_release_guide.md @@ -59,10 +59,10 @@ See [Developer Guide for publishing to maven](publish_to_maven.md) Run the command to generate the Java jar. After the jar is generated, please upload to [Azure storage](https://ms.portal.azure.com/#view/Microsoft_Azure_Storage/ContainerMenuBlade/~/overview/storageAccountId/%2Fsubscriptions%2Fa6c2a7cc-d67e-4a1a-b765-983f08c0423a%2FresourceGroups%2Fazurefeathrintegration%2Fproviders%2FMicrosoft.Storage%2FstorageAccounts%2Fazurefeathrstorage/path/public/etag/%220x8D9E6F64D62D599%22/defaultEncryptionScope/%24account-encryption-key/denyEncryptionScopeOverride//defaultId//publicAccessVal/Container) for faster access. ## Release PyPi -The automated workflow should take care of this, you can check under [actions](https://github.com/linkedin/feathr/actions/workflows/publish-to-pypi.yml) to see the triggered run and results. For manual steps, see [Python Package Release Note](https://linkedin.github.io/feathr/dev_guide/python_package_release.html) +The automated workflow should take care of this, you can check under [actions](https://github.com/linkedin/feathr/actions/workflows/publish-to-pypi.yml) to see the triggered run and results. For manual steps, see [Python Package Release Note](https://feathr-ai.github.io/feathr/dev_guide/python_package_release.html) ## Updating docker image for API and Registry -The automated workflow should take care of this as well, you can check under [actions](https://github.com/linkedin/feathr/actions/workflows/docker-publish.yml) to see the triggered run and results. For manual steps, see [Feathr Registry docker image](https://linkedin.github.io/feathr/dev_guide/build-and-push-feathr-registry-docker-image.html) +The automated workflow should take care of this as well, you can check under [actions](https://github.com/linkedin/feathr/actions/workflows/docker-publish.yml) to see the triggered run and results. For manual steps, see [Feathr Registry docker image](https://feathr-ai.github.io/feathr/dev_guide/build-and-push-feathr-registry-docker-image.html) ## Testing Run one of the sample [notebook](https://github.com/linkedin/feathr/blob/main/docs/samples/product_recommendation_demo.ipynb) as it uses the latest package from Maven and PyPi. diff --git a/docs/how-to-guides/azure-deployment-arm.md b/docs/how-to-guides/azure-deployment-arm.md index a06033bbe..bfb748d67 100644 --- a/docs/how-to-guides/azure-deployment-arm.md +++ b/docs/how-to-guides/azure-deployment-arm.md @@ -128,7 +128,7 @@ For more details on RBAC, refer to [Feathr Registry Access Control](../how-to-gu ## Next Steps -Follow the quick start guide [here](https://linkedin.github.io/feathr/quickstart_synapse.html) to try out a notebook example. +Follow the quick start guide [here](https://feathr-ai.github.io/feathr/quickstart_synapse.html) to try out a notebook example. ## Known Issues/Workaround diff --git a/docs/quickstart_synapse.md b/docs/quickstart_synapse.md index 894153797..f68c2cffb 100644 --- a/docs/quickstart_synapse.md +++ b/docs/quickstart_synapse.md @@ -22,7 +22,7 @@ First step is to provision required cloud resources if you want to use Feathr. F Feathr has native cloud integration. Here are the steps to use Feathr on Azure: -1. Follow the [Feathr ARM deployment guide](https://linkedin.github.io/feathr/how-to-guides/azure-deployment-arm.html) to run Feathr on Azure. This allows you to quickly get started with automated deployment using Azure Resource Manager template. Alternatively, if you want to set up everything manually, you can checkout the [Feathr CLI deployment guide](https://linkedin.github.io/feathr/how-to-guides/azure-deployment-cli.html) to run Feathr on Azure. This allows you to understand what is going on and set up one resource at a time. +1. Follow the [Feathr ARM deployment guide](https://feathr-ai.github.io/feathr/how-to-guides/azure-deployment-arm.html) to run Feathr on Azure. This allows you to quickly get started with automated deployment using Azure Resource Manager template. Alternatively, if you want to set up everything manually, you can checkout the [Feathr CLI deployment guide](https://feathr-ai.github.io/feathr/how-to-guides/azure-deployment-cli.html) to run Feathr on Azure. This allows you to understand what is going on and set up one resource at a time. 2. Once the deployment is complete,run the Feathr Jupyter Notebook by clicking this button: [![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/linkedin/feathr/main?labpath=feathr_project%2Ffeathrcli%2Fdata%2Ffeathr_user_workspace%2Fnyc_driver_demo.ipynb). 3. You only need to change the specified `Resource Prefix`. @@ -188,7 +188,7 @@ client.multi_get_online_features("nycTaxiDemoFeature", ["239", "265"], ['f_locat ## Next steps - Run the [demo notebook](./samples/product_recommendation_demo.ipynb) to understand the workflow of Feathr. -- Read the [Feathr Documentation Page](https://linkedin.github.io/feathr/) page to understand the Feathr abstractions. -- Read guide to understand [how to setup Feathr on Azure using Azure Resource Manager template](https://linkedin.github.io/feathr/how-to-guides/azure-deployment-arm.html). -- Read guide to understand [how to setup Feathr step by step on Azure using Azure CLI](https://linkedin.github.io/feathr/how-to-guides/azure-deployment-cli.html). +- Read the [Feathr Documentation Page](https://feathr-ai.github.io/feathr/) page to understand the Feathr abstractions. +- Read guide to understand [how to setup Feathr on Azure using Azure Resource Manager template](https://feathr-ai.github.io/feathr/how-to-guides/azure-deployment-arm.html). +- Read guide to understand [how to setup Feathr step by step on Azure using Azure CLI](https://feathr-ai.github.io/feathr/how-to-guides/azure-deployment-cli.html). - Read [Python API Documentation](https://feathr.readthedocs.io/en/latest/) diff --git a/docs/samples/customer360/Customer360.ipynb b/docs/samples/customer360/Customer360.ipynb index 5bbadc110..664ae5b3e 100644 --- a/docs/samples/customer360/Customer360.ipynb +++ b/docs/samples/customer360/Customer360.ipynb @@ -61,7 +61,7 @@ "\n", "First step is to provision required cloud resources if you want to use Feathr. Feathr provides a python based client to interact with cloud resources.\n", "\n", - "Please follow the steps [here](https://linkedin.github.io/feathr/how-to-guides/azure-deployment-arm.html) to provision required cloud resources. Due to the complexity of the possible cloud environment, it is almost impossible to create a script that works for all the use cases. Because of this, [azure_resource_provision.sh](https://github.com/linkedin/feathr/blob/main/docs/how-to-guides/azure_resource_provision.sh) is a full end to end command line to create all the required resources, and you can tailor the script as needed, while [the companion documentation](https://linkedin.github.io/feathr/how-to-guides/azure-deployment-cli.html) can be used as a complete guide for using that shell script.\n", + "Please follow the steps [here](https://feathr-ai.github.io/feathr/how-to-guides/azure-deployment-arm.html) to provision required cloud resources. Due to the complexity of the possible cloud environment, it is almost impossible to create a script that works for all the use cases. Because of this, [azure_resource_provision.sh](https://github.com/linkedin/feathr/blob/main/docs/how-to-guides/azure_resource_provision.sh) is a full end to end command line to create all the required resources, and you can tailor the script as needed, while [the companion documentation](https://feathr-ai.github.io/feathr/how-to-guides/azure-deployment-cli.html) can be used as a complete guide for using that shell script.\n", "\n", "\n", "And the architecture is as below:\n", diff --git a/docs/samples/fraud_detection_demo.ipynb b/docs/samples/fraud_detection_demo.ipynb index b72c8ffd3..45d6d7982 100644 --- a/docs/samples/fraud_detection_demo.ipynb +++ b/docs/samples/fraud_detection_demo.ipynb @@ -44,7 +44,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "***Prior to running the notebook, if you have not deployed all the required resources, please refer to the guide here and follow the steps to do so: https://linkedin.github.io/feathr/how-to-guides/azure-deployment-arm.html***" + "***Prior to running the notebook, if you have not deployed all the required resources, please refer to the guide here and follow the steps to do so: https://feathr-ai.github.io/feathr/how-to-guides/azure-deployment-arm.html***" ] }, { diff --git a/docs/samples/product_recommendation_demo.ipynb b/docs/samples/product_recommendation_demo.ipynb index 0634ce110..4ead35504 100644 --- a/docs/samples/product_recommendation_demo.ipynb +++ b/docs/samples/product_recommendation_demo.ipynb @@ -21,7 +21,7 @@ "\n", "First step is to provision required cloud resources if you want to use Feathr. Feathr provides a python based client to interact with cloud resources.\n", "\n", - "Please follow the steps [here](https://linkedin.github.io/feathr/how-to-guides/azure-deployment-arm.html) to provision required cloud resources. Due to the complexity of the possible cloud environment, it is almost impossible to create a script that works for all the use cases. Because of this, [azure_resource_provision.sh](https://github.com/linkedin/feathr/blob/main/docs/how-to-guides/azure_resource_provision.sh) is a full end to end command line to create all the required resources, and you can tailor the script as needed, while [the companion documentation](https://linkedin.github.io/feathr/how-to-guides/azure-deployment-cli.html) can be used as a complete guide for using that shell script. \n", + "Please follow the steps [here](https://feathr-ai.github.io/feathr/how-to-guides/azure-deployment-arm.html) to provision required cloud resources. Due to the complexity of the possible cloud environment, it is almost impossible to create a script that works for all the use cases. Because of this, [azure_resource_provision.sh](https://github.com/linkedin/feathr/blob/main/docs/how-to-guides/azure_resource_provision.sh) is a full end to end command line to create all the required resources, and you can tailor the script as needed, while [the companion documentation](https://feathr-ai.github.io/feathr/how-to-guides/azure-deployment-cli.html) can be used as a complete guide for using that shell script. \n", "\n", "\n", "![Architecture](https://github.com/linkedin/feathr/blob/main/docs/images/architecture.png?raw=true)" diff --git a/docs/samples/product_recommendation_demo_advanced.ipynb b/docs/samples/product_recommendation_demo_advanced.ipynb index b55035501..89c9c63e5 100644 --- a/docs/samples/product_recommendation_demo_advanced.ipynb +++ b/docs/samples/product_recommendation_demo_advanced.ipynb @@ -53,7 +53,7 @@ "\n", "First step is to provision required cloud resources if you want to use Feathr. Feathr provides a python based client to interact with cloud resources.\n", "\n", - "Please follow the steps [here](https://linkedin.github.io/feathr/how-to-guides/azure-deployment-arm.html) to provision required cloud resources. Due to the complexity of the possible cloud environment, it is almost impossible to create a script that works for all the use cases. Because of this, [azure_resource_provision.sh](https://github.com/linkedin/feathr/blob/main/docs/how-to-guides/azure_resource_provision.sh) is a full end to end command line to create all the required resources, and you can tailor the script as needed, while [the companion documentation](https://linkedin.github.io/feathr/how-to-guides/azure-deployment-cli.html) can be used as a complete guide for using that shell script. \n", + "Please follow the steps [here](https://feathr-ai.github.io/feathr/how-to-guides/azure-deployment-arm.html) to provision required cloud resources. Due to the complexity of the possible cloud environment, it is almost impossible to create a script that works for all the use cases. Because of this, [azure_resource_provision.sh](https://github.com/linkedin/feathr/blob/main/docs/how-to-guides/azure_resource_provision.sh) is a full end to end command line to create all the required resources, and you can tailor the script as needed, while [the companion documentation](https://feathr-ai.github.io/feathr/how-to-guides/azure-deployment-cli.html) can be used as a complete guide for using that shell script. \n", "\n", "\n", "![Architecture](https://github.com/linkedin/feathr/blob/main/docs/images/architecture.png?raw=true)" diff --git a/feathr_project/feathr/definition/materialization_settings.py b/feathr_project/feathr/definition/materialization_settings.py index fdc62dc5f..4aa0c5870 100644 --- a/feathr_project/feathr/definition/materialization_settings.py +++ b/feathr_project/feathr/definition/materialization_settings.py @@ -5,7 +5,7 @@ class BackfillTime: - """Time range to materialize/backfill feature data. Please refer to https://linkedin.github.io/feathr/concepts/materializing-features.html#feature-backfill for a more detailed explanation. + """Time range to materialize/backfill feature data. Please refer to https://feathr-ai.github.io/feathr/concepts/materializing-features.html#feature-backfill for a more detailed explanation. Attributes: start: start time of the backfill, inclusive. diff --git a/feathr_project/feathrcli/data/feathr_user_workspace/nyc_driver_demo.ipynb b/feathr_project/feathrcli/data/feathr_user_workspace/nyc_driver_demo.ipynb index 43c835c7f..38cec2ca9 100644 --- a/feathr_project/feathrcli/data/feathr_user_workspace/nyc_driver_demo.ipynb +++ b/feathr_project/feathrcli/data/feathr_user_workspace/nyc_driver_demo.ipynb @@ -31,7 +31,7 @@ "## Prerequisite: Use Quick Start Template to Provision Azure Resources\n", "First step is to provision required cloud resources if you want to use Feathr. Feathr provides a python based client to interact with cloud resources.\n", "\n", - "Please follow the steps [here](https://linkedin.github.io/feathr/how-to-guides/azure-deployment-arm.html) to provision required cloud resources. Due to the complexity of the possible cloud environment, it is almost impossible to create a script that works for all the use cases. Because of this, [azure_resource_provision.sh](https://github.com/linkedin/feathr/blob/main/docs/how-to-guides/azure_resource_provision.sh) is a full end to end command line to create all the required resources, and you can tailor the script as needed, while [the companion documentation](https://linkedin.github.io/feathr/how-to-guides/azure-deployment-cli.html) can be used as a complete guide for using that shell script.\n", + "Please follow the steps [here](https://feathr-ai.github.io/feathr/how-to-guides/azure-deployment-arm.html) to provision required cloud resources. Due to the complexity of the possible cloud environment, it is almost impossible to create a script that works for all the use cases. Because of this, [azure_resource_provision.sh](https://github.com/linkedin/feathr/blob/main/docs/how-to-guides/azure_resource_provision.sh) is a full end to end command line to create all the required resources, and you can tailor the script as needed, while [the companion documentation](https://feathr-ai.github.io/feathr/how-to-guides/azure-deployment-cli.html) can be used as a complete guide for using that shell script.\n", "\n", "\n", "![Architecture](https://github.com/linkedin/feathr/blob/main/docs/images/architecture.png?raw=true)" diff --git a/ui/src/pages/home/home.tsx b/ui/src/pages/home/home.tsx index f23b790cf..240ab8901 100644 --- a/ui/src/pages/home/home.tsx +++ b/ui/src/pages/home/home.tsx @@ -25,7 +25,7 @@ const Home = () => { feature lineages and manage access controls. {" "} @@ -228,7 +228,7 @@ const Home = () => { {" "} Feathr Github Homepage From bbdcc50341d8a4a1a39e504ec55b333f8f62ed6e Mon Sep 17 00:00:00 2001 From: Chang Yong Lik <51813538+ahlag@users.noreply.github.com> Date: Wed, 14 Sep 2022 00:40:00 +0900 Subject: [PATCH 07/25] Added _scproxy necessary for MacOS (#651) * Added _scproxy necessary for MacOS Signed-off-by: changyonglik * Changed to conditional import Signed-off-by: changyonglik * Added comments Signed-off-by: changyonglik Signed-off-by: changyonglik --- registry/sql-registry/registry/database.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/registry/sql-registry/registry/database.py b/registry/sql-registry/registry/database.py index 39bab8ec4..21b8a2aca 100644 --- a/registry/sql-registry/registry/database.py +++ b/registry/sql-registry/registry/database.py @@ -3,6 +3,13 @@ import logging import threading import os + +# Checks if the platform is Max (Darwin). +# If so, imports _scproxy that is necessary for pymssql to work on MacOS +import platform +if platform.system().lower().startswith('dar'): + import _scproxy + import pymssql @@ -53,7 +60,7 @@ def __init__(self, params): self.params = params self.make_connection() self.mutex = threading.Lock() - + def make_connection(self): self.conn = pymssql.connect(**self.params) @@ -85,10 +92,10 @@ def transaction(self): """ Start a transaction so we can run multiple SQL in one batch. User should use `with` with the returned value, look into db_registry.py for more real usage. - + NOTE: `self.query` and `self.execute` will use a different MSSQL connection so any change made in this transaction will *not* be visible in these calls. - + The minimal implementation could look like this if the underlying engine doesn't support transaction. ``` @contextmanager @@ -125,4 +132,4 @@ def connect(*args, **kargs): ret = p.connect(*args, **kargs) if ret is not None: return ret - raise RuntimeError("Cannot connect to database") \ No newline at end of file + raise RuntimeError("Cannot connect to database") From 6de1d606f72218fe76d0238136eebd73d97e1147 Mon Sep 17 00:00:00 2001 From: Xiaoyong Zhu Date: Tue, 13 Sep 2022 21:16:04 -0700 Subject: [PATCH 08/25] Add docs for consuming features in online environment (#609) * Create consume-features.md * Update consume-features.md * rename docs * Update model-inference-with-feathr.md * Update README.md * update docs per feedback * Update streaming-source-ingestion.md * update docs * update docs * Update azure-deployment-arm.md * Update model-inference-with-feathr.md * add sign off message Signed-off-by: Xiaoyong Zhu xiaoyzhu@outlook.com * fix comments * Delete deploy-feathr-api-as-webapp.md * Update model-inference-with-feathr.md Signed-off-by: Xiaoyong Zhu xiaoyzhu@outlook.com --- docs/README.md | 30 ++--- ...d-and-push-feathr-registry-docker-image.md | 34 ++++- docs/dev_guide/cloud_resource_provision.md | 4 +- docs/dev_guide/deploy-feathr-api-as-webapp.md | 122 ------------------ docs/dev_guide/feathr-core-code-structure.md | 2 +- docs/how-to-guides/azure-deployment-arm.md | 5 +- .../azure_resource_provision.json | 4 +- .../model-inference-with-feathr.md | 56 ++++++++ .../streaming-source-ingestion.md | 18 +-- 9 files changed, 115 insertions(+), 160 deletions(-) delete mode 100644 docs/dev_guide/deploy-feathr-api-as-webapp.md create mode 100644 docs/how-to-guides/model-inference-with-feathr.md diff --git a/docs/README.md b/docs/README.md index 958da9b49..2d98def0a 100644 --- a/docs/README.md +++ b/docs/README.md @@ -170,26 +170,26 @@ Follow the [quick start Jupyter Notebook](./samples/product_recommendation_demo. ![Architecture Diagram](./images/architecture.png) -| Feathr component | Cloud Integrations | -| ------------------------------- | --------------------------------------------------------------------------- | -| Offline store – Object Store | Azure Blob Storage, Azure ADLS Gen2, AWS S3 | -| Offline store – SQL | Azure SQL DB, Azure Synapse Dedicated SQL Pools, Azure SQL in VM, Snowflake | -| Streaming Source | Kafka, EventHub | -| Online store | Redis, Azure Cosmos DB (coming soon), Aerospike (coming soon) | -| Feature Registry and Governance | Azure Purview, ANSI SQL such as Azure SQL Server | -| Compute Engine | Azure Synapse Spark Pools, Databricks | -| Machine Learning Platform | Azure Machine Learning, Jupyter Notebook, Databricks Notebook | -| File Format | Parquet, ORC, Avro, JSON, Delta Lake, CSV | -| Credentials | Azure Key Vault | +| Feathr component | Cloud Integrations | +| ------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------- | +| Offline store – Object Store | Azure Blob Storage, Azure ADLS Gen2, AWS S3 | +| Offline store – SQL | Azure SQL DB, Azure Synapse Dedicated SQL Pools, Azure SQL in VM, Snowflake | +| Streaming Source | Kafka, EventHub | +| Online store | Redis, [Azure Cosmos DB](https://feathr-ai.github.io/feathr/how-to-guides/jdbc-cosmos-notes.html#using-cosmosdb-as-the-online-store), Aerospike (coming soon) | +| Feature Registry and Governance | Azure Purview, ANSI SQL such as Azure SQL Server | +| Compute Engine | Azure Synapse Spark Pools, Databricks | +| Machine Learning Platform | Azure Machine Learning, Jupyter Notebook, Databricks Notebook | +| File Format | Parquet, ORC, Avro, JSON, Delta Lake, CSV | +| Credentials | Azure Key Vault | ## 🚀 Roadmap -For a complete roadmap with estimated dates, please [visit this page](https://github.com/linkedin/feathr/milestones?direction=asc&sort=title&state=open). - -- [x] Support streaming -- [x] Support common data sources +- [x] Support streaming features with transformation +- [x] Support common data sources and sinks. Read more in the [Cloud Integrations and Architecture](#️-cloud-integrations-and-architecture) part. - [x] Support feature store UI, including Lineage and Search functionalities +- [ ] Support a sandbox Feathr environment for better getting started experience - [ ] Support online transformation +- [ ] More Feathr online client libraries such as Java - [ ] Support feature versioning - [ ] Support feature monitoring - [ ] Support feature data deletion and retention diff --git a/docs/dev_guide/build-and-push-feathr-registry-docker-image.md b/docs/dev_guide/build-and-push-feathr-registry-docker-image.md index 034b502df..873c6a141 100644 --- a/docs/dev_guide/build-and-push-feathr-registry-docker-image.md +++ b/docs/dev_guide/build-and-push-feathr-registry-docker-image.md @@ -6,7 +6,7 @@ parent: Developer Guides # How to build and push feathr registry docker image -This doc shows how to build feathr registry docker image locally and publish to registry. +This doc shows how to build feathr registry docker image locally and publish to DockerHub. ## Prerequisites @@ -28,32 +28,52 @@ Run **docker images** command, you will see newly created image listed in output docker images ``` -Run **docker run** command to test docker image locally: +Run **docker run** command to test docker image locally. + +### Test SQL-based registry + +You need to setup the connection string `CONNECTION_STR` for the docker container, so that it knows which SQL-based registry is connected to. The connection string will be something like this: + +```bash +"Server=tcp:testregistry.database.windows.net,1433;Initial Catalog=testsql;Persist Security Info=False;User ID=feathr@feathrtestsql;Password=StrongPassword;MultipleActiveResultSets=False;Encrypt=True;TrustServerCertificate=False;Connection Timeout=30;" +``` + +Then you can test the docker locally by running this command: -### Test SQL registry ```bash docker run --env CONNECTION_STR= --env API_BASE=api/v1 -it --rm -p 3000:80 feathrfeaturestore/sql-registry ``` ### Test Purview registry + +You need to setup a few environment variables, include: + +- `PURVIEW_NAME` indicates the Purview service name +- `AZURE_CLIENT_ID`, `AZURE_TENANT_ID`, `AZURE_CLIENT_SECRET` indicates the service principal account to talk with Purview service. + ```bash docker run --env PURVIEW_NAME= --env AZURE_CLIENT_ID= --env AZURE_TENANT_ID= --env AZURE_CLIENT_SECRET= --env API_BASE=api/v1 -it --rm -p 3000:80 feathrfeaturestore/feathr-registry ``` ### Test SQL registry + RBAC + ```bash docker run --env REACT_APP_ENABLE_RBAC=true --env REACT_APP_AZURE_CLIENT_ID= --env REACT_APP_AZURE_TENANT_ID= --env CONNECTION_STR= --env API_BASE=api/v1 -it --rm -p 3000:80 feathrfeaturestore/feathr-registry ``` -After docker image launched, open web browser and navigate to ,verify both UI and backend api can work correctly. +After docker image launched, open web browser and navigate to ,verify both the Feathr UI and the registry backend (SQL/Purview) can work correctly. + +## Upload to DockerHub (For Feathr Release Manager) -## Upload to DockerHub Registry +The Feathr repository already have automatic CD pipelines to publish the docker image to DockerHub on release branches. Please checkout [docker publish workflow](https://github.com/feathr-ai/feathr/blob/main/.github/workflows/docker-publish.yml) for details -Login with feathrfeaturestore account and then run **docker push** command to publish docker image to DockerHub. Contact Feathr Team (@jainr, @blrchen) for credentials. +In case if the Feathr release manager wants to do it manually, login with feathrfeaturestore account and then run **docker push** command to publish docker image to DockerHub. Contact Feathr Team (@jainr, @blrchen) for credentials. ```bash docker login -docker push feathrfeaturestore/sql-registry +docker push feathrfeaturestore/feathr-registry ``` +## Published Feathr Registry Image +The published feathr feature registry is located in [DockerHub here](https://hub.docker.com/r/feathrfeaturestore/feathr-registry). \ No newline at end of file diff --git a/docs/dev_guide/cloud_resource_provision.md b/docs/dev_guide/cloud_resource_provision.md index 8ac07ac43..033030694 100644 --- a/docs/dev_guide/cloud_resource_provision.md +++ b/docs/dev_guide/cloud_resource_provision.md @@ -29,12 +29,12 @@ Invoke Deployment Script from GitHub Repo with parameter for Azure Region. Available regions can be checked with this command ```powershell - Get-AzLocation | select displayname,location +Get-AzLocation | select displayname,location ``` ```powershell - iwr https://raw.githubusercontent.com/linkedin/feathr/main/docs/how-to-guides/deployFeathr.ps1 -outfile ./deployFeathr.ps1; ./deployFeathr.ps1 -AzureRegion '{Assign Your Region}' +iwr https://raw.githubusercontent.com/linkedin/feathr/main/docs/how-to-guides/deployFeathr.ps1 -outfile ./deployFeathr.ps1; ./deployFeathr.ps1 -AzureRegion '{Assign Your Region}' ``` diff --git a/docs/dev_guide/deploy-feathr-api-as-webapp.md b/docs/dev_guide/deploy-feathr-api-as-webapp.md deleted file mode 100644 index a25abf817..000000000 --- a/docs/dev_guide/deploy-feathr-api-as-webapp.md +++ /dev/null @@ -1,122 +0,0 @@ ---- -layout: default -title: Feathr REST API Deployment -parent: Developer Guides ---- - -# Feathr REST API - -The REST API currently supports following functionalities: - -1. Get Feature by Qualified Name -2. Get Feature by GUID -3. Get List of Features -4. Get Lineage for a Feature - -## Build and run locally - -### Install - -**NOTE:** You can run the following command in your local python environment or in your Azure Virtual machine. -You can install dependencies through the requirements file - -```bash -pip install -r requirements.txt -``` - -### Run - -This command will start the uvicorn server locally and will dynamically load your changes. - -```bash -uvicorn api:app --port 8080 --reload -``` - -## Build and deploy on Azure - -Here are the steps to build the API as a docker container, push it to Azure Container registry and then deploy it as webapp. The instructions below are for Mac/Linux but should work on Windows too. You might have to use sudo command or run docker as administrator on windows if you don't have right privileges. - -1. Install Azure CLI by following instructions [here](https://docs.microsoft.com/en-us/cli/azure/install-azure-cli?view=azure-cli-latest) - -1. Create Azure Container Registry. First create the resource group. - - ```bash - az group create --name --location - ``` - - Then create the container registry - - ```bash - az acr create --resource-group --name --sku Basic - ``` - -1. Login to your Azure container registry (ACR) account. - - ```bash - $ az acr login --name - ``` - -1. Clone the repository and navigate to api folder - - ```bash - $ git clone git@github.com:linkedin/feathr.git - - $ cd feathr_project/feathr/api - - ``` - -1. Build the docker container locally, you need to have docker installed locally and have it running. To set up docker on your machine follow the instructions [here](https://docs.docker.com/get-started/) - **Note: Note: /image_name is not a mandatory format for specifying the name of the image.It’s just a useful convention to avoid tagging your image again when you need to push it to a registry. It can be anything you want in the format below** - - ```bash - $ docker build -t feathr/api . - ``` - -1. Run docker images command and you will see your newly created image - - ```bash - $ docker images - - REPOSITORY TAG IMAGE ID CREATED SIZE - feathr/api latest a647ea749b9b 5 minutes ago 529MB - ``` - -1. Before you can push an image to your registry, you must tag it with the fully qualified name of your ACR login server. The login server name is in the format .azurecr.io (all lowercase), for example, mycontainerregistry007.azurecr.io. Tag the image - ```bash - $ docker tag feathr/api:latest feathracr.azurecr.io/feathr/api:latest - ``` -1. Push the image to the registry - ```bash - $ docker push feathracr.azurecr.io/feathr/api:latest - ``` -1. List the images from your registry to see your recently pushed image - ``` - az acr repository list --name feathracr --output table - ``` - Output: - ``` - Result - ---------- - feathr/api - ``` - -## Deploy image to Azure WebApp for Containers - -1. Go to [Azure portal](https://portal.azure.com) and search for your container registry -1. Select repositories from the left pane and click latest tag. Click on the three dots on right side of the tag and select **Deploy to WebApp** option. If you see the **Deploy to WebApp** option greyed out, you would have to enable Admin User on the registry by Updating it. - - ![Container Image 1](../images/feathr_api_image_latest.png) - - ![Container Image 2](../images/feathr_api_image_latest_options.png) - -1. Provide a name for the deployed webapp, along with the subscription to deploy app into, the resource group and the appservice plan - - ![Container Image](../images/feathr_api_image_latest_deployment.png) - -1. You will get the notification that your app has been successfully deployed, click on **Go to Resource** button. - -1. On the App overview page go to the URL (https://.azurewebsites.net/docs) for deployed app (it's under URL on the app overview page) and you should see the API documentation. - - ![API docs](../images/api-docs.png) - -Congratulations you have successfully deployed the Feathr API. diff --git a/docs/dev_guide/feathr-core-code-structure.md b/docs/dev_guide/feathr-core-code-structure.md index ab812f32e..acf0c8c93 100644 --- a/docs/dev_guide/feathr-core-code-structure.md +++ b/docs/dev_guide/feathr-core-code-structure.md @@ -1,6 +1,6 @@ --- layout: default -title: Documentation Guideline +title: Feathr Core Code Structure parent: Developer Guides --- diff --git a/docs/how-to-guides/azure-deployment-arm.md b/docs/how-to-guides/azure-deployment-arm.md index bfb748d67..9245db91d 100644 --- a/docs/how-to-guides/azure-deployment-arm.md +++ b/docs/how-to-guides/azure-deployment-arm.md @@ -17,7 +17,9 @@ The provided Azure Resource Manager (ARM) template deploys the following resourc 7. Azure Event Hub 8. Azure Redis -Please note, you need to have **owner access** in the resource group you are deploying this in. Owner access is required to assign role to managed identity within ARM template so it can access key vault and store secrets. +Please note, you need to have **owner access** in the resource group you are deploying this in. Owner access is required to assign role to managed identity within ARM template so it can access key vault and store secrets. If you don't have such permission, you might want to contact your IT admin to see if they can do that. + +Although we recommend end users deploy the resources using the ARM template, we understand that in many situations where users want to reuse existing resources instead of creating new resources; or users have many other permission issues. See [Manually connecting existing resources](#manually-connecting-existing-resources) for more details. ## Architecture @@ -111,7 +113,6 @@ https://{resource_prefix}webapp.azurewebsites.net ![feathr ui landing page](../images/feathr-ui-landingpage.png) - ### 5. Initialize RBAC access table (Optional) If you want to use RBAC access for your deployment, you also need to manually initialize the user access table. Replace `[your-email-account]` with the email account that you are currently using, and this email will be the global admin for Feathr feature registry. diff --git a/docs/how-to-guides/azure_resource_provision.json b/docs/how-to-guides/azure_resource_provision.json index 827757b8c..58300fae4 100644 --- a/docs/how-to-guides/azure_resource_provision.json +++ b/docs/how-to-guides/azure_resource_provision.json @@ -35,13 +35,13 @@ "sqlAdminUsername": { "type": "String", "metadata": { - "description": "Specifies the username for admin" + "description": "Specifies the username for SQL Database admin" } }, "sqlAdminPassword": { "type": "SecureString", "metadata": { - "description": "Specifies the password for admin" + "description": "Specifies the password for SQL Database admin" } }, "registryBackend": { diff --git a/docs/how-to-guides/model-inference-with-feathr.md b/docs/how-to-guides/model-inference-with-feathr.md new file mode 100644 index 000000000..c2b5a8e7c --- /dev/null +++ b/docs/how-to-guides/model-inference-with-feathr.md @@ -0,0 +1,56 @@ +--- +layout: default +title: Online Model Inference with Features from Feathr +parent: How-to Guides +--- + +# Online Model Inference with Features from Feathr + +After you have materialized features in online store such as Redis or Azure Cosmos DB, usually end users want to consume those features in production environment for model inference. + +With Feathr's [online client](https://feathr.readthedocs.io/en/latest/#feathr.FeathrClient.get_online_features), it is quite straightforward to do that. The sample code is as below, where users only need to configure the online store endpoint (if using Redis), and call `client.get_online_features()` to get the features for a particular key. + +```python + +## put the section below into the initialization handler +import os +from feathr import FeathrClient + +# Set Redis endpoint +os.environ['online_store__redis__host'] = ".redis.cache.windows.net" +os.environ['online_store__redis__port'] = "6380" +os.environ['online_store__redis__ssl_enabled'] = "True" +os.environ['REDIS_PASSWORD'] = "" + +client = FeathrClient() + + +# put this section in the model inference handler +feature = client.get_online_features(feature_table="nycTaxiCITable", + key='2020-04-15', + feature_names=['f_is_long_trip_distance', 'f_day_of_week']) +# `res` will be an array representing the features of that particular key. + + +# `model` will be a ML model that is loaded previously. +result = model.predict(feature) +``` + +## Best Practices + +Usually for ML platforms such as Azure Machine Learning, Sagemaker, or DataRobot, there are options where you can "bring your own container" or using "container inference". Basically it requires end users to write an "entry script" and provide a few functions. In those cases, there are usually two handlers: + +- an initialization handler to allow users to load configurations. For example, in Azure Machine Learning, it is a function called `init()`, and in Sagemaker, it is `model_fn()`. +- a model inference handler to do the model inference. For example, in Azure Machine Learning, it is called `init()`, and in Sagemaker, it is called `predict_fn()`. + +In the initialization handler, initialize the environment variables and initialize `FeathrClient` as shown in the above script; in the inference handler, call this line: + +```python +# put this section in the model inference handler +feature = client.get_online_features(feature_table="nycTaxiCITable", + key='2020-04-15', + feature_names=['f_is_long_trip_distance', 'f_day_of_week']) +# `res` will be an array representing the features of that particular key. +# `model` will be a ML model that is loaded previously. +result = model.predict(feature) +``` diff --git a/docs/how-to-guides/streaming-source-ingestion.md b/docs/how-to-guides/streaming-source-ingestion.md index 4a59abc48..499efef5c 100644 --- a/docs/how-to-guides/streaming-source-ingestion.md +++ b/docs/how-to-guides/streaming-source-ingestion.md @@ -1,12 +1,12 @@ --- layout: default -title: Streaming Source Ingestion +title: Streaming Source Ingestion and Feature Definition parent: How-to Guides --- -# Streaming feature ingestion +# Streaming Source Ingestion and Feature Definition -Feathr supports defining features from a stream source (for example Kafka) and sink the features into an online store (such as Redis). This is very useful if you need up-to-date features for online store, for example when user clicks on the website, that web log event is usually sent to Kafka, and data scientists might need some features immediately, such as the browser used in this particular event. The steps are as below: +Feathr supports defining features from a stream source (for example Kafka) with transformations, and sink the features into an online store (such as Redis). This is very useful if you need up-to-date features for online store, for example when user clicks on the website, that web log event is usually sent to Kafka, and data scientists might need some features immediately, such as the browser used in this particular event. The steps are as below: ## Define Kafka streaming input source @@ -35,13 +35,13 @@ stream_source = KafKaSource(name="kafkaStreamingSource", ) ``` -You may need to produce data and send them into Kafka as this data source in advance. Please check [Kafka data source producer](../../feathr_project/test/prep_azure_kafka_test_data.py) as a reference. Also you should keep this producer running which means there are data stream keep coming into Kafka while calling the 'materialize_features' below. +You may need to produce data and send them into Kafka as this data source in advance. Please check [Kafka data source producer](https://github.com/linkedin/feathr/blob/main/feathr_project/test/prep_azure_kafka_test_data.py) as a reference. Also you should keep this producer running which means there are data stream keep coming into Kafka while calling the 'materialize_features' below. ## Define feature definition with the Kafka source You can then define features. They are mostly the same with the [regular feature definition](../concepts/feature-definition.md). -Note that for the `transform` part, only row level transformation is allowed in streaming anchor at the moment, i.e. the transformations listed in [Spark SQL Built-in Functions](https://spark.apache.org/docs/latest/api/sql/) are supported. Other transformations support are in the roadmap. +Note that for the `transform` part, only row level transformation is allowed in streaming anchor at the moment, i.e. the transformations listed in [Spark SQL Built-in Functions](https://spark.apache.org/docs/latest/api/sql/) are supported. Users can also define customized [Spark SQL functions](./feathr-spark-udf-advanced.md). For example, you can specify to do a row-level transformation like `trips_today + randn() * cos(trips_today)` for your input data. @@ -90,14 +90,14 @@ res = client.multi_get_online_features('kafkaSampleDemoFeature', ['1', '2'], ['f ``` -You can also refer to the [test case](../../feathr_project/test/test_azure_kafka_e2e.py) for more details. +You can also refer to the [test case](https://github.com/linkedin/feathr/blob/main/feathr_project/test/test_azure_kafka_e2e.py) for more details. ## Kafka configuration -Please refer to the [Feathr Configuration Doc](./feathr-configuration-and-env.md#kafkasasljaasconfig) for more details on the credentials. +Please refer to the [Feathr Configuration Doc](./feathr-configuration-and-env.md#KAFKA_SASL_JAAS_CONFIG) for more details on the credentials. -## Event Hub monitor +## Event Hub monitoring -Please check monitor panel on your 'Event Hub' overview page while running materialize to make sure there are both incoming and outgoing messages, like below graph. Otherwise, you may not get anything from 'get_online_features' since the source is empty. +If you feel something is wrong, you can check the monitor panel on your 'Event Hub' overview page while running the Feathr materialization job, to make sure there are both incoming and outgoing messages, like the graph below. Otherwise, you may not get anything from `get_online_features()` since the source is empty. ![Kafka Monitor Page](../images/kafka-messages-monitor.png) \ No newline at end of file From a54180b90ef6f7910b40c262b945d9849addcc02 Mon Sep 17 00:00:00 2001 From: Xiaoyong Zhu Date: Wed, 14 Sep 2022 01:05:13 -0700 Subject: [PATCH 09/25] Clean up after moving to LFAI (#665) * Clean up after moving to LFAI Clean up after moving to LFAI * Update README.md --- .github/pull_request_template.md | 2 +- CONTRIBUTING.md | 4 +- docs/README.md | 38 ++++++++++--------- docs/concepts/feature-registry.md | 2 +- docs/concepts/get-offline-features.md | 2 +- docs/dev_guide/cloud_integration_testing.md | 2 +- docs/dev_guide/cloud_resource_provision.md | 2 +- .../dev_guide/feathr_overall_release_guide.md | 26 ++++++------- docs/dev_guide/new_contributor_guide.md | 10 ++--- docs/dev_guide/publish_to_maven.md | 2 +- docs/dev_guide/scala_dev_guide.md | 2 +- docs/how-to-guides/azure-deployment-arm.md | 6 +-- .../feathr-configuration-and-env.md | 6 +-- .../streaming-source-ingestion.md | 4 +- docs/quickstart_synapse.md | 6 +-- feathr_project/feathr/client.py | 4 +- feathr_project/feathrcli/cli.py | 2 +- feathr_project/setup.py | 6 +-- 18 files changed, 64 insertions(+), 62 deletions(-) diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md index 735b76e5a..9d2d9e746 100644 --- a/.github/pull_request_template.md +++ b/.github/pull_request_template.md @@ -1,6 +1,6 @@ ## Description