Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 12 additions & 44 deletions crates/pipeline-manager/src/cluster_monitor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ use crate::db::storage::Storage;
use crate::db::storage_postgres::StoragePostgres;
use crate::db::types::monitor::{MonitorStatus, NewClusterMonitorEvent};
use crate::error::source_error;
use crate::unstable_features;
use async_trait::async_trait;
use feldera_observability::ReqwestTracingExt;
use std::{sync::Arc, time::Duration};
Expand Down Expand Up @@ -38,12 +37,6 @@ const DEFAULT_REQUEST_TIMEOUT: Duration = Duration::from_secs(5);
const RESOURCES_INFO_NOT_AVAILABLE: &str =
"Resources information not available in Community edition.";

/// Message when the resources information gathering is not enabled.
const RESOURCES_INFO_NOT_ENABLED: &str = "Resources information is not enabled. \
Cluster monitoring resources is currently an unstable feature. It can be enabled by \
setting the control plane environment variable FELDERA_UNSTABLE_FEATURES and adding to it \
`cluster_monitor_resources` as one of the comma-separated entries.";

/// Target to poll resources of.
pub enum PollResourcesTarget {
Api,
Expand Down Expand Up @@ -121,43 +114,18 @@ pub async fn cluster_monitor<P: ResourcesPoller>(
let runner_self_info = truncate_info(runner_self_info);

// Perform polling of the resources backing the services
let (
api_resources_ok,
compiler_resources_ok,
runner_resources_ok,
api_resources_info,
compiler_resources_info,
runner_resources_info,
) = if unstable_features().is_some_and(|activated_unstable_features| {
activated_unstable_features.contains("cluster_monitor_resources")
}) {
let (api_resources_ok, api_resources_info) = resources_poller
.poll_resources(PollResourcesTarget::Api)
.await;
let (compiler_resources_ok, compiler_resources_info) = resources_poller
.poll_resources(PollResourcesTarget::Compiler)
.await;
let (runner_resources_ok, runner_resources_info) = resources_poller
.poll_resources(PollResourcesTarget::Runner)
.await;
(
api_resources_ok,
compiler_resources_ok,
runner_resources_ok,
truncate_info(api_resources_info),
truncate_info(compiler_resources_info),
truncate_info(runner_resources_info),
)
} else {
(
true,
true,
true,
RESOURCES_INFO_NOT_ENABLED.to_string(),
RESOURCES_INFO_NOT_ENABLED.to_string(),
RESOURCES_INFO_NOT_ENABLED.to_string(),
)
};
let (api_resources_ok, api_resources_info) = resources_poller
.poll_resources(PollResourcesTarget::Api)
.await;
let (compiler_resources_ok, compiler_resources_info) = resources_poller
.poll_resources(PollResourcesTarget::Compiler)
.await;
let (runner_resources_ok, runner_resources_info) = resources_poller
.poll_resources(PollResourcesTarget::Runner)
.await;
let api_resources_info = truncate_info(api_resources_info);
let compiler_resources_info = truncate_info(compiler_resources_info);
let runner_resources_info = truncate_info(runner_resources_info);

// Whether to insert the event into the database
let insert_into_database = match &latest_event {
Expand Down
2 changes: 0 additions & 2 deletions crates/pipeline-manager/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -253,8 +253,6 @@ pub struct CommonConfig {
/// Currently supported features:
/// - `runtime_version`: Allows to override the runtime version of a pipeline on the platform.
/// - `testing`
/// - `cluster_monitor_resources`: Cluster monitoring also monitors the resources backing the
/// instance (i.e., the Kubernetes objects).
#[arg(verbatim_doc_comment, long, env = "FELDERA_UNSTABLE_FEATURES")]
pub unstable_features: Option<String>,

Expand Down
7 changes: 2 additions & 5 deletions crates/pipeline-manager/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,8 @@ static UNSTABLE_FEATURES: OnceLock<HashSet<&'static str>> = OnceLock::new();

/// Initialization function to set the platform's unstable feature gate.
pub fn platform_enable_unstable(requested_features: &str) {
let all_features: HashSet<&'static str> = HashSet::from_iter(vec![
"runtime_version",
"testing",
"cluster_monitor_resources",
]);
let all_features: HashSet<&'static str> =
HashSet::from_iter(vec!["runtime_version", "testing"]);
let mut enabled = HashSet::new();
for requested_feature in requested_features.split(',') {
if let Some(supported_feature) = all_features.get(requested_feature) {
Expand Down
4 changes: 4 additions & 0 deletions docs.feldera.com/docs/changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,10 @@ import TabItem from '@theme/TabItem';

## Unreleased

- Cluster monitor events with information on the backing (Kubernetes) resources is
no longer gated behind unstable feature `cluster_monitor_resources` (deprecated).
It is now enabled by default.

- Casts of strings to Boolean and floating point values will
produce runtime errors instead of legal values for illegal string
values. The set of strings that can be legally converted to
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -322,6 +322,6 @@ Configure HTTPS for all Feldera components. See the [HTTPS guide](./https) for c

| Key | Default | Description |
|-----|---------|-------------|
| `unstableFeatures` | `[]` | List of unstable feature flags to enable. Possible values: `"testing"`, `"runtime_version"`, `"cluster_monitor_resources"`. Do not also set `FELDERA_UNSTABLE_FEATURES` in `controlPlane.env`. |
| `unstableFeatures` | `[]` | List of unstable feature flags to enable. Possible values: `"testing"`, `"runtime_version"`. Do not also set `FELDERA_UNSTABLE_FEATURES` in `controlPlane.env`. |
| `felderaSentryEnabled` | `false` | Send crash reports and logs to Feldera's Sentry installation. |
| `cloudApiEndpoint` | `"https://cloud1.feldera.com"` | Feldera cloud API endpoint used for license verification and runner telemetry. |
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,6 @@ notably at most 1000 and with a time limit of 72 hours (whichever comes first).
With this, it is possible to access both the latest health check of the cluster
and its health in the recent past. The events are accessible through the API.

The resources monitoring feature is not yet stabilized, but can already be activated by adding
`cluster_monitor_resources` to the Helm chart `unstableFeatures` array value.

## API usage

The cluster monitor events can be retrieved via two endpoints:
Expand Down
Loading