From 29338d8d0657821cfb0349cf657fd6635d8728f6 Mon Sep 17 00:00:00 2001 From: LorenzoTettamanti Date: Mon, 12 Jan 2026 18:27:30 +0100 Subject: [PATCH 01/46] [#171]: updated cli error handling. Added CliError to every function that returns a Result type --- cli/src/essential.rs | 53 +++++++++---- cli/src/install.rs | 46 ++++++----- cli/src/logs.rs | 36 ++++----- cli/src/main.rs | 1 - cli/src/monitoring.rs | 174 ++++++++++++++++++++++-------------------- cli/src/service.rs | 128 ++++++++++++++++--------------- cli/src/status.rs | 18 ++--- cli/src/uninstall.rs | 58 ++++++++------ 8 files changed, 283 insertions(+), 231 deletions(-) diff --git a/cli/src/essential.rs b/cli/src/essential.rs index 3f43350..1512c80 100644 --- a/cli/src/essential.rs +++ b/cli/src/essential.rs @@ -1,12 +1,10 @@ use std::borrow::Cow; -use std::process::Output; use std::thread; use std::time::Duration; use std::{collections::BTreeMap, fmt, process::Command, result::Result::Ok}; use anyhow::Error; use colored::Colorize; -use k8s_openapi::apimachinery::pkg::version; use kube::core::ErrorResponse; use serde::Serialize; @@ -69,10 +67,20 @@ impl From for CliError { } } impl From<()> for CliError { - fn from(v: ()) -> Self { + fn from(e: ()) -> Self { return ().into(); } } +impl From for CliError { + fn from(e: prost::DecodeError) -> Self { + todo!() + } +} +impl From for CliError { + fn from(e: tonic::Status) -> Self { + todo!() + } +} // docs: // fmt::Display implementation for CliError type. Creates a user friendly message error message. @@ -84,7 +92,11 @@ impl fmt::Display for CliError { CliError::InstallerError { reason } => { write!( f, - "An error occured while installing cortexflow components. Reason: {}", + "{} {} {}", + "=====>".blue().bold(), + "An error occured while installing cortexflow components. Reason:" + .bold() + .red(), reason ) } @@ -103,7 +115,15 @@ impl fmt::Display for CliError { ) } CliError::ClientError(e) => write!(f, "Client Error: {}", e), - CliError::AgentError(e) => write!(f, "Agent Error: {}", e), + CliError::AgentError(e) => { + write!( + f, + "{} {} {}", + "=====>".bold().blue(), + "Agent Error:".bold().red(), + e + ) + } } } } @@ -217,16 +237,17 @@ pub fn update_cli() { // docs: // // This function returns the latest version of the CLI from the crates.io registry -pub fn get_latest_cfcli_version() -> Result { +// TODO: implement CliError here +pub fn get_latest_cfcli_version() -> Result { let output = Command::new("cargo") .args(["search", "cortexflow-cli", "--limit", "1"]) .output() .expect("Error"); if !output.status.success() { - return Err(Error::msg(format!( - "An error occured during the latest version extraction" - ))); + return Err(CliError::InstallerError { + reason: "Cannot extract the latest version".to_string(), + }); } else { let command_stdout = String::from_utf8_lossy(&output.stdout); @@ -323,10 +344,10 @@ pub async fn read_configs() -> Result, CliError> { Ok(Vec::new()) //in case the key fails } - Err(_) => Err(CliError::ClientError(kube::Error::Api(ErrorResponse { + Err(e) => Err(CliError::ClientError(kube::Error::Api(ErrorResponse { status: "failed".to_string(), message: "Failed to connect to kubernetes client".to_string(), - reason: "Your cluster is probably disconnected".to_string(), + reason: e.to_string(), code: 404, }))), } @@ -351,7 +372,7 @@ pub async fn create_config_file(config_struct: MetadataConfigFile) -> Result<(), match connect_to_client().await { Ok(client) => { let namespace = "cortexflow"; - let configmap = "cortexbrain-client-config"; + //let configmap = "cortexbrain-client-config"; let api: Api = Api::namespaced(client, namespace); @@ -378,10 +399,10 @@ pub async fn create_config_file(config_struct: MetadataConfigFile) -> Result<(), } Ok(()) } - Err(_) => Err(CliError::ClientError(kube::Error::Api(ErrorResponse { + Err(e) => Err(CliError::ClientError(kube::Error::Api(ErrorResponse { status: "failed".to_string(), message: "Failed to connect to kubernetes client".to_string(), - reason: "Your cluster is probably disconnected".to_string(), + reason: e.to_string(), code: 404, }))), } @@ -479,10 +500,10 @@ pub async fn update_configmap(config_struct: MetadataConfigFile) -> Result<(), C Ok(()) } - Err(_) => Err(CliError::ClientError(kube::Error::Api(ErrorResponse { + Err(e) => Err(CliError::ClientError(kube::Error::Api(ErrorResponse { status: "failed".to_string(), message: "Failed to connect to kubernetes client".to_string(), - reason: "Your cluster is probably disconnected".to_string(), + reason: e.to_string(), code: 404, }))), } diff --git a/cli/src/install.rs b/cli/src/install.rs index a24fc22..4dd3e12 100644 --- a/cli/src/install.rs +++ b/cli/src/install.rs @@ -1,7 +1,7 @@ use crate::essential::{ BASE_COMMAND, CliError, connect_to_client, create_config_file, create_configs, }; -use clap::{Args, Subcommand, command}; +use clap::{Args, Subcommand}; use colored::Colorize; use kube::Error; use kube::core::ErrorResponse; @@ -144,12 +144,16 @@ async fn install_cluster_components() -> Result<(), CliError> { ); Ok(()) } - Err(e) => Err(CliError::ClientError(Error::Api(ErrorResponse { - status: "failed".to_string(), - message: "Failed to connect to kubernetes client".to_string(), - reason: "Your cluster is probably disconnected".to_string(), - code: 404, - }))), + Err(e) => { + return { + Err(CliError::ClientError(Error::Api(ErrorResponse { + status: "failed".to_string(), + message: "Failed to connect to kubernetes client".to_string(), + reason: e.to_string(), + code: 404, + }))) + }; + } } } @@ -190,12 +194,16 @@ async fn install_simple_example_component() -> Result<(), CliError> { ); Ok(()) } - Err(e) => Err(CliError::ClientError(Error::Api(ErrorResponse { - status: "failed".to_string(), - message: "Failed to connect to kubernetes client".to_string(), - reason: "Your cluster is probably disconnected".to_string(), - code: 404, - }))), + Err(e) => { + return { + Err(CliError::ClientError(Error::Api(ErrorResponse { + status: "failed".to_string(), + message: "Failed to connect to kubernetes client".to_string(), + reason: e.to_string(), + code: 404, + }))) + }; + } } } @@ -282,8 +290,8 @@ fn apply_component(file: &str) -> Result<(), CliError> { let output = Command::new(BASE_COMMAND) .args(["apply", "-f", file]) .output() - .map_err(|_| CliError::InstallerError { - reason: "Can't install component from file".to_string(), + .map_err(|e| CliError::InstallerError { + reason: e.to_string(), })?; if !output.status.success() { @@ -366,8 +374,8 @@ fn download_file(src: &str) -> Result<(), CliError> { Command::new("wget") .args([src]) .output() - .map_err(|_| CliError::InstallerError { - reason: "An error occured: component download failed".to_string(), + .map_err(|e| CliError::InstallerError { + reason: e.to_string(), })?; if !output.status.success() { @@ -396,8 +404,8 @@ fn rm_file(file_to_remove: &str) -> Result<(), CliError> { let output = Command::new("rm") .args(["-f", file_to_remove]) .output() - .map_err(|_| CliError::InstallerError { - reason: "cannot remove temporary installation file".to_string(), + .map_err(|e| CliError::InstallerError { + reason: e.to_string(), })?; if !output.status.success() { diff --git a/cli/src/logs.rs b/cli/src/logs.rs index bd819cc..1efd9bc 100644 --- a/cli/src/logs.rs +++ b/cli/src/logs.rs @@ -146,13 +146,13 @@ pub async fn logs_command( Ok(()) } - Err(_) => { - Err( + Err(e) => { + return Err( CliError::ClientError( Error::Api(ErrorResponse { status: "failed".to_string(), message: "Failed to connect to kubernetes client".to_string(), - reason: "Your cluster is probably disconnected".to_string(), + reason: e.to_string(), code: 404, }) ) @@ -181,13 +181,13 @@ pub async fn check_namespace_exists(namespace: &str) -> Result { Err(_) => Ok(false), } } - Err(_) => { - Err( + Err(e) => { + return Err( CliError::ClientError( Error::Api(ErrorResponse { status: "failed".to_string(), message: "Failed to connect to kubernetes client".to_string(), - reason: "Your cluster is probably disconnected".to_string(), + reason: e.to_string(), code: 404, }) ) @@ -232,13 +232,13 @@ pub async fn get_available_namespaces() -> Result, CliError> { _ => Ok(Vec::new()), } } - Err(_) => { - Err( + Err(e) => { + return Err( CliError::ClientError( Error::Api(ErrorResponse { status: "failed".to_string(), message: "Failed to connect to kubernetes client".to_string(), - reason: "Your cluster is probably disconnected".to_string(), + reason: e.to_string(), code: 404, }) ) @@ -290,13 +290,13 @@ async fn get_pods_for_service( _ => Ok(Vec::new()), } } - Err(_) => { - Err( + Err(e) => { + return Err( CliError::ClientError( Error::Api(ErrorResponse { status: "failed".to_string(), message: "Failed to connect to kubernetes client".to_string(), - reason: "Your cluster is probably disconnected".to_string(), + reason: e.to_string(), code: 404, }) ) @@ -349,13 +349,13 @@ async fn get_pods_for_component( _ => Ok(Vec::new()), } } - Err(_) => { - Err( + Err(e) => { + return Err( CliError::ClientError( Error::Api(ErrorResponse { status: "failed".to_string(), message: "Failed to connect to kubernetes client".to_string(), - reason: "Your cluster is probably disconnected".to_string(), + reason: e.to_string(), code: 404, }) ) @@ -402,13 +402,13 @@ async fn get_all_pods(namespace: &str) -> Result, CliError> { _ => Ok(Vec::new()), } } - Err(_) => { - Err( + Err(e) => { + return Err( CliError::ClientError( Error::Api(ErrorResponse { status: "failed".to_string(), message: "Failed to connect to kubernetes client".to_string(), - reason: "Your cluster is probably disconnected".to_string(), + reason: e.to_string(), code: 404, }) ) diff --git a/cli/src/main.rs b/cli/src/main.rs index 272123f..dea5d83 100644 --- a/cli/src/main.rs +++ b/cli/src/main.rs @@ -1,4 +1,3 @@ -#![allow(warnings)] mod essential; mod install; mod logs; diff --git a/cli/src/monitoring.rs b/cli/src/monitoring.rs index 506cc5f..1be9e31 100644 --- a/cli/src/monitoring.rs +++ b/cli/src/monitoring.rs @@ -1,9 +1,7 @@ -#![allow(warnings)] - //monitoring CLI function for identity service -use anyhow::Error; use colored::Colorize; use k8s_openapi::chrono::DateTime; +use kube::core::ErrorResponse; use prost::Message; use prost_types::FileDescriptorProto; use std::result::Result::Ok; @@ -12,8 +10,8 @@ use tonic_reflection::pb::v1::server_reflection_response::MessageResponse; use agent_api::client::{connect_to_client, connect_to_server_reflection}; use agent_api::requests::{get_all_features, send_active_connection_request}; -use clap::command; -use clap::{Args, Parser, Subcommand}; +use crate::essential::CliError; +use clap::{Args, Subcommand}; //monitoring subcommands #[derive(Subcommand, Debug, Clone)] @@ -23,15 +21,18 @@ pub enum MonitorCommands { #[command( name = "connections", about = "Monitor the recent connections detected by the identity service" - )] Connections, + )] + Connections, #[command( name = "latencymetrics", about = "Monitor the latency metrics detected by the metrics service" - )] Latencymetrics, + )] + Latencymetrics, #[command( name = "droppedpackets", about = "Monitor the dropped packets metrics detected by the metrics service" - )] Droppedpackets, + )] + Droppedpackets, } // cfcli monitor @@ -43,7 +44,7 @@ pub struct MonitorArgs { //pub flags: Option, } -pub async fn list_features() -> Result<(), Error> { +pub async fn list_features() -> Result<(), CliError> { match connect_to_server_reflection().await { Ok(client) => { println!( @@ -57,9 +58,8 @@ pub async fn list_features() -> Result<(), Error> { //decoding the proto file while let Some(resp) = streaming.message().await? { - if - let Some(MessageResponse::FileDescriptorResponse(fdr)) = - resp.message_response + if let Some(MessageResponse::FileDescriptorResponse(fdr)) = + resp.message_response { println!("Available services:"); for bytes in fdr.file_descriptor_proto { @@ -77,35 +77,38 @@ pub async fn list_features() -> Result<(), Error> { } } Err(e) => { - println!( - "{} {} {} {}", - "=====>".blue().bold(), - "An error occured".red(), - "Error:", - e - ); - return Err(e); + return Err(CliError::ClientError(kube::Error::Api(ErrorResponse { + status: "failed".to_string(), + message: "Failed to connect to kubernetes client".to_string(), + reason: e.to_string(), + code: 404, + }))); } } } Err(e) => { - println!( - "{} {}", - "=====>".blue().bold(), - "Failed to connect to CortexFlow Server Reflection".red() - ); - return Err(e); + return Err(CliError::AgentError( + tonic_reflection::server::Error::InvalidFileDescriptorSet(e.to_string()), + )); } } Ok(()) } -pub async fn monitor_identity_events() -> Result<(), Error> { - println!("{} {}", "=====>".blue().bold(), "Connecting to cortexflow Client".white()); +pub async fn monitor_identity_events() -> Result<(), CliError> { + println!( + "{} {}", + "=====>".blue().bold(), + "Connecting to cortexflow Client".white() + ); match connect_to_client().await { Ok(client) => { - println!("{} {}", "=====>".blue().bold(), "Connected to CortexFlow Client".green()); + println!( + "{} {}", + "=====>".blue().bold(), + "Connected to CortexFlow Client".green() + ); match send_active_connection_request(client).await { Ok(response) => { let resp = response.into_inner(); @@ -130,37 +133,40 @@ pub async fn monitor_identity_events() -> Result<(), Error> { } } Err(e) => { - println!( - "{} {} {} {}", - "=====>".blue().bold(), - "An error occured".red(), - "Error:", - e - ); - return Err(e); + return Err(CliError::ClientError(kube::Error::Api(ErrorResponse { + status: "failed".to_string(), + message: "Failed to connect to kubernetes client".to_string(), + reason: e.to_string(), + code: 404, + }))); } } } Err(e) => { - println!( - "{} {}", - "=====>".blue().bold(), - "Failed to connect to CortexFlow Client".red() - ); - return Err(e); + return Err(CliError::AgentError( + tonic_reflection::server::Error::InvalidFileDescriptorSet(e.to_string()), + )); } } Ok(()) } -pub async fn monitor_latency_metrics() -> Result<(), Error> { +pub async fn monitor_latency_metrics() -> Result<(), CliError> { //function to monitor latency metrics - println!("{} {}", "=====>".blue().bold(), "Connecting to cortexflow Client".white()); + println!( + "{} {}", + "=====>".blue().bold(), + "Connecting to cortexflow Client".white() + ); match connect_to_client().await { Ok(client) => { - println!("{} {}", "=====>".blue().bold(), "Connected to CortexFlow Client".green()); + println!( + "{} {}", + "=====>".blue().bold(), + "Connected to CortexFlow Client".green() + ); //send request to get latency metrics match agent_api::requests::send_latency_metrics_request(client).await { Ok(response) => { @@ -173,9 +179,10 @@ pub async fn monitor_latency_metrics() -> Result<(), Error> { "=====>".blue().bold(), resp.metrics.len() ); - + for (i, metric) in resp.metrics.iter().enumerate() { - let converted_timestamp= convert_timestamp_to_date(metric.timestamp_us); + let converted_timestamp = + convert_timestamp_to_date(metric.timestamp_us); println!( "{} Latency[{}] \n tgid: {} \n process_name: {} \n address_family: {} \n delta(us): {} \n src_address_v4: {} \n dst_address_v4: {} \n src_address_v6: {} \n dst_address_v6: {} \n local_port: {} \n remote_port: {} \n timestamp_us: {}\n", "=====>".blue().bold(), @@ -196,36 +203,42 @@ pub async fn monitor_latency_metrics() -> Result<(), Error> { } } Err(e) => { - println!( - "{} {} {} {}", - "=====>".blue().bold(), - "An error occured".red(), - "Error:", - e - ); - return Err(e); + return Err(CliError::ClientError(kube::Error::Api(ErrorResponse { + status: "failed".to_string(), + message: "Failed to connect to kubernetes client".to_string(), + reason: e.to_string(), + code: 404, + }))); } } } Err(e) => { - println!( - "{} {}", - "=====>".blue().bold(), - "Failed to connect to CortexFlow Client".red() - ); - return Err(e); + return Err(CliError::ClientError(kube::Error::Api(ErrorResponse { + status: "failed".to_string(), + message: "Failed to connect to kubernetes client".to_string(), + reason: e.to_string(), + code: 404, + }))); } } Ok(()) } -pub async fn monitor_dropped_packets() -> Result<(), Error> { +pub async fn monitor_dropped_packets() -> Result<(), CliError> { //function to monitor dropped packets metrics - println!("{} {}", "=====>".blue().bold(), "Connecting to cortexflow Client".white()); + println!( + "{} {}", + "=====>".blue().bold(), + "Connecting to cortexflow Client".white() + ); match connect_to_client().await { Ok(client) => { - println!("{} {}", "=====>".blue().bold(), "Connected to CortexFlow Client".green()); + println!( + "{} {}", + "=====>".blue().bold(), + "Connected to CortexFlow Client".green() + ); //send request to get dropped packets metrics match agent_api::requests::send_dropped_packets_request(client).await { Ok(response) => { @@ -242,7 +255,8 @@ pub async fn monitor_dropped_packets() -> Result<(), Error> { resp.metrics.len() ); for (i, metric) in resp.metrics.iter().enumerate() { - let converted_timestamp= convert_timestamp_to_date(metric.timestamp_us); + let converted_timestamp = + convert_timestamp_to_date(metric.timestamp_us); println!( "{} DroppedPackets[{}]\n TGID: {}\n Process: {}\n SK Drops: {}\n Socket Errors: {}\n Soft Errors: {}\n Backlog Length: {}\n Write Memory Queued: {}\n Receive Buffer Size: {}\n ACK Backlog: {}\n Timestamp: {} µs", "=====>".blue().bold(), @@ -262,30 +276,28 @@ pub async fn monitor_dropped_packets() -> Result<(), Error> { } } Err(e) => { - println!( - "{} {} {} {}", - "=====>".blue().bold(), - "An error occured".red(), - "Error:", - e - ); - return Err(e); + return Err(CliError::ClientError(kube::Error::Api(ErrorResponse { + status: "failed".to_string(), + message: "Failed to connect to kubernetes client".to_string(), + reason: e.to_string(), + code: 404, + }))); } } } Err(e) => { - println!( - "{} {}", - "=====>".blue().bold(), - "Failed to connect to CortexFlow Client".red() - ); - return Err(e); + return Err(CliError::ClientError(kube::Error::Api(ErrorResponse { + status: "failed".to_string(), + message: "Failed to connect to kubernetes client".to_string(), + reason: e.to_string(), + code: 404, + }))); } } Ok(()) } -fn convert_timestamp_to_date(timestamp:u64)->String{ +fn convert_timestamp_to_date(timestamp: u64) -> String { let datetime = DateTime::from_timestamp_micros(timestamp as i64).unwrap(); datetime.to_string() } diff --git a/cli/src/service.rs b/cli/src/service.rs index b66ed7e..91e6ee0 100644 --- a/cli/src/service.rs +++ b/cli/src/service.rs @@ -1,19 +1,21 @@ -use std::{ str, process::Command }; +use clap::{Args, Subcommand}; use colored::Colorize; -use clap::{ Args, Subcommand }; -use kube::{ core::ErrorResponse, Error }; +use kube::{Error, core::ErrorResponse}; +use std::{process::Command, str}; -use crate::essential::{ BASE_COMMAND, connect_to_client, CliError }; -use crate::logs::{ get_available_namespaces, check_namespace_exists }; +use crate::essential::{BASE_COMMAND, CliError, connect_to_client}; +use crate::logs::{check_namespace_exists, get_available_namespaces}; //service subcommands #[derive(Subcommand, Debug, Clone)] pub enum ServiceCommands { - #[command(name = "list", about = "Check services list")] List { + #[command(name = "list", about = "Check services list")] + List { #[arg(long)] namespace: Option, }, - #[command(name = "describe", about = "Describe service")] Describe { + #[command(name = "describe", about = "Describe service")] + Describe { service_name: String, #[arg(long)] namespace: Option, @@ -44,7 +46,12 @@ pub async fn list_services(namespace: Option) -> Result<(), CliError> { Ok(_) => { let ns = namespace.unwrap_or_else(|| "cortexflow".to_string()); - println!("{} {} {}", "=====>".blue().bold(), "Listing services in namespace:", ns); + println!( + "{} {} {}", + "=====>".blue().bold(), + "Listing services in namespace:", + ns + ); // Check if namespace exists first if !check_namespace_exists(&ns).await? { @@ -87,7 +94,10 @@ pub async fn list_services(namespace: Option) -> Result<(), CliError> { } // header for Table - println!("{:<40} {:<20} {:<10} {:<10}", "NAME", "STATUS", "RESTARTS", "AGE"); + println!( + "{:<40} {:<20} {:<10} {:<10}", + "NAME", "STATUS", "RESTARTS", "AGE" + ); println!("{}", "-".repeat(80)); // Display Each Pod. @@ -108,40 +118,33 @@ pub async fn list_services(namespace: Option) -> Result<(), CliError> { println!( "{:<40} {:<20} {:<10} {:<10}", - name, - full_status, - restarts, - age + name, full_status, restarts, age ); } } Ok(()) } Err(err) => { - Err( - CliError::ClientError( - Error::Api(ErrorResponse { - status: "failed".to_string(), - message: "Failed to execute the kubectl command".to_string(), - reason: "Your cluster is probably disconnected".to_string(), - code: 404, - }) - ) - ) + return { + Err(CliError::ClientError(Error::Api(ErrorResponse { + status: "failed".to_string(), + message: "Failed to execute the kubectl command".to_string(), + reason: err.to_string(), + code: 404, + }))) + }; } } } - Err(_) => { - Err( - CliError::ClientError( - Error::Api(ErrorResponse { - status: "failed".to_string(), - message: "Failed to connect to kubernetes client".to_string(), - reason: "Your cluster is probably disconnected".to_string(), - code: 404, - }) - ) - ) + Err(e) => { + return { + Err(CliError::ClientError(Error::Api(ErrorResponse { + status: "failed".to_string(), + message: "Failed to connect to kubernetes client".to_string(), + reason: e.to_string(), + code: 404, + }))) + }; } } } @@ -161,7 +164,7 @@ pub async fn list_services(namespace: Option) -> Result<(), CliError> { pub async fn describe_service( service_name: String, - namespace: &Option + namespace: &Option, ) -> Result<(), CliError> { match connect_to_client().await { Ok(_) => { @@ -169,7 +172,9 @@ pub async fn describe_service( Ok(_) => { //let file_path = get_config_directory().unwrap().1; - let ns = namespace.clone().unwrap_or_else(|| "cortexflow".to_string()); + let ns = namespace + .clone() + .unwrap_or_else(|| "cortexflow".to_string()); println!( "{} {} {} {} {}", @@ -193,7 +198,10 @@ pub async fn describe_service( for available_ns in &available_namespaces { println!(" • {}", available_ns); } - println!("\nTry: cortex service describe {} --namespace ", service_name); + println!( + "\nTry: cortex service describe {} --namespace ", + service_name + ); } else { println!("No namespaces found in the cluster."); } @@ -207,14 +215,12 @@ pub async fn describe_service( match output { Ok(output) => { if !output.status.success() { - let error = str - ::from_utf8(&output.stderr) - .unwrap_or("Unknown error"); + let error = + str::from_utf8(&output.stderr).unwrap_or("Unknown error"); eprintln!("Error executing kubectl describe: {}", error); eprintln!( "Make sure the pod '{}' exists in namespace '{}'", - service_name, - ns + service_name, ns ); } @@ -229,33 +235,29 @@ pub async fn describe_service( Ok(()) } Err(err) => { - Err( - CliError::ClientError( - Error::Api(ErrorResponse { - status: "failed".to_string(), - message: "Failed to execute the kubectl command ".to_string(), - reason: "Your cluster is probably disconnected".to_string(), - code: 404, - }) - ) - ) + return { + Err(CliError::ClientError(Error::Api(ErrorResponse { + status: "failed".to_string(), + message: "Failed to execute the kubectl command ".to_string(), + reason: err.to_string(), + code: 404, + }))) + }; } } } Err(e) => todo!(), } } - Err(_) => { - Err( - CliError::ClientError( - Error::Api(ErrorResponse { - status: "failed".to_string(), - message: "Failed to connect to kubernetes client".to_string(), - reason: "Your cluster is probably disconnected".to_string(), - code: 404, - }) - ) - ) + Err(e) => { + return { + Err(CliError::ClientError(Error::Api(ErrorResponse { + status: "failed".to_string(), + message: "Failed to connect to kubernetes client".to_string(), + reason: e.to_string(), + code: 404, + }))) + }; } } } diff --git a/cli/src/status.rs b/cli/src/status.rs index 2680781..16570cc 100644 --- a/cli/src/status.rs +++ b/cli/src/status.rs @@ -130,13 +130,13 @@ pub async fn status_command( } } } - Err(_) => { - Err( + Err(e) => { + return Err( CliError::ClientError( Error::Api(ErrorResponse { status: "failed".to_string(), message: "Failed to connect to kubernetes client".to_string(), - reason: "Your cluster is probably disconnected".to_string(), + reason: e.to_string(), code: 404, }) ) @@ -185,13 +185,13 @@ async fn get_pods_status(namespace: &str) -> Result Ok(Vec::new()), } } - Err(_) => { - Err( + Err(e) => { + return Err( CliError::ClientError( Error::Api(ErrorResponse { status: "failed".to_string(), message: "Failed to connect to kubernetes client".to_string(), - reason: "Your cluster is probably disconnected".to_string(), + reason: e.to_string(), code: 404, }) ) @@ -240,13 +240,13 @@ async fn get_services_status(namespace: &str) -> Result Ok(Vec::new()), } } - Err(_) => { - Err( + Err(e) => { + return Err( CliError::ClientError( Error::Api(ErrorResponse { status: "failed".to_string(), message: "Failed to connect to kubernetes client".to_string(), - reason: "Your cluster is probably disconnected".to_string(), + reason: e.to_string(), code: 404, }) ) diff --git a/cli/src/uninstall.rs b/cli/src/uninstall.rs index 0d71cfa..afcc935 100644 --- a/cli/src/uninstall.rs +++ b/cli/src/uninstall.rs @@ -38,12 +38,16 @@ pub async fn uninstall() -> Result<(), CliError> { } Ok(()) } - Err(_) => Err(CliError::ClientError(Error::Api(ErrorResponse { - status: "failed".to_string(), - message: "Failed to connect to kubernetes client".to_string(), - reason: "Your cluster is probably disconnected".to_string(), - code: 404, - }))), + Err(e) => { + return { + Err(CliError::ClientError(Error::Api(ErrorResponse { + status: "failed".to_string(), + message: "Failed to connect to kubernetes client".to_string(), + reason: e.to_string(), + code: 404, + }))) + }; + } } } @@ -85,18 +89,21 @@ async fn uninstall_all() -> Result<(), CliError> { Ok(()) } else { let stderr = String::from_utf8_lossy(&output.stderr); - eprintln!("Error deleting cortexflow namespace. Error: {} ", stderr); - Err(CliError::InstallerError { + return Err(CliError::InstallerError { reason: format!("Failed to delete cortexflow namespace. Error: {}", stderr), - }) + }); } } - Err(_) => Err(CliError::ClientError(Error::Api(ErrorResponse { - status: "failed".to_string(), - message: "Failed to connect to kubernetes client".to_string(), - reason: "Your cluster is probably disconnected".to_string(), - code: 404, - }))), + Err(e) => { + return { + Err(CliError::ClientError(Error::Api(ErrorResponse { + status: "failed".to_string(), + message: "Failed to connect to kubernetes client".to_string(), + reason: e.to_string(), + code: 404, + }))) + }; + } } } @@ -131,18 +138,21 @@ async fn uninstall_component(component_type: &str, component: &str) -> Result<() Ok(()) } else { let stderr = String::from_utf8_lossy(&output.stderr); - eprintln!("Error deleting {}:\n{}", component, stderr); - Err(CliError::InstallerError { + return Err(CliError::InstallerError { reason: format!("Failed to delete component '{}': {}", component, stderr), - }) + }); } } - Err(_) => Err(CliError::ClientError(Error::Api(ErrorResponse { - status: "failed".to_string(), - message: "Failed to connect to kubernetes client".to_string(), - reason: "Your cluster is probably disconnected".to_string(), - code: 404, - }))), + Err(e) => { + return { + Err(CliError::ClientError(Error::Api(ErrorResponse { + status: "failed".to_string(), + message: "Failed to connect to kubernetes client".to_string(), + reason: e.to_string(), + code: 404, + }))) + }; + } } } From 8b976d9e2dd2942621732fa59963cf6f0cf06773 Mon Sep 17 00:00:00 2001 From: LorenzoTettamanti Date: Mon, 12 Jan 2026 18:40:14 +0100 Subject: [PATCH 02/46] [#171]: moved custom errors implementation in the error.rs module --- cli/src/errors.rs | 115 +++++++++++++++++++++++++++++ cli/src/essential.rs | 118 +----------------------------- cli/src/install.rs | 3 +- cli/src/logs.rs | 134 +++++++++++++++------------------- cli/src/main.rs | 166 ++++++++++++++++++++++++------------------ cli/src/mod.rs | 3 +- cli/src/monitoring.rs | 2 +- cli/src/service.rs | 3 +- cli/src/status.rs | 3 +- cli/src/uninstall.rs | 3 +- 10 files changed, 283 insertions(+), 267 deletions(-) create mode 100644 cli/src/errors.rs diff --git a/cli/src/errors.rs b/cli/src/errors.rs new file mode 100644 index 0000000..bf83d21 --- /dev/null +++ b/cli/src/errors.rs @@ -0,0 +1,115 @@ +use colored::Colorize; +use std::fmt; + +// docs: +// +// CliError enum to group all the errors +// +// Custom error definition +// InstallerError: +// - used for general installation errors occured during the installation of cortexflow components. Can be used for: +// - Return downloading errors +// - Return unsuccessful file removal during installation +// +// ClientError: +// - used for Kubernetes client errors. Can be used for: +// - Return client connection errors +// +// UninstallError: +// - used for general installation errors occured during the uninstall for cortexflow components. Can be used for: +// - Return components removal errors +// +// AgentError: +// - used for cortexflow agent errors. Can be used for: +// - return errors from the reflection server +// - return unavailable agent errors (404) +// +// MonitoringError: +// - used for general monitoring errors. TODO: currently under implementation +// +// implements fmt::Display for user friendly error messages + +#[derive(Debug)] +pub enum CliError { + InstallerError { reason: String }, + ClientError(kube::Error), + UninstallError { reason: String }, + AgentError(tonic_reflection::server::Error), + MonitoringError { reason: String }, +} +// docs: +// error type conversions + +impl From for CliError { + fn from(e: kube::Error) -> Self { + CliError::ClientError(e) + } +} +impl From for CliError { + fn from(e: anyhow::Error) -> Self { + CliError::MonitoringError { + reason: format!("{}", e), + } + } +} +impl From<()> for CliError { + fn from(e: ()) -> Self { + return ().into(); + } +} +impl From for CliError { + fn from(e: prost::DecodeError) -> Self { + todo!() + } +} +impl From for CliError { + fn from(e: tonic::Status) -> Self { + todo!() + } +} + +// docs: +// fmt::Display implementation for CliError type. Creates a user friendly message error message. +// TODO: implement colored messages using the colorize crate for better output display + +impl fmt::Display for CliError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + CliError::InstallerError { reason } => { + write!( + f, + "{} {} {}", + "=====>".blue().bold(), + "An error occured while installing cortexflow components. Reason:" + .bold() + .red(), + reason + ) + } + CliError::UninstallError { reason } => { + write!( + f, + "An error occured while installing cortexflow components. Reason: {}", + reason + ) + } + CliError::MonitoringError { reason } => { + write!( + f, + "An error occured while installing cortexflow components. Reason: {}", + reason + ) + } + CliError::ClientError(e) => write!(f, "Client Error: {}", e), + CliError::AgentError(e) => { + write!( + f, + "{} {} {}", + "=====>".bold().blue(), + "Agent Error:".bold().red(), + e + ) + } + } + } +} diff --git a/cli/src/essential.rs b/cli/src/essential.rs index 1512c80..4067db0 100644 --- a/cli/src/essential.rs +++ b/cli/src/essential.rs @@ -1,9 +1,9 @@ +use crate::errors::CliError; use std::borrow::Cow; use std::thread; use std::time::Duration; -use std::{collections::BTreeMap, fmt, process::Command, result::Result::Ok}; +use std::{collections::BTreeMap, process::Command, result::Result::Ok}; -use anyhow::Error; use colored::Colorize; use kube::core::ErrorResponse; use serde::Serialize; @@ -15,119 +15,6 @@ use kube::client::Client; pub static BASE_COMMAND: &str = "kubectl"; // docs: Kubernetes base command -// docs: -// -// CliError enum to group all the errors -// -// Custom error definition -// InstallerError: -// - used for general installation errors occured during the installation of cortexflow components. Can be used for: -// - Return downloading errors -// - Return unsuccessful file removal during installation -// -// ClientError: -// - used for Kubernetes client errors. Can be used for: -// - Return client connection errors -// -// UninstallError: -// - used for general installation errors occured during the uninstall for cortexflow components. Can be used for: -// - Return components removal errors -// -// AgentError: -// - used for cortexflow agent errors. Can be used for: -// - return errors from the reflection server -// - return unavailable agent errors (404) -// -// MonitoringError: -// - used for general monitoring errors. TODO: currently under implementation -// -// implements fmt::Display for user friendly error messages - -#[derive(Debug)] -pub enum CliError { - InstallerError { reason: String }, - ClientError(kube::Error), - UninstallError { reason: String }, - AgentError(tonic_reflection::server::Error), - MonitoringError { reason: String }, -} -// docs: -// error type conversions - -impl From for CliError { - fn from(e: kube::Error) -> Self { - CliError::ClientError(e) - } -} -impl From for CliError { - fn from(e: anyhow::Error) -> Self { - CliError::MonitoringError { - reason: format!("{}", e), - } - } -} -impl From<()> for CliError { - fn from(e: ()) -> Self { - return ().into(); - } -} -impl From for CliError { - fn from(e: prost::DecodeError) -> Self { - todo!() - } -} -impl From for CliError { - fn from(e: tonic::Status) -> Self { - todo!() - } -} - -// docs: -// fmt::Display implementation for CliError type. Creates a user friendly message error message. -// TODO: implement colored messages using the colorize crate for better output display - -impl fmt::Display for CliError { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self { - CliError::InstallerError { reason } => { - write!( - f, - "{} {} {}", - "=====>".blue().bold(), - "An error occured while installing cortexflow components. Reason:" - .bold() - .red(), - reason - ) - } - CliError::UninstallError { reason } => { - write!( - f, - "An error occured while installing cortexflow components. Reason: {}", - reason - ) - } - CliError::MonitoringError { reason } => { - write!( - f, - "An error occured while installing cortexflow components. Reason: {}", - reason - ) - } - CliError::ClientError(e) => write!(f, "Client Error: {}", e), - CliError::AgentError(e) => { - write!( - f, - "{} {} {}", - "=====>".bold().blue(), - "Agent Error:".bold().red(), - e - ) - } - } - } -} - #[derive(Serialize)] pub struct MetadataConfigFile { blocklist: Vec, @@ -237,7 +124,6 @@ pub fn update_cli() { // docs: // // This function returns the latest version of the CLI from the crates.io registry -// TODO: implement CliError here pub fn get_latest_cfcli_version() -> Result { let output = Command::new("cargo") .args(["search", "cortexflow-cli", "--limit", "1"]) diff --git a/cli/src/install.rs b/cli/src/install.rs index 4dd3e12..19aacc9 100644 --- a/cli/src/install.rs +++ b/cli/src/install.rs @@ -1,6 +1,7 @@ use crate::essential::{ - BASE_COMMAND, CliError, connect_to_client, create_config_file, create_configs, + BASE_COMMAND, connect_to_client, create_config_file, create_configs, }; +use crate::errors::CliError; use clap::{Args, Subcommand}; use colored::Colorize; use kube::Error; diff --git a/cli/src/logs.rs b/cli/src/logs.rs index 1efd9bc..4a6ce3f 100644 --- a/cli/src/logs.rs +++ b/cli/src/logs.rs @@ -1,8 +1,9 @@ -use std::{ str, process::Command, result::Result::Ok }; -use colored::Colorize; +use crate::errors::CliError; +use crate::essential::{BASE_COMMAND, connect_to_client}; use clap::Args; -use kube::{ Error, core::ErrorResponse }; -use crate::essential::{ connect_to_client, BASE_COMMAND, CliError }; +use colored::Colorize; +use kube::{Error, core::ErrorResponse}; +use std::{process::Command, result::Result::Ok, str}; #[derive(Args, Debug, Clone)] pub struct LogsArgs { @@ -53,7 +54,7 @@ impl Component { pub async fn logs_command( service: Option, component: Option, - namespace: Option + namespace: Option, ) -> Result<(), CliError> { match connect_to_client().await { Ok(_) => { @@ -92,12 +93,18 @@ pub async fn logs_command( .collect() } (Some(service_name), None) => { - println!("Getting logs for service '{}' in namespace '{}'", service_name, ns); + println!( + "Getting logs for service '{}' in namespace '{}'", + service_name, ns + ); get_pods_for_service(&ns, &service_name).await? } (None, Some(component_str)) => { let comp = Component::from(component_str); - println!("Getting logs for component '{:?}' in namespace '{}'", comp, ns); + println!( + "Getting logs for component '{:?}' in namespace '{}'", + comp, ns + ); get_pods_for_component(&ns, &comp).await? } (None, None) => { @@ -117,8 +124,9 @@ pub async fn logs_command( for pod in pods { println!("{} Logs for pod: {:?}", "=====>".blue().bold(), pod); - match - Command::new(BASE_COMMAND).args(["logs", &pod, "-n", &ns, "--tail=50"]).output() + match Command::new(BASE_COMMAND) + .args(["logs", &pod, "-n", &ns, "--tail=50"]) + .output() { Ok(output) => { if output.status.success() { @@ -136,9 +144,7 @@ pub async fn logs_command( Err(err) => { eprintln!( "Failed to execute {} logs for pod '{:?}': {}", - BASE_COMMAND, - pod, - err + BASE_COMMAND, pod, err ); } } @@ -147,16 +153,12 @@ pub async fn logs_command( Ok(()) } Err(e) => { - return Err( - CliError::ClientError( - Error::Api(ErrorResponse { - status: "failed".to_string(), - message: "Failed to connect to kubernetes client".to_string(), - reason: e.to_string(), - code: 404, - }) - ) - ) + return Err(CliError::ClientError(Error::Api(ErrorResponse { + status: "failed".to_string(), + message: "Failed to connect to kubernetes client".to_string(), + reason: e.to_string(), + code: 404, + }))); } } } @@ -174,7 +176,9 @@ pub async fn logs_command( pub async fn check_namespace_exists(namespace: &str) -> Result { match connect_to_client().await { Ok(_) => { - let output = Command::new(BASE_COMMAND).args(["get", "namespace", namespace]).output(); + let output = Command::new(BASE_COMMAND) + .args(["get", "namespace", namespace]) + .output(); match output { Ok(output) => Ok(output.status.success()), @@ -182,16 +186,12 @@ pub async fn check_namespace_exists(namespace: &str) -> Result { } } Err(e) => { - return Err( - CliError::ClientError( - Error::Api(ErrorResponse { - status: "failed".to_string(), - message: "Failed to connect to kubernetes client".to_string(), - reason: e.to_string(), - code: 404, - }) - ) - ) + return Err(CliError::ClientError(Error::Api(ErrorResponse { + status: "failed".to_string(), + message: "Failed to connect to kubernetes client".to_string(), + reason: e.to_string(), + code: 404, + }))); } } } @@ -233,16 +233,12 @@ pub async fn get_available_namespaces() -> Result, CliError> { } } Err(e) => { - return Err( - CliError::ClientError( - Error::Api(ErrorResponse { - status: "failed".to_string(), - message: "Failed to connect to kubernetes client".to_string(), - reason: e.to_string(), - code: 404, - }) - ) - ) + return Err(CliError::ClientError(Error::Api(ErrorResponse { + status: "failed".to_string(), + message: "Failed to connect to kubernetes client".to_string(), + reason: e.to_string(), + code: 404, + }))); } } } @@ -259,7 +255,7 @@ pub async fn get_available_namespaces() -> Result, CliError> { async fn get_pods_for_service( namespace: &str, - service_name: &str + service_name: &str, ) -> Result, CliError> { match connect_to_client().await { Ok(_) => { @@ -291,16 +287,12 @@ async fn get_pods_for_service( } } Err(e) => { - return Err( - CliError::ClientError( - Error::Api(ErrorResponse { - status: "failed".to_string(), - message: "Failed to connect to kubernetes client".to_string(), - reason: e.to_string(), - code: 404, - }) - ) - ) + return Err(CliError::ClientError(Error::Api(ErrorResponse { + status: "failed".to_string(), + message: "Failed to connect to kubernetes client".to_string(), + reason: e.to_string(), + code: 404, + }))); } } } @@ -318,7 +310,7 @@ async fn get_pods_for_service( async fn get_pods_for_component( namespace: &str, - component: &Component + component: &Component, ) -> Result, CliError> { match connect_to_client().await { Ok(_) => { @@ -350,16 +342,12 @@ async fn get_pods_for_component( } } Err(e) => { - return Err( - CliError::ClientError( - Error::Api(ErrorResponse { - status: "failed".to_string(), - message: "Failed to connect to kubernetes client".to_string(), - reason: e.to_string(), - code: 404, - }) - ) - ) + return Err(CliError::ClientError(Error::Api(ErrorResponse { + status: "failed".to_string(), + message: "Failed to connect to kubernetes client".to_string(), + reason: e.to_string(), + code: 404, + }))); } } } @@ -403,16 +391,12 @@ async fn get_all_pods(namespace: &str) -> Result, CliError> { } } Err(e) => { - return Err( - CliError::ClientError( - Error::Api(ErrorResponse { - status: "failed".to_string(), - message: "Failed to connect to kubernetes client".to_string(), - reason: e.to_string(), - code: 404, - }) - ) - ) + return Err(CliError::ClientError(Error::Api(ErrorResponse { + status: "failed".to_string(), + message: "Failed to connect to kubernetes client".to_string(), + reason: e.to_string(), + code: 404, + }))); } } } diff --git a/cli/src/main.rs b/cli/src/main.rs index dea5d83..ba6b623 100644 --- a/cli/src/main.rs +++ b/cli/src/main.rs @@ -1,3 +1,4 @@ +mod errors; mod essential; mod install; mod logs; @@ -7,24 +8,24 @@ mod service; mod status; mod uninstall; -use clap::{ Args, Parser, Subcommand }; +use clap::{Args, Parser, Subcommand}; use colored::Colorize; use std::result::Result::Ok; use tracing::debug; -use crate::essential::{ CliError, info, update_cli }; -use crate::install::{ InstallArgs, InstallCommands, install_cortexflow, install_simple_example }; -use crate::logs::{ LogsArgs, logs_command }; -use crate::monitoring::{ MonitorArgs, MonitorCommands, list_features, monitor_dropped_packets, monitor_identity_events, monitor_latency_metrics }; +use crate::errors::CliError; +use crate::essential::{info, update_cli}; +use crate::install::{InstallArgs, InstallCommands, install_cortexflow, install_simple_example}; +use crate::logs::{LogsArgs, logs_command}; +use crate::monitoring::{ + MonitorArgs, MonitorCommands, list_features, monitor_dropped_packets, monitor_identity_events, + monitor_latency_metrics, +}; use crate::policies::{ - PoliciesArgs, - PoliciesCommands, - check_blocklist, - create_blocklist, - remove_ip, + PoliciesArgs, PoliciesCommands, check_blocklist, create_blocklist, remove_ip, }; -use crate::service::{ ServiceArgs, ServiceCommands, describe_service, list_services }; -use crate::status::{ StatusArgs, status_command }; +use crate::service::{ServiceArgs, ServiceCommands, describe_service, list_services}; +use crate::status::{StatusArgs, status_command}; use crate::uninstall::uninstall; use crate::essential::update_config_metadata; @@ -45,18 +46,24 @@ struct Cli { #[derive(Subcommand, Debug, Clone)] enum Commands { /* list of available commands */ - #[command(name = "install", about = "Manage installation")] Install(InstallArgs), + #[command(name = "install", about = "Manage installation")] + Install(InstallArgs), #[command(name = "uninstall", about = "Manage uninstallation")] Uninstall, #[command(name = "update", about = "Check for updates")] Update, #[command(name = "info", about = "Check core info")] Info, - #[command(name = "service", about = "Manage services")] Service(ServiceArgs), - #[command(name = "status", about = "Check components status")] Status(StatusArgs), - #[command(name = "logs", about = "Check services logs")] Logs(LogsArgs), - #[command(name = "monitoring", about = "Monitoring commands")] Monitor(MonitorArgs), - #[command(name = "policy", about = "Network Policies")] Policies(PoliciesArgs), + #[command(name = "service", about = "Manage services")] + Service(ServiceArgs), + #[command(name = "status", about = "Check components status")] + Status(StatusArgs), + #[command(name = "logs", about = "Check services logs")] + Logs(LogsArgs), + #[command(name = "monitoring", about = "Monitoring commands")] + Monitor(MonitorArgs), + #[command(name = "policy", about = "Network Policies")] + Policies(PoliciesArgs), } #[derive(Args, Debug, Clone)] struct SetArgs { @@ -67,17 +74,18 @@ async fn args_parser() -> Result<(), CliError> { let args = Cli::parse(); debug!("Arguments {:?}", args.cmd); match args.cmd { - Some(Commands::Install(installation_args)) => - match installation_args.install_cmd { - InstallCommands::All => { - install_cortexflow().await.map_err(|e| eprintln!("{}",e) )?; - } - InstallCommands::TestPods => { - install_simple_example().await.map_err(|e| eprintln!("{}",e) )?; - } + Some(Commands::Install(installation_args)) => match installation_args.install_cmd { + InstallCommands::All => { + install_cortexflow().await.map_err(|e| eprintln!("{}", e))?; + } + InstallCommands::TestPods => { + install_simple_example() + .await + .map_err(|e| eprintln!("{}", e))?; } + }, Some(Commands::Uninstall) => { - uninstall().await.map_err(|e| eprintln!("{}",e) )?; + uninstall().await.map_err(|e| eprintln!("{}", e))?; } Some(Commands::Update) => { update_cli(); @@ -85,40 +93,55 @@ async fn args_parser() -> Result<(), CliError> { Some(Commands::Info) => { info(); } - Some(Commands::Service(service_args)) => - match service_args.service_cmd { - ServiceCommands::List { namespace } => { - list_services(namespace).await.map_err(|e| eprintln!("{}",e) )?; - } - ServiceCommands::Describe { service_name, namespace } => { - describe_service(service_name, &namespace).await.map_err(|e| eprintln!("{}",e) )?; - } + Some(Commands::Service(service_args)) => match service_args.service_cmd { + ServiceCommands::List { namespace } => { + list_services(namespace) + .await + .map_err(|e| eprintln!("{}", e))?; + } + ServiceCommands::Describe { + service_name, + namespace, + } => { + describe_service(service_name, &namespace) + .await + .map_err(|e| eprintln!("{}", e))?; } + }, Some(Commands::Status(status_args)) => { - status_command(status_args.output, status_args.namespace).await.map_err(|e| eprintln!("{}",e) )?; + status_command(status_args.output, status_args.namespace) + .await + .map_err(|e| eprintln!("{}", e))?; } Some(Commands::Logs(logs_args)) => { - logs_command(logs_args.service, logs_args.component, logs_args.namespace).await.map_err(|e| eprintln!("{}",e) )?; + logs_command(logs_args.service, logs_args.component, logs_args.namespace) + .await + .map_err(|e| eprintln!("{}", e))?; } - Some(Commands::Monitor(monitor_args)) => - match monitor_args.monitor_cmd { - MonitorCommands::List => { - let _ = list_features().await.map_err(|e| eprintln!("{}",e) )?; - } - MonitorCommands::Connections => { - let _ = monitor_identity_events().await.map_err(|e| eprintln!("{}",e) )?; - } - MonitorCommands::Latencymetrics => { - let _ = monitor_latency_metrics().await.map_err(|e| eprintln!("{}",e) )?; - } - MonitorCommands::Droppedpackets => { - let _ = monitor_dropped_packets().await.map_err(|e| eprintln!("{}",e) )?; - } + Some(Commands::Monitor(monitor_args)) => match monitor_args.monitor_cmd { + MonitorCommands::List => { + let _ = list_features().await.map_err(|e| eprintln!("{}", e))?; + } + MonitorCommands::Connections => { + let _ = monitor_identity_events() + .await + .map_err(|e| eprintln!("{}", e))?; + } + MonitorCommands::Latencymetrics => { + let _ = monitor_latency_metrics() + .await + .map_err(|e| eprintln!("{}", e))?; } + MonitorCommands::Droppedpackets => { + let _ = monitor_dropped_packets() + .await + .map_err(|e| eprintln!("{}", e))?; + } + }, Some(Commands::Policies(policies_args)) => { match policies_args.policy_cmd { PoliciesCommands::CheckBlocklist => { - let _ = check_blocklist().await.map_err(|e| eprintln!("{}",e) )?; + let _ = check_blocklist().await.map_err(|e| eprintln!("{}", e))?; } PoliciesCommands::CreateBlocklist => { // pass the ip as a monitoring flag @@ -132,7 +155,9 @@ async fn args_parser() -> Result<(), CliError> { match create_blocklist(&ip).await { Ok(_) => { //update the config metadata - let _ = update_config_metadata(&ip, "add").await.map_err(|e| eprintln!("{}",e) )?; + let _ = update_config_metadata(&ip, "add") + .await + .map_err(|e| eprintln!("{}", e))?; } Err(e) => { eprintln!("{}", e); @@ -141,26 +166,27 @@ async fn args_parser() -> Result<(), CliError> { } } } - PoliciesCommands::RemoveIpFromBlocklist => - match policies_args.flags { - None => { - eprintln!( - "{}", - "Insert at least one ip to remove from the blocklist".red() - ); - } - Some(ip) => { - println!("Inserted ip: {}", ip); - match remove_ip(&ip).await { - Ok(_) => { - let _ = update_config_metadata(&ip, "delete").await.map_err(|e| eprintln!("{}",e) )?; - } - Err(e) => { - eprintln!("{}", e); - } + PoliciesCommands::RemoveIpFromBlocklist => match policies_args.flags { + None => { + eprintln!( + "{}", + "Insert at least one ip to remove from the blocklist".red() + ); + } + Some(ip) => { + println!("Inserted ip: {}", ip); + match remove_ip(&ip).await { + Ok(_) => { + let _ = update_config_metadata(&ip, "delete") + .await + .map_err(|e| eprintln!("{}", e))?; + } + Err(e) => { + eprintln!("{}", e); } } } + }, } } None => { diff --git a/cli/src/mod.rs b/cli/src/mod.rs index 2c91fdc..fe7c816 100644 --- a/cli/src/mod.rs +++ b/cli/src/mod.rs @@ -5,4 +5,5 @@ pub mod service; pub mod status; pub mod logs; pub mod monitoring; -pub mod policies; \ No newline at end of file +pub mod policies; +pub mod errors; \ No newline at end of file diff --git a/cli/src/monitoring.rs b/cli/src/monitoring.rs index 1be9e31..d6b120c 100644 --- a/cli/src/monitoring.rs +++ b/cli/src/monitoring.rs @@ -10,7 +10,7 @@ use tonic_reflection::pb::v1::server_reflection_response::MessageResponse; use agent_api::client::{connect_to_client, connect_to_server_reflection}; use agent_api::requests::{get_all_features, send_active_connection_request}; -use crate::essential::CliError; +use crate::errors::CliError; use clap::{Args, Subcommand}; //monitoring subcommands diff --git a/cli/src/service.rs b/cli/src/service.rs index 91e6ee0..21e8e94 100644 --- a/cli/src/service.rs +++ b/cli/src/service.rs @@ -3,7 +3,8 @@ use colored::Colorize; use kube::{Error, core::ErrorResponse}; use std::{process::Command, str}; -use crate::essential::{BASE_COMMAND, CliError, connect_to_client}; +use crate::errors::CliError; +use crate::essential::{BASE_COMMAND, connect_to_client}; use crate::logs::{check_namespace_exists, get_available_namespaces}; //service subcommands diff --git a/cli/src/status.rs b/cli/src/status.rs index 16570cc..ca5d43a 100644 --- a/cli/src/status.rs +++ b/cli/src/status.rs @@ -4,7 +4,8 @@ use clap::Args; use kube::{ Error, core::ErrorResponse }; use crate::logs::{ get_available_namespaces, check_namespace_exists }; -use crate::essential::{ BASE_COMMAND, connect_to_client, CliError }; +use crate::essential::{ BASE_COMMAND, connect_to_client }; +use crate::errors::CliError; #[derive(Debug)] pub enum OutputFormat { diff --git a/cli/src/uninstall.rs b/cli/src/uninstall.rs index afcc935..e847ca2 100644 --- a/cli/src/uninstall.rs +++ b/cli/src/uninstall.rs @@ -1,7 +1,8 @@ use colored::Colorize; use std::{io::stdin, process::Command, thread, time::Duration}; -use crate::essential::{BASE_COMMAND, CliError, connect_to_client}; +use crate::errors::CliError; +use crate::essential::{BASE_COMMAND, connect_to_client}; use kube::{Error, core::ErrorResponse}; //docs: From fa2a4e784e018124f5c0e8b6141a9e8906733a31 Mon Sep 17 00:00:00 2001 From: LorenzoTettamanti Date: Mon, 12 Jan 2026 18:42:05 +0100 Subject: [PATCH 03/46] updated k8s manifests file: updated image repository from my personal repository to the cortexflow org ghcr.io repository --- core/src/testing/agent.yaml | 2 +- core/src/testing/identity.yaml | 2 +- core/src/testing/metrics.yaml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/core/src/testing/agent.yaml b/core/src/testing/agent.yaml index e5c54f0..d189f43 100644 --- a/core/src/testing/agent.yaml +++ b/core/src/testing/agent.yaml @@ -19,7 +19,7 @@ spec: hostNetwork: true containers: - name: agent - image: lorenzotettamanti/cortexflow-agent:latest + image: ghcr.io/cortexflow/agent:latest command: ["/bin/bash", "-c"] args: - | diff --git a/core/src/testing/identity.yaml b/core/src/testing/identity.yaml index 44fc5b9..bb027d2 100644 --- a/core/src/testing/identity.yaml +++ b/core/src/testing/identity.yaml @@ -53,7 +53,7 @@ spec: - SYS_PTRACE containers: - name: identity - image: lorenzotettamanti/cortexflow-identity:latest + image: ghcr.io/cortexflow/identity:latest command: ["/bin/bash", "-c"] args: - | diff --git a/core/src/testing/metrics.yaml b/core/src/testing/metrics.yaml index 3f74c71..4c775ca 100644 --- a/core/src/testing/metrics.yaml +++ b/core/src/testing/metrics.yaml @@ -19,7 +19,7 @@ spec: hostNetwork: true containers: - name: metrics - image: lorenzotettamanti/cortexflow-metrics:latest + image: ghcr.io/cortexflow/metrics:latest command: ["/bin/bash", "-c"] args: - | From 7b962dea4d883a80100961132ecf37f34b4cd82d Mon Sep 17 00:00:00 2001 From: LorenzoTettamanti Date: Mon, 12 Jan 2026 21:07:12 +0100 Subject: [PATCH 04/46] [dependencies]: update CLI dependencies --- cli/Cargo.lock | 36 ++++++++++++++++++------------------ cli/Cargo.toml | 10 +++++----- 2 files changed, 23 insertions(+), 23 deletions(-) diff --git a/cli/Cargo.lock b/cli/Cargo.lock index a2d8968..6e951ca 100644 --- a/cli/Cargo.lock +++ b/cli/Cargo.lock @@ -278,9 +278,9 @@ dependencies = [ [[package]] name = "clap" -version = "4.5.51" +version = "4.5.54" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c26d721170e0295f191a69bd9a1f93efcdb0aff38684b61ab5750468972e5f5" +checksum = "c6e6ff9dcd79cff5cd969a17a545d79e84ab086e444102a591e288a8aa3ce394" dependencies = [ "clap_builder", "clap_derive", @@ -288,9 +288,9 @@ dependencies = [ [[package]] name = "clap_builder" -version = "4.5.51" +version = "4.5.54" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "75835f0c7bf681bfd05abe44e965760fea999a5286c6eb2d59883634fd02011a" +checksum = "fa42cf4d2b7a41bc8f663a7cab4031ebafa1bf3875705bfaf8466dc60ab52c00" dependencies = [ "anstream", "anstyle", @@ -1309,9 +1309,9 @@ dependencies = [ [[package]] name = "prost" -version = "0.14.1" +version = "0.14.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7231bd9b3d3d33c86b58adbac74b5ec0ad9f496b19d22801d773636feaa95f3d" +checksum = "d2ea70524a2f82d518bce41317d0fae74151505651af45faf1ffbd6fd33f0568" dependencies = [ "bytes", "prost-derive", @@ -1341,9 +1341,9 @@ dependencies = [ [[package]] name = "prost-derive" -version = "0.14.1" +version = "0.14.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9120690fafc389a67ba3803df527d0ec9cbbc9cc45e4cc20b332996dfb672425" +checksum = "27c6023962132f4b30eb4c172c91ce92d933da334c59c23cddee82358ddafb0b" dependencies = [ "anyhow", "itertools", @@ -1354,9 +1354,9 @@ dependencies = [ [[package]] name = "prost-types" -version = "0.14.1" +version = "0.14.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b9b4db3d6da204ed77bb26ba83b6122a73aeb2e87e25fbf7ad2e84c4ccbf8f72" +checksum = "8991c4cbdb8bc5b11f0b074ffe286c30e523de90fee5ba8132f1399f23cb3dd7" dependencies = [ "prost", ] @@ -1794,9 +1794,9 @@ dependencies = [ [[package]] name = "tokio" -version = "1.48.0" +version = "1.49.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ff360e02eab121e0bc37a2d3b4d4dc622e6eda3a8e5253d5435ecf5bd4c68408" +checksum = "72a2903cd7736441aac9df9d7688bd0ce48edccaadf181c3b90be801e81d3d86" dependencies = [ "bytes", "libc", @@ -1987,9 +1987,9 @@ checksum = "8df9b6e13f2d32c91b9bd719c00d1958837bc7dec474d94952798cc8e69eeec3" [[package]] name = "tracing" -version = "0.1.41" +version = "0.1.44" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "784e0ac535deb450455cbfa28a6f0df145ea1bb7ae51b821cf5e7927fdcfbdd0" +checksum = "63e71662fa4b2a2c3a26f570f037eb95bb1f85397f3cd8076caed2f026a6d100" dependencies = [ "log", "pin-project-lite", @@ -1999,9 +1999,9 @@ dependencies = [ [[package]] name = "tracing-attributes" -version = "0.1.30" +version = "0.1.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "81383ab64e72a7a8b8e13130c49e3dab29def6d0c7d76a03087b3cf71c5c6903" +checksum = "7490cfa5ec963746568740651ac6781f701c9c5ea257c58e057f3ba8cf69e8da" dependencies = [ "proc-macro2", "quote", @@ -2010,9 +2010,9 @@ dependencies = [ [[package]] name = "tracing-core" -version = "0.1.34" +version = "0.1.36" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b9d12581f227e93f094d3af2ae690a574abb8a2b9b7a96e7cfe9647b2b617678" +checksum = "db97caf9d906fbde555dd62fa95ddba9eecfd14cb388e4f491a66d74cd5fb79a" dependencies = [ "once_cell", "valuable", diff --git a/cli/Cargo.toml b/cli/Cargo.toml index 2a43cb9..cfbcae0 100644 --- a/cli/Cargo.toml +++ b/cli/Cargo.toml @@ -11,17 +11,17 @@ license = "Apache-2.0" readme = "../README.md" [dependencies] -clap = { version = "4.5.51", features = ["derive"] } +clap = { version = "4.5.54", features = ["derive"] } colored = "3.0.0" directories = "6.0.0" serde = { version = "1.0.219", features = ["derive"] } -tracing = "0.1.41" -tokio = {version = "1.47.0",features = ["macros",'rt-multi-thread']} +tracing = "0.1.44" +tokio = {version = "1.49.0",features = ["macros",'rt-multi-thread']} anyhow = "1.0.100" tonic = "0.14.2" tonic-reflection = "0.14.2" -prost-types = "0.14.1" -prost = "0.14.1" +prost-types = "0.14.3" +prost = "0.14.3" cortexflow_agent_api = {version = "0.1.1",features = ["client"]} kube = "2.0.1" k8s-openapi = {version = "0.26.0", features = ["v1_34"]} From ce01e061f2388eb95b739df44bc3a3c797f52ecd Mon Sep 17 00:00:00 2001 From: LorenzoTettamanti Date: Mon, 12 Jan 2026 21:38:23 +0100 Subject: [PATCH 05/46] [#171]: removed deprecated rm_dir function --- cli/src/uninstall.rs | 29 +---------------------------- 1 file changed, 1 insertion(+), 28 deletions(-) diff --git a/cli/src/uninstall.rs b/cli/src/uninstall.rs index e847ca2..5259d49 100644 --- a/cli/src/uninstall.rs +++ b/cli/src/uninstall.rs @@ -155,31 +155,4 @@ async fn uninstall_component(component_type: &str, component: &str) -> Result<() }; } } -} - -// -// -//docs: -// -// This function is deprecated and will be removed in the next version -// -// Do not include or refactor this function -#[deprecated(since = "0.1.4")] -fn rm_dir(directory_to_remove: &str) { - let output = Command::new("rm") - .args(["-rf", directory_to_remove]) - .output() - .expect("cannot remove directory"); - - if !output.status.success() { - eprintln!( - "Error removing directory: {}:\n{}", - directory_to_remove, - String::from_utf8_lossy(&output.stderr) - ); - } else { - println!("✅ Removed directory {}", directory_to_remove); - } - - thread::sleep(Duration::from_secs(2)); -} +} \ No newline at end of file From 363984d5d0b553a682f478c33d4bc8d47510aacf Mon Sep 17 00:00:00 2001 From: LorenzoTettamanti Date: Mon, 12 Jan 2026 22:53:38 +0100 Subject: [PATCH 06/46] [#171]: cleaned unused dependencies. Fixed error propagation from Result type with ? operator for the apply_component() function in install.rs module. Fixed stack overflow error while returning a custom error. implemented coloured error outputs,added docs in error.rs module --- cli/src/errors.rs | 53 +++++++++++++++++++++++--------------------- cli/src/install.rs | 4 ++-- cli/src/main.rs | 50 +++++++++++++---------------------------- cli/src/service.rs | 1 + cli/src/uninstall.rs | 2 +- 5 files changed, 47 insertions(+), 63 deletions(-) diff --git a/cli/src/errors.rs b/cli/src/errors.rs index bf83d21..6e37504 100644 --- a/cli/src/errors.rs +++ b/cli/src/errors.rs @@ -15,10 +15,6 @@ use std::fmt; // - used for Kubernetes client errors. Can be used for: // - Return client connection errors // -// UninstallError: -// - used for general installation errors occured during the uninstall for cortexflow components. Can be used for: -// - Return components removal errors -// // AgentError: // - used for cortexflow agent errors. Can be used for: // - return errors from the reflection server @@ -33,12 +29,15 @@ use std::fmt; pub enum CliError { InstallerError { reason: String }, ClientError(kube::Error), - UninstallError { reason: String }, AgentError(tonic_reflection::server::Error), MonitoringError { reason: String }, } // docs: -// error type conversions +// +// The following functions implements the trait From conversions +// +// The From Trait is used to perform a value-to-value conversion while consuming input values. +// We use that to return a single error type 'CliError' that incapsulates multiple error types impl From for CliError { fn from(e: kube::Error) -> Self { @@ -48,29 +47,28 @@ impl From for CliError { impl From for CliError { fn from(e: anyhow::Error) -> Self { CliError::MonitoringError { - reason: format!("{}", e), + reason: e.to_string(), } } } -impl From<()> for CliError { - fn from(e: ()) -> Self { - return ().into(); - } -} impl From for CliError { fn from(e: prost::DecodeError) -> Self { - todo!() + return CliError::AgentError(tonic_reflection::server::Error::DecodeError(e)); } } impl From for CliError { fn from(e: tonic::Status) -> Self { - todo!() + return CliError::MonitoringError { + reason: e.to_string(), + }; } } // docs: -// fmt::Display implementation for CliError type. Creates a user friendly message error message. -// TODO: implement colored messages using the colorize crate for better output display +// +// The Trait fmt::Display is used to create a user friendly error message for the CliError type. +// This Trait automatically implements the ToString trait for the type allowing +// the usage of .to_string() method impl fmt::Display for CliError { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { @@ -83,31 +81,36 @@ impl fmt::Display for CliError { "An error occured while installing cortexflow components. Reason:" .bold() .red(), - reason + reason.red().bold() ) } - CliError::UninstallError { reason } => { + CliError::MonitoringError { reason } => { write!( f, - "An error occured while installing cortexflow components. Reason: {}", - reason + "{} {} {}", + "=====>".blue().bold(), + "An error occured while installing cortexflow components. Reason:" + .bold() + .red(), + reason.red().bold() ) } - CliError::MonitoringError { reason } => { + CliError::ClientError(e) => { write!( f, - "An error occured while installing cortexflow components. Reason: {}", - reason + "{} {} {}", + "=====>".blue().bold(), + "Client Error:".bold().red(), + e.to_string().red().bold() ) } - CliError::ClientError(e) => write!(f, "Client Error: {}", e), CliError::AgentError(e) => { write!( f, "{} {} {}", "=====>".bold().blue(), "Agent Error:".bold().red(), - e + e.to_string().bold().red() ) } } diff --git a/cli/src/install.rs b/cli/src/install.rs index 19aacc9..9ec4ecb 100644 --- a/cli/src/install.rs +++ b/cli/src/install.rs @@ -247,7 +247,7 @@ fn install_components(components_type: &str) -> Result<(), CliError> { "Applying", component.to_string().green().bold() ); - apply_component(component); + apply_component(component)?; i = i + 1; } } else if components_type == "simple-example" { @@ -267,7 +267,7 @@ fn install_components(components_type: &str) -> Result<(), CliError> { "Applying", component.to_string().green().bold() ); - apply_component(component); + apply_component(component)?; i = i + 1; } } else { diff --git a/cli/src/main.rs b/cli/src/main.rs index ba6b623..edf820b 100644 --- a/cli/src/main.rs +++ b/cli/src/main.rs @@ -76,16 +76,14 @@ async fn args_parser() -> Result<(), CliError> { match args.cmd { Some(Commands::Install(installation_args)) => match installation_args.install_cmd { InstallCommands::All => { - install_cortexflow().await.map_err(|e| eprintln!("{}", e))?; + install_cortexflow().await?; } InstallCommands::TestPods => { - install_simple_example() - .await - .map_err(|e| eprintln!("{}", e))?; + install_simple_example().await?; } }, Some(Commands::Uninstall) => { - uninstall().await.map_err(|e| eprintln!("{}", e))?; + uninstall().await?; } Some(Commands::Update) => { update_cli(); @@ -95,53 +93,39 @@ async fn args_parser() -> Result<(), CliError> { } Some(Commands::Service(service_args)) => match service_args.service_cmd { ServiceCommands::List { namespace } => { - list_services(namespace) - .await - .map_err(|e| eprintln!("{}", e))?; + list_services(namespace).await?; } ServiceCommands::Describe { service_name, namespace, } => { - describe_service(service_name, &namespace) - .await - .map_err(|e| eprintln!("{}", e))?; + describe_service(service_name, &namespace).await?; } }, Some(Commands::Status(status_args)) => { - status_command(status_args.output, status_args.namespace) - .await - .map_err(|e| eprintln!("{}", e))?; + status_command(status_args.output, status_args.namespace).await?; } Some(Commands::Logs(logs_args)) => { - logs_command(logs_args.service, logs_args.component, logs_args.namespace) - .await - .map_err(|e| eprintln!("{}", e))?; + logs_command(logs_args.service, logs_args.component, logs_args.namespace).await?; } Some(Commands::Monitor(monitor_args)) => match monitor_args.monitor_cmd { MonitorCommands::List => { - let _ = list_features().await.map_err(|e| eprintln!("{}", e))?; + let _ = list_features().await?; } MonitorCommands::Connections => { - let _ = monitor_identity_events() - .await - .map_err(|e| eprintln!("{}", e))?; + let _ = monitor_identity_events().await?; } MonitorCommands::Latencymetrics => { - let _ = monitor_latency_metrics() - .await - .map_err(|e| eprintln!("{}", e))?; + let _ = monitor_latency_metrics().await?; } MonitorCommands::Droppedpackets => { - let _ = monitor_dropped_packets() - .await - .map_err(|e| eprintln!("{}", e))?; + let _ = monitor_dropped_packets().await?; } }, Some(Commands::Policies(policies_args)) => { match policies_args.policy_cmd { PoliciesCommands::CheckBlocklist => { - let _ = check_blocklist().await.map_err(|e| eprintln!("{}", e))?; + let _ = check_blocklist().await?; } PoliciesCommands::CreateBlocklist => { // pass the ip as a monitoring flag @@ -155,9 +139,7 @@ async fn args_parser() -> Result<(), CliError> { match create_blocklist(&ip).await { Ok(_) => { //update the config metadata - let _ = update_config_metadata(&ip, "add") - .await - .map_err(|e| eprintln!("{}", e))?; + let _ = update_config_metadata(&ip, "add").await?; } Err(e) => { eprintln!("{}", e); @@ -177,9 +159,7 @@ async fn args_parser() -> Result<(), CliError> { println!("Inserted ip: {}", ip); match remove_ip(&ip).await { Ok(_) => { - let _ = update_config_metadata(&ip, "delete") - .await - .map_err(|e| eprintln!("{}", e))?; + let _ = update_config_metadata(&ip, "delete").await?; } Err(e) => { eprintln!("{}", e); @@ -198,5 +178,5 @@ async fn args_parser() -> Result<(), CliError> { #[tokio::main] async fn main() { - let _ = args_parser().await; + let _ = args_parser().await.map_err(|e| eprintln!("{}", e)); } diff --git a/cli/src/service.rs b/cli/src/service.rs index 21e8e94..37beb58 100644 --- a/cli/src/service.rs +++ b/cli/src/service.rs @@ -1,5 +1,6 @@ use clap::{Args, Subcommand}; use colored::Colorize; +use kube::Client; use kube::{Error, core::ErrorResponse}; use std::{process::Command, str}; diff --git a/cli/src/uninstall.rs b/cli/src/uninstall.rs index 5259d49..b9558dd 100644 --- a/cli/src/uninstall.rs +++ b/cli/src/uninstall.rs @@ -1,5 +1,5 @@ use colored::Colorize; -use std::{io::stdin, process::Command, thread, time::Duration}; +use std::{io::stdin, process::Command}; use crate::errors::CliError; use crate::essential::{BASE_COMMAND, connect_to_client}; From d2dd42075f1a2996bed3ddde85a43689c7899d40 Mon Sep 17 00:00:00 2001 From: LorenzoTettamanti Date: Tue, 13 Jan 2026 20:31:22 +0100 Subject: [PATCH 07/46] [#171]: fixed error logic --- cli/src/monitoring.rs | 54 +++++++++++++++++++------------------------ 1 file changed, 24 insertions(+), 30 deletions(-) diff --git a/cli/src/monitoring.rs b/cli/src/monitoring.rs index d6b120c..39a56ce 100644 --- a/cli/src/monitoring.rs +++ b/cli/src/monitoring.rs @@ -77,19 +77,19 @@ pub async fn list_features() -> Result<(), CliError> { } } Err(e) => { - return Err(CliError::ClientError(kube::Error::Api(ErrorResponse { - status: "failed".to_string(), - message: "Failed to connect to kubernetes client".to_string(), - reason: e.to_string(), - code: 404, - }))); + return Err(CliError::AgentError( + tonic_reflection::server::Error::InvalidFileDescriptorSet(e.to_string()), + )); } } } Err(e) => { - return Err(CliError::AgentError( - tonic_reflection::server::Error::InvalidFileDescriptorSet(e.to_string()), - )); + return Err(CliError::ClientError(kube::Error::Api(ErrorResponse { + status: "failed".to_string(), + message: "Failed to connect to kubernetes client".to_string(), + reason: e.to_string(), + code: 404, + }))); } } Ok(()) @@ -133,19 +133,19 @@ pub async fn monitor_identity_events() -> Result<(), CliError> { } } Err(e) => { - return Err(CliError::ClientError(kube::Error::Api(ErrorResponse { - status: "failed".to_string(), - message: "Failed to connect to kubernetes client".to_string(), - reason: e.to_string(), - code: 404, - }))); + return Err(CliError::AgentError( + tonic_reflection::server::Error::InvalidFileDescriptorSet(e.to_string()), + )); } } } Err(e) => { - return Err(CliError::AgentError( - tonic_reflection::server::Error::InvalidFileDescriptorSet(e.to_string()), - )); + return Err(CliError::ClientError(kube::Error::Api(ErrorResponse { + status: "failed".to_string(), + message: "Failed to connect to kubernetes client".to_string(), + reason: e.to_string(), + code: 404, + }))); } } @@ -203,12 +203,9 @@ pub async fn monitor_latency_metrics() -> Result<(), CliError> { } } Err(e) => { - return Err(CliError::ClientError(kube::Error::Api(ErrorResponse { - status: "failed".to_string(), - message: "Failed to connect to kubernetes client".to_string(), - reason: e.to_string(), - code: 404, - }))); + return Err(CliError::AgentError( + tonic_reflection::server::Error::InvalidFileDescriptorSet(e.to_string()), + )); } } } @@ -276,12 +273,9 @@ pub async fn monitor_dropped_packets() -> Result<(), CliError> { } } Err(e) => { - return Err(CliError::ClientError(kube::Error::Api(ErrorResponse { - status: "failed".to_string(), - message: "Failed to connect to kubernetes client".to_string(), - reason: e.to_string(), - code: 404, - }))); + return Err(CliError::AgentError( + tonic_reflection::server::Error::InvalidFileDescriptorSet(e.to_string()), + )); } } } From 68df67159898bfd17ba334cf92a1c3b8c4ebc6ce Mon Sep 17 00:00:00 2001 From: LorenzoTettamanti Date: Tue, 13 Jan 2026 21:51:56 +0100 Subject: [PATCH 08/46] [#171]: removed unused imports, Added CliError::BaseError to return general errors. simplified ClientError responses and AgentError responses --- cli/src/errors.rs | 26 ++++++++++++++++---------- cli/src/essential.rs | 39 ++++++++++++++++++++++++++------------- cli/src/install.rs | 40 ++++++++++++++++++++++------------------ cli/src/logs.rs | 17 ++++++++++++----- cli/src/main.rs | 2 +- cli/src/monitoring.rs | 5 +++-- cli/src/service.rs | 28 ++++++++++++++++------------ 7 files changed, 96 insertions(+), 61 deletions(-) diff --git a/cli/src/errors.rs b/cli/src/errors.rs index 6e37504..b813e35 100644 --- a/cli/src/errors.rs +++ b/cli/src/errors.rs @@ -1,11 +1,15 @@ use colored::Colorize; -use std::fmt; +use std::{error::Error, fmt}; // docs: // // CliError enum to group all the errors // // Custom error definition +// +// BaseError: +// - used for general errors +// // InstallerError: // - used for general installation errors occured during the installation of cortexflow components. Can be used for: // - Return downloading errors @@ -20,8 +24,6 @@ use std::fmt; // - return errors from the reflection server // - return unavailable agent errors (404) // -// MonitoringError: -// - used for general monitoring errors. TODO: currently under implementation // // implements fmt::Display for user friendly error messages @@ -30,7 +32,7 @@ pub enum CliError { InstallerError { reason: String }, ClientError(kube::Error), AgentError(tonic_reflection::server::Error), - MonitoringError { reason: String }, + BaseError { reason: String }, } // docs: // @@ -46,7 +48,7 @@ impl From for CliError { } impl From for CliError { fn from(e: anyhow::Error) -> Self { - CliError::MonitoringError { + CliError::BaseError { reason: e.to_string(), } } @@ -58,7 +60,7 @@ impl From for CliError { } impl From for CliError { fn from(e: tonic::Status) -> Self { - return CliError::MonitoringError { + return CliError::BaseError { reason: e.to_string(), }; } @@ -84,33 +86,37 @@ impl fmt::Display for CliError { reason.red().bold() ) } - CliError::MonitoringError { reason } => { + CliError::BaseError { reason } => { write!( f, "{} {} {}", "=====>".blue().bold(), - "An error occured while installing cortexflow components. Reason:" + "An error occured. Reason:" .bold() .red(), reason.red().bold() ) } CliError::ClientError(e) => { + // raw error looks like this + // (ErrorResponse { status: "failed", message: "Failed to connect to kubernetes client", reason: "transport error", code: 404 } + let msg = Error::source(e).unwrap(); // msg = Failed to connect to kubernetes client: transport error write!( f, "{} {} {}", "=====>".blue().bold(), "Client Error:".bold().red(), - e.to_string().red().bold() + msg.to_string().red().bold() ) } CliError::AgentError(e) => { + let msg = Error::source(e).unwrap(); write!( f, "{} {} {}", "=====>".bold().blue(), "Agent Error:".bold().red(), - e.to_string().bold().red() + msg.to_string().bold().red() ) } } diff --git a/cli/src/essential.rs b/cli/src/essential.rs index 4067db0..5ca01b9 100644 --- a/cli/src/essential.rs +++ b/cli/src/essential.rs @@ -50,7 +50,7 @@ pub async fn connect_to_client() -> Result { // // Returns an error if the command fails -pub fn update_cli() { +pub fn update_cli() -> Result<(), CliError> { let latest_version = get_latest_cfcli_version().expect("Can't get the latest version"); println!("{} {}", "=====>".blue().bold(), "Updating CortexFlow CLI"); println!( @@ -65,10 +65,12 @@ pub fn update_cli() { .expect("error"); if !output.status.success() { - eprintln!( - "Error extracting the version : {}", - String::from_utf8_lossy(&output.stderr) - ); + return Err(CliError::InstallerError { + reason: format!( + "Error extracting the version : {}", + String::from_utf8_lossy(&output.stderr) + ), + }); } else { // extract the cli version: let version = String::from_utf8_lossy(&output.stdout) @@ -106,10 +108,12 @@ pub fn update_cli() { .output() .expect("error"); if !update_command.status.success() { - eprintln!( - "Error updating the CLI: {} ", - String::from_utf8_lossy(&update_command.stderr) - ); + return Err(CliError::InstallerError { + reason: format!( + "Error updating the CLI: {} ", + String::from_utf8_lossy(&update_command.stderr) + ), + }); } else { println!( "{} {}", @@ -119,6 +123,7 @@ pub fn update_cli() { } } } + Ok(()) } // docs: @@ -280,7 +285,12 @@ pub async fn create_config_file(config_struct: MetadataConfigFile) -> Result<(), println!("Configmap created successfully"); } Err(e) => { - eprintln!("An error occured: {}", e); + return Err(CliError::ClientError(kube::Error::Api(ErrorResponse { + status: "failed".to_string(), + message: "Failed to create configmap".to_string(), + reason: e.to_string(), + code: 404, + }))); } } Ok(()) @@ -329,7 +339,9 @@ pub async fn update_config_metadata(input: &str, action: &str) -> Result<(), Cli if let Some(index) = ips.iter().position(|target| target == &input.to_string()) { ips.remove(index); } else { - eprintln!("Index of element not found"); + return Err(CliError::BaseError { + reason: "Index of element not found".to_string(), + }); } // override blocklist parameters @@ -379,8 +391,9 @@ pub async fn update_configmap(config_struct: MetadataConfigFile) -> Result<(), C println!("Map updated successfully"); } Err(e) => { - eprintln!("An error occured during the patching process: {}", e); - return Err(e.into()); + return Err(CliError::BaseError { + reason: format!("An error occured during the patching process: {}", e), + }); } } diff --git a/cli/src/install.rs b/cli/src/install.rs index 9ec4ecb..0045ef7 100644 --- a/cli/src/install.rs +++ b/cli/src/install.rs @@ -1,7 +1,5 @@ -use crate::essential::{ - BASE_COMMAND, connect_to_client, create_config_file, create_configs, -}; use crate::errors::CliError; +use crate::essential::{BASE_COMMAND, connect_to_client, create_config_file, create_configs}; use clap::{Args, Subcommand}; use colored::Colorize; use kube::Error; @@ -296,11 +294,13 @@ fn apply_component(file: &str) -> Result<(), CliError> { })?; if !output.status.success() { - eprintln!( - "Error installing file: {}:\n{}", - file, - String::from_utf8_lossy(&output.stderr) - ); + return Err(CliError::InstallerError { + reason: format!( + "Error installing file: {}:\n{}", + file, + String::from_utf8_lossy(&output.stderr) + ), + }); } else { println!("✅ Applied {}", file); } @@ -380,11 +380,13 @@ fn download_file(src: &str) -> Result<(), CliError> { })?; if !output.status.success() { - eprintln!( - "Error copying file: {}.\n{}", - src, - String::from_utf8_lossy(&output.stderr) - ); + return Err(CliError::InstallerError { + reason: format!( + "Error copying file: {}.\n{}", + src, + String::from_utf8_lossy(&output.stderr) + ), + }); } else { println!("✅ Copied file from {} ", src); } @@ -410,11 +412,13 @@ fn rm_file(file_to_remove: &str) -> Result<(), CliError> { })?; if !output.status.success() { - eprintln!( - "Error removing file: {}:\n{}", - file_to_remove, - String::from_utf8_lossy(&output.stderr) - ); + return Err(CliError::InstallerError { + reason: format!( + "Error removing file: {}:\n{}", + file_to_remove, + String::from_utf8_lossy(&output.stderr) + ), + }); } else { println!("✅ Removed file {}", file_to_remove); } diff --git a/cli/src/logs.rs b/cli/src/logs.rs index 4a6ce3f..102d97b 100644 --- a/cli/src/logs.rs +++ b/cli/src/logs.rs @@ -138,14 +138,21 @@ pub async fn logs_command( } } else { let stderr = str::from_utf8(&output.stderr).unwrap_or("Unknown error"); - eprintln!("Error getting logs for pod '{:?}': {}", pod, stderr); + return Err(CliError::BaseError { + reason: format!( + "Error getting logs for pod '{:?}': {}", + pod, stderr + ), + }); } } Err(err) => { - eprintln!( - "Failed to execute {} logs for pod '{:?}': {}", - BASE_COMMAND, pod, err - ); + return Err(CliError::BaseError { + reason: format!( + "Failed to execute {} logs for pod '{:?}': {}", + BASE_COMMAND, pod, err + ), + }); } } } diff --git a/cli/src/main.rs b/cli/src/main.rs index edf820b..da80680 100644 --- a/cli/src/main.rs +++ b/cli/src/main.rs @@ -86,7 +86,7 @@ async fn args_parser() -> Result<(), CliError> { uninstall().await?; } Some(Commands::Update) => { - update_cli(); + update_cli()?; } Some(Commands::Info) => { info(); diff --git a/cli/src/monitoring.rs b/cli/src/monitoring.rs index 39a56ce..b7cf3e2 100644 --- a/cli/src/monitoring.rs +++ b/cli/src/monitoring.rs @@ -292,6 +292,7 @@ pub async fn monitor_dropped_packets() -> Result<(), CliError> { } fn convert_timestamp_to_date(timestamp: u64) -> String { - let datetime = DateTime::from_timestamp_micros(timestamp as i64).unwrap(); - datetime.to_string() + DateTime::from_timestamp_micros(timestamp as i64) + .map(|dt| dt.to_string()) + .unwrap_or_else(|| "Cannot convert timestamp to date".to_string()) } diff --git a/cli/src/service.rs b/cli/src/service.rs index 37beb58..8cfebf1 100644 --- a/cli/src/service.rs +++ b/cli/src/service.rs @@ -1,6 +1,5 @@ use clap::{Args, Subcommand}; use colored::Colorize; -use kube::Client; use kube::{Error, core::ErrorResponse}; use std::{process::Command, str}; @@ -81,7 +80,9 @@ pub async fn list_services(namespace: Option) -> Result<(), CliError> { Ok(output) => { if !output.status.success() { let error = str::from_utf8(&output.stderr).unwrap_or("Unknown error"); - eprintln!("Error executing {}: {}", BASE_COMMAND, error); + return Err(CliError::BaseError { + reason: format!("Error executing {}: {}", BASE_COMMAND, error), + }); } let stdout = str::from_utf8(&output.stdout).unwrap_or(""); @@ -162,7 +163,7 @@ pub async fn list_services(namespace: Option) -> Result<(), CliError> { // - else return an empty Vector // // -// Returns a CliError if the connection fails +// Returns a CliError if the connection failsss pub async fn describe_service( service_name: String, @@ -172,8 +173,6 @@ pub async fn describe_service( Ok(_) => { match list_services(namespace.clone()).await { Ok(_) => { - //let file_path = get_config_directory().unwrap().1; - let ns = namespace .clone() .unwrap_or_else(|| "cortexflow".to_string()); @@ -201,7 +200,7 @@ pub async fn describe_service( println!(" • {}", available_ns); } println!( - "\nTry: cortex service describe {} --namespace ", + "\nTry: cortexflow service describe {} --namespace ", service_name ); } else { @@ -219,11 +218,12 @@ pub async fn describe_service( if !output.status.success() { let error = str::from_utf8(&output.stderr).unwrap_or("Unknown error"); - eprintln!("Error executing kubectl describe: {}", error); - eprintln!( - "Make sure the pod '{}' exists in namespace '{}'", - service_name, ns - ); + return Err(CliError::BaseError { + reason: format!( + "Error executing kubectl describe: {}.Make sure the pod '{}' exists in namespace '{}'", + error, service_name, ns + ), + }); } let stdout = str::from_utf8(&output.stdout).unwrap_or(""); @@ -248,7 +248,11 @@ pub async fn describe_service( } } } - Err(e) => todo!(), + Err(e) => { + return Err(CliError::BaseError { + reason: format!("Cannot list services: {}", e), + }); + } } } Err(e) => { From 0032b9ffd8b1c6e85ea32aeb6c60b5ad3ec99c19 Mon Sep 17 00:00:00 2001 From: LorenzoTettamanti Date: Tue, 13 Jan 2026 22:31:05 +0100 Subject: [PATCH 09/46] [#171]: improved main.rs error structure --- cli/src/main.rs | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/cli/src/main.rs b/cli/src/main.rs index da80680..7ef8872 100644 --- a/cli/src/main.rs +++ b/cli/src/main.rs @@ -9,7 +9,6 @@ mod status; mod uninstall; use clap::{Args, Parser, Subcommand}; -use colored::Colorize; use std::result::Result::Ok; use tracing::debug; @@ -131,7 +130,9 @@ async fn args_parser() -> Result<(), CliError> { // pass the ip as a monitoring flag match policies_args.flags { None => { - eprintln!("{}", "Insert at least one ip to create a blocklist".red()); + return Err(CliError::BaseError { + reason: "Insert at least one ip to create a blocklist".to_string(), + }); } Some(ip) => { println!("inserted ip: {} ", ip); @@ -142,7 +143,9 @@ async fn args_parser() -> Result<(), CliError> { let _ = update_config_metadata(&ip, "add").await?; } Err(e) => { - eprintln!("{}", e); + return Err(CliError::BaseError { + reason: e.to_string(), + }); } } } @@ -150,10 +153,10 @@ async fn args_parser() -> Result<(), CliError> { } PoliciesCommands::RemoveIpFromBlocklist => match policies_args.flags { None => { - eprintln!( - "{}", - "Insert at least one ip to remove from the blocklist".red() - ); + return Err(CliError::BaseError { + reason: "Insert at least one ip to remove from the blocklist" + .to_string(), + }); } Some(ip) => { println!("Inserted ip: {}", ip); @@ -162,7 +165,9 @@ async fn args_parser() -> Result<(), CliError> { let _ = update_config_metadata(&ip, "delete").await?; } Err(e) => { - eprintln!("{}", e); + return Err(CliError::BaseError { + reason: e.to_string(), + }); } } } @@ -170,7 +175,9 @@ async fn args_parser() -> Result<(), CliError> { } } None => { - eprintln!("CLI unknown argument. Cli arguments passed: {:?}", args.cmd); + return Err(CliError::BaseError { + reason: format!("CLI unknown argument. Cli arguments passed: {:?}", args.cmd), + }); } } Ok(()) From 28d589377dad87a702470676ee00c58cb14498d5 Mon Sep 17 00:00:00 2001 From: LorenzoTettamanti Date: Tue, 13 Jan 2026 22:36:01 +0100 Subject: [PATCH 10/46] [#171]: removed useless derive macros --- cli/src/install.rs | 2 +- cli/src/main.rs | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/cli/src/install.rs b/cli/src/install.rs index 0045ef7..bdb1ea1 100644 --- a/cli/src/install.rs +++ b/cli/src/install.rs @@ -41,7 +41,7 @@ pub enum InstallCommands { } //install args -#[derive(Args, Debug, Clone)] +#[derive(Args, Debug)] pub struct InstallArgs { #[command(subcommand)] pub install_cmd: InstallCommands, diff --git a/cli/src/main.rs b/cli/src/main.rs index 7ef8872..0a5ac46 100644 --- a/cli/src/main.rs +++ b/cli/src/main.rs @@ -42,7 +42,7 @@ struct Cli { cmd: Option, } -#[derive(Subcommand, Debug, Clone)] +#[derive(Subcommand, Debug)] enum Commands { /* list of available commands */ #[command(name = "install", about = "Manage installation")] @@ -64,7 +64,7 @@ enum Commands { #[command(name = "policy", about = "Network Policies")] Policies(PoliciesArgs), } -#[derive(Args, Debug, Clone)] +#[derive(Args)] struct SetArgs { val: String, } From a4fffd392c95e2985a273d5cfdc70563f58dbe4d Mon Sep 17 00:00:00 2001 From: LorenzoTettamanti Date: Fri, 16 Jan 2026 10:54:24 +0100 Subject: [PATCH 11/46] [#158]: moved map_handlers module from identity crate to common crate --- core/Cargo.lock | 6 +- core/common/Cargo.toml | 6 ++ core/common/src/lib.rs | 3 +- .../identity => common}/src/map_handlers.rs | 96 +++++++++++-------- core/src/components/identity/src/lib.rs | 3 +- 5 files changed, 68 insertions(+), 46 deletions(-) rename core/{src/components/identity => common}/src/map_handlers.rs (56%) diff --git a/core/Cargo.lock b/core/Cargo.lock index 506d5dc..e980659 100644 --- a/core/Cargo.lock +++ b/core/Cargo.lock @@ -406,6 +406,9 @@ name = "cortexbrain-common" version = "0.1.0" dependencies = [ "anyhow", + "aya", + "k8s-openapi", + "kube", "tracing", "tracing-subscriber", ] @@ -453,10 +456,9 @@ dependencies = [ "bytemuck", "bytemuck_derive", "bytes", - "cortexbrain-common 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)", + "cortexbrain-common 0.1.0", "k8s-openapi", "kube", - "libc", "nix", "tokio", "tracing", diff --git a/core/common/Cargo.toml b/core/common/Cargo.toml index 7054578..ac87f68 100644 --- a/core/common/Cargo.toml +++ b/core/common/Cargo.toml @@ -13,3 +13,9 @@ repository = "https://github.com/CortexFlow/CortexBrain" tracing = "0.1" tracing-subscriber = { version = "0.3", features = ["env-filter", "fmt"] } anyhow = "1.0" +kube = { version = "2.0.1", features = ["client"] } +k8s-openapi = { version = "0.26.0", features = ["v1_34"] } +aya = "0.13.1" + +[features] +map-handlers = [] diff --git a/core/common/src/lib.rs b/core/common/src/lib.rs index f8fadc6..2f8e563 100644 --- a/core/common/src/lib.rs +++ b/core/common/src/lib.rs @@ -1,3 +1,4 @@ pub mod constants; +pub mod formatters; pub mod logger; -pub mod formatters; \ No newline at end of file +pub mod map_handlers; diff --git a/core/src/components/identity/src/map_handlers.rs b/core/common/src/map_handlers.rs similarity index 56% rename from core/src/components/identity/src/map_handlers.rs rename to core/common/src/map_handlers.rs index a225a47..43330fa 100644 --- a/core/src/components/identity/src/map_handlers.rs +++ b/core/common/src/map_handlers.rs @@ -13,39 +13,49 @@ use std::sync::Mutex; use tracing::warn; use tracing::{error, info}; -pub fn init_bpf_maps(bpf: Arc>) -> Result<(Map, Map, Map, Map), anyhow::Error> { - // this function init the bpfs maps used in the main program - /* - index 0: events_map - index 1: veth_map - index 2: blocklist map - */ - let mut bpf_new = bpf.lock().unwrap(); +// docs +// +// this function init the bpfs maps used in the main program +// +// index 0: events_map +// index 1: veth_map +// index 2: blocklist map +// index 3: tcp_registry map +// - let events_map = bpf_new - .take_map("EventsMap") - .ok_or_else(|| anyhow::anyhow!("EventsMap map not found"))?; - - let veth_map = bpf_new - .take_map("veth_identity_map") - .ok_or_else(|| anyhow::anyhow!("veth_identity_map map not found"))?; - - let blocklist_map = bpf_new - .take_map("Blocklist") - .ok_or_else(|| anyhow::anyhow!("Blocklist map not found"))?; +#[cfg(feature = "map-handlers")] +pub struct BpfMapsData { + pub bpf_obj_names: Vec, + pub bpf_obj_map: Vec, +} - let tcp_registry_map = bpf_new - .take_map("TcpPacketRegistry") - .ok_or_else(|| anyhow::anyhow!("TcpPacketRegistry map not found"))?; +#[cfg(feature = "map-handlers")] +pub fn init_bpf_maps( + bpf: Arc>, + map_names: Vec, +) -> Result { + let mut bpf_new = bpf.lock().expect("Cannot get value from lock"); + let mut maps = Vec::new(); // stores bpf_maps_objects - Ok((events_map, veth_map, blocklist_map, tcp_registry_map)) + for name in &map_names { + let bpf_map_init = bpf_new + .take_map(&name) + .ok_or_else(|| anyhow::anyhow!("{} map not found", &name))?; + maps.push(bpf_map_init); + } + Ok(BpfMapsData { + bpf_obj_names: map_names.clone(), + bpf_obj_map: maps, + }) } //TODO: save bpf maps path in the cli metadata + //takes an array of bpf maps and pin them to persiste session data -//TODO: change maps type with a Vec instead of (Map,Map). This method is only for fast development and it's not optimized -//TODO: add bpf mounts during cli installation -pub fn map_pinner(maps: &(Map, Map, Map, Map), path: &PathBuf) -> Result<(), Error> { +// FIXME: is this ok that we are returning a BpfMapsData? + +#[cfg(feature = "map-handlers")] +pub fn map_pinner(maps: BpfMapsData, path: &PathBuf) -> Result, Error> { if !path.exists() { info!("Pin path {:?} does not exist. Creating it...", path); std::fs::create_dir_all(&path)?; @@ -56,28 +66,32 @@ pub fn map_pinner(maps: &(Map, Map, Map, Map), path: &PathBuf) -> Result<(), Err } } - let configs = [ - (&maps.0, "events_map"), - (&maps.1, "veth_map"), - (&maps.2, "blocklist_map"), - (&maps.3, "tcp_packet_registry"), - ]; - - for (name, paths) in configs { - let map_path = path.join(paths); + let mut owned_maps = Vec::new(); // aya::Maps does not implement the clone trait i need to create a raw copy of the vec map + // an iterator that iterates two iterators simultaneously + for (map_obj, name) in maps + .bpf_obj_map + .into_iter() + .zip(maps.bpf_obj_names.into_iter()) + { + let map_path = path.join(&name); if map_path.exists() { - warn!("Path {} already exists", paths); - warn!("Removing path {}", paths); - let _ = std::fs::remove_file(&map_path); + warn!("Path {} already exists", name); + warn!("Removing path {}", name); + std::fs::remove_file(&map_path)?; } info!("Trying to pin map {:?} in map path: {:?}", name, &map_path); - name.pin(&map_path)?; + map_obj.pin(&map_path)?; + owned_maps.push(map_obj); } - Ok(()) + Ok(owned_maps) } + +#[cfg(feature = "map-handlers")] pub async fn populate_blocklist(map: &mut Map) -> Result<(), Error> { - let client = Client::try_default().await.unwrap(); + let client = Client::try_default() + .await + .expect("Cannot connect to Kubernetes Client"); let namespace = "cortexflow"; let configmap = "cortexbrain-client-config"; diff --git a/core/src/components/identity/src/lib.rs b/core/src/components/identity/src/lib.rs index e3bb59e..5413414 100644 --- a/core/src/components/identity/src/lib.rs +++ b/core/src/components/identity/src/lib.rs @@ -1,4 +1,3 @@ pub mod helpers; pub mod structs; -pub mod enums; -pub mod map_handlers; \ No newline at end of file +pub mod enums; \ No newline at end of file From 2e451fd7aacc4b334a470b31a830017c9b20966d Mon Sep 17 00:00:00 2001 From: LorenzoTettamanti Date: Fri, 16 Jan 2026 10:54:52 +0100 Subject: [PATCH 12/46] [#158]: simplified identity logic. removed duplicated code and functions --- core/src/components/identity/Cargo.toml | 8 +- core/src/components/identity/src/helpers.rs | 12 +- core/src/components/identity/src/main.rs | 195 +++++++++----------- core/src/components/identity/src/mod.rs | 3 +- core/src/components/identity/src/structs.rs | 26 +-- 5 files changed, 112 insertions(+), 132 deletions(-) diff --git a/core/src/components/identity/Cargo.toml b/core/src/components/identity/Cargo.toml index 08d753e..3146991 100644 --- a/core/src/components/identity/Cargo.toml +++ b/core/src/components/identity/Cargo.toml @@ -10,11 +10,10 @@ homepage = "https://docs.cortexflow.org" repository = "https://github.com/CortexFlow/CortexBrain" [features] -default = ["map-handlers", "struct", "enums"] -map-handlers = [] +default = ["struct", "enums"] struct = [] enums = [] -experimental = ["map-handlers", "struct", "enums"] +experimental = ["struct", "enums"] [dependencies] @@ -31,10 +30,9 @@ tokio = { version = "1.48.0", features = [ anyhow = "1.0" tracing = "0.1.41" tracing-subscriber = { version = "0.3.19", features = ["env-filter"] } -libc = "0.2.172" bytemuck = { version = "1.23.0", features = ["derive"] } bytemuck_derive = "1.10.1" -cortexbrain-common = "0.1.0" +cortexbrain-common = { path = "../../../common/", features = ["map-handlers"] } nix = { version = "0.30.1", features = ["net"] } kube = { version = "2.0.1", features = ["client"] } k8s-openapi = { version = "0.26.0", features = ["v1_34"] } diff --git a/core/src/components/identity/src/helpers.rs b/core/src/components/identity/src/helpers.rs index 7855edc..05b9603 100644 --- a/core/src/components/identity/src/helpers.rs +++ b/core/src/components/identity/src/helpers.rs @@ -49,10 +49,10 @@ impl TryFrom for IpProtocols { /* helper functions to read and log net events in the container */ pub async fn display_events>( mut perf_buffers: Vec>, - running: Arc, + //running: Arc, mut buffers: Vec, ) { - while running.load(Ordering::SeqCst) { + while true { for buf in perf_buffers.iter_mut() { match buf.read_events(&mut buffers) { std::result::Result::Ok(events) => { @@ -105,11 +105,11 @@ pub fn reverse_be_addr(addr: u32) -> Ipv4Addr { pub async fn display_veth_events>( bpf: Arc>, mut perf_buffers: Vec>, - running: Arc, + //running: Arc, mut buffers: Vec, mut link_ids: Arc>>, ) { - while running.load(Ordering::SeqCst) { + while true { for buf in perf_buffers.iter_mut() { match buf.read_events(&mut buffers) { std::result::Result::Ok(events) => { @@ -265,10 +265,10 @@ async fn attach_detach_veth( /* helper functions to display events from the TcpPacketRegistry structure */ pub async fn display_tcp_registry_events>( mut perf_buffers: Vec>, - running: Arc, + //running: Arc, mut buffers: Vec, ) { - while running.load(Ordering::SeqCst) { + while true { for buf in perf_buffers.iter_mut() { match buf.read_events(&mut buffers) { std::result::Result::Ok(events) => { diff --git a/core/src/components/identity/src/main.rs b/core/src/components/identity/src/main.rs index 5688715..9dd6ce9 100644 --- a/core/src/components/identity/src/main.rs +++ b/core/src/components/identity/src/main.rs @@ -7,46 +7,36 @@ * 4. [Experimental]: cgroup scanner * */ -#![allow(warnings)] mod enums; mod helpers; -mod map_handlers; mod structs; +use crate::helpers::{ + display_events, display_tcp_registry_events, display_veth_events, get_veth_channels, +}; use aya::{ Ebpf, - maps::{ - Map, MapData, - perf::{PerfEventArray, PerfEventArrayBuffer}, - }, + maps::{Map, perf::PerfEventArray}, programs::{KProbe, SchedClassifier, TcAttachType, tc::SchedClassifierLinkId}, util::online_cpus, }; -use crate::helpers::{ - display_events, display_tcp_registry_events, display_veth_events, get_veth_channels, -}; - #[cfg(feature = "experimental")] use crate::helpers::scan_cgroup_cronjob; -use crate::map_handlers::{init_bpf_maps, map_pinner, populate_blocklist}; - use bytes::BytesMut; +use cortexbrain_common::map_handlers::{init_bpf_maps, map_pinner, populate_blocklist}; use std::{ convert::TryInto, path::Path, - sync::{ - Arc, Mutex, - atomic::{AtomicBool, Ordering}, - }, + sync::{Arc, Mutex}, }; use anyhow::{Context, Ok}; use cortexbrain_common::{constants, logger}; use tokio::{fs, signal}; -use tracing::{error, info}; +use tracing::{debug, error, info}; use std::collections::HashMap; @@ -72,14 +62,19 @@ async fn main() -> Result<(), anyhow::Error> { let bpf = Arc::new(Mutex::new(Ebpf::load(&data)?)); let bpf_map_save_path = std::env::var(constants::PIN_MAP_PATH) .context("PIN_MAP_PATH environment variable required")?; - - match init_bpf_maps(bpf.clone()) { - std::result::Result::Ok(mut bpf_maps) => { + let data = vec![ + "EventsMap".to_string(), + "veth_identity_map".to_string(), + //"Blocklist".to_string(), + "TcpPacketRegistry".to_string(), + ]; + match init_bpf_maps(bpf.clone(), data) { + std::result::Result::Ok(bpf_maps) => { info!("Successfully loaded bpf maps"); let pin_path = std::path::PathBuf::from(&bpf_map_save_path); info!("About to call map_pinner with path: {:?}", pin_path); - match map_pinner(&bpf_maps, &pin_path) { - std::result::Result::Ok(_) => { + match map_pinner(bpf_maps, &pin_path) { + std::result::Result::Ok(maps) => { info!("maps pinned successfully"); //load veth_trace program ref veth_trace.rs { @@ -90,9 +85,9 @@ async fn main() -> Result<(), anyhow::Error> { info!("Found interfaces: {:?}", interfaces); - { - populate_blocklist(&mut bpf_maps.2).await; - } + //{ FIXME: paused for testing the other features + // populate_blocklist(&mut maps.2).await?; + //} { init_tc_classifier(bpf.clone(), interfaces, link_ids.clone()).await.context( @@ -105,9 +100,11 @@ async fn main() -> Result<(), anyhow::Error> { )?; } - event_listener(bpf_maps, link_ids.clone(), bpf.clone()) + event_listener(maps, link_ids.clone(), bpf.clone()) .await - .context("Error initializing event_listener")?; + .map_err(|e| { + anyhow::anyhow!("Error inizializing event_listener. Reason: {}", e) + })?; } Err(e) => { error!("Error while pinning bpf_maps: {}", e); @@ -116,7 +113,7 @@ async fn main() -> Result<(), anyhow::Error> { } Err(e) => { error!("Error while loading bpf maps {}", e); - signal::ctrl_c(); + let _ = signal::ctrl_c().await; } } @@ -132,7 +129,9 @@ async fn init_tc_classifier( //this funtion initialize the tc classifier program info!("Loading programs"); - let mut bpf_new = bpf.lock().unwrap(); + let mut bpf_new = bpf + .lock() + .map_err(|e| anyhow::anyhow!("Cannot get value from lock. Reason: {}", e))?; let program: &mut SchedClassifier = bpf_new .program_mut("identity_classifier") @@ -151,7 +150,9 @@ async fn init_tc_classifier( "Program 'identity_classifier' attached to interface {}", interface ); - let mut map = link_ids.lock().unwrap(); + let mut map = link_ids + .lock() + .map_err(|e| anyhow::anyhow!("Cannot get value from lock. Reason: {}", e))?; map.insert(interface.clone(), link_id); } Err(e) => error!( @@ -167,7 +168,9 @@ async fn init_tc_classifier( async fn init_veth_tracer(bpf: Arc>) -> Result<(), anyhow::Error> { //this functions init the veth_tracer used to make the InterfacesRegistry - let mut bpf_new = bpf.lock().unwrap(); + let mut bpf_new = bpf + .lock() + .map_err(|e| anyhow::anyhow!("Cannot get value from lock. Reason: {}", e))?; //creation tracer let veth_creation_tracer: &mut KProbe = bpf_new @@ -199,7 +202,9 @@ async fn init_veth_tracer(bpf: Arc>) -> Result<(), anyhow::Error> { } async fn init_tcp_registry(bpf: Arc>) -> Result<(), anyhow::Error> { - let mut bpf_new = bpf.lock().unwrap(); + let mut bpf_new = bpf + .lock() + .map_err(|e| anyhow::anyhow!("Cannot get value from lock. Reason: {}", e))?; // init tcp registry let tcp_analyzer: &mut KProbe = bpf_new @@ -236,91 +241,81 @@ async fn init_tcp_registry(bpf: Arc>) -> Result<(), anyhow::Error> { Ok(()) } +// this function init the event listener. Listens for veth events (creation/deletion) and network events (pod to pod communications) +// Doc: +// +// perf_net_events_array: contains is associated with the network events stored in the events_map (EventsMap) +// perf_veth_array: contains is associated with the network events stored in the veth_map (veth_identity_map) +// +// async fn event_listener( - bpf_maps: (Map, Map, Map, Map), + bpf_maps: Vec, link_ids: Arc>>, bpf: Arc>, ) -> Result<(), anyhow::Error> { - // this function init the event listener. Listens for veth events (creation/deletion) and network events (pod to pod communications) - /* Doc: - - perf_net_events_array: contains is associated with the network events stored in the events_map (EventsMap) - perf_veth_array: contains is associated with the network events stored in the veth_map (veth_identity_map) - - */ - info!("Preparing perf_buffers and perf_arrays"); //TODO: try to change from PerfEventArray to a RingBuffer data structure - //let m0=bpf_maps[0]; - //let m1 = bpf_maps[1]; - //let mut ring1=RingBuf::try_from(m0)?; - //let mut ring2=RingBuf::try_from(m1)?; - - //TODO:create an helper function that initialize the data structures and the running - // init PerfEventArrays - let mut perf_veth_array: PerfEventArray = PerfEventArray::try_from(bpf_maps.1)?; - let mut perf_net_events_array: PerfEventArray = PerfEventArray::try_from(bpf_maps.0)?; - let mut tcp_registry_array: PerfEventArray = PerfEventArray::try_from(bpf_maps.3)?; - - // init PerfEventArrays buffers - let mut perf_veth_buffer: Vec> = Vec::new(); - let mut perf_net_events_buffer: Vec> = Vec::new(); - let mut tcp_registry_buffer: Vec> = Vec::new(); - - // fill the input buffers - - for cpu_id in online_cpus().map_err(|e| anyhow::anyhow!("Error {:?}", e))? { - let veth_buf: PerfEventArrayBuffer = perf_veth_array.open(cpu_id, None)?; - perf_veth_buffer.push(veth_buf); - } - for cpu_id in online_cpus().map_err(|e| anyhow::anyhow!("Error {:?}", e))? { - let events_buf: PerfEventArrayBuffer = perf_net_events_array.open(cpu_id, None)?; - perf_net_events_buffer.push(events_buf); + + let mut perf_event_arrays = Vec::new(); // contains a vector of PerfEventArrays + let mut event_buffers = Vec::new(); // contains a vector of buffers + + // create the PerfEventArrays and the buffers + for map in bpf_maps { + debug!("Debugging map type:{:?}", map); + let perf_event_array = PerfEventArray::try_from(map).map_err(|e| { + error!("Cannot create perf_event_array for map.Reason: {}", e); + anyhow::anyhow!("Cannot create perf_event_array for map.Reason: {}", e) + })?; + perf_event_arrays.push(perf_event_array); // this is step 1 + let perf_event_array_buffer = Vec::new(); + event_buffers.push(perf_event_array_buffer); //this is step 2 } - for cpu_id in online_cpus().map_err(|e| anyhow::anyhow!("Error {:?}", e))? { - let tcp_registry_buf: PerfEventArrayBuffer = - tcp_registry_array.open(cpu_id, None)?; - tcp_registry_buffer.push(tcp_registry_buf); + + // fill the input buffers with data from the PerfEventArrays + let cpus = online_cpus().map_err(|e| anyhow::anyhow!("Error {:?}", e))?; + + for (perf_evt_array, perf_evt_array_buffer) in + perf_event_arrays.iter_mut().zip(event_buffers.iter_mut()) + { + for cpu_id in &cpus { + let single_buffer = perf_evt_array.open(*cpu_id, None)?; + perf_evt_array_buffer.push(single_buffer); + } } info!("Listening for events..."); - // init runnings - let veth_running = Arc::new(AtomicBool::new(true)); - let net_events_running = Arc::new(AtomicBool::new(true)); - let tcp_registry_running = Arc::new(AtomicBool::new(true)); + let mut event_buffers = event_buffers.into_iter(); + let perf_veth_buffer = event_buffers + .next() + .expect("Cannot create perf_veth buffer"); + let perf_net_events_buffer = event_buffers + .next() + .expect("Cannot create perf_net_events buffer"); + let tcp_registry_buffer = event_buffers + .next() + .expect("Cannot create tcp_registry buffer"); // init output buffers - let mut veth_buffers = vec![BytesMut::with_capacity(1024); 10]; - let mut events_buffers = vec![BytesMut::with_capacity(1024); online_cpus().iter().len()]; - let mut tcp_buffers = vec![BytesMut::with_capacity(1024); online_cpus().iter().len()]; - - // init running signals - let veth_running_signal = veth_running.clone(); - let net_events_running_signal = net_events_running.clone(); - let tcp_registry_running_signal = tcp_registry_running.clone(); + let veth_buffers = vec![BytesMut::with_capacity(1024); 10]; + let events_buffers = vec![BytesMut::with_capacity(1024); online_cpus().iter().len()]; + let tcp_buffers = vec![BytesMut::with_capacity(1024); online_cpus().iter().len()]; - let veth_link_ids = link_ids.clone(); + // init veth link ids + let veth_link_ids = link_ids; + // spawn async tasks let veth_events_displayer = tokio::spawn(async move { - display_veth_events( - bpf.clone(), - perf_veth_buffer, - veth_running, - veth_buffers, - veth_link_ids, - ) - .await; + display_veth_events(bpf.clone(), perf_veth_buffer, veth_buffers, veth_link_ids).await; }); - // IDEA: Maybe we don't need to display all this events let net_events_displayer = tokio::spawn(async move { - display_events(perf_net_events_buffer, net_events_running, events_buffers).await; + display_events(perf_net_events_buffer, events_buffers).await; }); let tcp_registry_events_displayer: tokio::task::JoinHandle<()> = tokio::spawn(async move { - display_tcp_registry_events(tcp_registry_buffer, tcp_registry_running, tcp_buffers).await; + display_tcp_registry_events(tcp_registry_buffer, tcp_buffers).await; }); #[cfg(feature = "experimental")] @@ -330,12 +325,6 @@ async fn event_listener( #[cfg(not(feature = "experimental"))] tokio::select! { - /* result = scan_cgroup_cronjob=>{ - match result{ - Err(e)=>error!("scan_cgroup_cronjob panicked {:?}",e), - std::result::Result::Ok(_) => info!("cgroup scan cronjob exited"), - } - } */ result = veth_events_displayer=>{ match result{ Err(e)=>error!("veth_event_displayer panicked {:?}",e), @@ -359,9 +348,6 @@ async fn event_listener( _= signal::ctrl_c()=>{ info!("Triggered Exiting..."); - veth_running_signal.store(false, Ordering::SeqCst); - net_events_running_signal.store(false, Ordering::SeqCst); - tcp_registry_running_signal.store(false, Ordering::SeqCst); } } @@ -396,9 +382,6 @@ async fn event_listener( _= signal::ctrl_c()=>{ info!("Triggered Exiting..."); - veth_running_signal.store(false, Ordering::SeqCst); - net_events_running_signal.store(false, Ordering::SeqCst); - tcp_registry_running_signal.store(false, Ordering::SeqCst); } } diff --git a/core/src/components/identity/src/mod.rs b/core/src/components/identity/src/mod.rs index e3bb59e..5413414 100644 --- a/core/src/components/identity/src/mod.rs +++ b/core/src/components/identity/src/mod.rs @@ -1,4 +1,3 @@ pub mod helpers; pub mod structs; -pub mod enums; -pub mod map_handlers; \ No newline at end of file +pub mod enums; \ No newline at end of file diff --git a/core/src/components/identity/src/structs.rs b/core/src/components/identity/src/structs.rs index d8cff93..7e2aa2b 100644 --- a/core/src/components/identity/src/structs.rs +++ b/core/src/components/identity/src/structs.rs @@ -19,17 +19,17 @@ unsafe impl aya::Pod for PacketLog {} /* * Connection Array that contains the hash_id associated with an active connection */ -#[repr(C)] -#[derive(Clone, Copy, Zeroable)] -pub struct ConnArray { - pub src_ip: u32, - pub dst_ip: u32, - pub src_port: u16, - pub dst_port: u16, - pub proto: u8, -} +//#[repr(C)] +//#[derive(Clone, Copy, Zeroable)] +//pub struct ConnArray { +// pub src_ip: u32, +// pub dst_ip: u32, +// pub src_port: u16, +// pub dst_port: u16, +// pub proto: u8, +//} -unsafe impl aya::Pod for ConnArray {} +//unsafe impl aya::Pod for ConnArray {} #[repr(C)] #[derive(Clone, Copy)] @@ -44,13 +44,13 @@ pub struct VethLog { #[repr(C)] #[derive(Clone, Copy)] -pub struct TcpPacketRegistry{ +pub struct TcpPacketRegistry { pub proto: u8, pub src_ip: u32, pub dst_ip: u32, pub src_port: u16, pub dst_port: u16, pub pid: u32, - pub command: [u8;16], + pub command: [u8; 16], pub cgroup_id: u64, -} \ No newline at end of file +} From 975f4f8979a48349f985a33b436910275395d062 Mon Sep 17 00:00:00 2001 From: LorenzoTettamanti Date: Thu, 22 Jan 2026 21:07:47 +0100 Subject: [PATCH 13/46] [#158]: added program handlers function in the common crate. Remove duplicated code in metrics module --- core/common/Cargo.toml | 1 + core/common/src/lib.rs | 3 + core/common/src/map_handlers.rs | 4 - core/common/src/program_handlers.rs | 42 +++++++ core/src/components/metrics/Cargo.toml | 16 ++- core/src/components/metrics/src/helpers.rs | 107 +++++++++++------- core/src/components/metrics/src/main.rs | 72 ++++++------ .../components/metrics/src/maps_handlers.rs | 48 -------- core/src/components/metrics/src/mod.rs | 4 +- .../metrics/src/program_handlers.rs | 59 ---------- 10 files changed, 165 insertions(+), 191 deletions(-) create mode 100644 core/common/src/program_handlers.rs delete mode 100644 core/src/components/metrics/src/maps_handlers.rs delete mode 100644 core/src/components/metrics/src/program_handlers.rs diff --git a/core/common/Cargo.toml b/core/common/Cargo.toml index ac87f68..854c04e 100644 --- a/core/common/Cargo.toml +++ b/core/common/Cargo.toml @@ -19,3 +19,4 @@ aya = "0.13.1" [features] map-handlers = [] +program-handlers = [] diff --git a/core/common/src/lib.rs b/core/common/src/lib.rs index 2f8e563..1d015a2 100644 --- a/core/common/src/lib.rs +++ b/core/common/src/lib.rs @@ -1,4 +1,7 @@ pub mod constants; pub mod formatters; pub mod logger; +#[cfg(feature = "map-handlers")] pub mod map_handlers; +#[cfg(feature = "program-handlers")] +pub mod program_handlers; \ No newline at end of file diff --git a/core/common/src/map_handlers.rs b/core/common/src/map_handlers.rs index 43330fa..2882d66 100644 --- a/core/common/src/map_handlers.rs +++ b/core/common/src/map_handlers.rs @@ -17,10 +17,6 @@ use tracing::{error, info}; // // this function init the bpfs maps used in the main program // -// index 0: events_map -// index 1: veth_map -// index 2: blocklist map -// index 3: tcp_registry map // #[cfg(feature = "map-handlers")] diff --git a/core/common/src/program_handlers.rs b/core/common/src/program_handlers.rs new file mode 100644 index 0000000..8832daf --- /dev/null +++ b/core/common/src/program_handlers.rs @@ -0,0 +1,42 @@ +use aya::{Ebpf, programs::KProbe}; +use std::convert::TryInto; +use std::sync::{Arc, Mutex}; +use tracing::{error, info}; + +#[cfg(feature = "program-handlers")] +pub fn load_program( + bpf: Arc>, + program_name: &str, + actual_program: &str, +) -> Result<(), anyhow::Error> { + let mut bpf_new = bpf.lock().expect("Cannot get value from lock"); + + // Load and attach the eBPF programs + let program: &mut KProbe = bpf_new + .program_mut(program_name) + .ok_or_else(|| anyhow::anyhow!("Program {} not found", program_name))? + .try_into() + .map_err(|e| anyhow::anyhow!("Failed to convert program: {:?}", e))?; + + program + .load() + .map_err(|e| anyhow::anyhow!("Cannot load program: {}. Error: {}", &program_name, e))?; + + match program.attach(actual_program, 0) { + Ok(_) => info!("{} program attached successfully", actual_program), + Err(e) => { + error!("Error attaching {} program {:?}", actual_program, e); + return Err(anyhow::anyhow!( + "Failed to attach {}: {:?}", + actual_program, + e + )); + } + }; + + info!( + "eBPF program {} loaded and attached successfully", + program_name + ); + Ok(()) +} diff --git a/core/src/components/metrics/Cargo.toml b/core/src/components/metrics/Cargo.toml index 112872e..0e88d8c 100644 --- a/core/src/components/metrics/Cargo.toml +++ b/core/src/components/metrics/Cargo.toml @@ -7,11 +7,21 @@ edition = "2024" aya = "0.13.1" aya-log = "0.2.1" bytes = "1.4" -tokio = { version = "1.48.0", features = ["rt","macros","time","fs","signal","rt-multi-thread"] } +tokio = { version = "1.48.0", features = [ + "rt", + "macros", + "time", + "fs", + "signal", + "rt-multi-thread", +] } anyhow = "1.0" tracing = "0.1.41" tracing-subscriber = { version = "0.3.19", features = ["env-filter"] } libc = "0.2.172" bytemuck = "1.23.0" -cortexbrain-common = { path = "../../../common" } -nix ={version="0.30.1",features=["net"]} +cortexbrain-common = { path = "../../../common", features = [ + "map-handlers", + "program-handlers", +] } +nix = { version = "0.30.1", features = ["net"] } diff --git a/core/src/components/metrics/src/helpers.rs b/core/src/components/metrics/src/helpers.rs index 1b4628e..f519c7e 100644 --- a/core/src/components/metrics/src/helpers.rs +++ b/core/src/components/metrics/src/helpers.rs @@ -1,24 +1,23 @@ -use aya::{maps::{ - perf::PerfEventArrayBuffer, Map, MapData, PerfEventArray - }, util::online_cpus}; +use aya::{ + maps::{Map, MapData, PerfEventArray, perf::PerfEventArrayBuffer}, + util::online_cpus, +}; use bytes::BytesMut; -use tokio::signal; -use std::{ - sync::{ - Arc, - atomic::{AtomicBool, Ordering}, - }, +use std::sync::{ + Arc, + atomic::{AtomicBool, Ordering}, }; +use tokio::signal; -use tracing::{error, info}; +use tracing::{debug, error, info}; use crate::structs::NetworkMetrics; use crate::structs::TimeStampMetrics; pub async fn display_metrics_map( mut perf_buffers: Vec>, - running: Arc, // Changed to Arc + running: Arc, // Changed to Arc mut buffers: Vec, ) { info!("Starting metrics event listener..."); @@ -46,10 +45,23 @@ pub async fn display_metrics_map( let sk_receive_buffer_size = net_metrics.sk_receive_buffer_size; info!( "tgid: {}, comm: {}, ts_us: {}, sk_drops: {}, sk_err: {}, sk_err_soft: {}, sk_backlog_len: {}, sk_write_memory_queued: {}, sk_ack_backlog: {}, sk_receive_buffer_size: {}", - tgid, comm, ts_us, sk_drop_count, sk_err, sk_err_soft, sk_backlog_len, sk_write_memory_queued, sk_ack_backlog, sk_receive_buffer_size + tgid, + comm, + ts_us, + sk_drop_count, + sk_err, + sk_err_soft, + sk_backlog_len, + sk_write_memory_queued, + sk_ack_backlog, + sk_receive_buffer_size ); } else { - info!("Received data too small: {} bytes, expected: {}", data.len(), std::mem::size_of::()); + info!( + "Received data too small: {} bytes, expected: {}", + data.len(), + std::mem::size_of::() + ); } } } @@ -65,7 +77,7 @@ pub async fn display_metrics_map( pub async fn display_time_stamp_events_map( mut perf_buffers: Vec>, - running: Arc, // Changed to Arc + running: Arc, // Changed to Arc mut buffers: Vec, ) { info!("Starting timestamp event listener..."); @@ -107,48 +119,67 @@ pub async fn display_time_stamp_events_map( info!("Timestamp event listener stopped"); } -pub async fn event_listener(bpf_maps: (Map, Map)) -> Result<(), anyhow::Error> { +pub async fn event_listener(bpf_maps: Vec) -> Result<(), anyhow::Error> { info!("Getting CPU count..."); - let cpu_count = online_cpus().map_err(|e| anyhow::anyhow!("Error {:?}", e))?.len(); - info!("CPU count: {}", cpu_count); - + + let mut perf_event_arrays = Vec::new(); // contains a vector of PerfEventArrays + let mut event_buffers = Vec::new(); // contains a vector of buffers + info!("Creating perf buffers..."); - let mut net_perf_buffer: Vec> = Vec::new(); - let mut net_perf_array: PerfEventArray = PerfEventArray::try_from(bpf_maps.0)?; - let mut time_stamp_events_perf_buffer: Vec> = Vec::new(); - let mut time_stamp_events_perf_array: PerfEventArray = - PerfEventArray::try_from(bpf_maps.1)?; - - info!("Opening perf buffers for {} CPUs...", cpu_count); - for cpu_id in online_cpus().map_err(|e| anyhow::anyhow!("Error {:?}", e))? { - let buf: PerfEventArrayBuffer = net_perf_array.open(cpu_id, None)?; - net_perf_buffer.push(buf); + for map in bpf_maps { + debug!("Debugging map type:{:?}", map); + let perf_event_array = PerfEventArray::try_from(map).map_err(|e| { + error!("Cannot create perf_event_array for map.Reason: {}", e); + anyhow::anyhow!("Cannot create perf_event_array for map.Reason: {}", e) + })?; + perf_event_arrays.push(perf_event_array); // this is step 1 + let perf_event_array_buffer = Vec::new(); + event_buffers.push(perf_event_array_buffer); //this is step 2 } - for cpu_id in online_cpus().map_err(|e| anyhow::anyhow!("Error {:?}", e))? { - let buf: PerfEventArrayBuffer = time_stamp_events_perf_array.open(cpu_id, None)?; - time_stamp_events_perf_buffer.push(buf); + + let cpu_count = online_cpus().map_err(|e| anyhow::anyhow!("Error {:?}", e))?; + + //info!("CPU count: {}", cpu_count); + for (perf_evt_array, perf_evt_array_buffer) in + perf_event_arrays.iter_mut().zip(event_buffers.iter_mut()) + { + for cpu_id in &cpu_count { + let single_buffer = perf_evt_array.open(*cpu_id, None)?; + perf_evt_array_buffer.push(single_buffer); + } } + + //info!("Opening perf buffers for {} CPUs...", cpu_count); info!("Perf buffers created successfully"); + let mut event_buffers = event_buffers.into_iter(); + + let time_stamp_events_perf_buffer = event_buffers.next().expect(""); + let net_perf_buffer = event_buffers.next().expect(""); // Create shared running flags let net_metrics_running = Arc::new(AtomicBool::new(true)); let time_stamp_events_running = Arc::new(AtomicBool::new(true)); - + // Create proper sized buffers - let net_metrics_buffers = vec![BytesMut::with_capacity(1024); cpu_count]; - let time_stamp_events_buffers = vec![BytesMut::with_capacity(1024); cpu_count]; - + let net_metrics_buffers = vec![BytesMut::with_capacity(1024); cpu_count.len()]; + let time_stamp_events_buffers = vec![BytesMut::with_capacity(1024); cpu_count.len()]; + // Clone for the signal handler let net_metrics_running_signal = net_metrics_running.clone(); let time_stamp_events_running_signal = time_stamp_events_running.clone(); - + info!("Starting event listener tasks..."); let metrics_map_displayer = tokio::spawn(async move { display_metrics_map(net_perf_buffer, net_metrics_running, net_metrics_buffers).await; }); let time_stamp_events_displayer = tokio::spawn(async move { - display_time_stamp_events_map(time_stamp_events_perf_buffer, time_stamp_events_running, time_stamp_events_buffers).await + display_time_stamp_events_map( + time_stamp_events_perf_buffer, + time_stamp_events_running, + time_stamp_events_buffers, + ) + .await }); info!("Event listeners started, entering main loop..."); @@ -176,4 +207,4 @@ pub async fn event_listener(bpf_maps: (Map, Map)) -> Result<(), anyhow::Error> { // return success Ok(()) -} \ No newline at end of file +} diff --git a/core/src/components/metrics/src/main.rs b/core/src/components/metrics/src/main.rs index 6b22a86..9648e8a 100644 --- a/core/src/components/metrics/src/main.rs +++ b/core/src/components/metrics/src/main.rs @@ -1,27 +1,18 @@ -use aya::{ - Ebpf -}; - +use anyhow::{Context, Ok}; +use aya::Ebpf; +use cortexbrain_common::{constants, logger}; use std::{ env, fs, path::Path, - sync::{ - Arc, Mutex, - }, + sync::{Arc, Mutex}, }; - -use anyhow::{Context, Ok}; use tracing::{error, info}; -use cortexbrain_common::{constants, logger}; mod helpers; -use crate::{helpers::event_listener, maps_handlers::map_pinner, program_handlers::load_and_attach_tcp_programs}; - -mod maps_handlers; -use crate::maps_handlers::init_ebpf_maps; +use crate::helpers::event_listener; -mod program_handlers; -use crate::program_handlers::load_program; +use cortexbrain_common::map_handlers::{init_bpf_maps, map_pinner}; +use cortexbrain_common::program_handlers::load_program; mod structs; @@ -33,41 +24,50 @@ async fn main() -> Result<(), anyhow::Error> { info!("Starting metrics service..."); info!("fetching data"); - let bpf_path = env::var(constants::BPF_PATH).context("BPF_PATH environment variable required")?; + let bpf_path = + env::var(constants::BPF_PATH).context("BPF_PATH environment variable required")?; let data = fs::read(Path::new(&bpf_path)).context("Failed to load file from path")?; let bpf = Arc::new(Mutex::new(Ebpf::load(&data)?)); let tcp_bpf = bpf.clone(); let tcp_rev_bpf = bpf.clone(); + let tcp_v6_bpf = bpf.clone(); info!("Running Ebpf logger"); info!("loading programs"); - let bpf_map_save_path = - std::env::var(constants::PIN_MAP_PATH).context("PIN_MAP_PATH environment variable required")?; + let bpf_map_save_path = std::env::var(constants::PIN_MAP_PATH) + .context("PIN_MAP_PATH environment variable required")?; - match init_ebpf_maps(bpf.clone()) { - std::result::Result::Ok(maps) => { + let map_data = vec!["time_stamp_events".to_string(), "net_metrics".to_string()]; + + match init_bpf_maps(bpf.clone(), map_data) { + std::result::Result::Ok(bpf_maps) => { info!("BPF maps loaded successfully"); let pin_path = std::path::PathBuf::from(&bpf_map_save_path); info!("About to call map_pinner with path: {:?}", pin_path); - match map_pinner(&maps, &pin_path).await { - std::result::Result::Ok(_) => { + match map_pinner(bpf_maps, &pin_path) { + std::result::Result::Ok(maps) => { info!("BPF maps pinned successfully to {}", bpf_map_save_path); { load_program(bpf.clone(), "metrics_tracer", "tcp_identify_packet_loss") - .context("An error occured during the execution of load_program function")?; - } - - { - load_and_attach_tcp_programs(tcp_bpf.clone()) - .context("An error occured during the execution of load_and_attach_tcp_programs function")?; + .context( + "An error occured during the execution of load_program function", + )?; + + load_program(tcp_bpf,"tcp_connect","tcp_v4_connect") + .context("An error occured during the execution of load_and_attach_tcp_programs function")?; + load_program(tcp_v6_bpf,"tcp_connect","tcp_v6_connect") + .context("An error occured during the execution of load_and_attach_tcp_programs function")?; + + load_program( + tcp_rev_bpf, + "tcp_rcv_state_process", + "tcp_rcv_state_process", + ) + .context( + "An error occured during the execution of load_program function", + )?; } - - { - load_program(tcp_rev_bpf.clone(), "tcp_rcv_state_process", "tcp_rcv_state_process") - .context("An error occured during the execution of load_program function")?; - } - event_listener(maps).await?; } Err(e) => { @@ -83,4 +83,4 @@ async fn main() -> Result<(), anyhow::Error> { } Ok(()) -} \ No newline at end of file +} diff --git a/core/src/components/metrics/src/maps_handlers.rs b/core/src/components/metrics/src/maps_handlers.rs deleted file mode 100644 index 12c3d0a..0000000 --- a/core/src/components/metrics/src/maps_handlers.rs +++ /dev/null @@ -1,48 +0,0 @@ -use std::{path::PathBuf, sync::{Arc, Mutex}}; -use tokio::fs; -use anyhow::Error; -use aya::{maps::Map, Ebpf}; -use tracing::info; - - - -pub fn init_ebpf_maps(bpf: Arc>) -> Result<(Map, Map), anyhow::Error> { - // this function init the bpfs maps used in the main program - /* - index 0: net_metrics - index 1: time_stamp_events - */ - let mut bpf_new = bpf.lock().unwrap(); - - let net_metrics_map = bpf_new - .take_map("net_metrics") - .ok_or_else(|| anyhow::anyhow!("net_metrics map not found"))?; - - let time_stamps_events_map = bpf_new - .take_map("time_stamp_events") - .ok_or_else(|| anyhow::anyhow!("time_stamp_events map not found"))?; - - Ok((net_metrics_map, time_stamps_events_map)) -} - -pub async fn map_pinner(maps: &(Map, Map), path: &PathBuf) -> Result<(), Error> { - // check if the map exists - if !path.exists() { - info!("Pin path {:?} does not exist. Creating it...", path); - fs::create_dir_all(&path).await?; - #[cfg(unix)] - { - use std::os::unix::fs::PermissionsExt; - fs::set_permissions(&path, std::fs::Permissions::from_mode(0o755)).await?; - } - } - - let map1_path = path.join("net_metrics"); - let map2_path = path.join("time_stamp_events"); - - // maps pinning - maps.0.pin(&map1_path)?; - maps.1.pin(&map2_path)?; - - Ok(()) -} diff --git a/core/src/components/metrics/src/mod.rs b/core/src/components/metrics/src/mod.rs index 8c4a839..8414b63 100644 --- a/core/src/components/metrics/src/mod.rs +++ b/core/src/components/metrics/src/mod.rs @@ -1,5 +1,3 @@ mod structs; mod enums; -mod map_handlers; -mod helpers; -mod program_handlers; \ No newline at end of file +mod helpers; \ No newline at end of file diff --git a/core/src/components/metrics/src/program_handlers.rs b/core/src/components/metrics/src/program_handlers.rs deleted file mode 100644 index 24d18cb..0000000 --- a/core/src/components/metrics/src/program_handlers.rs +++ /dev/null @@ -1,59 +0,0 @@ -use std::sync::{Arc, Mutex}; - -use aya::{programs::KProbe, Ebpf}; -use tracing::{info, error}; -use std::convert::TryInto; - -pub fn load_program(bpf: Arc>, program_name: &str, actual_program: &str) -> Result<(), anyhow::Error> { - let mut bpf_new = bpf.lock().unwrap(); - - // Load and attach the eBPF programs - let program: &mut KProbe = bpf_new - .program_mut(program_name) - .ok_or_else(|| anyhow::anyhow!("Program {} not found", program_name))? - .try_into() - .map_err(|e| anyhow::anyhow!("Failed to convert program: {:?}", e))?; - - program.load()?; - - match program.attach(actual_program, 0) { - Ok(_) => info!("{} program attached successfully", actual_program), - Err(e) => { - error!("Error attaching {} program {:?}", actual_program, e); - return Err(anyhow::anyhow!("Failed to attach {}: {:?}", actual_program, e)); - } - }; - - info!("eBPF program {} loaded and attached successfully", program_name); - Ok(()) -} - -pub fn load_and_attach_tcp_programs(bpf: Arc>) -> Result<(), anyhow::Error> { - let mut bpf_new = bpf.lock().unwrap(); - - // Load and attach the eBPF programs - let tcp_prog: &mut KProbe = bpf_new - .program_mut("tcp_connect") - .ok_or_else(|| anyhow::anyhow!("Program tcp_connect not found"))? - .try_into() - .map_err(|e| anyhow::anyhow!("Failed to convert program tcp_connect: {:?}", e))?; - tcp_prog.load()?; - - match tcp_prog.attach("tcp_v4_connect", 0) { - Ok(_) => info!("tcp_v4_connect program attached successfully"), - Err(e) => { - error!("Error attaching tcp_v4_connect: {:?}", e); - return Err(anyhow::anyhow!("Failed to attach tcp_v4_connect: {:?}", e)); - } - }; - - match tcp_prog.attach("tcp_v6_connect", 0) { - Ok(_) => info!("tcp_v6_connect program attached successfully"), - Err(e) => { - error!("Error attaching tcp_v6_connect: {:?}", e); - return Err(anyhow::anyhow!("Failed to attach tcp_v6_connect: {:?}", e)); - } - }; - - Ok(()) -} \ No newline at end of file From b3e913647c293240680f916e2f43808eb1b34cf6 Mon Sep 17 00:00:00 2001 From: LorenzoTettamanti Date: Thu, 22 Jan 2026 23:05:29 +0100 Subject: [PATCH 14/46] [#168]: added load from program-handlers in identity user space implementation. Added a small doc in the conntracker/main.rs file --- core/common/src/program_handlers.rs | 4 +- core/src/components/conntracker/src/main.rs | 24 ++++++-- core/src/components/identity/Cargo.toml | 2 +- core/src/components/identity/src/helpers.rs | 3 + core/src/components/identity/src/main.rs | 67 +++------------------ 5 files changed, 34 insertions(+), 66 deletions(-) diff --git a/core/common/src/program_handlers.rs b/core/common/src/program_handlers.rs index 8832daf..5991bef 100644 --- a/core/common/src/program_handlers.rs +++ b/core/common/src/program_handlers.rs @@ -9,7 +9,9 @@ pub fn load_program( program_name: &str, actual_program: &str, ) -> Result<(), anyhow::Error> { - let mut bpf_new = bpf.lock().expect("Cannot get value from lock"); + let mut bpf_new = bpf + .lock() + .map_err(|e| anyhow::anyhow!("Cannot get value from lock. Reason: {}", e))?; // Load and attach the eBPF programs let program: &mut KProbe = bpf_new diff --git a/core/src/components/conntracker/src/main.rs b/core/src/components/conntracker/src/main.rs index 7a12642..e723e4b 100644 --- a/core/src/components/conntracker/src/main.rs +++ b/core/src/components/conntracker/src/main.rs @@ -29,14 +29,13 @@ use aya_ebpf::{ }; use crate::tc::try_identity_classifier; -use crate::veth_tracer::try_veth_tracer; use crate::tcp_analyzer::try_tcp_analyzer; - +use crate::veth_tracer::try_veth_tracer; // docs: // // virtual ethernet (veth) interface tracer: -// This function is triggered when a virtual ethernet interface is created +// This function is triggered when a virtual ethernet interface is created // #[kprobe] @@ -50,7 +49,7 @@ pub fn veth_creation_trace(ctx: ProbeContext) -> u32 { // docs: // // virtual ethernet (veth) interface tracer: -// This function is triggered when a virtual ethernet interface is deleted +// This function is triggered when a virtual ethernet interface is deleted // #[kprobe] @@ -94,14 +93,29 @@ pub fn identity_classifier(ctx: TcContext) -> i32 { // // this kprobe retrieves pid data and task id of an incoming packet +// this kprobe separation is needed because every kprobe program can be attached only one time. +// if you try to attach the same program the kernel returns this error: "Program is already attached" +// this is the reason why we have tcp_message_tracer_connect and tcp_message_tracer_rcv that are essentially the same functions +// but in the kernel space one is attached to the tcp_v4_connect kprobe and one to the tcp_v4_rcv kprobe +// TODO: a good addition to the library will be a function that check if the program is already attached: +// if the program is attached it creates a safe copy of the program to attach a second kernel symbol (kprobes) +// if the program is not attached we have the traditional behaviour (load the program + attach the program to the kernel symbol (kprobes)) + #[kprobe] -pub fn tcp_message_tracer(ctx: ProbeContext) -> u32 { +pub fn tcp_message_tracer_connect(ctx: ProbeContext) -> u32 { match try_tcp_analyzer(ctx) { Ok(ret_val) => ret_val, Err(ret_val) => ret_val.try_into().unwrap_or(1), } } +#[kprobe] +pub fn tcp_message_tracer_rcv(ctx: ProbeContext) -> u32 { + match try_tcp_analyzer(ctx) { + Ok(ret_val) => ret_val, + Err(ret_val) => ret_val.try_into().unwrap_or(1), + } +} //ref:https://elixir.bootlin.com/linux/v6.15.1/source/include/uapi/linux/ethtool.h#L536 //https://elixir.bootlin.com/linux/v6.15.1/source/drivers/net/veth.c#L268 diff --git a/core/src/components/identity/Cargo.toml b/core/src/components/identity/Cargo.toml index 3146991..f5bdb37 100644 --- a/core/src/components/identity/Cargo.toml +++ b/core/src/components/identity/Cargo.toml @@ -32,7 +32,7 @@ tracing = "0.1.41" tracing-subscriber = { version = "0.3.19", features = ["env-filter"] } bytemuck = { version = "1.23.0", features = ["derive"] } bytemuck_derive = "1.10.1" -cortexbrain-common = { path = "../../../common/", features = ["map-handlers"] } +cortexbrain-common = { path = "../../../common/", features = ["map-handlers","program-handlers"] } nix = { version = "0.30.1", features = ["net"] } kube = { version = "2.0.1", features = ["client"] } k8s-openapi = { version = "0.26.0", features = ["v1_34"] } diff --git a/core/src/components/identity/src/helpers.rs b/core/src/components/identity/src/helpers.rs index 05b9603..9512789 100644 --- a/core/src/components/identity/src/helpers.rs +++ b/core/src/components/identity/src/helpers.rs @@ -52,6 +52,7 @@ pub async fn display_events>( //running: Arc, mut buffers: Vec, ) { + // FIXME: here maybe we need to use a loop with tokio::select while true { for buf in perf_buffers.iter_mut() { match buf.read_events(&mut buffers) { @@ -109,6 +110,7 @@ pub async fn display_veth_events>( mut buffers: Vec, mut link_ids: Arc>>, ) { + // FIXME: here maybe we need to use a loop with tokio::select while true { for buf in perf_buffers.iter_mut() { match buf.read_events(&mut buffers) { @@ -268,6 +270,7 @@ pub async fn display_tcp_registry_events>( //running: Arc, mut buffers: Vec, ) { + // FIXME: here maybe we need to use a loop with tokio::select while true { for buf in perf_buffers.iter_mut() { match buf.read_events(&mut buffers) { diff --git a/core/src/components/identity/src/main.rs b/core/src/components/identity/src/main.rs index 9dd6ce9..ac4ed37 100644 --- a/core/src/components/identity/src/main.rs +++ b/core/src/components/identity/src/main.rs @@ -18,7 +18,7 @@ use crate::helpers::{ use aya::{ Ebpf, maps::{Map, perf::PerfEventArray}, - programs::{KProbe, SchedClassifier, TcAttachType, tc::SchedClassifierLinkId}, + programs::{SchedClassifier, TcAttachType, tc::SchedClassifierLinkId}, util::online_cpus, }; @@ -27,6 +27,7 @@ use crate::helpers::scan_cgroup_cronjob; use bytes::BytesMut; use cortexbrain_common::map_handlers::{init_bpf_maps, map_pinner, populate_blocklist}; +use cortexbrain_common::program_handlers::load_program; use std::{ convert::TryInto, path::Path, @@ -65,7 +66,6 @@ async fn main() -> Result<(), anyhow::Error> { let data = vec![ "EventsMap".to_string(), "veth_identity_map".to_string(), - //"Blocklist".to_string(), "TcpPacketRegistry".to_string(), ]; match init_bpf_maps(bpf.clone(), data) { @@ -167,76 +167,25 @@ async fn init_tc_classifier( async fn init_veth_tracer(bpf: Arc>) -> Result<(), anyhow::Error> { //this functions init the veth_tracer used to make the InterfacesRegistry - - let mut bpf_new = bpf - .lock() - .map_err(|e| anyhow::anyhow!("Cannot get value from lock. Reason: {}", e))?; - //creation tracer - let veth_creation_tracer: &mut KProbe = bpf_new - .program_mut("veth_creation_trace") - .ok_or_else(|| anyhow::anyhow!("program 'veth_creation_trace' not found"))? - .try_into()?; - veth_creation_tracer.load()?; - - match veth_creation_tracer.attach("register_netdevice", 0) { - std::result::Result::Ok(_) => info!("veth_creation_tracer program attached successfully"), - Err(e) => error!("Error attaching veth_creation_tracer program {:?}", e), - } - //deletion tracer - let veth_deletion_tracer: &mut KProbe = bpf_new - .program_mut("veth_deletion_trace") - .ok_or_else(|| anyhow::anyhow!("program 'veth_deletion_trace' not found"))? - .try_into()?; - veth_deletion_tracer - .load() - .context("Failed to load deletetion_tracer program")?; + load_program(bpf.clone(), "veth_creation_trace", "register_netdevice")?; - match veth_deletion_tracer.attach("unregister_netdevice_queue", 0) { - std::result::Result::Ok(_) => info!("veth_deletion_trace program attached successfully"), - Err(e) => error!("Error attaching veth_deletetion_trace program {:?}", e), - } + //deletion tracer + load_program(bpf, "veth_deletion_trace", "unregister_netdevice_queue")?; Ok(()) } async fn init_tcp_registry(bpf: Arc>) -> Result<(), anyhow::Error> { - let mut bpf_new = bpf - .lock() - .map_err(|e| anyhow::anyhow!("Cannot get value from lock. Reason: {}", e))?; - // init tcp registry - let tcp_analyzer: &mut KProbe = bpf_new - .program_mut("tcp_message_tracer") - .ok_or_else(|| anyhow::anyhow!("program 'tcp_message_tracer' not found"))? - .try_into()?; - tcp_analyzer - .load() - .context("Failed to load tcp_message_tracer")?; + // .clone() increments the reference count of the shared Ebpf instance. + load_program(bpf.clone(), "tcp_message_tracer_rcv", "tcp_v4_rcv")?; info!("initializing tcp tracing functions"); - match tcp_analyzer.attach("tcp_v4_rcv", 0) { - std::result::Result::Ok(_) => { - info!("tcp_message_tracer attached successfully to the tcp_v4_rcv function ") - } - Err(e) => error!( - "Error attaching tcp_message_tracer to the tcp_v4_rcv function. Error: {:?}", - e - ), - } - - match tcp_analyzer.attach("tcp_v4_connect", 0) { - std::result::Result::Ok(_) => { - info!("tcp_message_tracer attached successfully to the tcp_v4_connect function ") - } - Err(e) => error!( - "Error attaching tcp_message_tracer to the tcp_v4_connect function. Error: {:?}", - e - ), - } + load_program(bpf, "tcp_message_tracer_connect", "tcp_v4_connect")?; Ok(()) } From b908dc0e00fda19e37da4d93b495b051a2c17f95 Mon Sep 17 00:00:00 2001 From: LorenzoTettamanti Date: Fri, 23 Jan 2026 20:24:15 +0100 Subject: [PATCH 15/46] [#158]: fixed typos in the map names --- core/src/components/conntracker/src/data_structures.rs | 2 +- core/src/components/identity/src/main.rs | 2 +- core/src/testing/identity.yaml | 2 +- core/src/testing/metrics.yaml | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/core/src/components/conntracker/src/data_structures.rs b/core/src/components/conntracker/src/data_structures.rs index 35861a8..4de05cc 100644 --- a/core/src/components/conntracker/src/data_structures.rs +++ b/core/src/components/conntracker/src/data_structures.rs @@ -87,7 +87,7 @@ pub struct TcpPacketRegistry{ // -#[map(name = "EventsMap", pinning = "by_name")] +#[map(name = "events_map", pinning = "by_name")] pub static mut EVENTS: PerfEventArray = PerfEventArray::new(0); // FIXME: this might be useless diff --git a/core/src/components/identity/src/main.rs b/core/src/components/identity/src/main.rs index ac4ed37..b4f6d18 100644 --- a/core/src/components/identity/src/main.rs +++ b/core/src/components/identity/src/main.rs @@ -64,7 +64,7 @@ async fn main() -> Result<(), anyhow::Error> { let bpf_map_save_path = std::env::var(constants::PIN_MAP_PATH) .context("PIN_MAP_PATH environment variable required")?; let data = vec![ - "EventsMap".to_string(), + "events_map".to_string(), "veth_identity_map".to_string(), "TcpPacketRegistry".to_string(), ]; diff --git a/core/src/testing/identity.yaml b/core/src/testing/identity.yaml index bb027d2..1b77d60 100644 --- a/core/src/testing/identity.yaml +++ b/core/src/testing/identity.yaml @@ -53,7 +53,7 @@ spec: - SYS_PTRACE containers: - name: identity - image: ghcr.io/cortexflow/identity:latest + image: lorenzotettamanti/cortexflow-identity:0.1.5-refcount7 command: ["/bin/bash", "-c"] args: - | diff --git a/core/src/testing/metrics.yaml b/core/src/testing/metrics.yaml index 4c775ca..1c1ecf8 100644 --- a/core/src/testing/metrics.yaml +++ b/core/src/testing/metrics.yaml @@ -19,7 +19,7 @@ spec: hostNetwork: true containers: - name: metrics - image: ghcr.io/cortexflow/metrics:latest + image: lorenzotettamanti/cortexflow-metrics:0.1.2-test8 command: ["/bin/bash", "-c"] args: - | From 901fc3c27c84ba932ad98a1ebd18daea27df3b8c Mon Sep 17 00:00:00 2001 From: LorenzoTettamanti Date: Fri, 23 Jan 2026 22:35:12 +0100 Subject: [PATCH 16/46] [#158]: fixed bpf error: Error: the program is already loaded.Improved map handlers code --- core/common/src/map_handlers.rs | 5 ++++- core/src/components/identity/src/main.rs | 21 ++++++++++--------- core/src/components/metrics/src/helpers.rs | 16 +++++++------- core/src/components/metrics/src/main.rs | 4 ++-- .../src/components/metrics_tracer/src/main.rs | 8 ++++++- core/src/testing/identity.yaml | 2 +- core/src/testing/metrics.yaml | 2 +- 7 files changed, 34 insertions(+), 24 deletions(-) diff --git a/core/common/src/map_handlers.rs b/core/common/src/map_handlers.rs index 2882d66..2e22736 100644 --- a/core/common/src/map_handlers.rs +++ b/core/common/src/map_handlers.rs @@ -30,7 +30,10 @@ pub fn init_bpf_maps( bpf: Arc>, map_names: Vec, ) -> Result { - let mut bpf_new = bpf.lock().expect("Cannot get value from lock"); + let mut bpf_new = bpf + .lock() + .map_err(|e| anyhow::anyhow!("Cannot get value from lock. Reason: {}", e))?; + let mut maps = Vec::new(); // stores bpf_maps_objects for name in &map_names { diff --git a/core/src/components/identity/src/main.rs b/core/src/components/identity/src/main.rs index b4f6d18..56f81d6 100644 --- a/core/src/components/identity/src/main.rs +++ b/core/src/components/identity/src/main.rs @@ -37,7 +37,7 @@ use std::{ use anyhow::{Context, Ok}; use cortexbrain_common::{constants, logger}; use tokio::{fs, signal}; -use tracing::{debug, error, info}; +use tracing::{debug, error, info, warn}; use std::collections::HashMap; @@ -63,12 +63,13 @@ async fn main() -> Result<(), anyhow::Error> { let bpf = Arc::new(Mutex::new(Ebpf::load(&data)?)); let bpf_map_save_path = std::env::var(constants::PIN_MAP_PATH) .context("PIN_MAP_PATH environment variable required")?; - let data = vec![ + let map_data = vec![ "events_map".to_string(), "veth_identity_map".to_string(), "TcpPacketRegistry".to_string(), + "Blocklist".to_string(), ]; - match init_bpf_maps(bpf.clone(), data) { + match init_bpf_maps(bpf.clone(), map_data) { std::result::Result::Ok(bpf_maps) => { info!("Successfully loaded bpf maps"); let pin_path = std::path::PathBuf::from(&bpf_map_save_path); @@ -212,13 +213,13 @@ async fn event_listener( // create the PerfEventArrays and the buffers for map in bpf_maps { debug!("Debugging map type:{:?}", map); - let perf_event_array = PerfEventArray::try_from(map).map_err(|e| { - error!("Cannot create perf_event_array for map.Reason: {}", e); - anyhow::anyhow!("Cannot create perf_event_array for map.Reason: {}", e) - })?; - perf_event_arrays.push(perf_event_array); // this is step 1 - let perf_event_array_buffer = Vec::new(); - event_buffers.push(perf_event_array_buffer); //this is step 2 + if let std::result::Result::Ok(perf_event_array) = PerfEventArray::try_from(map) { + perf_event_arrays.push(perf_event_array); // this is step 1 + let perf_event_array_buffer = Vec::new(); + event_buffers.push(perf_event_array_buffer); //this is step 2 + } else { + warn!("Map is not a PerfEventArray, skipping load"); + } } // fill the input buffers with data from the PerfEventArrays diff --git a/core/src/components/metrics/src/helpers.rs b/core/src/components/metrics/src/helpers.rs index f519c7e..a67b607 100644 --- a/core/src/components/metrics/src/helpers.rs +++ b/core/src/components/metrics/src/helpers.rs @@ -10,7 +10,7 @@ use std::sync::{ }; use tokio::signal; -use tracing::{debug, error, info}; +use tracing::{debug, error, info, warn}; use crate::structs::NetworkMetrics; use crate::structs::TimeStampMetrics; @@ -128,13 +128,13 @@ pub async fn event_listener(bpf_maps: Vec) -> Result<(), anyhow::Error> { info!("Creating perf buffers..."); for map in bpf_maps { debug!("Debugging map type:{:?}", map); - let perf_event_array = PerfEventArray::try_from(map).map_err(|e| { - error!("Cannot create perf_event_array for map.Reason: {}", e); - anyhow::anyhow!("Cannot create perf_event_array for map.Reason: {}", e) - })?; - perf_event_arrays.push(perf_event_array); // this is step 1 - let perf_event_array_buffer = Vec::new(); - event_buffers.push(perf_event_array_buffer); //this is step 2 + if let std::result::Result::Ok(perf_event_array) = PerfEventArray::try_from(map) { + perf_event_arrays.push(perf_event_array); // this is step 1 + let perf_event_array_buffer = Vec::new(); + event_buffers.push(perf_event_array_buffer); //this is step 2 + } else { + warn!("Map is not a PerfEventArray, skipping load"); + } } let cpu_count = online_cpus().map_err(|e| anyhow::anyhow!("Error {:?}", e))?; diff --git a/core/src/components/metrics/src/main.rs b/core/src/components/metrics/src/main.rs index 9648e8a..e8677fb 100644 --- a/core/src/components/metrics/src/main.rs +++ b/core/src/components/metrics/src/main.rs @@ -54,9 +54,9 @@ async fn main() -> Result<(), anyhow::Error> { "An error occured during the execution of load_program function", )?; - load_program(tcp_bpf,"tcp_connect","tcp_v4_connect") + load_program(tcp_bpf,"tcp_v4_connect","tcp_v4_connect") .context("An error occured during the execution of load_and_attach_tcp_programs function")?; - load_program(tcp_v6_bpf,"tcp_connect","tcp_v6_connect") + load_program(tcp_v6_bpf,"tcp_v6_connect","tcp_v6_connect") .context("An error occured during the execution of load_and_attach_tcp_programs function")?; load_program( diff --git a/core/src/components/metrics_tracer/src/main.rs b/core/src/components/metrics_tracer/src/main.rs index 2f5e5a1..216a6ac 100644 --- a/core/src/components/metrics_tracer/src/main.rs +++ b/core/src/components/metrics_tracer/src/main.rs @@ -78,7 +78,13 @@ fn try_metrics_tracer(ctx: ProbeContext) -> Result { // Monitor on tcp_sendmsg, tcp_v4_connect #[kprobe] -fn tcp_connect(ctx: ProbeContext) -> u32 { +fn tcp_v6_connect(ctx: ProbeContext) -> u32 { + match on_connect(ctx) { Ok(_) => 0, Err(e) => e as u32 } +} + +// Monitor on tcp_sendmsg, tcp_v4_connect +#[kprobe] +fn tcp_v4_connect(ctx: ProbeContext) -> u32 { match on_connect(ctx) { Ok(_) => 0, Err(e) => e as u32 } } diff --git a/core/src/testing/identity.yaml b/core/src/testing/identity.yaml index 1b77d60..38bf197 100644 --- a/core/src/testing/identity.yaml +++ b/core/src/testing/identity.yaml @@ -53,7 +53,7 @@ spec: - SYS_PTRACE containers: - name: identity - image: lorenzotettamanti/cortexflow-identity:0.1.5-refcount7 + image: lorenzotettamanti/cortexflow-identity:0.1.5-refcount9 command: ["/bin/bash", "-c"] args: - | diff --git a/core/src/testing/metrics.yaml b/core/src/testing/metrics.yaml index 1c1ecf8..262b28f 100644 --- a/core/src/testing/metrics.yaml +++ b/core/src/testing/metrics.yaml @@ -19,7 +19,7 @@ spec: hostNetwork: true containers: - name: metrics - image: lorenzotettamanti/cortexflow-metrics:0.1.2-test8 + image: lorenzotettamanti/cortexflow-metrics:0.1.2-test12 command: ["/bin/bash", "-c"] args: - | From 4f6c2b06d81c8b25dd1b43af381dbd397e2b36e6 Mon Sep 17 00:00:00 2001 From: Lorenzo Tettamanti <167028406+LorenzoTettamanti@users.noreply.github.com> Date: Sun, 8 Feb 2026 15:02:36 +0100 Subject: [PATCH 17/46] OpenTelemetry Protcol (OTLP) identity integration (#180) * [#158]: moved map_handlers module from identity crate to common crate * [#158]: simplified identity logic. removed duplicated code and functions * [#158]: added program handlers function in the common crate. Remove duplicated code in metrics module * [#168]: added load from program-handlers in identity user space implementation. Added a small doc in the conntracker/main.rs file * [#158]: fixed typos in the map names * [#158]: fixed bpf error: Error: the program is already loaded.Improved map handlers code * [#174]: added open telemetry (otel) logger for logs. Added otel daemonset with otel agent and collector * [#158]: improved docs for the conntracker data structures. removed useless conversion from u8 to 64 with .into() for state variable * [#174]: Added otel libraries and features in the common crate. .update identity kubernetes manifest with the otel env variables. * [#158]: imroved documentation in the user space for the identity (VethLog) data structure * [#158]: restored blocklist map initialization * [#158]: added better docs. Updated while true pattern with "loop" pattern. Code cleaning * [#174]: added prettify to logger * [#181]: added command to repair blocklist configmaps * [#182]: added GetTrackedVeth grpc endpoint definition * [#158]: added load_perf_event_array_from_mapdata function in map_handlers.rs * [#182]: added total monitored veth_events (tot_monitored_veth) * [#182]: added "cfcli monitoring veth" command frontend. added send_tracked_veth_requests function in api/requests.rs * [refactoring]: separate experimental service discovery from the helpers in the identity service * [refactoring]: created BufferType enum to centralize the event readers for PacketLog, VethLog and TcpPacketRegistry * updated common cargo.toml * [#158]: moved IpProtocols and network structures from the identity crate to the common crate. Added BufferType enum to list different buffers readers. Added buffer_type module in cortexbrain common (experimental) * [update]: cli packages update * [update]: IpProtocols update . Btter code formatting * [#158]: fixed typos from latest commit * [#182]: Added tracked events agent API implementation. Added VethEvent protobuffer message type * [proposal]: added batcher module intial bones * [158]: removed old deprecated functions to show veth logs. added packed representation for the VethLog structure. changed dev_addr type from [u32;8] to [u8;6] (see https://wiki.osdev.org/Address_Resolution_Protocol). Added unit tests to check VethLog structure bytes size. Changed "actual program" to "kernel_symbol" in the load_program function * [#158]: added a map_manager to call BpfMaps using the map name instead of relying on the declaration order during the startup * [#182]: added VethEvent protobuf message. Added log to see the veth event in the agent api * [docker] pushed new development images --- cli/Cargo.lock | 610 +++++++++++++++-- cli/Cargo.toml | 2 +- cli/src/install.rs | 84 ++- cli/src/main.rs | 11 +- cli/src/monitoring.rs | 60 +- core/Cargo.lock | 586 ++++++++++++++-- core/api/Cargo.toml | 17 +- core/api/protos/agent.proto | 22 +- core/api/src/agent.rs | 106 ++- core/api/src/api.rs | 241 +++++-- core/api/src/batcher.rs | 22 + core/api/src/client.rs | 22 +- core/api/src/lib.rs | 1 + core/api/src/requests.rs | 54 +- core/api/src/structs.rs | 2 +- core/common/Cargo.toml | 13 +- core/common/src/buffer_type.rs | 292 ++++++++ core/common/src/lib.rs | 5 +- core/common/src/logger.rs | 49 +- core/common/src/map_handlers.rs | 48 +- core/common/src/program_handlers.rs | 10 +- .../conntracker/src/data_structures.rs | 59 +- .../components/conntracker/src/veth_tracer.rs | 10 +- core/src/components/identity/Cargo.toml | 13 +- core/src/components/identity/src/enums.rs | 12 - core/src/components/identity/src/helpers.rs | 641 ++---------------- core/src/components/identity/src/lib.rs | 4 +- core/src/components/identity/src/main.rs | 118 ++-- core/src/components/identity/src/mod.rs | 4 +- .../identity/src/service_discovery.rs | 297 ++++++++ core/src/components/identity/src/structs.rs | 56 -- core/src/testing/agent.yaml | 2 +- core/src/testing/identity.yaml | 13 +- core/src/testing/otel_agent.yaml | 210 ++++++ 34 files changed, 2717 insertions(+), 979 deletions(-) create mode 100644 core/api/src/batcher.rs create mode 100644 core/common/src/buffer_type.rs delete mode 100644 core/src/components/identity/src/enums.rs create mode 100644 core/src/components/identity/src/service_discovery.rs delete mode 100644 core/src/components/identity/src/structs.rs create mode 100644 core/src/testing/otel_agent.yaml diff --git a/cli/Cargo.lock b/cli/Cargo.lock index 6e951ca..0fea51d 100644 --- a/cli/Cargo.lock +++ b/cli/Cargo.lock @@ -216,18 +216,15 @@ checksum = "46c5e41b57b8bba42a04676d81cb89e9ee8e859a1a66f80a5a72e1cb76b34d43" [[package]] name = "bytemuck" -version = "1.23.2" +version = "1.25.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3995eaeebcdf32f91f980d360f78732ddc061097ab4e39991ae7a6ace9194677" -dependencies = [ - "bytemuck_derive", -] +checksum = "c8efb64bd706a16a1bdde310ae86b351e4d21550d98d056f22f8a7f7a2183fec" [[package]] name = "bytemuck_derive" -version = "1.10.1" +version = "1.10.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4f154e572231cb6ba2bd1176980827e3d5dc04cc183a75dea38109fbdd672d29" +checksum = "f9abbd1bc6865053c427f7198e6af43bfdedc55ab791faed4fbd361d789575ff" dependencies = [ "proc-macro2", "quote", @@ -236,9 +233,9 @@ dependencies = [ [[package]] name = "bytes" -version = "1.10.1" +version = "1.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d71b6127be86fdcfddb610f7182ac57211d4b18a3e9c82eb2d17662f2227ad6a" +checksum = "b35204fbdc0b3f4446b89fc1ac2cf84a8a68971995d0bf2e925ec7cd960f9cb3" [[package]] name = "cc" @@ -256,12 +253,6 @@ version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2fd1289c04a9ea8cb22300a459a72a385d7c73d3259e2ed7dcb2af674838cfa9" -[[package]] -name = "cfg_aliases" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724" - [[package]] name = "chrono" version = "0.4.42" @@ -359,10 +350,19 @@ checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" [[package]] name = "cortexbrain-common" version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5daea06747f06e000deaa52b7aceb504ddc309c061badf76e0b4b3d146ebf3a4" dependencies = [ "anyhow", + "aya", + "bytemuck", + "bytemuck_derive", + "bytes", + "k8s-openapi", + "kube", + "opentelemetry", + "opentelemetry-appender-tracing", + "opentelemetry-otlp", + "opentelemetry-stdout", + "opentelemetry_sdk", "tracing", "tracing-subscriber", ] @@ -390,8 +390,6 @@ dependencies = [ [[package]] name = "cortexflow_agent_api" version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2bfebbb2894a8d2edec3c4f3631952860c34706b798aa8d77ea2806ddd6fc476" dependencies = [ "anyhow", "aya", @@ -399,7 +397,6 @@ dependencies = [ "bytemuck_derive", "chrono", "cortexbrain-common", - "cortexflow_identity", "prost", "tokio", "tokio-stream", @@ -412,27 +409,6 @@ dependencies = [ "tracing-subscriber", ] -[[package]] -name = "cortexflow_identity" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5725a802e4f494b5fab4c69b1455a32dd3804b52a58c665a7d751eeae93ddfca" -dependencies = [ - "anyhow", - "aya", - "bytemuck", - "bytemuck_derive", - "bytes", - "cortexbrain-common", - "k8s-openapi", - "kube", - "libc", - "nix", - "tokio", - "tracing", - "tracing-subscriber", -] - [[package]] name = "cpufeatures" version = "0.2.17" @@ -512,6 +488,17 @@ dependencies = [ "windows-sys 0.61.1", ] +[[package]] +name = "displaydoc" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "either" version = "1.15.0" @@ -603,12 +590,34 @@ version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "05f29059c0c2090612e8d742178b0580d2dc940c837851ad723096f87af6663e" +[[package]] +name = "futures-executor" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e28d1d997f585e54aebc3f97d39e72338912123a67330d723fdbb564d646c9f" +dependencies = [ + "futures-core", + "futures-task", + "futures-util", +] + [[package]] name = "futures-io" version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9e5c1b78ca4aae1ac06c48a526a655760685149f0d465d21f37abfe57ce075c6" +[[package]] +name = "futures-macro" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "futures-sink" version = "0.3.31" @@ -630,6 +639,7 @@ dependencies = [ "futures-channel", "futures-core", "futures-io", + "futures-macro", "futures-sink", "futures-task", "memchr", @@ -822,6 +832,7 @@ version = "0.1.16" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8d9b05277c7e8da2c93a568989bb6207bef0112e8d17df7a6eda4a3cf143bc5e" dependencies = [ + "base64", "bytes", "futures-channel", "futures-core", @@ -829,7 +840,9 @@ dependencies = [ "http", "http-body", "hyper", + "ipnet", "libc", + "percent-encoding", "pin-project-lite", "socket2", "tokio", @@ -861,6 +874,108 @@ dependencies = [ "cc", ] +[[package]] +name = "icu_collections" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4c6b649701667bbe825c3b7e6388cb521c23d88644678e83c0c4d0a621a34b43" +dependencies = [ + "displaydoc", + "potential_utf", + "yoke", + "zerofrom", + "zerovec", +] + +[[package]] +name = "icu_locale_core" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "edba7861004dd3714265b4db54a3c390e880ab658fec5f7db895fae2046b5bb6" +dependencies = [ + "displaydoc", + "litemap", + "tinystr", + "writeable", + "zerovec", +] + +[[package]] +name = "icu_normalizer" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5f6c8828b67bf8908d82127b2054ea1b4427ff0230ee9141c54251934ab1b599" +dependencies = [ + "icu_collections", + "icu_normalizer_data", + "icu_properties", + "icu_provider", + "smallvec", + "zerovec", +] + +[[package]] +name = "icu_normalizer_data" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7aedcccd01fc5fe81e6b489c15b247b8b0690feb23304303a9e560f37efc560a" + +[[package]] +name = "icu_properties" +version = "2.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "020bfc02fe870ec3a66d93e677ccca0562506e5872c650f893269e08615d74ec" +dependencies = [ + "icu_collections", + "icu_locale_core", + "icu_properties_data", + "icu_provider", + "zerotrie", + "zerovec", +] + +[[package]] +name = "icu_properties_data" +version = "2.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "616c294cf8d725c6afcd8f55abc17c56464ef6211f9ed59cccffe534129c77af" + +[[package]] +name = "icu_provider" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85962cf0ce02e1e0a629cc34e7ca3e373ce20dda4c4d7294bbd0bf1fdb59e614" +dependencies = [ + "displaydoc", + "icu_locale_core", + "writeable", + "yoke", + "zerofrom", + "zerotrie", + "zerovec", +] + +[[package]] +name = "idna" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b0875f23caa03898994f6ddc501886a45c7d3d62d04d2d90788d47be1b1e4de" +dependencies = [ + "idna_adapter", + "smallvec", + "utf8_iter", +] + +[[package]] +name = "idna_adapter" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3acae9609540aa318d1bc588455225fb2085b9ed0c4f6bd0d9d5bcd86f1a0344" +dependencies = [ + "icu_normalizer", + "icu_properties", +] + [[package]] name = "indexmap" version = "2.11.0" @@ -871,6 +986,22 @@ dependencies = [ "hashbrown", ] +[[package]] +name = "ipnet" +version = "2.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "469fb0b9cefa57e3ef31275ee7cacb78f2fdca44e4765491884a2b119d4eb130" + +[[package]] +name = "iri-string" +version = "0.7.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c91338f0783edbd6195decb37bae672fd3b165faffb89bf7b9e6942f8b1a731a" +dependencies = [ + "memchr", + "serde", +] + [[package]] name = "is_terminal_polyfill" version = "1.70.1" @@ -1019,6 +1150,12 @@ version = "0.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cd945864f07fe9f5371a27ad7b52a172b4b499999f1d97574c9fa68373937e12" +[[package]] +name = "litemap" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6373607a59f0be73a39b6fe456b8192fcc3585f602af20751600e974dd455e77" + [[package]] name = "lock_api" version = "0.4.13" @@ -1056,15 +1193,6 @@ version = "2.7.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "32a282da65faaf38286cf3be983213fcf1d2e2a58700e808f83f4ea9a4804bc0" -[[package]] -name = "memoffset" -version = "0.9.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "488016bfae457b036d996092f6cb448677611ce4449e970ceaf42695203f218a" -dependencies = [ - "autocfg", -] - [[package]] name = "mime" version = "0.3.17" @@ -1088,19 +1216,6 @@ version = "0.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1d87ecb2933e8aeadb3e3a02b828fed80a7528047e68b4f424523a0981a3a084" -[[package]] -name = "nix" -version = "0.30.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "74523f3a35e05aba87a1d978330aef40f67b0304ac79c1c00b294c9830543db6" -dependencies = [ - "bitflags", - "cfg-if", - "cfg_aliases", - "libc", - "memoffset", -] - [[package]] name = "nu-ansi-term" version = "0.50.1" @@ -1149,6 +1264,105 @@ version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d05e27ee213611ffe7d6348b942e8f942b37114c00cc03cec254295a4a17852e" +[[package]] +name = "opentelemetry" +version = "0.31.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b84bcd6ae87133e903af7ef497404dda70c60d0ea14895fc8a5e6722754fc2a0" +dependencies = [ + "futures-core", + "futures-sink", + "js-sys", + "pin-project-lite", + "thiserror 2.0.16", + "tracing", +] + +[[package]] +name = "opentelemetry-appender-tracing" +version = "0.31.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ef6a1ac5ca3accf562b8c306fa8483c85f4390f768185ab775f242f7fe8fdcc2" +dependencies = [ + "opentelemetry", + "tracing", + "tracing-core", + "tracing-subscriber", +] + +[[package]] +name = "opentelemetry-http" +version = "0.31.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d7a6d09a73194e6b66df7c8f1b680f156d916a1a942abf2de06823dd02b7855d" +dependencies = [ + "async-trait", + "bytes", + "http", + "opentelemetry", + "reqwest", +] + +[[package]] +name = "opentelemetry-otlp" +version = "0.31.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a2366db2dca4d2ad033cad11e6ee42844fd727007af5ad04a1730f4cb8163bf" +dependencies = [ + "http", + "opentelemetry", + "opentelemetry-http", + "opentelemetry-proto", + "opentelemetry_sdk", + "prost", + "reqwest", + "thiserror 2.0.16", + "tokio", + "tonic", + "tracing", +] + +[[package]] +name = "opentelemetry-proto" +version = "0.31.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a7175df06de5eaee9909d4805a3d07e28bb752c34cab57fa9cff549da596b30f" +dependencies = [ + "opentelemetry", + "opentelemetry_sdk", + "prost", + "tonic", + "tonic-prost", +] + +[[package]] +name = "opentelemetry-stdout" +version = "0.31.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bc8887887e169414f637b18751487cce4e095be787d23fad13c454e2fb1b3811" +dependencies = [ + "chrono", + "opentelemetry", + "opentelemetry_sdk", +] + +[[package]] +name = "opentelemetry_sdk" +version = "0.31.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e14ae4f5991976fd48df6d843de219ca6d31b01daaab2dad5af2badeded372bd" +dependencies = [ + "futures-channel", + "futures-executor", + "futures-util", + "opentelemetry", + "percent-encoding", + "rand", + "thiserror 2.0.16", + "tokio", + "tokio-stream", +] + [[package]] name = "option-ext" version = "0.2.0" @@ -1288,6 +1502,24 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" +[[package]] +name = "potential_utf" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b73949432f5e2a09657003c25bca5e19a0e9c84f8058ca374f49e0ebe605af77" +dependencies = [ + "zerovec", +] + +[[package]] +name = "ppv-lite86" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9" +dependencies = [ + "zerocopy", +] + [[package]] name = "prettyplease" version = "0.2.37" @@ -1396,6 +1628,35 @@ version = "5.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" +[[package]] +name = "rand" +version = "0.9.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6db2770f06117d490610c7488547d543617b21bfa07796d7a12f6f1bd53850d1" +dependencies = [ + "rand_chacha", + "rand_core", +] + +[[package]] +name = "rand_chacha" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb" +dependencies = [ + "ppv-lite86", + "rand_core", +] + +[[package]] +name = "rand_core" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76afc826de14238e6e8c374ddcc1fa19e374fd8dd986b0d2af0d02377261d83c" +dependencies = [ + "getrandom 0.3.3", +] + [[package]] name = "redox_syscall" version = "0.5.17" @@ -1445,6 +1706,40 @@ version = "0.8.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "caf4aa5b0f434c91fe5c7f1ecb6a5ece2130b02ad2a590589dda5146df959001" +[[package]] +name = "reqwest" +version = "0.12.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d0946410b9f7b082a427e4ef5c8ff541a88b357bc6c637c40db3a68ac70a36f" +dependencies = [ + "base64", + "bytes", + "futures-channel", + "futures-core", + "futures-util", + "http", + "http-body", + "http-body-util", + "hyper", + "hyper-util", + "js-sys", + "log", + "percent-encoding", + "pin-project-lite", + "serde", + "serde_json", + "serde_urlencoded", + "sync_wrapper", + "tokio", + "tower", + "tower-http", + "tower-service", + "url", + "wasm-bindgen", + "wasm-bindgen-futures", + "web-sys", +] + [[package]] name = "ring" version = "0.17.14" @@ -1631,6 +1926,18 @@ dependencies = [ "serde_core", ] +[[package]] +name = "serde_urlencoded" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3491c14715ca2294c4d6a88f15e84739788c1d030eed8c110436aafdaa2f3fd" +dependencies = [ + "form_urlencoded", + "itoa", + "ryu", + "serde", +] + [[package]] name = "serde_yaml" version = "0.9.34+deprecated" @@ -1701,6 +2008,12 @@ dependencies = [ "windows-sys 0.59.0", ] +[[package]] +name = "stable_deref_trait" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ce2be8dc25455e1f91df71bfa12ad37d7af1092ae736f3a6cd0e37bc7810596" + [[package]] name = "strsim" version = "0.11.1" @@ -1729,6 +2042,20 @@ name = "sync_wrapper" version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0bf256ce5efdfa370213c1dabab5935a12e49f2c58d15e9eac2870d3b4f27263" +dependencies = [ + "futures-core", +] + +[[package]] +name = "synstructure" +version = "0.13.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "728a70f3dbaf5bab7f0c4b1ac8d7ae5ea60a4b5549c8a5914361c99147a709d2" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] [[package]] name = "tempfile" @@ -1792,6 +2119,16 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "tinystr" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42d3e9c45c09de15d06dd8acf5f4e0e399e85927b7f00711024eb7ae10fa4869" +dependencies = [ + "displaydoc", + "zerovec", +] + [[package]] name = "tokio" version = "1.49.0" @@ -1964,10 +2301,13 @@ dependencies = [ "base64", "bitflags", "bytes", + "futures-util", "http", "http-body", + "iri-string", "mime", "pin-project-lite", + "tower", "tower-layer", "tower-service", "tracing", @@ -2089,6 +2429,24 @@ version = "0.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1" +[[package]] +name = "url" +version = "2.5.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff67a8a4397373c3ef660812acab3268222035010ab8680ec4215f38ba3d0eed" +dependencies = [ + "form_urlencoded", + "idna", + "percent-encoding", + "serde", +] + +[[package]] +name = "utf8_iter" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be" + [[package]] name = "utf8parse" version = "0.2.2" @@ -2158,6 +2516,19 @@ dependencies = [ "wasm-bindgen-shared", ] +[[package]] +name = "wasm-bindgen-futures" +version = "0.4.54" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7e038d41e478cc73bae0ff9b36c60cff1c98b8f38f8d7e8061e79ee63608ac5c" +dependencies = [ + "cfg-if", + "js-sys", + "once_cell", + "wasm-bindgen", + "web-sys", +] + [[package]] name = "wasm-bindgen-macro" version = "0.2.104" @@ -2190,6 +2561,16 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "web-sys" +version = "0.3.81" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9367c417a924a74cae129e6a2ae3b47fabb1f8995595ab474029da749a8be120" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + [[package]] name = "windows-core" version = "0.62.1" @@ -2426,8 +2807,111 @@ version = "0.45.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "052283831dbae3d879dc7f51f3d92703a316ca49f91540417d38591826127814" +[[package]] +name = "writeable" +version = "0.6.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9edde0db4769d2dc68579893f2306b26c6ecfbe0ef499b013d731b7b9247e0b9" + +[[package]] +name = "yoke" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72d6e5c6afb84d73944e5cedb052c4680d5657337201555f9f2a16b7406d4954" +dependencies = [ + "stable_deref_trait", + "yoke-derive", + "zerofrom", +] + +[[package]] +name = "yoke-derive" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b659052874eb698efe5b9e8cf382204678a0086ebf46982b79d6ca3182927e5d" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "synstructure", +] + +[[package]] +name = "zerocopy" +version = "0.8.37" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7456cf00f0685ad319c5b1693f291a650eaf345e941d082fc4e03df8a03996ac" +dependencies = [ + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.8.37" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1328722bbf2115db7e19d69ebcc15e795719e2d66b60827c6a69a117365e37a0" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "zerofrom" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "50cc42e0333e05660c3587f3bf9d0478688e15d870fab3346451ce7f8c9fbea5" +dependencies = [ + "zerofrom-derive", +] + +[[package]] +name = "zerofrom-derive" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d71e5d6e06ab090c67b5e44993ec16b72dcbaabc526db883a360057678b48502" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "synstructure", +] + [[package]] name = "zeroize" version = "1.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b97154e67e32c85465826e8bcc1c59429aaaf107c1e4a9e53c8d8ccd5eff88d0" + +[[package]] +name = "zerotrie" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2a59c17a5562d507e4b54960e8569ebee33bee890c70aa3fe7b97e85a9fd7851" +dependencies = [ + "displaydoc", + "yoke", + "zerofrom", +] + +[[package]] +name = "zerovec" +version = "0.11.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c28719294829477f525be0186d13efa9a3c602f7ec202ca9e353d310fb9a002" +dependencies = [ + "yoke", + "zerofrom", + "zerovec-derive", +] + +[[package]] +name = "zerovec-derive" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eadce39539ca5cb3985590102671f2567e659fca9666581ad3411d59207951f3" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] diff --git a/cli/Cargo.toml b/cli/Cargo.toml index cfbcae0..a14b527 100644 --- a/cli/Cargo.toml +++ b/cli/Cargo.toml @@ -22,7 +22,7 @@ tonic = "0.14.2" tonic-reflection = "0.14.2" prost-types = "0.14.3" prost = "0.14.3" -cortexflow_agent_api = {version = "0.1.1",features = ["client"]} +cortexflow_agent_api = {path = "../core/api",features = ["client"]} kube = "2.0.1" k8s-openapi = {version = "0.26.0", features = ["v1_34"]} diff --git a/cli/src/install.rs b/cli/src/install.rs index bdb1ea1..105853c 100644 --- a/cli/src/install.rs +++ b/cli/src/install.rs @@ -2,8 +2,10 @@ use crate::errors::CliError; use crate::essential::{BASE_COMMAND, connect_to_client, create_config_file, create_configs}; use clap::{Args, Subcommand}; use colored::Colorize; -use kube::Error; +use k8s_openapi::api::core::v1::ConfigMap; use kube::core::ErrorResponse; +use kube::{Api, Client, Error}; +use std::thread::sleep; use std::{process::Command, thread, time::Duration}; // docs: @@ -38,6 +40,8 @@ pub enum InstallCommands { about = "Deploys a simple example contained in deploy-test-pod.yaml" )] TestPods, + #[command(name = "blocklist", about = "Install or Repair blocklist configmap")] + Blocklist, } //install args @@ -206,6 +210,84 @@ async fn install_simple_example_component() -> Result<(), CliError> { } } +// docs: +pub async fn install_blocklist_configmap() -> Result<(), CliError> { + match connect_to_client().await { + Ok(client) => { + println!( + "{} {}", + "=====>".blue().bold(), + "Checking if the Blocklist configmap exists" + ); + sleep(Duration::from_secs(1)); + let blocklist_exists = check_if_blocklist_exists(client).await?; + if !blocklist_exists { + println!( + "{} {}", + "=====>".blue().bold(), + "Blocklist configmap does not exist".red().bold() + ); + sleep(Duration::from_secs(1)); + println!("{} {}", "=====>".bold().blue(), "Creating configmap"); + let metdata_configs = create_configs(); + sleep(Duration::from_secs(1)); + match create_config_file(metdata_configs).await { + Ok(_) => { + println!( + "{} {}", + "=====>".bold().blue(), + "Configmap created/repaired successfully".bold().green() + ) + } + Err(e) => { + return Err(CliError::InstallerError { + reason: e.to_string(), + }); + } + } + return Ok(()); + } else { + println!() + } + + Ok(()) + } + Err(e) => { + return Err(CliError::ClientError(Error::Api(ErrorResponse { + status: "failed".to_string(), + message: "Failed to connect to kubernetes client".to_string(), + reason: e.to_string(), + code: 404, + }))); + } + } +} + +// docs: +async fn check_if_blocklist_exists(client: Client) -> Result { + let namespace = "cortexflow"; + let name = "cortexbrain-client-config"; + let api: Api = Api::namespaced(client, namespace); + match api.get(name).await { + Ok(_) => { + println!( + "{} {}", + "=====>".bold().blue(), + "Blocklist configmap exists".green().bold() + ); + Ok(true) + } + Err(_) => { + println!( + "{} {}", + "=====>".bold().blue(), + "Blocklist configmap doesn not exists".red().bold(), + ); + Ok(false) + } + } +} + //docs: // // This is an auxiliary function to help manage the cortexflow components during the installation diff --git a/cli/src/main.rs b/cli/src/main.rs index 0a5ac46..8d543cd 100644 --- a/cli/src/main.rs +++ b/cli/src/main.rs @@ -18,7 +18,7 @@ use crate::install::{InstallArgs, InstallCommands, install_cortexflow, install_s use crate::logs::{LogsArgs, logs_command}; use crate::monitoring::{ MonitorArgs, MonitorCommands, list_features, monitor_dropped_packets, monitor_identity_events, - monitor_latency_metrics, + monitor_latency_metrics, monitor_tracked_veth, }; use crate::policies::{ PoliciesArgs, PoliciesCommands, check_blocklist, create_blocklist, remove_ip, @@ -68,7 +68,7 @@ enum Commands { struct SetArgs { val: String, } - +//TODO: add command for monitoring veth interfaces async fn args_parser() -> Result<(), CliError> { let args = Cli::parse(); debug!("Arguments {:?}", args.cmd); @@ -80,6 +80,10 @@ async fn args_parser() -> Result<(), CliError> { InstallCommands::TestPods => { install_simple_example().await?; } + InstallCommands::Blocklist => { + //install or repair blocklist configmap + let _ = install::install_blocklist_configmap().await?; + } }, Some(Commands::Uninstall) => { uninstall().await?; @@ -120,6 +124,9 @@ async fn args_parser() -> Result<(), CliError> { MonitorCommands::Droppedpackets => { let _ = monitor_dropped_packets().await?; } + MonitorCommands::Veth => { + let _ = monitor_tracked_veth().await?; + } }, Some(Commands::Policies(policies_args)) => { match policies_args.policy_cmd { diff --git a/cli/src/monitoring.rs b/cli/src/monitoring.rs index b7cf3e2..72a94b8 100644 --- a/cli/src/monitoring.rs +++ b/cli/src/monitoring.rs @@ -8,7 +8,10 @@ use std::result::Result::Ok; use tonic_reflection::pb::v1::server_reflection_response::MessageResponse; use agent_api::client::{connect_to_client, connect_to_server_reflection}; -use agent_api::requests::{get_all_features, send_active_connection_request}; +use agent_api::requests::{ + get_all_features, send_active_connection_request, send_dropped_packets_request, + send_latency_metrics_request, send_tracked_veth_request, +}; use crate::errors::CliError; use clap::{Args, Subcommand}; @@ -33,6 +36,11 @@ pub enum MonitorCommands { about = "Monitor the dropped packets metrics detected by the metrics service" )] Droppedpackets, + #[command( + name = "veth", + about = "Monitor tracked veth interfaces from the identity service" + )] + Veth, } // cfcli monitor @@ -40,8 +48,6 @@ pub enum MonitorCommands { pub struct MonitorArgs { #[command(subcommand)] pub monitor_cmd: MonitorCommands, - //#[arg(long, short)] - //pub flags: Option, } pub async fn list_features() -> Result<(), CliError> { @@ -168,7 +174,7 @@ pub async fn monitor_latency_metrics() -> Result<(), CliError> { "Connected to CortexFlow Client".green() ); //send request to get latency metrics - match agent_api::requests::send_latency_metrics_request(client).await { + match send_latency_metrics_request(client).await { Ok(response) => { let resp = response.into_inner(); if resp.metrics.is_empty() { @@ -237,7 +243,7 @@ pub async fn monitor_dropped_packets() -> Result<(), CliError> { "Connected to CortexFlow Client".green() ); //send request to get dropped packets metrics - match agent_api::requests::send_dropped_packets_request(client).await { + match send_dropped_packets_request(client).await { Ok(response) => { let resp = response.into_inner(); if resp.metrics.is_empty() { @@ -291,6 +297,50 @@ pub async fn monitor_dropped_packets() -> Result<(), CliError> { Ok(()) } +pub async fn monitor_tracked_veth() -> Result<(), CliError> { + println!( + "{} {}", + "=====>".blue().bold(), + "Connecting to cortexflow Client".white() + ); + match connect_to_client().await { + Ok(client) => match send_tracked_veth_request(client).await { + Ok(response) => { + let veth_response = response.into_inner(); + if veth_response.tot_monitored_veth == 0 { + println!("{} {} ", "=====>".blue().bold(), "No tracked veth found"); + Ok(()) + } else { + println!( + "{} {} {} {} ", + "=====>".blue().bold(), + "Found:", + &veth_response.tot_monitored_veth, + "tracked veth" + ); + for veth in veth_response.veth_names.iter() { + println!("{} {}", "=====>".blue().bold(), &veth); + } + Ok(()) + } + } + Err(e) => { + return Err(CliError::AgentError( + tonic_reflection::server::Error::InvalidFileDescriptorSet(e.to_string()), + )); + } + }, + Err(e) => { + return Err(CliError::ClientError(kube::Error::Api(ErrorResponse { + status: "failed".to_string(), + message: "Failed to connect to kubernetes client".to_string(), + reason: e.to_string(), + code: 404, + }))); + } + } +} + fn convert_timestamp_to_date(timestamp: u64) -> String { DateTime::from_timestamp_micros(timestamp as i64) .map(|dt| dt.to_string()) diff --git a/core/Cargo.lock b/core/Cargo.lock index e980659..745a66d 100644 --- a/core/Cargo.lock +++ b/core/Cargo.lock @@ -275,9 +275,9 @@ checksum = "46c5e41b57b8bba42a04676d81cb89e9ee8e859a1a66f80a5a72e1cb76b34d43" [[package]] name = "bytemuck" -version = "1.24.0" +version = "1.25.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1fbdf580320f38b612e485521afda1ee26d10cc9884efaaa750d383e13e3c5f4" +checksum = "c8efb64bd706a16a1bdde310ae86b351e4d21550d98d056f22f8a7f7a2183fec" dependencies = [ "bytemuck_derive", ] @@ -295,9 +295,9 @@ dependencies = [ [[package]] name = "bytes" -version = "1.10.1" +version = "1.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d71b6127be86fdcfddb610f7182ac57211d4b18a3e9c82eb2d17662f2227ad6a" +checksum = "b35204fbdc0b3f4446b89fc1ac2cf84a8a68971995d0bf2e925ec7cd960f9cb3" [[package]] name = "camino" @@ -407,19 +407,16 @@ version = "0.1.0" dependencies = [ "anyhow", "aya", + "bytemuck", + "bytemuck_derive", + "bytes", "k8s-openapi", "kube", - "tracing", - "tracing-subscriber", -] - -[[package]] -name = "cortexbrain-common" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5daea06747f06e000deaa52b7aceb504ddc309c061badf76e0b4b3d146ebf3a4" -dependencies = [ - "anyhow", + "opentelemetry", + "opentelemetry-appender-tracing", + "opentelemetry-otlp", + "opentelemetry-stdout", + "opentelemetry_sdk", "tracing", "tracing-subscriber", ] @@ -433,8 +430,7 @@ dependencies = [ "bytemuck", "bytemuck_derive", "chrono", - "cortexbrain-common 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)", - "cortexflow_identity 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", + "cortexbrain-common", "prost", "tokio", "tokio-stream", @@ -456,34 +452,12 @@ dependencies = [ "bytemuck", "bytemuck_derive", "bytes", - "cortexbrain-common 0.1.0", - "k8s-openapi", - "kube", - "nix", - "tokio", - "tracing", - "tracing-subscriber", -] - -[[package]] -name = "cortexflow_identity" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5725a802e4f494b5fab4c69b1455a32dd3804b52a58c665a7d751eeae93ddfca" -dependencies = [ - "anyhow", - "aya", - "bytemuck", - "bytemuck_derive", - "bytes", - "cortexbrain-common 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)", + "cortexbrain-common", "k8s-openapi", "kube", - "libc", "nix", "tokio", "tracing", - "tracing-subscriber", ] [[package]] @@ -544,6 +518,17 @@ dependencies = [ "crypto-common", ] +[[package]] +name = "displaydoc" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "either" version = "1.15.0" @@ -641,12 +626,34 @@ version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "05f29059c0c2090612e8d742178b0580d2dc940c837851ad723096f87af6663e" +[[package]] +name = "futures-executor" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e28d1d997f585e54aebc3f97d39e72338912123a67330d723fdbb564d646c9f" +dependencies = [ + "futures-core", + "futures-task", + "futures-util", +] + [[package]] name = "futures-io" version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9e5c1b78ca4aae1ac06c48a526a655760685149f0d465d21f37abfe57ce075c6" +[[package]] +name = "futures-macro" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "futures-sink" version = "0.3.31" @@ -668,6 +675,7 @@ dependencies = [ "futures-channel", "futures-core", "futures-io", + "futures-macro", "futures-sink", "futures-task", "memchr", @@ -866,6 +874,7 @@ version = "0.1.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3c6995591a8f1380fcb4ba966a252a4b29188d51d2b89e3a252f5305be65aea8" dependencies = [ + "base64", "bytes", "futures-channel", "futures-core", @@ -873,7 +882,9 @@ dependencies = [ "http", "http-body", "hyper", + "ipnet", "libc", + "percent-encoding", "pin-project-lite", "socket2", "tokio", @@ -905,6 +916,108 @@ dependencies = [ "cc", ] +[[package]] +name = "icu_collections" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4c6b649701667bbe825c3b7e6388cb521c23d88644678e83c0c4d0a621a34b43" +dependencies = [ + "displaydoc", + "potential_utf", + "yoke", + "zerofrom", + "zerovec", +] + +[[package]] +name = "icu_locale_core" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "edba7861004dd3714265b4db54a3c390e880ab658fec5f7db895fae2046b5bb6" +dependencies = [ + "displaydoc", + "litemap", + "tinystr", + "writeable", + "zerovec", +] + +[[package]] +name = "icu_normalizer" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5f6c8828b67bf8908d82127b2054ea1b4427ff0230ee9141c54251934ab1b599" +dependencies = [ + "icu_collections", + "icu_normalizer_data", + "icu_properties", + "icu_provider", + "smallvec", + "zerovec", +] + +[[package]] +name = "icu_normalizer_data" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7aedcccd01fc5fe81e6b489c15b247b8b0690feb23304303a9e560f37efc560a" + +[[package]] +name = "icu_properties" +version = "2.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "020bfc02fe870ec3a66d93e677ccca0562506e5872c650f893269e08615d74ec" +dependencies = [ + "icu_collections", + "icu_locale_core", + "icu_properties_data", + "icu_provider", + "zerotrie", + "zerovec", +] + +[[package]] +name = "icu_properties_data" +version = "2.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "616c294cf8d725c6afcd8f55abc17c56464ef6211f9ed59cccffe534129c77af" + +[[package]] +name = "icu_provider" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85962cf0ce02e1e0a629cc34e7ca3e373ce20dda4c4d7294bbd0bf1fdb59e614" +dependencies = [ + "displaydoc", + "icu_locale_core", + "writeable", + "yoke", + "zerofrom", + "zerotrie", + "zerovec", +] + +[[package]] +name = "idna" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b0875f23caa03898994f6ddc501886a45c7d3d62d04d2d90788d47be1b1e4de" +dependencies = [ + "idna_adapter", + "smallvec", + "utf8_iter", +] + +[[package]] +name = "idna_adapter" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3acae9609540aa318d1bc588455225fb2085b9ed0c4f6bd0d9d5bcd86f1a0344" +dependencies = [ + "icu_normalizer", + "icu_properties", +] + [[package]] name = "indexmap" version = "2.12.0" @@ -915,6 +1028,22 @@ dependencies = [ "hashbrown 0.16.0", ] +[[package]] +name = "ipnet" +version = "2.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "469fb0b9cefa57e3ef31275ee7cacb78f2fdca44e4765491884a2b119d4eb130" + +[[package]] +name = "iri-string" +version = "0.7.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c91338f0783edbd6195decb37bae672fd3b165faffb89bf7b9e6942f8b1a731a" +dependencies = [ + "memchr", + "serde", +] + [[package]] name = "itertools" version = "0.14.0" @@ -1047,6 +1176,12 @@ version = "0.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "df1d3c3b53da64cf5760482273a98e575c651a67eec7f77df96b5b642de8f039" +[[package]] +name = "litemap" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6373607a59f0be73a39b6fe456b8192fcc3585f602af20751600e974dd455e77" + [[package]] name = "lock_api" version = "0.4.14" @@ -1101,7 +1236,7 @@ dependencies = [ "aya-log", "bytemuck", "bytes", - "cortexbrain-common 0.1.0", + "cortexbrain-common", "libc", "nix", "tokio", @@ -1218,6 +1353,105 @@ version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d05e27ee213611ffe7d6348b942e8f942b37114c00cc03cec254295a4a17852e" +[[package]] +name = "opentelemetry" +version = "0.31.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b84bcd6ae87133e903af7ef497404dda70c60d0ea14895fc8a5e6722754fc2a0" +dependencies = [ + "futures-core", + "futures-sink", + "js-sys", + "pin-project-lite", + "thiserror 2.0.17", + "tracing", +] + +[[package]] +name = "opentelemetry-appender-tracing" +version = "0.31.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ef6a1ac5ca3accf562b8c306fa8483c85f4390f768185ab775f242f7fe8fdcc2" +dependencies = [ + "opentelemetry", + "tracing", + "tracing-core", + "tracing-subscriber", +] + +[[package]] +name = "opentelemetry-http" +version = "0.31.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d7a6d09a73194e6b66df7c8f1b680f156d916a1a942abf2de06823dd02b7855d" +dependencies = [ + "async-trait", + "bytes", + "http", + "opentelemetry", + "reqwest", +] + +[[package]] +name = "opentelemetry-otlp" +version = "0.31.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a2366db2dca4d2ad033cad11e6ee42844fd727007af5ad04a1730f4cb8163bf" +dependencies = [ + "http", + "opentelemetry", + "opentelemetry-http", + "opentelemetry-proto", + "opentelemetry_sdk", + "prost", + "reqwest", + "thiserror 2.0.17", + "tokio", + "tonic", + "tracing", +] + +[[package]] +name = "opentelemetry-proto" +version = "0.31.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a7175df06de5eaee9909d4805a3d07e28bb752c34cab57fa9cff549da596b30f" +dependencies = [ + "opentelemetry", + "opentelemetry_sdk", + "prost", + "tonic", + "tonic-prost", +] + +[[package]] +name = "opentelemetry-stdout" +version = "0.31.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bc8887887e169414f637b18751487cce4e095be787d23fad13c454e2fb1b3811" +dependencies = [ + "chrono", + "opentelemetry", + "opentelemetry_sdk", +] + +[[package]] +name = "opentelemetry_sdk" +version = "0.31.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e14ae4f5991976fd48df6d843de219ca6d31b01daaab2dad5af2badeded372bd" +dependencies = [ + "futures-channel", + "futures-executor", + "futures-util", + "opentelemetry", + "percent-encoding", + "rand", + "thiserror 2.0.17", + "tokio", + "tokio-stream", +] + [[package]] name = "ordered-float" version = "2.10.1" @@ -1351,6 +1585,24 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" +[[package]] +name = "potential_utf" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b73949432f5e2a09657003c25bca5e19a0e9c84f8058ca374f49e0ebe605af77" +dependencies = [ + "zerovec", +] + +[[package]] +name = "ppv-lite86" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9" +dependencies = [ + "zerocopy", +] + [[package]] name = "prettyplease" version = "0.2.37" @@ -1471,6 +1723,35 @@ version = "5.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" +[[package]] +name = "rand" +version = "0.9.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6db2770f06117d490610c7488547d543617b21bfa07796d7a12f6f1bd53850d1" +dependencies = [ + "rand_chacha", + "rand_core", +] + +[[package]] +name = "rand_chacha" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb" +dependencies = [ + "ppv-lite86", + "rand_core", +] + +[[package]] +name = "rand_core" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76afc826de14238e6e8c374ddcc1fa19e374fd8dd986b0d2af0d02377261d83c" +dependencies = [ + "getrandom 0.3.4", +] + [[package]] name = "redox_syscall" version = "0.5.18" @@ -1509,6 +1790,40 @@ version = "0.8.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7a2d987857b319362043e95f5353c0535c1f58eec5336fdfcf626430af7def58" +[[package]] +name = "reqwest" +version = "0.12.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d0946410b9f7b082a427e4ef5c8ff541a88b357bc6c637c40db3a68ac70a36f" +dependencies = [ + "base64", + "bytes", + "futures-channel", + "futures-core", + "futures-util", + "http", + "http-body", + "http-body-util", + "hyper", + "hyper-util", + "js-sys", + "log", + "percent-encoding", + "pin-project-lite", + "serde", + "serde_json", + "serde_urlencoded", + "sync_wrapper", + "tokio", + "tower", + "tower-http", + "tower-service", + "url", + "wasm-bindgen", + "wasm-bindgen-futures", + "web-sys", +] + [[package]] name = "ring" version = "0.17.14" @@ -1705,6 +2020,18 @@ dependencies = [ "serde_core", ] +[[package]] +name = "serde_urlencoded" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3491c14715ca2294c4d6a88f15e84739788c1d030eed8c110436aafdaa2f3fd" +dependencies = [ + "form_urlencoded", + "itoa", + "ryu", + "serde", +] + [[package]] name = "serde_yaml" version = "0.9.34+deprecated" @@ -1775,6 +2102,12 @@ dependencies = [ "windows-sys 0.60.2", ] +[[package]] +name = "stable_deref_trait" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ce2be8dc25455e1f91df71bfa12ad37d7af1092ae736f3a6cd0e37bc7810596" + [[package]] name = "subtle" version = "2.6.1" @@ -1797,6 +2130,20 @@ name = "sync_wrapper" version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0bf256ce5efdfa370213c1dabab5935a12e49f2c58d15e9eac2870d3b4f27263" +dependencies = [ + "futures-core", +] + +[[package]] +name = "synstructure" +version = "0.13.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "728a70f3dbaf5bab7f0c4b1ac8d7ae5ea60a4b5549c8a5914361c99147a709d2" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] [[package]] name = "tempfile" @@ -1860,6 +2207,16 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "tinystr" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42d3e9c45c09de15d06dd8acf5f4e0e399e85927b7f00711024eb7ae10fa4869" +dependencies = [ + "displaydoc", + "zerovec", +] + [[package]] name = "tokio" version = "1.48.0" @@ -2032,10 +2389,13 @@ dependencies = [ "base64", "bitflags", "bytes", + "futures-util", "http", "http-body", + "iri-string", "mime", "pin-project-lite", + "tower", "tower-layer", "tower-service", "tracing", @@ -2157,6 +2517,24 @@ version = "0.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1" +[[package]] +name = "url" +version = "2.5.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff67a8a4397373c3ef660812acab3268222035010ab8680ec4215f38ba3d0eed" +dependencies = [ + "form_urlencoded", + "idna", + "percent-encoding", + "serde", +] + +[[package]] +name = "utf8_iter" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be" + [[package]] name = "valuable" version = "0.1.1" @@ -2206,6 +2584,19 @@ dependencies = [ "wasm-bindgen-shared", ] +[[package]] +name = "wasm-bindgen-futures" +version = "0.4.55" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "551f88106c6d5e7ccc7cd9a16f312dd3b5d36ea8b4954304657d5dfba115d4a0" +dependencies = [ + "cfg-if", + "js-sys", + "once_cell", + "wasm-bindgen", + "web-sys", +] + [[package]] name = "wasm-bindgen-macro" version = "0.2.105" @@ -2238,6 +2629,16 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "web-sys" +version = "0.3.82" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3a1f95c0d03a47f4ae1f7a64643a6bb97465d9b740f0fa8f90ea33915c99a9a1" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + [[package]] name = "which" version = "7.0.3" @@ -2477,8 +2878,111 @@ version = "0.46.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f17a85883d4e6d00e8a97c586de764dabcc06133f7f1d55dce5cdc070ad7fe59" +[[package]] +name = "writeable" +version = "0.6.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9edde0db4769d2dc68579893f2306b26c6ecfbe0ef499b013d731b7b9247e0b9" + +[[package]] +name = "yoke" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72d6e5c6afb84d73944e5cedb052c4680d5657337201555f9f2a16b7406d4954" +dependencies = [ + "stable_deref_trait", + "yoke-derive", + "zerofrom", +] + +[[package]] +name = "yoke-derive" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b659052874eb698efe5b9e8cf382204678a0086ebf46982b79d6ca3182927e5d" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "synstructure", +] + +[[package]] +name = "zerocopy" +version = "0.8.33" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "668f5168d10b9ee831de31933dc111a459c97ec93225beb307aed970d1372dfd" +dependencies = [ + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.8.33" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2c7962b26b0a8685668b671ee4b54d007a67d4eaf05fda79ac0ecf41e32270f1" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "zerofrom" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "50cc42e0333e05660c3587f3bf9d0478688e15d870fab3346451ce7f8c9fbea5" +dependencies = [ + "zerofrom-derive", +] + +[[package]] +name = "zerofrom-derive" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d71e5d6e06ab090c67b5e44993ec16b72dcbaabc526db883a360057678b48502" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "synstructure", +] + [[package]] name = "zeroize" version = "1.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b97154e67e32c85465826e8bcc1c59429aaaf107c1e4a9e53c8d8ccd5eff88d0" + +[[package]] +name = "zerotrie" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2a59c17a5562d507e4b54960e8569ebee33bee890c70aa3fe7b97e85a9fd7851" +dependencies = [ + "displaydoc", + "yoke", + "zerofrom", +] + +[[package]] +name = "zerovec" +version = "0.11.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c28719294829477f525be0186d13efa9a3c602f7ec202ca9e353d310fb9a002" +dependencies = [ + "yoke", + "zerofrom", + "zerovec-derive", +] + +[[package]] +name = "zerovec-derive" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eadce39539ca5cb3985590102671f2567e659fca9666581ad3411d59207951f3" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] diff --git a/core/api/Cargo.toml b/core/api/Cargo.toml index 988ac46..a422fd7 100644 --- a/core/api/Cargo.toml +++ b/core/api/Cargo.toml @@ -3,7 +3,13 @@ name = "cortexflow_agent_api" version = "0.1.1" edition = "2024" description = "CortexFlow agent API" -authors = ["Lorenzo Tettamanti", "Pranav Verma", "Lorenzo Bradanini","Siddharth Sutar","Andrea Bozzo"] +authors = [ + "Lorenzo Tettamanti", + "Pranav Verma", + "Lorenzo Bradanini", + "Siddharth Sutar", + "Andrea Bozzo", +] documentation = "https://docs.cortexflow.org" homepage = "https://docs.cortexflow.org" repository = "https://github.com/CortexFlow/CortexBrain" @@ -23,14 +29,17 @@ tonic = "0.14.0" tonic-prost = "0.14.0" tracing = "0.1.41" aya = "0.13.1" -cortexbrain-common = "0.1.0" +cortexbrain-common = { path = "../common", features = [ + "map-handlers", + "network-structs", + "buffer-reader" +] } tonic-reflection = "0.14.0" tonic-build = "0.14.0" tracing-subscriber = "0.3.19" tokio-stream = "0.1.17" -bytemuck = {version ="1.23.0"} +bytemuck = { version = "1.23.0" } bytemuck_derive = "1.10.1" -cortexflow_identity = {version = "0.1.1", features = ["enums"]} chrono = "0.4.42" [build-dependencies] diff --git a/core/api/protos/agent.proto b/core/api/protos/agent.proto index 3cd236b..9bfc6e4 100644 --- a/core/api/protos/agent.proto +++ b/core/api/protos/agent.proto @@ -68,6 +68,21 @@ message DroppedPacketsResponse { uint32 total_drops = 3; // Total drops across all connections } +// Veth Info + +message VethResponse{ + string status = 1; + repeated string veth_names = 2; // List of active veth interface names + int32 tot_monitored_veth = 3; +} +message VethEvent{ + string name = 1; // Virtual Ethernet Interface Name + uint64 state = 2; // Veth State + string dev_addr = 3; // Veth device Address + uint32 event_type = 4; // Event type + uint32 netns = 5; // Network Namespace + uint32 pid = 6; // Process ID +} //declare agent api service Agent{ @@ -81,11 +96,14 @@ service Agent{ // remove ip from blocklist endpoint rpc RmIpFromBlocklist(RmIpFromBlocklistRequest) returns (RmIpFromBlocklistResponse); - // metrics data + // metrics data endpoint rpc GetLatencyMetrics(google.protobuf.Empty) returns (LatencyMetricsResponse); - // dropped packets + // dropped packets endpoint rpc GetDroppedPacketsMetrics(google.protobuf.Empty) returns (DroppedPacketsResponse); + + // active veth info endpoint + rpc GetTrackedVeth(google.protobuf.Empty) returns (VethResponse); } message AddIpToBlocklistRequest{ diff --git a/core/api/src/agent.rs b/core/api/src/agent.rs index c6f5126..cb93ddd 100644 --- a/core/api/src/agent.rs +++ b/core/api/src/agent.rs @@ -121,6 +121,37 @@ pub struct DroppedPacketsResponse { pub total_drops: u32, } #[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)] +pub struct VethResponse { + #[prost(string, tag = "1")] + pub status: ::prost::alloc::string::String, + /// List of active veth interface names + #[prost(string, repeated, tag = "2")] + pub veth_names: ::prost::alloc::vec::Vec<::prost::alloc::string::String>, + #[prost(int32, tag = "3")] + pub tot_monitored_veth: i32, +} +#[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)] +pub struct VethEvent { + /// Virtual Ethernet Interface Name + #[prost(string, tag = "1")] + pub name: ::prost::alloc::string::String, + /// Veth State + #[prost(uint64, tag = "2")] + pub state: u64, + /// Veth device Address + #[prost(string, tag = "3")] + pub dev_addr: ::prost::alloc::string::String, + /// Event type + #[prost(uint32, tag = "4")] + pub event_type: u32, + /// Network Namespace + #[prost(uint32, tag = "5")] + pub netns: u32, + /// Process ID + #[prost(uint32, tag = "6")] + pub pid: u32, +} +#[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)] pub struct AddIpToBlocklistRequest { #[prost(string, optional, tag = "1")] pub ip: ::core::option::Option<::prost::alloc::string::String>, @@ -341,7 +372,7 @@ pub mod agent_client { .insert(GrpcMethod::new("agent.Agent", "RmIpFromBlocklist")); self.inner.unary(req, path, codec).await } - /// metrics data + /// metrics data endpoint pub async fn get_latency_metrics( &mut self, request: impl tonic::IntoRequest<()>, @@ -366,7 +397,7 @@ pub mod agent_client { .insert(GrpcMethod::new("agent.Agent", "GetLatencyMetrics")); self.inner.unary(req, path, codec).await } - /// dropped packets + /// dropped packets endpoint pub async fn get_dropped_packets_metrics( &mut self, request: impl tonic::IntoRequest<()>, @@ -391,6 +422,28 @@ pub mod agent_client { .insert(GrpcMethod::new("agent.Agent", "GetDroppedPacketsMetrics")); self.inner.unary(req, path, codec).await } + /// active veth info endpoint + pub async fn get_tracked_veth( + &mut self, + request: impl tonic::IntoRequest<()>, + ) -> std::result::Result, tonic::Status> { + self.inner + .ready() + .await + .map_err(|e| { + tonic::Status::unknown( + format!("Service was not ready: {}", e.into()), + ) + })?; + let codec = tonic_prost::ProstCodec::default(); + let path = http::uri::PathAndQuery::from_static( + "/agent.Agent/GetTrackedVeth", + ); + let mut req = request.into_request(); + req.extensions_mut() + .insert(GrpcMethod::new("agent.Agent", "GetTrackedVeth")); + self.inner.unary(req, path, codec).await + } } } /// Generated server implementations. @@ -437,7 +490,7 @@ pub mod agent_server { tonic::Response, tonic::Status, >; - /// metrics data + /// metrics data endpoint async fn get_latency_metrics( &self, request: tonic::Request<()>, @@ -445,7 +498,7 @@ pub mod agent_server { tonic::Response, tonic::Status, >; - /// dropped packets + /// dropped packets endpoint async fn get_dropped_packets_metrics( &self, request: tonic::Request<()>, @@ -453,6 +506,11 @@ pub mod agent_server { tonic::Response, tonic::Status, >; + /// active veth info endpoint + async fn get_tracked_veth( + &self, + request: tonic::Request<()>, + ) -> std::result::Result, tonic::Status>; } /// declare agent api #[derive(Debug)] @@ -787,6 +845,46 @@ pub mod agent_server { }; Box::pin(fut) } + "/agent.Agent/GetTrackedVeth" => { + #[allow(non_camel_case_types)] + struct GetTrackedVethSvc(pub Arc); + impl tonic::server::UnaryService<()> + for GetTrackedVethSvc { + type Response = super::VethResponse; + type Future = BoxFuture< + tonic::Response, + tonic::Status, + >; + fn call(&mut self, request: tonic::Request<()>) -> Self::Future { + let inner = Arc::clone(&self.0); + let fut = async move { + ::get_tracked_veth(&inner, request).await + }; + Box::pin(fut) + } + } + let accept_compression_encodings = self.accept_compression_encodings; + let send_compression_encodings = self.send_compression_encodings; + let max_decoding_message_size = self.max_decoding_message_size; + let max_encoding_message_size = self.max_encoding_message_size; + let inner = self.inner.clone(); + let fut = async move { + let method = GetTrackedVethSvc(inner); + let codec = tonic_prost::ProstCodec::default(); + let mut grpc = tonic::server::Grpc::new(codec) + .apply_compression_config( + accept_compression_encodings, + send_compression_encodings, + ) + .apply_max_message_size_config( + max_decoding_message_size, + max_encoding_message_size, + ); + let res = grpc.unary(method, req).await; + Ok(res) + }; + Box::pin(fut) + } _ => { Box::pin(async move { let mut response = http::Response::new( diff --git a/core/api/src/api.rs b/core/api/src/api.rs index 27641b4..79b9df3 100644 --- a/core/api/src/api.rs +++ b/core/api/src/api.rs @@ -1,19 +1,14 @@ -#![allow(warnings)] use anyhow::Context; use chrono::Local; -use cortexbrain_common::{ - formatters::{format_ipv4, format_ipv6}, -}; +use cortexbrain_common::formatters::{format_ipv4, format_ipv6}; +use cortexbrain_common::map_handlers::load_perf_event_array_from_mapdata; use prost::bytes::BytesMut; -use std::{str::FromStr, sync::Arc}; +use std::str::FromStr; use std::sync::Mutex; use tonic::{Request, Response, Status}; use tracing::info; -use aya::{ - maps::{MapData, PerfEventArray}, - util::online_cpus, -}; +use aya::{maps::MapData, util::online_cpus}; use std::result::Result::Ok; use tonic::async_trait; @@ -22,26 +17,24 @@ use std::collections::HashMap; use tokio::sync::mpsc; use tokio::task; -use crate::{ - agent::{ - ConnectionEvent, DroppedPacketMetric, DroppedPacketsResponse, - LatencyMetric, LatencyMetricsResponse, - }, +use crate::agent::{ + ConnectionEvent, DroppedPacketMetric, DroppedPacketsResponse, LatencyMetric, + LatencyMetricsResponse, VethEvent, }; use crate::structs::{NetworkMetrics, PacketLog, TimeStampMetrics}; +use cortexbrain_common::buffer_type::VethLog; // * contains agent api configuration use crate::agent::{ - agent_server::Agent, ActiveConnectionResponse, AddIpToBlocklistRequest, BlocklistResponse, - RequestActiveConnections, RmIpFromBlocklistRequest, RmIpFromBlocklistResponse, + ActiveConnectionResponse, AddIpToBlocklistRequest, BlocklistResponse, RequestActiveConnections, + RmIpFromBlocklistRequest, RmIpFromBlocklistResponse, VethResponse, agent_server::Agent, }; use crate::constants::PIN_BLOCKLIST_MAP_PATH; use crate::helpers::comm_to_string; use aya::maps::Map; -use cortexbrain_common::constants::BPF_PATH; -use cortexflow_identity::enums::IpProtocols; +use cortexbrain_common::buffer_type::IpProtocols; use std::net::Ipv4Addr; use tracing::warn; @@ -54,6 +47,8 @@ pub struct AgentApi { latency_metrics_tx: mpsc::Sender, Status>>, dropped_packet_metrics_rx: Mutex, Status>>>, dropped_packet_metrics_tx: mpsc::Sender, Status>>, + tracked_veth_rx: Mutex, Status>>>, + tracked_veth_tx: mpsc::Sender, Status>>, } //* Event sender trait. Takes an event from a map and send that to the mpsc channel @@ -94,6 +89,16 @@ pub trait EventSender: Send + Sync + 'static { let _ = tx.send(event).await; } + async fn send_tracked_veth_event(&self, event: Vec); + async fn send_tracked_veth_event_map( + &self, + map: Vec, + tx: mpsc::Sender, Status>>, + ) { + let status = Status::new(tonic::Code::Ok, "success"); + let event = Ok(map); + let _ = tx.send(event).await; + } } // send event function. takes an HashMap and send that using mpsc event_tx @@ -113,39 +118,38 @@ impl EventSender for AgentApi { self.send_dropped_packet_metrics_event_map(event, self.dropped_packet_metrics_tx.clone()) .await; } + async fn send_tracked_veth_event(&self, event: Vec) { + self.send_tracked_veth_event_map(event, self.tracked_veth_tx.clone()) + .await; + } } //initialize a default trait for AgentApi. Loads a name and a bpf istance. //this trait is essential for init the Agent. impl Default for AgentApi { - //TODO:this part needs a better error handling fn default() -> Self { - // load connections maps mapdata - let active_connection_mapdata = MapData::from_pin("/sys/fs/bpf/maps/events_map") - .expect("cannot open events_map Mapdata"); - let active_connection_map = Map::PerfEventArray(active_connection_mapdata); //creates a PerfEventArray from the mapdata - - let mut active_connection_events_array = PerfEventArray::try_from(active_connection_map) - .expect("Error while initializing events array"); - - // load network metrics maps mapdata - let network_metrics_mapdata = MapData::from_pin("/sys/fs/bpf/trace_maps/net_metrics") - .expect("cannot open net_metrics Mapdata"); - let network_metrics_map = Map::PerfEventArray(network_metrics_mapdata); //creates a PerfEventArray from the mapdata - let mut network_metrics_events_array = PerfEventArray::try_from(network_metrics_map) - .expect("Error while initializing network metrics array"); - - // load time stamp events maps mapdata - let time_stamp_events_mapdata = MapData::from_pin("/sys/fs/bpf/trace_maps/time_stamp_events") - .expect("cannot open time_stamp_events Mapdata"); - let time_stamp_events_map = Map::PerfEventArray(time_stamp_events_mapdata); // - let mut time_stamp_events_array = PerfEventArray::try_from(time_stamp_events_map) - .expect("Error while initializing time stamp events array"); - - //init a mpsc channel + // + // init MapData from the kernel space + // + + // TODO: in the future will be better to not use .unwrap() + let mut active_connection_events_array = + load_perf_event_array_from_mapdata("/sys/fs/bpf/maps/events_map").unwrap(); + let mut network_metrics_events_array = + load_perf_event_array_from_mapdata("/sys/fs/bpf/trace_maps/net_metrics").unwrap(); + let mut time_stamp_events_array = + load_perf_event_array_from_mapdata("/sys/fs/bpf/trace_maps/time_stamp_events").unwrap(); + let mut tracked_veth_events_array = + load_perf_event_array_from_mapdata("/sys/fs/bpf/maps/veth_identity_map").unwrap(); + + // + // init a mpsc channels with TX (transmission) and RX(Receiver) components + // + let (conn_tx, conn_rx) = mpsc::channel(1024); let (lat_tx, lat_rx) = mpsc::channel(2048); let (drop_tx, drop_rx) = mpsc::channel(2048); + let (veth_tx, tracked_veth_rx) = mpsc::channel(1024); let api = AgentApi { active_connection_event_rx: conn_rx.into(), @@ -154,6 +158,8 @@ impl Default for AgentApi { latency_metrics_tx: lat_tx.clone(), dropped_packet_metrics_rx: Mutex::new(drop_rx), dropped_packet_metrics_tx: drop_tx.clone(), + tracked_veth_rx: Mutex::new(tracked_veth_rx), + tracked_veth_tx: veth_tx.clone(), }; // For network metrics @@ -198,12 +204,7 @@ impl Default for AgentApi { Ok(proto) => { info!( "Event Id: {} Protocol: {:?} SRC: {}:{} -> DST: {}:{}", - event_id, - proto, - src, - src_port, - dst, - dst_port + event_id, proto, src, src_port, dst, dst_port ); info!("creating vector for the aggregated data"); let mut evt = Vec::new(); @@ -234,12 +235,12 @@ impl Default for AgentApi { ); } } - } else if events.read == 0 { - info!("[Agent/API] 0 Events found"); + } else if events.lost > 0 { + info!("[Agent/API] Lost {} events", events.lost); } } Err(e) => { - eprintln!("Errore nella lettura eventi: {}", e); + eprintln!("Error while reading events: {}", e); tokio::time::sleep(tokio::time::Duration::from_millis(100)).await; } } @@ -296,18 +297,18 @@ impl Default for AgentApi { if dropped_packet_metrics.sk_drops > 0 { let mut evt = Vec::new(); info!( - "Dropped Packet Metric - tgid: {}, process_name: {}, sk_drops: {}, sk_err: {}, sk_err_soft: {}, sk_backlog_len: {}, sk_wmem_queued: {}, sk_rcvbuf: {}, sk_ack_backlog: {}, timestamp_us: {}", - dropped_packet_metrics.tgid, - dropped_packet_metrics.process_name, - dropped_packet_metrics.sk_drops, - dropped_packet_metrics.sk_err, - dropped_packet_metrics.sk_err_soft, - dropped_packet_metrics.sk_backlog_len, - dropped_packet_metrics.sk_wmem_queued, - dropped_packet_metrics.sk_rcvbuf, - dropped_packet_metrics.sk_ack_backlog, - dropped_packet_metrics.timestamp_us - ); + "Dropped Packet Metric - tgid: {}, process_name: {}, sk_drops: {}, sk_err: {}, sk_err_soft: {}, sk_backlog_len: {}, sk_wmem_queued: {}, sk_rcvbuf: {}, sk_ack_backlog: {}, timestamp_us: {}", + dropped_packet_metrics.tgid, + dropped_packet_metrics.process_name, + dropped_packet_metrics.sk_drops, + dropped_packet_metrics.sk_err, + dropped_packet_metrics.sk_err_soft, + dropped_packet_metrics.sk_backlog_len, + dropped_packet_metrics.sk_wmem_queued, + dropped_packet_metrics.sk_rcvbuf, + dropped_packet_metrics.sk_ack_backlog, + dropped_packet_metrics.timestamp_us + ); evt.push(dropped_packet_metrics.clone()); let _ = drop_tx.send(Ok(evt)).await; } @@ -408,6 +409,90 @@ impl Default for AgentApi { } }); + // TODO: this part needs a better implementation + task::spawn(async move { + let mut veth_events_buffer = Vec::new(); + //scan the cpus to read the data + for cpu_id in online_cpus() + .map_err(|e| anyhow::anyhow!("Error {:?}", e)) + .unwrap() + { + let buf = tracked_veth_events_array + .open(cpu_id, None) + .expect("Error during the creation of time stamp events buf structure"); + + let buffers = vec![BytesMut::with_capacity(4096); 8]; + veth_events_buffer.push((buf, buffers)); + } + + info!("Starting time stamp events listener"); + + //send the data through a mpsc channel + loop { + for (buf, buffers) in veth_events_buffer.iter_mut() { + match buf.read_events(buffers) { + Ok(events) => { + //read the events, this function is similar to the one used in identity/helpers.rs/display_events + if events.read > 0 { + for i in 0..events.read { + info!("Found veth events {}", events.read); + let data = &buffers[i]; + if data.len() >= std::mem::size_of::() { + let veth: VethLog = + unsafe { std::ptr::read(data.as_ptr() as *const _) }; + let veth_event = VethEvent { + name: String::from_utf8_lossy(unsafe { + std::slice::from_raw_parts( + veth.name.as_ptr() as *const u8, + veth.name.len() * std::mem::size_of::(), + ) + }) + .trim_end_matches('\0') + .to_string(), + state: veth.state, + dev_addr: String::from_utf8_lossy(unsafe { + std::slice::from_raw_parts( + veth.dev_addr.as_ptr() as *const u8, + veth.dev_addr.len() + * std::mem::size_of::(), + ) + }) + .trim_end_matches('\0') + .to_string(), + event_type: veth.event_type.into(), + netns: veth.netns, + pid: veth.pid, + }; + info!( + "Veth Event - name: {}, state: {}, dev_addr: {}, event_type: {}, netns: {}, pid: {}", + veth_event.name, + veth_event.state, + veth_event.dev_addr, + veth_event.event_type, + veth_event.netns, + veth_event.pid + ); + let mut evt = Vec::new(); + evt.push(veth_event.clone()); + let _ = veth_tx.send(Ok(evt)).await; + } else { + warn!( + "Received time stamp metrics data too small: {} bytes", + data.len() + ); + } + } + } + } + Err(e) => { + eprintln!("Errore nella lettura time stamp eventi: {}", e); + tokio::time::sleep(tokio::time::Duration::from_millis(100)).await; + } + } + } + } + }); + api } } @@ -659,4 +744,34 @@ impl Agent for AgentApi { Ok(Response::new(response)) } + + async fn get_tracked_veth( + &self, + request: Request<()>, + ) -> Result, Status> { + let req = request.into_inner(); + info!("Getting tracked veth metrics"); + let mut tracked_veth = Vec::::new(); + let mut tot_veth = 0 as i32; + + while let Ok(evt) = self.tracked_veth_rx.lock().unwrap().try_recv() { + if let Ok(vec) = evt { + tracked_veth.extend(vec); + } + } + tot_veth = tracked_veth.len() as i32; + + info!("Total tracked veth events: {}", tot_veth); + info!("Tracked veth: {:?}", &tracked_veth); + + let veth_names: Vec = tracked_veth.iter().map(|v| v.name.clone()).collect(); + + let response = VethResponse { + status: "success".to_string(), + veth_names, + tot_monitored_veth: tot_veth, + }; + + Ok(Response::new(response)) + } } diff --git a/core/api/src/batcher.rs b/core/api/src/batcher.rs new file mode 100644 index 0000000..6e984d5 --- /dev/null +++ b/core/api/src/batcher.rs @@ -0,0 +1,22 @@ +// This module is experimental and may be subject to major changes. + + +use crate::agent::{ConnectionEvent, DroppedPacketMetric, LatencyMetric}; + +pub enum MetricsBatcher { + LatencyMetrics, + DroppedPacketsMetrics, +} +pub enum EventBatcher {} + +impl MetricsBatcher { + pub async fn send_batched_metrics() { + todo!(); + } +} + +impl EventBatcher { + pub async fn send_batched_logs() { + todo!(); + } +} diff --git a/core/api/src/client.rs b/core/api/src/client.rs index 844ea75..096b176 100644 --- a/core/api/src/client.rs +++ b/core/api/src/client.rs @@ -1,29 +1,23 @@ +use crate::agent::agent_client::AgentClient; use anyhow::Error; use std::result::Result::Ok; -use tonic::{transport::Channel}; -use tonic_reflection::pb::v1::{ - server_reflection_client::ServerReflectionClient, -}; -use crate::agent::agent_client::AgentClient; +use tonic::transport::Channel; +use tonic_reflection::pb::v1::server_reflection_client::ServerReflectionClient; -const AGENT_IP : &str = "http://127.0.0.1:9090"; +const AGENT_IP: &str = "http://127.0.0.1:9090"; -#[cfg(feature="client")] +#[cfg(feature = "client")] pub async fn connect_to_client() -> Result, Error> { //this methods force a HTTP/2 connection from a static string //FIXME: this will require an update to ensure a protected connection - let channel = Channel::from_static(AGENT_IP) - .connect() - .await?; + let channel = Channel::from_static(AGENT_IP).connect().await?; let client = AgentClient::new(channel); Ok(client) } -#[cfg(feature="client")] +#[cfg(feature = "client")] pub async fn connect_to_server_reflection() -> Result, Error> { //this methods force a HTTP/2 connection from a static string - let channel = Channel::from_static(AGENT_IP) - .connect() - .await?; + let channel = Channel::from_static(AGENT_IP).connect().await?; let client = ServerReflectionClient::new(channel); Ok(client) } diff --git a/core/api/src/lib.rs b/core/api/src/lib.rs index 03ecd68..cf2c0c9 100644 --- a/core/api/src/lib.rs +++ b/core/api/src/lib.rs @@ -5,3 +5,4 @@ pub mod requests; pub mod structs; pub mod constants; pub mod helpers; +pub mod batcher; diff --git a/core/api/src/requests.rs b/core/api/src/requests.rs index a518f4a..06a4030 100644 --- a/core/api/src/requests.rs +++ b/core/api/src/requests.rs @@ -1,26 +1,25 @@ use anyhow::Error; use std::result::Result::Ok; -use tonic::{ Request, Response, Streaming, transport::Channel }; +use tonic::{Request, Response, Streaming, transport::Channel}; use tonic_reflection::pb::v1::{ - ServerReflectionRequest, - ServerReflectionResponse, - server_reflection_client::ServerReflectionClient, - server_reflection_request::MessageRequest, + ServerReflectionRequest, ServerReflectionResponse, + server_reflection_client::ServerReflectionClient, server_reflection_request::MessageRequest, }; -use crate::agent::agent_client::AgentClient; use crate::agent::ActiveConnectionResponse; -use crate::agent::RequestActiveConnections; -use crate::agent::BlocklistResponse; use crate::agent::AddIpToBlocklistRequest; -use crate::agent::RmIpFromBlocklistRequest; -use crate::agent::RmIpFromBlocklistResponse; +use crate::agent::BlocklistResponse; use crate::agent::DroppedPacketsResponse; use crate::agent::LatencyMetricsResponse; +use crate::agent::RequestActiveConnections; +use crate::agent::RmIpFromBlocklistRequest; +use crate::agent::RmIpFromBlocklistResponse; +use crate::agent::VethResponse; +use crate::agent::agent_client::AgentClient; #[cfg(feature = "client")] pub async fn send_active_connection_request( - mut client: AgentClient + mut client: AgentClient, ) -> Result, Error> { let request = Request::new(RequestActiveConnections { pod_ip: None }); let response = client.active_connections(request).await?; @@ -29,13 +28,17 @@ pub async fn send_active_connection_request( #[cfg(feature = "client")] pub async fn get_all_features( - mut client: ServerReflectionClient + mut client: ServerReflectionClient, ) -> Result>, Error> { let request = ServerReflectionRequest { host: "".to_string(), - message_request: Some(MessageRequest::FileContainingSymbol("agent.Agent".to_string())), + message_request: Some(MessageRequest::FileContainingSymbol( + "agent.Agent".to_string(), + )), }; - let response = client.server_reflection_info(tokio_stream::iter(vec![request])).await?; + let response = client + .server_reflection_info(tokio_stream::iter(vec![request])) + .await?; Ok(response) } @@ -43,7 +46,7 @@ pub async fn get_all_features( #[cfg(feature = "client")] pub async fn send_create_blocklist_request( mut client: AgentClient, - ip: &str + ip: &str, ) -> Result, Error> { let ip = Some(ip.to_string()); let request = Request::new(AddIpToBlocklistRequest { ip }); @@ -53,7 +56,7 @@ pub async fn send_create_blocklist_request( #[cfg(feature = "client")] pub async fn send_check_blocklist_request( - mut client: AgentClient + mut client: AgentClient, ) -> Result, Error> { let request = Request::new(()); let response = client.check_blocklist(request).await?; @@ -63,7 +66,7 @@ pub async fn send_check_blocklist_request( #[cfg(feature = "client")] pub async fn remove_ip_from_blocklist_request( mut client: AgentClient, - ip: &str + ip: &str, ) -> Result, Error> { let ip = ip.to_string(); let request = Request::new(RmIpFromBlocklistRequest { ip }); @@ -76,9 +79,7 @@ pub async fn send_dropped_packets_request( mut client: AgentClient, ) -> Result, Error> { let request = Request::new(()); - let response = client.get_dropped_packets_metrics( - request - ).await?; + let response = client.get_dropped_packets_metrics(request).await?; Ok(response) } @@ -87,8 +88,15 @@ pub async fn send_latency_metrics_request( mut client: AgentClient, ) -> Result, Error> { let request = Request::new(()); - let response = client.get_latency_metrics( - request - ).await?; + let response = client.get_latency_metrics(request).await?; + Ok(response) +} + +#[cfg(feature = "client")] +pub async fn send_tracked_veth_request( + mut client: AgentClient, +) -> Result, Error> { + let request = Request::new(()); + let response = client.get_tracked_veth(request).await?; Ok(response) } diff --git a/core/api/src/structs.rs b/core/api/src/structs.rs index b15fa22..97a4017 100644 --- a/core/api/src/structs.rs +++ b/core/api/src/structs.rs @@ -1,4 +1,4 @@ -use bytemuck::Zeroable; +use bytemuck_derive::Zeroable; use crate::constants::TASK_COMM_LEN; diff --git a/core/common/Cargo.toml b/core/common/Cargo.toml index 854c04e..b8e840d 100644 --- a/core/common/Cargo.toml +++ b/core/common/Cargo.toml @@ -10,13 +10,24 @@ homepage = "https://docs.cortexflow.org" repository = "https://github.com/CortexFlow/CortexBrain" [dependencies] -tracing = "0.1" +tracing = { version = "0.1", features = ["std"] } tracing-subscriber = { version = "0.3", features = ["env-filter", "fmt"] } anyhow = "1.0" kube = { version = "2.0.1", features = ["client"] } k8s-openapi = { version = "0.26.0", features = ["v1_34"] } aya = "0.13.1" +opentelemetry = "0.31.0" +opentelemetry_sdk = { version = "0.31.0", features = ["logs", "rt-tokio"] } +opentelemetry-stdout = { version = "0.31.0", features = ["logs"] } +opentelemetry-appender-tracing = "0.31.1" +opentelemetry-otlp = { version = "0.31.0", features = ["logs", "grpc-tonic"] } +bytemuck = "1.25.0" +bytes = "1.11.0" +bytemuck_derive = "1.10.2" [features] map-handlers = [] program-handlers = [] +network-structs = [] +buffer-reader = [] +experimental = [] diff --git a/core/common/src/buffer_type.rs b/core/common/src/buffer_type.rs new file mode 100644 index 0000000..9fc7828 --- /dev/null +++ b/core/common/src/buffer_type.rs @@ -0,0 +1,292 @@ +use bytemuck_derive::Zeroable; +use bytes::BytesMut; +use std::net::Ipv4Addr; +use tracing::{error, info, warn}; + +// +// IpProtocols enum to reconstruct the packet protocol based on the +// IPV4 Header Protocol code +// + +#[derive(Debug)] +#[repr(u8)] +pub enum IpProtocols { + ICMP = 1, + TCP = 6, + UDP = 17, +} + +// +// TryFrom Trait implementation for IpProtocols enum +// This is used to reconstruct the packet protocol based on the +// IPV4 Header Protocol code +// + +impl TryFrom for IpProtocols { + type Error = (); + fn try_from(proto: u8) -> Result { + match proto { + 1 => Ok(IpProtocols::ICMP), + 6 => Ok(IpProtocols::TCP), + 17 => Ok(IpProtocols::UDP), + _ => Err(()), + } + } +} + +// +// Structure PacketLog +//This structure is used to store the packet information +// +#[cfg(feature = "network-structs")] +#[repr(C)] +#[derive(Clone, Copy, Zeroable)] +pub struct PacketLog { + pub proto: u8, + pub src_ip: u32, + pub src_port: u16, + pub dst_ip: u32, + pub dst_port: u16, + pub pid: u32, +} +#[cfg(feature = "network-structs")] +unsafe impl aya::Pod for PacketLog {} + +#[cfg(feature = "network-structs")] +#[repr(C, packed)] +#[derive(Clone, Copy)] +pub struct VethLog { + pub name: [u8; 16], // 16 bytes: veth interface name + pub state: u64, // 8 bytes: state variable (unsigned long in kernel) + pub dev_addr: [u8; 6], // 32 bytes: device address + pub event_type: u8, // 1 byte: 1 for veth creation, 2 for veth destruction + pub netns: u32, // 4 bytes: network namespace inode number + pub pid: u32, // 4 bytes: PID that triggered the event +} + +#[cfg(feature = "network-structs")] +#[repr(C)] +#[derive(Clone, Copy)] +pub struct TcpPacketRegistry { + pub proto: u8, + pub src_ip: u32, + pub dst_ip: u32, + pub src_port: u16, + pub dst_port: u16, + pub pid: u32, + pub command: [u8; 16], + pub cgroup_id: u64, +} + +// docs: +// This function perform a byte swap from little-endian to big-endian +// It's used to reconstruct the correct IPv4 address from the u32 representation +// +// Takes a u32 address in big-endian format and returns a Ipv4Addr with reversed octets +// +#[inline(always)] +pub fn reverse_be_addr(addr: u32) -> Ipv4Addr { + let octects = addr.to_be_bytes(); + let [a, b, c, d] = [octects[3], octects[2], octects[1], octects[0]]; + let reversed_ip = Ipv4Addr::new(a, b, c, d); + reversed_ip +} + +// enum BuffersType +#[cfg(feature = "buffer-reader")] +pub enum BufferType { + PacketLog, + TcpPacketRegistry, + VethLog, +} + +// IDEA: this is an experimental implementation to centralize buffer reading logic +// TODO: add variant for cortexflow API exporter +#[cfg(feature = "buffer-reader")] +impl BufferType { + pub async fn read_packet_log(buffers: &mut [BytesMut], tot_events: i32, offset: i32) { + for i in offset..tot_events { + let vec_bytes = &buffers[i as usize]; + if vec_bytes.len() < std::mem::size_of::() { + error!( + "Corrupted Packet log data. Raw data: {}. Readed {} bytes expected {} bytes", + vec_bytes + .iter() + .map(|b| format!("{:02x}", b)) + .collect::>() + .join(" "), + vec_bytes.len(), + std::mem::size_of::() + ); + continue; + } + if vec_bytes.len() >= std::mem::size_of::() { + let pl: PacketLog = + unsafe { std::ptr::read_unaligned(vec_bytes.as_ptr() as *const _) }; // reading raw bytes + + // extracting struct info from bytes + let src_ip = reverse_be_addr(pl.src_ip); + let dst_ip = reverse_be_addr(pl.dst_ip); + let src_port = u16::from_be(pl.src_port); + let dst_port = u16::from_be(pl.dst_port); + let event_id = pl.pid; + let protocol = pl.proto; + + // protocol extraction + match IpProtocols::try_from(protocol) { + Ok(proto) => { + info!( + "Event Id: {} Protocol: {:?} SRC: {}:{} -> DST: {}:{}", + event_id, proto, src_ip, src_port, dst_ip, dst_port + ); + } + Err(e) => { + error!("Unknown protocol. Data maybe corrupted. Reason:{:?}", e); + } + } + } + } + } + pub async fn read_tcp_registry_log(buffers: &mut [BytesMut], tot_events: i32, offset: i32) { + for i in offset..tot_events { + let vec_bytes = &buffers[i as usize]; + if vec_bytes.len() < std::mem::size_of::() { + error!( + "Corrupted data Tcp Registry data. Raw data: {}. Readed {} bytes expected {} bytes", + vec_bytes + .iter() + .map(|b| format!("{:02x}", b)) + .collect::>() + .join(" "), + vec_bytes.len(), + std::mem::size_of::() + ); + continue; + } + if vec_bytes.len() >= std::mem::size_of::() { + let pl: TcpPacketRegistry = + unsafe { std::ptr::read_unaligned(vec_bytes.as_ptr() as *const _) }; // reading raw bytes + + // extracting struct info from bytes + let src = reverse_be_addr(pl.src_ip); + let dst = reverse_be_addr(pl.dst_ip); + let src_port = u16::from_be(pl.src_port); + let dst_port = u16::from_be(pl.dst_port); + let event_id = pl.pid; + let command = pl.command.to_vec(); + let end = command + .iter() + .position(|&x| x == 0) + .unwrap_or(command.len()); + let command_str = String::from_utf8_lossy(&command[..end]).to_string(); + let cgroup_id = pl.cgroup_id; + let protocol = pl.proto; + + // protocol extraction + match IpProtocols::try_from(protocol) { + Ok(proto) => { + info!( + "Event Id: {} Protocol: {:?} SRC: {}:{} -> DST: {}:{} Command: {} Cgroup_id: {}", + event_id, + proto, + src, + src_port, + dst, + dst_port, + command_str, + cgroup_id //proc_content + ); + } + Err(e) => { + error!("Unknown protocol. Data maybe corrupted. Reason:{:?}", e); + } + } + } + } + } + pub async fn read_and_handle_veth_log( + buffers: &mut [BytesMut], + tot_events: i32, + offset: i32, + ) { + for i in offset..tot_events { + let vec_bytes = &buffers[i as usize]; + if vec_bytes.len() < std::mem::size_of::() { + error!( + "Corrupted data VethLog data. Raw data: {}. Readed {} bytes expected {} bytes", + vec_bytes + .iter() + .map(|b| format!("{:02x}", b)) + .collect::>() + .join(" "), + vec_bytes.len(), + std::mem::size_of::() + ); + continue; + } + if vec_bytes.len() >= std::mem::size_of::() { + let vthl: VethLog = + unsafe { std::ptr::read_unaligned(vec_bytes.as_ptr() as *const _) }; // reading raw bytes + + // extracting struct info from bytes + let name_bytes = vthl.name; + let dev_addr_bytes = vthl.dev_addr; + let name = std::str::from_utf8(&name_bytes); + let state = vthl.state; + + let dev_addr = dev_addr_bytes; + let netns = vthl.netns; + let mut event_type = String::new(); + + // event_type extraction + match vthl.event_type { + 1 => { + event_type = "creation".to_string(); + match name { + Ok(veth_name) => { + info!( + "[{}] Veth Event: Type: {} Name: {} Dev_addr: {:x?} State: {}", + netns, + event_type, + veth_name.trim_end_matches("\0"), + dev_addr, + state + ); + } + Err(e) => { + error!( + "Failed to extract veth name during event_type = creation (1).Reason:{}", + e + ); + } + } + } + 2 => { + event_type = "deletion".to_string(); + match name { + Ok(veth_name) => { + info!( + "[{}] Veth Event: Type: {} Name: {} Dev_addr: {:x?} State: {}", + netns, + event_type, + veth_name.trim_end_matches("\0"), + dev_addr, + state + ); + } + Err(e) => { + error!( + "Failed to extract veth name during event_type = deletion (2).Reason:{}", + e + ); + } + } + } + _ => { + warn!("Unknown event type") + } + } + } + } + } +} diff --git a/core/common/src/lib.rs b/core/common/src/lib.rs index 1d015a2..d88c1db 100644 --- a/core/common/src/lib.rs +++ b/core/common/src/lib.rs @@ -1,7 +1,10 @@ +#[cfg(feature = "buffer-reader")] +#[cfg(feature = "network-structs")] +pub mod buffer_type; pub mod constants; pub mod formatters; pub mod logger; #[cfg(feature = "map-handlers")] pub mod map_handlers; #[cfg(feature = "program-handlers")] -pub mod program_handlers; \ No newline at end of file +pub mod program_handlers; diff --git a/core/common/src/logger.rs b/core/common/src/logger.rs index 5a1b890..ab06f79 100644 --- a/core/common/src/logger.rs +++ b/core/common/src/logger.rs @@ -1,4 +1,7 @@ -use tracing_subscriber::{fmt::format::FmtSpan, EnvFilter}; +use tracing_subscriber::Layer; +use tracing_subscriber::layer::SubscriberExt; +use tracing_subscriber::util::SubscriberInitExt; +use tracing_subscriber::{EnvFilter, fmt::format::FmtSpan}; /// Initialize the default logger configuration used across CortexBrain components. /// @@ -35,3 +38,47 @@ pub fn init_logger_without_time() { .with_line_number(false) .init(); } + +use opentelemetry_appender_tracing::layer::OpenTelemetryTracingBridge; +use opentelemetry_otlp::{LogExporter, WithExportConfig}; +use opentelemetry_sdk::Resource; +use opentelemetry_sdk::logs::SdkLoggerProvider; + +pub fn otlp_logger_init(service_name: String) -> SdkLoggerProvider { + //exporter and provider initialization + let otlp_endpoint = std::env::var("OTEL_EXPORTER_OTLP_ENDPOINT") + .unwrap_or_else(|_| "http://localhost:4317".to_string()); + + let exporter = LogExporter::builder() + .with_tonic() + .with_endpoint(otlp_endpoint) + .build() + .expect("Failed to create OTLP exporter"); + + //needs a service name + let provider = SdkLoggerProvider::builder() + .with_resource(Resource::builder().with_service_name(service_name).build()) + .with_batch_exporter(exporter) + .build(); + + //maybe we will need some filter later + //init otel_filter and layer + let otel_layer = OpenTelemetryTracingBridge::new(&provider); + + // init fmt filter and layer + let fmt_filter = EnvFilter::new("info").add_directive("opentelemetry=debug".parse().unwrap()); + let fmt_layer = tracing_subscriber::fmt::layer() + .with_thread_names(true) + .with_line_number(false) + .with_target(false) + .pretty() + .with_filter(fmt_filter); + + //init tracing subscriber with otel layer + tracing_subscriber::registry() + .with(otel_layer) + .with(fmt_layer) + .init(); + + provider +} diff --git a/core/common/src/map_handlers.rs b/core/common/src/map_handlers.rs index 2e22736..19d4e20 100644 --- a/core/common/src/map_handlers.rs +++ b/core/common/src/map_handlers.rs @@ -50,11 +50,10 @@ pub fn init_bpf_maps( //TODO: save bpf maps path in the cli metadata -//takes an array of bpf maps and pin them to persiste session data -// FIXME: is this ok that we are returning a BpfMapsData? +//takes an array of bpf maps and pin them to persist session data #[cfg(feature = "map-handlers")] -pub fn map_pinner(maps: BpfMapsData, path: &PathBuf) -> Result, Error> { +pub fn map_pinner(maps: BpfMapsData, path: &PathBuf) -> Result { if !path.exists() { info!("Pin path {:?} does not exist. Creating it...", path); std::fs::create_dir_all(&path)?; @@ -65,7 +64,11 @@ pub fn map_pinner(maps: BpfMapsData, path: &PathBuf) -> Result, Error> } } - let mut owned_maps = Vec::new(); // aya::Maps does not implement the clone trait i need to create a raw copy of the vec map + //let mut owned_maps = Vec::new(); // aya::Maps does not implement the clone trait i need to create a raw copy of the vec map + let mut owned_bpf_maps_data = BpfMapsData { + bpf_obj_names: Vec::new(), + bpf_obj_map: Vec::new(), + }; // an iterator that iterates two iterators simultaneously for (map_obj, name) in maps .bpf_obj_map @@ -80,22 +83,31 @@ pub fn map_pinner(maps: BpfMapsData, path: &PathBuf) -> Result, Error> } info!("Trying to pin map {:?} in map path: {:?}", name, &map_path); map_obj.pin(&map_path)?; - owned_maps.push(map_obj); + //owned_maps.push(map_obj); + owned_bpf_maps_data.bpf_obj_names.push(name); + owned_bpf_maps_data.bpf_obj_map.push(map_obj); } - Ok(owned_maps) + Ok(owned_bpf_maps_data) // return a BpfMapsData type } #[cfg(feature = "map-handlers")] -pub async fn populate_blocklist(map: &mut Map) -> Result<(), Error> { +pub async fn populate_blocklist() -> Result<(), Error> { + use aya::maps::MapData; + // load mapdata from path + + let mapdata = MapData::from_pin("/sys/fs/bpf/maps/Blocklist") + .map_err(|e| anyhow::anyhow!("Failed to load blocklist_map: {}", e))?; + + let map = Map::HashMap(mapdata); + let mut blocklist_map = HashMap::<_, [u8; 4], [u8; 4]>::try_from(map)?; + let client = Client::try_default() .await .expect("Cannot connect to Kubernetes Client"); let namespace = "cortexflow"; let configmap = "cortexbrain-client-config"; - let mut blocklist_map = HashMap::<_, [u8; 4], [u8; 4]>::try_from(map)?; - let api: Api = Api::namespaced(client, namespace); match api.get(configmap).await { std::result::Result::Ok(configs) => { @@ -124,3 +136,21 @@ pub async fn populate_blocklist(map: &mut Map) -> Result<(), Error> { } } } + +#[cfg(feature = "map-handlers")] +pub fn load_perf_event_array_from_mapdata( + path: &'static str, +) -> Result, Error> { + use aya::maps::MapData; + use aya::maps::PerfEventArray; + + let map_data = MapData::from_pin(path) + .map_err(|e| anyhow::anyhow!("Cannot load mapdata from pin {:?} .Reason: {}", &path, e))?; + + let map = Map::PerfEventArray(map_data); + + let perf_event_array = PerfEventArray::try_from(map).map_err(|e| { + anyhow::anyhow!("Cannot initialize perf_event_array from map. Reason: {}", e) + })?; + Ok(perf_event_array) +} diff --git a/core/common/src/program_handlers.rs b/core/common/src/program_handlers.rs index 5991bef..42cd3ba 100644 --- a/core/common/src/program_handlers.rs +++ b/core/common/src/program_handlers.rs @@ -7,7 +7,7 @@ use tracing::{error, info}; pub fn load_program( bpf: Arc>, program_name: &str, - actual_program: &str, + kernel_symbol: &str, ) -> Result<(), anyhow::Error> { let mut bpf_new = bpf .lock() @@ -24,13 +24,13 @@ pub fn load_program( .load() .map_err(|e| anyhow::anyhow!("Cannot load program: {}. Error: {}", &program_name, e))?; - match program.attach(actual_program, 0) { - Ok(_) => info!("{} program attached successfully", actual_program), + match program.attach(kernel_symbol, 0) { + Ok(_) => info!("{} program attached successfully", kernel_symbol), Err(e) => { - error!("Error attaching {} program {:?}", actual_program, e); + error!("Error attaching {} program {:?}", kernel_symbol, e); return Err(anyhow::anyhow!( "Failed to attach {}: {:?}", - actual_program, + kernel_symbol, e )); } diff --git a/core/src/components/conntracker/src/data_structures.rs b/core/src/components/conntracker/src/data_structures.rs index 4de05cc..f4c5047 100644 --- a/core/src/components/conntracker/src/data_structures.rs +++ b/core/src/components/conntracker/src/data_structures.rs @@ -1,20 +1,19 @@ use aya_ebpf::{ macros::map, - maps::{LruPerCpuHashMap, PerfEventArray,HashMap}, + maps::{HashMap, LruPerCpuHashMap, PerfEventArray}, }; // docs: // PacketLog structure used to track an incoming network packet -// +// // proto: packet protol (ex. TCP,UDP,ICMP) -// src_ip: source address ip +// src_ip: source address ip // src_port: source address port // dst_ip: destination ip // dst_port: destination port // pid: kernel process ID // - #[repr(C)] #[derive(Clone, Copy)] pub struct PacketLog { @@ -37,75 +36,67 @@ pub struct ConnArray { pub proto: u8, } - // docs: // VethLog structure used to track virtual ethernet interfaces creation and deletion -// +// // name: veth name -// state: socket state +// state: socket state // dev_addr: veth device addresses // event_type: creation or deletion // netns: veth network namespace // pid: kernel process ID // -#[repr(C)] -#[derive(Clone, Copy, Debug)] +#[repr(C,packed)] +#[derive(Clone, Copy)] pub struct VethLog { - pub name: [u8; 16], - pub state: u64, // state var type: long unsigned int - pub dev_addr: [u32; 8], - pub event_type: u8, // i choose 1 for veth creation or 2 for veth destruction - pub netns: u32, - pub pid: u32 - + pub name: [u8; 16], // 16 bytes: veth interface name + pub state: u64, // 8 bytes: state variable (unsigned long in kernel) + pub dev_addr: [u8; 6], // 6 bytes: device address + pub event_type: u8, // 1 byte: 1 for veth creation, 2 for veth destruction + pub netns: u32, // 4 bytes: network namespace inode number + pub pid: u32, // 4 bytes: PID that triggered the event } // TODO: write documentation about this structure #[repr(C)] -#[derive(Clone,Copy,Debug)] -pub struct TcpPacketRegistry{ +#[derive(Clone, Copy, Debug)] +pub struct TcpPacketRegistry { pub proto: u8, pub src_ip: u32, pub dst_ip: u32, pub src_port: u16, pub dst_port: u16, pub pid: u32, - pub command: [u8;16], + pub command: [u8; 16], pub cgroup_id: u64, - } // docs: // -// BPF maps used in the conntracker programs -// +// BPF maps used in the conntracker programs +// // VETH_EVENTS: PerfEventArray used in the veth_tracer functions (veth_tracer.rs module) // -// BLOCKLIST: an hashmap used to block addresses -----> TODO: key and values are the same for semplicity but we need to +// BLOCKLIST: an hashmap used to block addresses -----> TODO: key and values are the same for semplicity but we need to // investigate the possibility to save the service name or the timestamp registered when the command was executed or a simple int index // - #[map(name = "events_map", pinning = "by_name")] pub static mut EVENTS: PerfEventArray = PerfEventArray::new(0); -// FIXME: this might be useless -#[map(name = "ConnectionMap")] -pub static mut ACTIVE_CONNECTIONS: LruPerCpuHashMap = - LruPerCpuHashMap::with_max_entries(65536, 0); - // FIXME: this might be useless #[map(name = "ConnectionTrackerMap")] pub static mut CONNTRACKER: LruPerCpuHashMap = LruPerCpuHashMap::with_max_entries(65536, 0); -#[map(name = "veth_identity_map")] +#[map(name = "veth_identity_map", pinning = "by_name")] pub static mut VETH_EVENTS: PerfEventArray = PerfEventArray::new(0); -#[map(name = "Blocklist")] -pub static mut BLOCKLIST: HashMap<[u8;4], [u8;4]> = HashMap::<[u8;4], [u8;4]>::with_max_entries(1024, 0); +#[map(name = "Blocklist", pinning = "by_name")] +pub static mut BLOCKLIST: HashMap<[u8; 4], [u8; 4]> = + HashMap::<[u8; 4], [u8; 4]>::with_max_entries(1024, 0); //here i need to pass an address like this: [135,171,168,192] -#[map(name = "TcpPacketRegistry",pinning = "by_name")] -pub static mut PACKET_REGISTRY: PerfEventArray = PerfEventArray::new(0); \ No newline at end of file +#[map(name = "TcpPacketRegistry", pinning = "by_name")] +pub static mut PACKET_REGISTRY: PerfEventArray = PerfEventArray::new(0); diff --git a/core/src/components/conntracker/src/veth_tracer.rs b/core/src/components/conntracker/src/veth_tracer.rs index e2f07e7..cf66a74 100644 --- a/core/src/components/conntracker/src/veth_tracer.rs +++ b/core/src/components/conntracker/src/veth_tracer.rs @@ -25,7 +25,7 @@ pub fn try_veth_tracer(ctx: ProbeContext, mode: u8) -> Result { } let mut name_buf = [0u8; 16]; - let mut dev_addr_buf = [0u32; 8]; + let mut dev_addr_buf = [0u8; 6]; // name field let name_field_offset = 304; // reading the name field offset @@ -35,12 +35,12 @@ pub fn try_veth_tracer(ctx: ProbeContext, mode: u8) -> Result { // state field let state_offset = 168; - let state: u8 = read_linux_inner_value::(net_device_pointer as *const u8, state_offset)?; + let state: u64 = read_linux_inner_value::(net_device_pointer as *const u8, state_offset)?; // dev_addr let dev_addr_offset = 1080; - let dev_addr_array: [u32; 8] = - read_linux_inner_value::<[u32; 8]>(net_device_pointer as *const u8, dev_addr_offset)?; + let dev_addr_array: [u8; 6] = + read_linux_inner_value::<[u8; 6]>(net_device_pointer as *const u8, dev_addr_offset)?; let inum: u32 = extract_netns_inum(net_device_pointer as *const u8)?; let pid: u32 = bpf_get_current_pid_tgid() as u32; // extracting lower 32 bit corresponding to the PID @@ -52,7 +52,7 @@ pub fn try_veth_tracer(ctx: ProbeContext, mode: u8) -> Result { // compose the structure let veth_data = VethLog { name: name_buf, - state: state.into(), + state: state, dev_addr: dev_addr_buf, event_type: mode, netns: inum, diff --git a/core/src/components/identity/Cargo.toml b/core/src/components/identity/Cargo.toml index f5bdb37..1e96cc9 100644 --- a/core/src/components/identity/Cargo.toml +++ b/core/src/components/identity/Cargo.toml @@ -15,7 +15,6 @@ struct = [] enums = [] experimental = ["struct", "enums"] - [dependencies] aya = "0.13.1" bytes = "1.4" @@ -27,12 +26,16 @@ tokio = { version = "1.48.0", features = [ "time", "macros", ] } -anyhow = "1.0" tracing = "0.1.41" -tracing-subscriber = { version = "0.3.19", features = ["env-filter"] } bytemuck = { version = "1.23.0", features = ["derive"] } -bytemuck_derive = "1.10.1" -cortexbrain-common = { path = "../../../common/", features = ["map-handlers","program-handlers"] } +cortexbrain-common = { path = "../../../common/", features = [ + "map-handlers", + "program-handlers", + "network-structs", + "buffer-reader", +] } nix = { version = "0.30.1", features = ["net"] } kube = { version = "2.0.1", features = ["client"] } k8s-openapi = { version = "0.26.0", features = ["v1_34"] } +bytemuck_derive = "1.10.2" +anyhow = "1.0.100" diff --git a/core/src/components/identity/src/enums.rs b/core/src/components/identity/src/enums.rs deleted file mode 100644 index b0b271b..0000000 --- a/core/src/components/identity/src/enums.rs +++ /dev/null @@ -1,12 +0,0 @@ -/* - * IpProtocols enum to reconstruct the packet protocol based on the - * IPV4 Header Protocol code - */ -#[cfg(feature="enums")] -#[derive(Debug)] -#[repr(u8)] -pub enum IpProtocols { - ICMP = 1, - TCP = 6, - UDP = 17, -} \ No newline at end of file diff --git a/core/src/components/identity/src/helpers.rs b/core/src/components/identity/src/helpers.rs index 9512789..bd76a29 100644 --- a/core/src/components/identity/src/helpers.rs +++ b/core/src/components/identity/src/helpers.rs @@ -1,193 +1,23 @@ -#![allow(warnings)] -use crate::enums::IpProtocols; -use crate::structs::{PacketLog, TcpPacketRegistry, VethLog}; -use anyhow::Error; -use aya::programs::tc::SchedClassifierLinkId; -use aya::{ - Bpf, - maps::{MapData, perf::PerfEventArrayBuffer}, - programs::{SchedClassifier, TcAttachType}, -}; -use bytes::BytesMut; -use k8s_openapi::api::core::v1::Pod; -use kube::api::ObjectList; -use kube::{Api, Client}; +use aya::maps::perf::PerfEventArrayBuffer; +use cortexbrain_common::buffer_type::BufferType; use nix::net::if_::if_nameindex; -use std::collections::HashMap; -use std::fs; use std::result::Result::Ok; -use std::sync::Mutex; -use std::{ - borrow::BorrowMut, - net::Ipv4Addr, - sync::{ - Arc, - atomic::{AtomicBool, Ordering}, - }, -}; -use tokio::time; -use tracing::{debug, error, info, warn}; - -/* - * TryFrom Trait implementation for IpProtocols enum - * This is used to reconstruct the packet protocol based on the - * IPV4 Header Protocol code - */ - -impl TryFrom for IpProtocols { - type Error = (); - fn try_from(proto: u8) -> Result { - match proto { - 1 => Ok(IpProtocols::ICMP), - 6 => Ok(IpProtocols::TCP), - 17 => Ok(IpProtocols::UDP), - _ => Err(()), - } - } -} - -/* helper functions to read and log net events in the container */ -pub async fn display_events>( - mut perf_buffers: Vec>, - //running: Arc, - mut buffers: Vec, -) { - // FIXME: here maybe we need to use a loop with tokio::select - while true { - for buf in perf_buffers.iter_mut() { - match buf.read_events(&mut buffers) { - std::result::Result::Ok(events) => { - for i in 0..events.read { - let data = &buffers[i]; - if data.len() >= std::mem::size_of::() { - let pl: PacketLog = - unsafe { std::ptr::read(data.as_ptr() as *const _) }; - let src = reverse_be_addr(pl.src_ip); - let dst = reverse_be_addr(pl.dst_ip); - let src_port = u16::from_be(pl.src_port); - let dst_port = u16::from_be(pl.dst_port); - let event_id = pl.pid; - - match IpProtocols::try_from(pl.proto) { - std::result::Result::Ok(proto) => { - info!( - "Event Id: {} Protocol: {:?} SRC: {}:{} -> DST: {}:{}", - event_id, proto, src, src_port, dst, dst_port - ); - } - Err(_) => { - info!( - "Event Id: {} Protocol: Unknown ({})", - event_id, pl.proto - ); - } - }; - } else { - warn!("Received packet data too small: {} bytes", data.len()); - } - } - } - Err(e) => { - error!("Error reading events: {:?}", e); - } - } - } - tokio::time::sleep(std::time::Duration::from_millis(100)).await; - } -} - -pub fn reverse_be_addr(addr: u32) -> Ipv4Addr { - let mut octects = addr.to_be_bytes(); - let [a, b, c, d] = [octects[3], octects[2], octects[1], octects[0]]; - let reversed_ip = Ipv4Addr::new(a, b, c, d); - reversed_ip -} - -pub async fn display_veth_events>( - bpf: Arc>, - mut perf_buffers: Vec>, - //running: Arc, - mut buffers: Vec, - mut link_ids: Arc>>, -) { - // FIXME: here maybe we need to use a loop with tokio::select - while true { - for buf in perf_buffers.iter_mut() { - match buf.read_events(&mut buffers) { - std::result::Result::Ok(events) => { - for i in 0..events.read { - let data = &buffers[i]; - if data.len() >= std::mem::size_of::() { - let vethlog: VethLog = - unsafe { std::ptr::read(data.as_ptr() as *const _) }; - - let name_bytes = vethlog.name; - - let dev_addr_bytes = vethlog.dev_addr.to_vec(); - let name = std::str::from_utf8(&name_bytes); - let state = vethlog.state; - - let dev_addr = dev_addr_bytes; - let netns = vethlog.netns; - let mut event_type = String::new(); - match vethlog.event_type { - 1 => { - event_type = "creation".to_string(); - } - 2 => { - event_type = "deletion".to_string(); - } - _ => warn!("unknown event_type"), - } - match name { - std::result::Result::Ok(veth_name) => { - info!( - "[{}] Triggered action: register_netdevice event_type:{:?} Manipulated veth: {:?} state:{:?} dev_addr:{:?}", - netns, - event_type, - veth_name.trim_end_matches("\0").to_string(), - state, - dev_addr - ); - match attach_detach_veth( - bpf.clone(), - vethlog.event_type, - veth_name, - link_ids.clone(), - ) - .await - { - std::result::Result::Ok(_) => { - info!("Attach/Detach veth function attached correctly"); - } - Err(e) => error!( - "Error attaching Attach/Detach function. Error : {}", - e - ), - } - } - Err(_) => info!("Unknown name or corrupted field"), - } - } else { - warn!("Corrupted data"); - } - } - } - Err(e) => { - error!("Error reading veth events: {:?}", e); - } - } - } - tokio::time::sleep(std::time::Duration::from_millis(100)).await; - } -} +use tracing::{error, info}; +// docs: +// This function checks if the given interface name is in the list of ignored interfaces +// Takes a interface name (iface) as &str and returns true if the interface should be ignored +// Typically we want to ignore eth0,docker0,tunl0,lo interfaces because they are not relevant for the internal monitoring +// pub fn ignore_iface(iface: &str) -> bool { let ignored_interfaces = ["eth0", "docker0", "tunl0", "lo"]; ignored_interfaces.contains(&iface) } -//filter the interfaces,exclude docker0,eth0,lo interfaces +// docs: +// This function retrieves the list of veth interfaces on the system, filtering out ignored interfaces with +// the ignore_iface function. +// pub fn get_veth_channels() -> Vec { //filter interfaces and save the output in the let mut interfaces: Vec = Vec::new(); @@ -206,409 +36,82 @@ pub fn get_veth_channels() -> Vec { interfaces } -async fn attach_detach_veth( - bpf: Arc>, - event_type: u8, - iface: &str, - link_ids: Arc>>, -) -> Result<(), anyhow::Error> { - info!( - "attach_detach_veth called: event_type={}, iface={}", - event_type, iface - ); - match event_type { - 1 => { - let mut bpf = bpf.lock().unwrap(); - let program: &mut SchedClassifier = bpf - .program_mut("identity_classifier") - .ok_or_else(|| anyhow::anyhow!("program 'identity_classifier' not found"))? - .try_into()?; - - let iface = iface.trim_end_matches('\0'); +// docs: read buffer function: +// template function that take a mut perf_event_array_buffer of type T and a mutable buffer of Vec - if ignore_iface(iface) { - info!("Skipping ignored interface: {}", iface); - return Ok(()); - } - - let mut link_ids = link_ids.lock().unwrap(); - match program.attach(iface, TcAttachType::Ingress) { - std::result::Result::Ok(link_id) => { - info!( - "Program 'identity_classifier' attached to interface {}", - iface - ); - link_ids.insert(iface.to_string(), link_id); - } - Err(e) => error!("Error attaching program to interface {}: {:?}", iface, e), - } - } - 2 => { - // INFO: Detaching occurs automatically when veth is deleted by kernel itself - let mut link_ids = link_ids.lock().unwrap(); - match link_ids.remove(iface) { - Some(_) => { - info!("Successfully detached program from interface {}", iface); - } - None => { - error!("Interface {} not found in link_ids", iface); - return Err(anyhow::anyhow!("Interface {} not found in link_ids", iface)); - } - } - } - _ => { - error!("Unknown event type: {}", event_type); - } - } - Ok(()) -} - -// CHECK THIS DIR: /sys/fs/cgroup/kubelet.slice/kubelet-kubepods.slice/kubelet-kubepods-besteffort.slice -/* helper functions to display events from the TcpPacketRegistry structure */ -pub async fn display_tcp_registry_events>( - mut perf_buffers: Vec>, - //running: Arc, - mut buffers: Vec, +pub async fn read_perf_buffer>( + mut array_buffers: Vec>, + mut buffers: Vec, + buffer_type: BufferType, ) { - // FIXME: here maybe we need to use a loop with tokio::select - while true { - for buf in perf_buffers.iter_mut() { - match buf.read_events(&mut buffers) { - std::result::Result::Ok(events) => { - for i in 0..events.read { - let data = &buffers[i]; - if data.len() >= std::mem::size_of::() { - let tcp_pl: TcpPacketRegistry = - unsafe { std::ptr::read(data.as_ptr() as *const _) }; - let src = reverse_be_addr(tcp_pl.src_ip); - let dst = reverse_be_addr(tcp_pl.dst_ip); - let src_port = u16::from_be(tcp_pl.src_port); - let dst_port = u16::from_be(tcp_pl.dst_port); - let event_id = tcp_pl.pid; - let command = tcp_pl.command.to_vec(); - let end = command - .iter() - .position(|&x| x == 0) - .unwrap_or(command.len()); - let command_str = String::from_utf8_lossy(&command[..end]).to_string(); - let cgroup_id = tcp_pl.cgroup_id; - - match IpProtocols::try_from(tcp_pl.proto) { - std::result::Result::Ok(proto) => { - info!( - "Event Id: {} Protocol: {:?} SRC: {}:{} -> DST: {}:{} Command: {} Cgroup_id: {}", - event_id, - proto, - src, - src_port, - dst, - dst_port, - command_str, - cgroup_id //proc_content - ); - } - Err(_) => { - info!( - "Event Id: {} Protocol: Unknown ({})", - event_id, tcp_pl.proto - ); - } - }; - } else { - warn!("Received packet data too small: {} bytes", data.len()); - } - } - } - Err(e) => { - error!("Error reading events: {:?}", e); - } - } - } - tokio::time::sleep(std::time::Duration::from_millis(100)).await; - } -} - -#[cfg(feature = "experimental")] -pub async fn scan_cgroup_paths(path: String) -> Result, Error> { - let mut cgroup_paths: Vec = Vec::new(); - let default_path = "/sys/fs/cgroup/kubepods.slice".to_string(); - - let target_path = if fs::metadata(&path).is_err() { - error!("Using default path: {}", &default_path); - default_path - } else { - path - }; - let entries = match fs::read_dir(&target_path) { - Ok(entries) => entries, - Err(e) => { - error!( - "Error reading cgroup directory {:?}: {}", - &target_path.clone(), - e - ); - return Ok(cgroup_paths); - } - }; - for entry in entries { - if let Ok(entry) = entry { - let path = entry.path(); - if path.is_dir() { - if let Some(path_str) = path.to_str() { - cgroup_paths.push(path_str.to_string()); - } - } - } - } - - Ok(cgroup_paths) -} - -#[cfg(feature = "experimental")] -struct ServiceIdentity { - uid: String, - container_id: String, -} - -#[cfg(feature = "experimental")] -pub async fn scan_cgroup_cronjob(time_delta: u64) -> Result<(), Error> { - let interval = std::time::Duration::from_secs(time_delta); + // loop over the buffers loop { - let scanned_paths = scan_cgroup_paths("/sys/fs/cgroup/kubelet.slice".to_string()) - .await - .expect("An error occured during the cgroup scan"); - //--> this should return : - // /sys/fs/cgroup/kubelet.slice/kubelet-kubepods.slice - // /sys/fs/cgroup/kubelet.slice/kubelet.service - let mut scanned_subpaths = Vec::::new(); - for path in scanned_paths { - //info!("Scanned cgroup path: {}", path); - // scan the subgroups - let subpaths = scan_cgroup_paths(path.to_string()).await; - match subpaths { - Ok(paths) => { - for subpath in paths { - scanned_subpaths.push(subpath); + for buf in array_buffers.iter_mut() { + match buf.read_events(&mut buffers) { + Ok(events) => { + // triggered if some events are lost + if events.lost > 0 { + tracing::debug!("Lost events: {} ", events.lost); } - // ---> this should return the cgroups files and also : - // kubelet-kubepods-burstable.slice - // kubelet-kubepods-besteffort.slice - - // this directories needs to be scanned again to get further information about the pods - // for example: - // kubelet-kubepods-besteffort-pod088f8704_24f0_4636_a8e2_13f75646f370.slice - // where pod088f8704_24f0_4636_a8e2_13f75646f370 is the pod UID - } - Err(e) => { - error!("An error occured during the cgroup subpath scan: {}", e); - continue; - } - } - } - - let mut scanned_subpaths_v2 = Vec::::new(); - // second cgroup scan level to get the pod UIDs - for scanned_subpath in &scanned_subpaths { - let subpaths_v2 = scan_cgroup_paths(scanned_subpath.to_string()).await; - match subpaths_v2 { - Ok(paths) => { - for sub2 in paths { - info!("Debugging sub2: {}", &sub2); //return e.g. /sys/fs/cgroup/kubepods.slice/kubepods-besteffort.slice/kubepods-besteffort-podb8701d38_3791_422d_ad15_890ad1a0844b.slice/docker-f2e265659293676231ecb38fafccc97b1a42b75be192c32a602bc8ea579dc866.scope - scanned_subpaths_v2.push(sub2); - // this contains the addressed like this - //kubelet-kubepods-besteffort-pod088f8704_24f0_4636_a8e2_13f75646f370.slice + // triggered if some events are readed + if events.read > 0 { + tracing::debug!("Readed events: {}", events.read); + let offset = 0; + let tot_events = events.read as i32; + + //read the events in the buffer + match buffer_type { + BufferType::PacketLog => { + BufferType::read_packet_log(&mut buffers, tot_events, offset).await + } + BufferType::TcpPacketRegistry => { + BufferType::read_tcp_registry_log(&mut buffers, tot_events, offset) + .await + } + BufferType::VethLog => { + BufferType::read_and_handle_veth_log( + &mut buffers, + tot_events, + offset, + ) + .await + } + } } } Err(e) => { - error!("An error occured during the cgroup subpath v2 scan: {}", e); - continue; + error!("Cannot read events from buffer. Reason: {} ", e); } } } - - let mut uids = Vec::::new(); - let mut identites = Vec::::new(); - - //read the subpaths to extract the pod uid - for subpath in scanned_subpaths_v2 { - let uid = extract_pod_uid(subpath.clone()) - .expect("An error occured during the extraction of pod UIDs"); - let container_id = extract_container_id(subpath.clone()) - .expect("An error occured during the extraction of the docker container id"); - debug!("Debugging extracted UID: {:?}", &uid); - // create a linked list for each service - let service_identity = ServiceIdentity { uid, container_id }; - identites.push(service_identity); //push the linked list in a vector of ServiceIdentity structure. Each struct contains the uid and the container id - } - - // get pod information from UID and store the info in an HashMqp for O(1) access - let service_map = get_pod_info().await?; - - //info!("Debugging Identites vector: {:?}", identites); - for service in identites { - let name = service_cache(service_map.clone(), service.uid.clone()); - let uid = service.uid; - let id = service.container_id; - info!( - "[Identity]: name: {:?} uid: {:?} docker container id {:?} ", - name, uid, id - ); - } - - info!( - "Cronjob completed a cgroup scan cycle. Next scan will be in {} seconds", - time_delta - ); - time::sleep(interval).await; - } -} -#[cfg(feature = "experimental")] -fn service_cache(service_map: HashMap, uid: String) -> String { - service_map.get(&uid).cloned().unwrap_or_else(|| { - error!("Service not found for uid: {}", uid); - "unknown".to_string() - }) -} -#[cfg(feature = "experimental")] -fn extract_container_id(cgroup_path: String) -> Result { - let splits: Vec<&str> = cgroup_path.split("/").collect(); - - let index = extract_target_from_splits(splits.clone(), "docker-")?; - let docker_id_split = splits[index] - .trim_start_matches("docker-") - .trim_end_matches(".scope"); - Ok(docker_id_split.to_string()) -} - -// IDEA: add cgroup docker process mapping in ServiceIdentity structure -#[cfg(feature = "experimental")] -fn extract_pod_uid(cgroup_path: String) -> Result { - // example of cgroup path: - // /sys/fs/cgroup/kubelet.slice/kubelet-kubepods.slice/kubelet-kubepods-besteffort.slice/kubelet-kubepods-besteffort-pod93580201_87d5_44e6_9779_f6153ca17637.slice - // or - // /sys/fs/cgroup/kubelet.slice/kubelet-kubepods.slice/kubelet-kubepods-burstable.slice/kubelet-kubepods-burstable-poddd3a1c6b_af40_41b1_8e1c_9e31fe8d96cb.slice - - // split the path by "/" - let splits: Vec<&str> = cgroup_path.split("/").collect(); - debug!("Debugging splits: {:?}", &splits); - - let index = extract_target_from_splits(splits.clone(), "-pod")?; - - let pod_split = splits[index] - .trim_start_matches("kubelet-kubepods-besteffort-") - .trim_start_matches("kubelet-kubepods-burstable-") - .trim_start_matches("kubepods-besteffort-") - .trim_start_matches("kubepods-burstable-"); - - let uid_ = pod_split - .trim_start_matches("pod") - .trim_end_matches(".slice"); //return uids with underscore (_) [ex.dd3a1c6b_af40_41b1_8e1c_9e31fe8d96cb] - - let uid = uid_.replace("_", "-"); - Ok(uid.to_string()) -} -#[cfg(feature = "experimental")] -fn extract_target_from_splits(splits: Vec<&str>, target: &str) -> Result { - for (index, split) in splits.iter().enumerate() { - // find the split that contains the word 'pod' - if split.contains(target) { - debug!("Target index; {}", index); - return Ok(index); - } + tokio::time::sleep(std::time::Duration::from_millis(100)).await; // small sleep } - Err(Error::msg("'-pod' word not found in split")) -} - -/* unfortunately you cannot query the pods using the uids directly from ListParams */ -#[cfg(feature = "experimental")] -async fn query_all_pods() -> Result, Error> { - let client = Client::try_default() - .await - .expect("Cannot connect to kubernetes client"); - let pods: Api = Api::all(client); - let lp = kube::api::ListParams::default(); // default list params - let pod_list = pods - .list(&lp) - .await - .expect("An error occured during the pod list extraction"); - - Ok(pod_list) -} - -// fast pod caching system -#[cfg(feature = "experimental")] -async fn get_pod_info() -> Result, Error> { - let all_pods = query_all_pods().await?; - - let mut service_map = HashMap::::new(); - - for pod in all_pods { - if let (Some(name), Some(uid)) = (pod.metadata.name, pod.metadata.uid) { - service_map.insert(uid, name); - } - } // insert the pod name and uid from the KubeAPI - - Ok(service_map) } -#[cfg(feature = "experimental")] +#[cfg(test)] mod tests { - use tracing_subscriber::fmt::format; - - use crate::helpers::{extract_container_id, extract_pod_uid, extract_target_from_splits}; - - #[test] - fn extract_uid_from_string() { - let cgroup_paths = vec!["/sys/fs/cgroup/kubepods.slice/kubepods-besteffort.slice/kubepods-besteffort-pod231bd2d7_0f09_4781_a4e1_e4ea026342dd.slice".to_string(), - "/sys/fs/cgroup/kubelet.slice/kubelet-kubepods.slice/kubelet-kubepods-besteffort.slice/kubelet-kubepods-besteffort-pod231bd2d7_0f09_4781_a4e1_e4ea026342dd.slice".to_string()]; - - let mut uid_vec = Vec::::new(); - - for cgroup_path in cgroup_paths { - let uid = extract_pod_uid(cgroup_path) - .map_err(|e| format!("An error occured {}", e)) - .unwrap(); - uid_vec.push(uid); - } - - let check = vec![ - "231bd2d7-0f09-4781-a4e1-e4ea026342dd".to_string(), - "231bd2d7-0f09-4781-a4e1-e4ea026342dd".to_string(), - ]; - - assert_eq!(uid_vec, check); - } - + use cortexbrain_common::buffer_type::VethLog; #[test] - fn test_extract_target_index() { - let cgroup_paths = vec!["/sys/fs/cgroup/kubepods.slice/kubepods-besteffort.slice/kubepods-besteffort-pod231bd2d7_0f09_4781_a4e1_e4ea026342dd.slice".to_string(), - "/sys/fs/cgroup/kubelet.slice/kubelet-kubepods.slice/kubelet-kubepods-besteffort.slice/kubelet-kubepods-besteffort-pod231bd2d7_0f09_4781_a4e1_e4ea026342dd.slice".to_string()]; - - let mut index_vec = Vec::::new(); - for cgroup_path in cgroup_paths { - let splits: Vec<&str> = cgroup_path.split("/").collect(); - - let target_index = extract_target_from_splits(splits, "-pod").unwrap(); - index_vec.push(target_index); - } - let index_check = vec![6, 7]; - assert_eq!(index_vec, index_check); + fn check_veth_log_struct_mem() { + let mem_test = std::mem::size_of::(); + assert_eq!(mem_test, 39); } - #[test] - fn extract_docker_id() { - let cgroup_paths = vec!["/sys/fs/cgroup/kubepods.slice/kubepods-besteffort.slice/kubepods-besteffort-pod17fd3f7c_37e4_4009_8c38_e58b30691af3.slice/docker-13abd64c0ba349975a762476c9703b642d18077eabeb3aa1d941132048afc861.scope".to_string(), - "/sys/fs/cgroup/kubelet.slice/kubelet-kubepods.slice/kubelet-kubepods-besteffort.slice/kubelet-kubepods-besteffort-pod17fd3f7c_37e4_4009_8c38_e58b30691af3.slice/docker-13abd64c0ba349975a762476c9703b642d18077eabeb3aa1d941132048afc861.scope".to_string()]; - - let mut id_vec = Vec::::new(); - for cgroup_path in cgroup_paths { - let id = extract_container_id(cgroup_path).unwrap(); - id_vec.push(id); - } - let id_check = vec![ - "13abd64c0ba349975a762476c9703b642d18077eabeb3aa1d941132048afc861".to_string(), - "13abd64c0ba349975a762476c9703b642d18077eabeb3aa1d941132048afc861".to_string(), - ]; - assert_eq!(id_vec, id_check); + fn test_vethlog_buffer_len() { + let vethlog = VethLog { + name: [0; 16], + dev_addr: [0; 6], + state: 1, + netns: 123, + event_type: 1, + pid: 1, + }; + let buffer = unsafe { + std::slice::from_raw_parts( + (&vethlog as *const VethLog) as *const u8, + std::mem::size_of::(), + ) + }; + assert_eq!(buffer.len(), 39); } } diff --git a/core/src/components/identity/src/lib.rs b/core/src/components/identity/src/lib.rs index 5413414..ceaedc2 100644 --- a/core/src/components/identity/src/lib.rs +++ b/core/src/components/identity/src/lib.rs @@ -1,3 +1,3 @@ pub mod helpers; -pub mod structs; -pub mod enums; \ No newline at end of file +#[cfg(feature = "experimental")] +pub mod service_discovery; \ No newline at end of file diff --git a/core/src/components/identity/src/main.rs b/core/src/components/identity/src/main.rs index 56f81d6..598b964 100644 --- a/core/src/components/identity/src/main.rs +++ b/core/src/components/identity/src/main.rs @@ -8,16 +8,16 @@ * */ -mod enums; mod helpers; -mod structs; +mod service_discovery; -use crate::helpers::{ - display_events, display_tcp_registry_events, display_veth_events, get_veth_channels, -}; +use crate::helpers::{get_veth_channels, read_perf_buffer}; use aya::{ Ebpf, - maps::{Map, perf::PerfEventArray}, + maps::{ + MapData, + perf::{PerfEventArray, PerfEventArrayBuffer}, + }, programs::{SchedClassifier, TcAttachType, tc::SchedClassifierLinkId}, util::online_cpus, }; @@ -28,6 +28,7 @@ use crate::helpers::scan_cgroup_cronjob; use bytes::BytesMut; use cortexbrain_common::map_handlers::{init_bpf_maps, map_pinner, populate_blocklist}; use cortexbrain_common::program_handlers::load_program; +use cortexbrain_common::{buffer_type::BufferType, map_handlers::BpfMapsData}; use std::{ convert::TryInto, path::Path, @@ -43,8 +44,8 @@ use std::collections::HashMap; #[tokio::main] async fn main() -> Result<(), anyhow::Error> { - //init tracing subscriber - logger::init_default_logger(); + //init otlè tracing subscriber + let otlp_provider = logger::otlp_logger_init("identity_service-OTLP".to_string()); info!("Starting identity service..."); info!("fetching data"); @@ -86,9 +87,9 @@ async fn main() -> Result<(), anyhow::Error> { info!("Found interfaces: {:?}", interfaces); - //{ FIXME: paused for testing the other features - // populate_blocklist(&mut maps.2).await?; - //} + { + populate_blocklist().await?; + } { init_tc_classifier(bpf.clone(), interfaces, link_ids.clone()).await.context( @@ -101,11 +102,9 @@ async fn main() -> Result<(), anyhow::Error> { )?; } - event_listener(maps, link_ids.clone(), bpf.clone()) - .await - .map_err(|e| { - anyhow::anyhow!("Error inizializing event_listener. Reason: {}", e) - })?; + event_listener(maps).await.map_err(|e| { + anyhow::anyhow!("Error inizializing event_listener. Reason: {}", e) + })?; } Err(e) => { error!("Error while pinning bpf_maps: {}", e); @@ -115,6 +114,7 @@ async fn main() -> Result<(), anyhow::Error> { Err(e) => { error!("Error while loading bpf maps {}", e); let _ = signal::ctrl_c().await; + let _ = otlp_provider.shutdown(); } } @@ -198,74 +198,92 @@ async fn init_tcp_registry(bpf: Arc>) -> Result<(), anyhow::Error> { // perf_veth_array: contains is associated with the network events stored in the veth_map (veth_identity_map) // // -async fn event_listener( - bpf_maps: Vec, - link_ids: Arc>>, - bpf: Arc>, -) -> Result<(), anyhow::Error> { +async fn event_listener(bpf_maps: BpfMapsData) -> Result<(), anyhow::Error> { info!("Preparing perf_buffers and perf_arrays"); //TODO: try to change from PerfEventArray to a RingBuffer data structure - let mut perf_event_arrays = Vec::new(); // contains a vector of PerfEventArrays - let mut event_buffers = Vec::new(); // contains a vector of buffers + let mut map_manager = + HashMap::, Vec>)>::new(); - // create the PerfEventArrays and the buffers - for map in bpf_maps { - debug!("Debugging map type:{:?}", map); + // create the PerfEventArrays and the buffers from the BpfMapsData Objects + for (map, name) in bpf_maps + .bpf_obj_map + .into_iter() + .zip(bpf_maps.bpf_obj_names.into_iter()) + // zip two iterators at the same time for map and mapnames + { + debug!("Debugging map type:{:?} for map name {:?}", map, &name); + info!("Creating PerfEventArray for map name {:?}", &name); + + // save the map in a registry if is a PerfEventArray to access them by name if let std::result::Result::Ok(perf_event_array) = PerfEventArray::try_from(map) { - perf_event_arrays.push(perf_event_array); // this is step 1 - let perf_event_array_buffer = Vec::new(); - event_buffers.push(perf_event_array_buffer); //this is step 2 + map_manager.insert(name.clone(), (perf_event_array, Vec::new())); + + // perf_event_arrays.push(perf_event_array); // this is step 1 + // let perf_event_array_buffer = Vec::new(); + // event_buffers.push(perf_event_array_buffer); //this is step 2 } else { - warn!("Map is not a PerfEventArray, skipping load"); + warn!("Map {:?} is not a PerfEventArray, skipping load", &name); } } // fill the input buffers with data from the PerfEventArrays - let cpus = online_cpus().map_err(|e| anyhow::anyhow!("Error {:?}", e))?; - - for (perf_evt_array, perf_evt_array_buffer) in - perf_event_arrays.iter_mut().zip(event_buffers.iter_mut()) - { - for cpu_id in &cpus { - let single_buffer = perf_evt_array.open(*cpu_id, None)?; - perf_evt_array_buffer.push(single_buffer); + for cpu_id in online_cpus().map_err(|e| anyhow::anyhow!("Error {:?}", e))? { + for (name, (perf_evt_array, perf_evt_array_buffer)) in map_manager.iter_mut() { + let buf = perf_evt_array.open(cpu_id, None)?; + info!( + "Buffer created for map {:?} on cpu_id {:?}. Buffer size: {}", + name, + cpu_id, + std::mem::size_of_val(&buf) + ); + perf_evt_array_buffer.push(buf); } } info!("Listening for events..."); - let mut event_buffers = event_buffers.into_iter(); - let perf_veth_buffer = event_buffers - .next() + // i need to use remove to move the values from the Map Manager to the the async tasks + let (perf_veth_array, perf_veth_buffers) = map_manager + .remove("veth_identity_map") .expect("Cannot create perf_veth buffer"); - let perf_net_events_buffer = event_buffers - .next() + let (perf_net_events_array, perf_net_events_buffers) = map_manager + .remove("events_map") .expect("Cannot create perf_net_events buffer"); - let tcp_registry_buffer = event_buffers - .next() + let (tcp_registry_array, tcp_registry_buffers) = map_manager + .remove("TcpPacketRegistry") .expect("Cannot create tcp_registry buffer"); // init output buffers - let veth_buffers = vec![BytesMut::with_capacity(1024); 10]; + let veth_buffers = vec![BytesMut::with_capacity(10 * 1024); online_cpus().iter().len()]; let events_buffers = vec![BytesMut::with_capacity(1024); online_cpus().iter().len()]; let tcp_buffers = vec![BytesMut::with_capacity(1024); online_cpus().iter().len()]; // init veth link ids - let veth_link_ids = link_ids; + //let veth_link_ids = link_ids; // spawn async tasks let veth_events_displayer = tokio::spawn(async move { - display_veth_events(bpf.clone(), perf_veth_buffer, veth_buffers, veth_link_ids).await; + read_perf_buffer(perf_veth_buffers, veth_buffers, BufferType::VethLog).await; }); let net_events_displayer = tokio::spawn(async move { - display_events(perf_net_events_buffer, events_buffers).await; + read_perf_buffer( + perf_net_events_buffers, + events_buffers, + BufferType::PacketLog, + ) + .await; }); let tcp_registry_events_displayer: tokio::task::JoinHandle<()> = tokio::spawn(async move { - display_tcp_registry_events(tcp_registry_buffer, tcp_buffers).await; + read_perf_buffer( + tcp_registry_buffers, + tcp_buffers, + BufferType::TcpPacketRegistry, + ) + .await; }); #[cfg(feature = "experimental")] diff --git a/core/src/components/identity/src/mod.rs b/core/src/components/identity/src/mod.rs index 5413414..ceaedc2 100644 --- a/core/src/components/identity/src/mod.rs +++ b/core/src/components/identity/src/mod.rs @@ -1,3 +1,3 @@ pub mod helpers; -pub mod structs; -pub mod enums; \ No newline at end of file +#[cfg(feature = "experimental")] +pub mod service_discovery; \ No newline at end of file diff --git a/core/src/components/identity/src/service_discovery.rs b/core/src/components/identity/src/service_discovery.rs new file mode 100644 index 0000000..bc43f3d --- /dev/null +++ b/core/src/components/identity/src/service_discovery.rs @@ -0,0 +1,297 @@ +#[cfg(feature = "experimental")] +use anyhow::Error; +#[cfg(feature = "experimental")] +use k8s_openapi::api::core::v1::Pod; +#[cfg(feature = "experimental")] +use kube::api::ObjectList; +#[cfg(feature = "experimental")] +use kube::{Api, Client}; +#[cfg(feature = "experimental")] +use std::fs; +#[cfg(feature = "experimental")] +use tokio::time; + +#[cfg(feature = "experimental")] +pub async fn scan_cgroup_paths(path: String) -> Result, Error> { + let mut cgroup_paths: Vec = Vec::new(); + let default_path = "/sys/fs/cgroup/kubepods.slice".to_string(); + + let target_path = if fs::metadata(&path).is_err() { + error!("Using default path: {}", &default_path); + default_path + } else { + path + }; + let entries = match fs::read_dir(&target_path) { + Ok(entries) => entries, + Err(e) => { + error!( + "Error reading cgroup directory {:?}: {}", + &target_path.clone(), + e + ); + return Ok(cgroup_paths); + } + }; + for entry in entries { + if let Ok(entry) = entry { + let path = entry.path(); + if path.is_dir() { + if let Some(path_str) = path.to_str() { + cgroup_paths.push(path_str.to_string()); + } + } + } + } + + Ok(cgroup_paths) +} + +#[cfg(feature = "experimental")] +struct ServiceIdentity { + uid: String, + container_id: String, +} + +#[cfg(feature = "experimental")] +pub async fn scan_cgroup_cronjob(time_delta: u64) -> Result<(), Error> { + let interval = std::time::Duration::from_secs(time_delta); + loop { + let scanned_paths = scan_cgroup_paths("/sys/fs/cgroup/kubelet.slice".to_string()) + .await + .expect("An error occured during the cgroup scan"); + //--> this should return : + // /sys/fs/cgroup/kubelet.slice/kubelet-kubepods.slice + // /sys/fs/cgroup/kubelet.slice/kubelet.service + let mut scanned_subpaths = Vec::::new(); + for path in scanned_paths { + //info!("Scanned cgroup path: {}", path); + // scan the subgroups + let subpaths = scan_cgroup_paths(path.to_string()).await; + match subpaths { + Ok(paths) => { + for subpath in paths { + scanned_subpaths.push(subpath); + } + // ---> this should return the cgroups files and also : + // kubelet-kubepods-burstable.slice + // kubelet-kubepods-besteffort.slice + + // this directories needs to be scanned again to get further information about the pods + // for example: + // kubelet-kubepods-besteffort-pod088f8704_24f0_4636_a8e2_13f75646f370.slice + // where pod088f8704_24f0_4636_a8e2_13f75646f370 is the pod UID + } + Err(e) => { + error!("An error occured during the cgroup subpath scan: {}", e); + continue; + } + } + } + + let mut scanned_subpaths_v2 = Vec::::new(); + // second cgroup scan level to get the pod UIDs + for scanned_subpath in &scanned_subpaths { + let subpaths_v2 = scan_cgroup_paths(scanned_subpath.to_string()).await; + match subpaths_v2 { + Ok(paths) => { + for sub2 in paths { + info!("Debugging sub2: {}", &sub2); //return e.g. /sys/fs/cgroup/kubepods.slice/kubepods-besteffort.slice/kubepods-besteffort-podb8701d38_3791_422d_ad15_890ad1a0844b.slice/docker-f2e265659293676231ecb38fafccc97b1a42b75be192c32a602bc8ea579dc866.scope + scanned_subpaths_v2.push(sub2); + // this contains the addressed like this + //kubelet-kubepods-besteffort-pod088f8704_24f0_4636_a8e2_13f75646f370.slice + } + } + Err(e) => { + error!("An error occured during the cgroup subpath v2 scan: {}", e); + continue; + } + } + } + + let mut uids = Vec::::new(); + let mut identites = Vec::::new(); + + //read the subpaths to extract the pod uid + for subpath in scanned_subpaths_v2 { + let uid = extract_pod_uid(subpath.clone()) + .expect("An error occured during the extraction of pod UIDs"); + let container_id = extract_container_id(subpath.clone()) + .expect("An error occured during the extraction of the docker container id"); + debug!("Debugging extracted UID: {:?}", &uid); + // create a linked list for each service + let service_identity = ServiceIdentity { uid, container_id }; + identites.push(service_identity); //push the linked list in a vector of ServiceIdentity structure. Each struct contains the uid and the container id + } + + // get pod information from UID and store the info in an HashMqp for O(1) access + let service_map = get_pod_info().await?; + + //info!("Debugging Identites vector: {:?}", identites); + for service in identites { + let name = service_cache(service_map.clone(), service.uid.clone()); + let uid = service.uid; + let id = service.container_id; + info!( + "[Identity]: name: {:?} uid: {:?} docker container id {:?} ", + name, uid, id + ); + } + + info!( + "Cronjob completed a cgroup scan cycle. Next scan will be in {} seconds", + time_delta + ); + time::sleep(interval).await; + } +} +#[cfg(feature = "experimental")] +fn service_cache(service_map: HashMap, uid: String) -> String { + service_map.get(&uid).cloned().unwrap_or_else(|| { + error!("Service not found for uid: {}", uid); + "unknown".to_string() + }) +} +#[cfg(feature = "experimental")] +fn extract_container_id(cgroup_path: String) -> Result { + let splits: Vec<&str> = cgroup_path.split("/").collect(); + + let index = extract_target_from_splits(splits.clone(), "docker-")?; + let docker_id_split = splits[index] + .trim_start_matches("docker-") + .trim_end_matches(".scope"); + Ok(docker_id_split.to_string()) +} + +// IDEA: add cgroup docker process mapping in ServiceIdentity structure +#[cfg(feature = "experimental")] +fn extract_pod_uid(cgroup_path: String) -> Result { + // example of cgroup path: + // /sys/fs/cgroup/kubelet.slice/kubelet-kubepods.slice/kubelet-kubepods-besteffort.slice/kubelet-kubepods-besteffort-pod93580201_87d5_44e6_9779_f6153ca17637.slice + // or + // /sys/fs/cgroup/kubelet.slice/kubelet-kubepods.slice/kubelet-kubepods-burstable.slice/kubelet-kubepods-burstable-poddd3a1c6b_af40_41b1_8e1c_9e31fe8d96cb.slice + + // split the path by "/" + let splits: Vec<&str> = cgroup_path.split("/").collect(); + debug!("Debugging splits: {:?}", &splits); + + let index = extract_target_from_splits(splits.clone(), "-pod")?; + + let pod_split = splits[index] + .trim_start_matches("kubelet-kubepods-besteffort-") + .trim_start_matches("kubelet-kubepods-burstable-") + .trim_start_matches("kubepods-besteffort-") + .trim_start_matches("kubepods-burstable-"); + + let uid_ = pod_split + .trim_start_matches("pod") + .trim_end_matches(".slice"); //return uids with underscore (_) [ex.dd3a1c6b_af40_41b1_8e1c_9e31fe8d96cb] + + let uid = uid_.replace("_", "-"); + Ok(uid.to_string()) +} +#[cfg(feature = "experimental")] +fn extract_target_from_splits(splits: Vec<&str>, target: &str) -> Result { + for (index, split) in splits.iter().enumerate() { + // find the split that contains the word 'pod' + if split.contains(target) { + debug!("Target index; {}", index); + return Ok(index); + } + } + Err(Error::msg("'-pod' word not found in split")) +} + +/* unfortunately you cannot query the pods using the uids directly from ListParams */ +#[cfg(feature = "experimental")] +async fn query_all_pods() -> Result, Error> { + let client = Client::try_default() + .await + .expect("Cannot connect to kubernetes client"); + let pods: Api = Api::all(client); + let lp = kube::api::ListParams::default(); // default list params + let pod_list = pods + .list(&lp) + .await + .expect("An error occured during the pod list extraction"); + + Ok(pod_list) +} + +// fast pod caching system +#[cfg(feature = "experimental")] +async fn get_pod_info() -> Result, Error> { + let all_pods = query_all_pods().await?; + + let mut service_map = HashMap::::new(); + + for pod in all_pods { + if let (Some(name), Some(uid)) = (pod.metadata.name, pod.metadata.uid) { + service_map.insert(uid, name); + } + } // insert the pod name and uid from the KubeAPI + + Ok(service_map) +} + +#[cfg(feature = "experimental")] +mod tests { + use tracing_subscriber::fmt::format; + + use crate::helpers::{extract_container_id, extract_pod_uid, extract_target_from_splits}; + + #[test] + fn extract_uid_from_string() { + let cgroup_paths = vec!["/sys/fs/cgroup/kubepods.slice/kubepods-besteffort.slice/kubepods-besteffort-pod231bd2d7_0f09_4781_a4e1_e4ea026342dd.slice".to_string(), + "/sys/fs/cgroup/kubelet.slice/kubelet-kubepods.slice/kubelet-kubepods-besteffort.slice/kubelet-kubepods-besteffort-pod231bd2d7_0f09_4781_a4e1_e4ea026342dd.slice".to_string()]; + + let mut uid_vec = Vec::::new(); + + for cgroup_path in cgroup_paths { + let uid = extract_pod_uid(cgroup_path) + .map_err(|e| format!("An error occured {}", e)) + .unwrap(); + uid_vec.push(uid); + } + + let check = vec![ + "231bd2d7-0f09-4781-a4e1-e4ea026342dd".to_string(), + "231bd2d7-0f09-4781-a4e1-e4ea026342dd".to_string(), + ]; + + assert_eq!(uid_vec, check); + } + + #[test] + fn test_extract_target_index() { + let cgroup_paths = vec!["/sys/fs/cgroup/kubepods.slice/kubepods-besteffort.slice/kubepods-besteffort-pod231bd2d7_0f09_4781_a4e1_e4ea026342dd.slice".to_string(), + "/sys/fs/cgroup/kubelet.slice/kubelet-kubepods.slice/kubelet-kubepods-besteffort.slice/kubelet-kubepods-besteffort-pod231bd2d7_0f09_4781_a4e1_e4ea026342dd.slice".to_string()]; + + let mut index_vec = Vec::::new(); + for cgroup_path in cgroup_paths { + let splits: Vec<&str> = cgroup_path.split("/").collect(); + + let target_index = extract_target_from_splits(splits, "-pod").unwrap(); + index_vec.push(target_index); + } + let index_check = vec![6, 7]; + assert_eq!(index_vec, index_check); + } + + #[test] + fn extract_docker_id() { + let cgroup_paths = vec!["/sys/fs/cgroup/kubepods.slice/kubepods-besteffort.slice/kubepods-besteffort-pod17fd3f7c_37e4_4009_8c38_e58b30691af3.slice/docker-13abd64c0ba349975a762476c9703b642d18077eabeb3aa1d941132048afc861.scope".to_string(), + "/sys/fs/cgroup/kubelet.slice/kubelet-kubepods.slice/kubelet-kubepods-besteffort.slice/kubelet-kubepods-besteffort-pod17fd3f7c_37e4_4009_8c38_e58b30691af3.slice/docker-13abd64c0ba349975a762476c9703b642d18077eabeb3aa1d941132048afc861.scope".to_string()]; + + let mut id_vec = Vec::::new(); + for cgroup_path in cgroup_paths { + let id = extract_container_id(cgroup_path).unwrap(); + id_vec.push(id); + } + let id_check = vec![ + "13abd64c0ba349975a762476c9703b642d18077eabeb3aa1d941132048afc861".to_string(), + "13abd64c0ba349975a762476c9703b642d18077eabeb3aa1d941132048afc861".to_string(), + ]; + assert_eq!(id_vec, id_check); + } +} diff --git a/core/src/components/identity/src/structs.rs b/core/src/components/identity/src/structs.rs deleted file mode 100644 index 7e2aa2b..0000000 --- a/core/src/components/identity/src/structs.rs +++ /dev/null @@ -1,56 +0,0 @@ -use bytemuck_derive::Zeroable; - -/* - * Structure PacketLog - * This structure is used to store the packet information - */ -#[repr(C)] -#[derive(Clone, Copy, Zeroable)] -pub struct PacketLog { - pub proto: u8, - pub src_ip: u32, - pub src_port: u16, - pub dst_ip: u32, - pub dst_port: u16, - pub pid: u32, -} -unsafe impl aya::Pod for PacketLog {} - -/* - * Connection Array that contains the hash_id associated with an active connection - */ -//#[repr(C)] -//#[derive(Clone, Copy, Zeroable)] -//pub struct ConnArray { -// pub src_ip: u32, -// pub dst_ip: u32, -// pub src_port: u16, -// pub dst_port: u16, -// pub proto: u8, -//} - -//unsafe impl aya::Pod for ConnArray {} - -#[repr(C)] -#[derive(Clone, Copy)] -pub struct VethLog { - pub name: [u8; 16], - pub state: u64, - pub dev_addr: [u32; 8], - pub event_type: u8, - pub netns: u32, - pub pid: u32, -} - -#[repr(C)] -#[derive(Clone, Copy)] -pub struct TcpPacketRegistry { - pub proto: u8, - pub src_ip: u32, - pub dst_ip: u32, - pub src_port: u16, - pub dst_port: u16, - pub pid: u32, - pub command: [u8; 16], - pub cgroup_id: u64, -} diff --git a/core/src/testing/agent.yaml b/core/src/testing/agent.yaml index d189f43..4633408 100644 --- a/core/src/testing/agent.yaml +++ b/core/src/testing/agent.yaml @@ -19,7 +19,7 @@ spec: hostNetwork: true containers: - name: agent - image: ghcr.io/cortexflow/agent:latest + image: lorenzotettamanti/cortexflow-agent:veth-command-test4 command: ["/bin/bash", "-c"] args: - | diff --git a/core/src/testing/identity.yaml b/core/src/testing/identity.yaml index 38bf197..aeb9ebc 100644 --- a/core/src/testing/identity.yaml +++ b/core/src/testing/identity.yaml @@ -28,7 +28,6 @@ spec: echo "checking permissions" ls -ld /sys/fs/bpf - volumeMounts: - name: bpf mountPath: /sys/fs/bpf @@ -53,7 +52,7 @@ spec: - SYS_PTRACE containers: - name: identity - image: lorenzotettamanti/cortexflow-identity:0.1.5-refcount9 + image: lorenzotettamanti/cortexflow-identity:0.1.2 command: ["/bin/bash", "-c"] args: - | @@ -70,6 +69,16 @@ spec: echo "Running application..." exec /usr/local/bin/cortexflow-identity-service || echo "Application exited with code $?" + env: + - name: OTEL_SERVICE_NAME + value: cortexflow-identity + - name: OTEL_EXPORTER_OTLP_ENDPOINT + value: http://localhost:4317 + - name: OTEL_EXPORTER_OTLP_PROTOCOL + value: grpc + - name: OTEL_RESOURCE_ATTRIBUTES + value: service.namespace=cortexflow,service.version=0.1.5 + resources: limits: cpu: "1" diff --git a/core/src/testing/otel_agent.yaml b/core/src/testing/otel_agent.yaml new file mode 100644 index 0000000..71b7e08 --- /dev/null +++ b/core/src/testing/otel_agent.yaml @@ -0,0 +1,210 @@ +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: otel-agent-conf + namespace: cortexflow + labels: + app: opentelemetry + component: otel-agent-conf +data: + otel-agent-config: | + receivers: + otlp: + protocols: + grpc: + endpoint: 0.0.0.0:4317 + http: + endpoint: 0.0.0.0:4318 + + exporters: + otlp: + endpoint: otel-collector.cortexflow.svc.cluster.local:4317 + tls: + insecure: true + logging: + loglevel: info + + service: + pipelines: + traces: + receivers: [otlp] + exporters: [otlp, logging] + logs: + receivers: [otlp] + exporters: [otlp, logging] + +--- +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: otel-agent + namespace: cortexflow + labels: + app: opentelemetry + component: otel-agent +spec: + selector: + matchLabels: + app: opentelemetry + component: otel-agent + template: + metadata: + labels: + app: opentelemetry + component: otel-agent + spec: + hostNetwork: true + dnsPolicy: ClusterFirstWithHostNet + containers: + - name: otel-agent + image: otel/opentelemetry-collector:0.95.0 + command: + - "/otelcol" + - "--config=/conf/otel-agent-config.yaml" + resources: + limits: + cpu: 500m + memory: 500Mi + requests: + cpu: 100m + memory: 100Mi + ports: + - containerPort: 4317 + hostPort: 4317 + protocol: TCP + - containerPort: 4318 + hostPort: 4318 + protocol: TCP + env: + - name: GOMEMLIMIT + value: 400MiB + volumeMounts: + - name: otel-agent-config-vol + mountPath: /conf + volumes: + - name: otel-agent-config-vol + configMap: + name: otel-agent-conf + items: + - key: otel-agent-config + path: otel-agent-config.yaml + +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: otel-collector-conf + namespace: cortexflow + labels: + app: opentelemetry + component: otel-collector-conf +data: + otel-collector-config: | + receivers: + otlp: + protocols: + grpc: + endpoint: 0.0.0.0:4317 + http: + endpoint: 0.0.0.0:4318 + + processors: + memory_limiter: + limit_mib: 1500 + spike_limit_mib: 512 + check_interval: 5s + + exporters: + # otlp: + # endpoint: otel-collector.cortexflow.svc.cluster.local:4317 + # tls: + # insecure: true + logging: {} + + service: + pipelines: + traces: + receivers: [otlp] + processors: [memory_limiter] + exporters: [logging] + logs: + receivers: [otlp] + processors: [memory_limiter] + exporters: [logging] + +--- +apiVersion: v1 +kind: Service +metadata: + name: otel-collector + namespace: cortexflow + labels: + app: opentelemetry + component: otel-collector +spec: + selector: + app: opentelemetry + component: otel-collector + ports: + - name: otlp-grpc + port: 4317 + targetPort: 4317 + - name: otlp-http + port: 4318 + targetPort: 4318 + - name: metrics + port: 8888 + targetPort: 8888 + +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: otel-collector + namespace: cortexflow + labels: + app: opentelemetry + component: otel-collector +spec: + replicas: 1 + selector: + matchLabels: + app: opentelemetry + component: otel-collector + template: + metadata: + labels: + app: opentelemetry + component: otel-collector + spec: + containers: + - name: otel-collector + image: otel/opentelemetry-collector:0.95.0 + command: + - "/otelcol" + - "--config=/conf/otel-collector-config.yaml" + resources: + limits: + cpu: "1" + memory: 2Gi + requests: + cpu: 200m + memory: 400Mi + ports: + - containerPort: 4317 + - containerPort: 4318 + - containerPort: 8888 + env: + - name: GOMEMLIMIT + value: 1600MiB + volumeMounts: + - name: otel-collector-config-vol + mountPath: /conf + volumes: + - name: otel-collector-config-vol + configMap: + name: otel-collector-conf + items: + - key: otel-collector-config + path: otel-collector-config.yaml \ No newline at end of file From 696cbb7e1b6b4f24803b7a3e61ded3415a1ee5e3 Mon Sep 17 00:00:00 2001 From: LorenzoTettamanti Date: Sun, 8 Feb 2026 21:10:02 +0100 Subject: [PATCH 18/46] [#158]: moved map manager from identity to common crate to reuse the function in metrics service --- core/common/Cargo.toml | 1 + core/common/src/map_handlers.rs | 54 ++++++++++++++++++++ core/src/components/identity/src/main.rs | 57 ++++++---------------- core/src/components/metrics/src/helpers.rs | 54 +++++++++----------- 4 files changed, 91 insertions(+), 75 deletions(-) diff --git a/core/common/Cargo.toml b/core/common/Cargo.toml index b8e840d..f604b65 100644 --- a/core/common/Cargo.toml +++ b/core/common/Cargo.toml @@ -29,5 +29,6 @@ bytemuck_derive = "1.10.2" map-handlers = [] program-handlers = [] network-structs = [] +monitoring-structs = [] buffer-reader = [] experimental = [] diff --git a/core/common/src/map_handlers.rs b/core/common/src/map_handlers.rs index 19d4e20..313f73e 100644 --- a/core/common/src/map_handlers.rs +++ b/core/common/src/map_handlers.rs @@ -154,3 +154,57 @@ pub fn load_perf_event_array_from_mapdata( })?; Ok(perf_event_array) } + +#[cfg(feature = "map-handlers")] +pub fn map_manager( + maps: BpfMapsData, +) -> Result< + std::collections::HashMap< + String, + ( + aya::maps::PerfEventArray, + Vec>, + ), + >, + Error, +> { + use aya::maps::PerfEventArray; + use aya::maps::{MapData, perf::PerfEventArrayBuffer}; + use tracing::debug; + + let mut map_manager = std::collections::HashMap::< + String, // this will store the bpf map name + (PerfEventArray, Vec>), // this will manage the BPF_MAP_TYPE_PERF_EVENT_ARRAY and its buffer + >::new(); + + // map_manager creates an hashmap that contains: + // MAP NAME as String (KEY) + // + // VALUES (tuple) + // a PERF_EVENT_ARRAY + // a vector of PERF_EVENT_ARRAY_BUFFER + // + // the map manager helps the event listener to specifically call a map by its pinned name + // e.g. veth_identity_map and returns the associated PERF_EVENT_ARRAY and PERF_EVENT_ARRAY_BUFFERS (1 per CPU) + // also the map manager helps to write a more complete debug context by linking map names with arrays and buffers. + // actually i cannot return the extact information using only the Aya library + + // create the PerfEventArrays and the buffers from the BpfMapsData Objects + for (map, name) in maps + .bpf_obj_map + .into_iter() + .zip(maps.bpf_obj_names.into_iter()) + // zip two iterators at the same time for map object and map names + { + debug!("Debugging map type:{:?} for map name {:?}", map, &name); + info!("Creating PerfEventArray for map name {:?}", &name); + + // save the map in a registry if is a PerfEventArray to access them by name + if let std::result::Result::Ok(perf_event_array) = PerfEventArray::try_from(map) { + map_manager.insert(name.clone(), (perf_event_array, Vec::new())); + } else { + warn!("Map {:?} is not a PerfEventArray, skipping load", &name); + } + } + Ok(map_manager) +} diff --git a/core/src/components/identity/src/main.rs b/core/src/components/identity/src/main.rs index 598b964..c70011e 100644 --- a/core/src/components/identity/src/main.rs +++ b/core/src/components/identity/src/main.rs @@ -14,10 +14,6 @@ mod service_discovery; use crate::helpers::{get_veth_channels, read_perf_buffer}; use aya::{ Ebpf, - maps::{ - MapData, - perf::{PerfEventArray, PerfEventArrayBuffer}, - }, programs::{SchedClassifier, TcAttachType, tc::SchedClassifierLinkId}, util::online_cpus, }; @@ -25,8 +21,9 @@ use aya::{ #[cfg(feature = "experimental")] use crate::helpers::scan_cgroup_cronjob; -use bytes::BytesMut; -use cortexbrain_common::map_handlers::{init_bpf_maps, map_pinner, populate_blocklist}; +use cortexbrain_common::map_handlers::{ + init_bpf_maps, map_manager, map_pinner, populate_blocklist, +}; use cortexbrain_common::program_handlers::load_program; use cortexbrain_common::{buffer_type::BufferType, map_handlers::BpfMapsData}; use std::{ @@ -36,11 +33,11 @@ use std::{ }; use anyhow::{Context, Ok}; +use cortexbrain_common::buffer_type::BufferSize; use cortexbrain_common::{constants, logger}; -use tokio::{fs, signal}; -use tracing::{debug, error, info, warn}; - use std::collections::HashMap; +use tokio::{fs, signal}; +use tracing::{error, info}; #[tokio::main] async fn main() -> Result<(), anyhow::Error> { @@ -203,34 +200,11 @@ async fn event_listener(bpf_maps: BpfMapsData) -> Result<(), anyhow::Error> { //TODO: try to change from PerfEventArray to a RingBuffer data structure - let mut map_manager = - HashMap::, Vec>)>::new(); - - // create the PerfEventArrays and the buffers from the BpfMapsData Objects - for (map, name) in bpf_maps - .bpf_obj_map - .into_iter() - .zip(bpf_maps.bpf_obj_names.into_iter()) - // zip two iterators at the same time for map and mapnames - { - debug!("Debugging map type:{:?} for map name {:?}", map, &name); - info!("Creating PerfEventArray for map name {:?}", &name); - - // save the map in a registry if is a PerfEventArray to access them by name - if let std::result::Result::Ok(perf_event_array) = PerfEventArray::try_from(map) { - map_manager.insert(name.clone(), (perf_event_array, Vec::new())); - - // perf_event_arrays.push(perf_event_array); // this is step 1 - // let perf_event_array_buffer = Vec::new(); - // event_buffers.push(perf_event_array_buffer); //this is step 2 - } else { - warn!("Map {:?} is not a PerfEventArray, skipping load", &name); - } - } + let mut maps = map_manager(bpf_maps)?; // fill the input buffers with data from the PerfEventArrays for cpu_id in online_cpus().map_err(|e| anyhow::anyhow!("Error {:?}", e))? { - for (name, (perf_evt_array, perf_evt_array_buffer)) in map_manager.iter_mut() { + for (name, (perf_evt_array, perf_evt_array_buffer)) in maps.iter_mut() { let buf = perf_evt_array.open(cpu_id, None)?; info!( "Buffer created for map {:?} on cpu_id {:?}. Buffer size: {}", @@ -245,23 +219,20 @@ async fn event_listener(bpf_maps: BpfMapsData) -> Result<(), anyhow::Error> { info!("Listening for events..."); // i need to use remove to move the values from the Map Manager to the the async tasks - let (perf_veth_array, perf_veth_buffers) = map_manager + let (perf_veth_array, perf_veth_buffers) = maps .remove("veth_identity_map") .expect("Cannot create perf_veth buffer"); - let (perf_net_events_array, perf_net_events_buffers) = map_manager + let (perf_net_events_array, perf_net_events_buffers) = maps .remove("events_map") .expect("Cannot create perf_net_events buffer"); - let (tcp_registry_array, tcp_registry_buffers) = map_manager + let (tcp_registry_array, tcp_registry_buffers) = maps .remove("TcpPacketRegistry") .expect("Cannot create tcp_registry buffer"); // init output buffers - let veth_buffers = vec![BytesMut::with_capacity(10 * 1024); online_cpus().iter().len()]; - let events_buffers = vec![BytesMut::with_capacity(1024); online_cpus().iter().len()]; - let tcp_buffers = vec![BytesMut::with_capacity(1024); online_cpus().iter().len()]; - - // init veth link ids - //let veth_link_ids = link_ids; + let veth_buffers = BufferSize::VethEvents.set_buffer(); + let events_buffers = BufferSize::ClassifierNetEvents.set_buffer(); + let tcp_buffers = BufferSize::TcpEvents.set_buffer(); // spawn async tasks let veth_events_displayer = tokio::spawn(async move { diff --git a/core/src/components/metrics/src/helpers.rs b/core/src/components/metrics/src/helpers.rs index a67b607..e0ab006 100644 --- a/core/src/components/metrics/src/helpers.rs +++ b/core/src/components/metrics/src/helpers.rs @@ -1,5 +1,5 @@ use aya::{ - maps::{Map, MapData, PerfEventArray, perf::PerfEventArrayBuffer}, + maps::{MapData, perf::PerfEventArrayBuffer}, util::online_cpus, }; @@ -10,10 +10,14 @@ use std::sync::{ }; use tokio::signal; -use tracing::{debug, error, info, warn}; +use tracing::{error, info}; -use crate::structs::NetworkMetrics; -use crate::structs::TimeStampMetrics; +use cortexbrain_common::map_handlers::map_manager; +use cortexbrain_common::{ + buffer_type::{BufferSize, BufferType}, + buffer_type::{NetworkMetrics, TimeStampMetrics}, + map_handlers::BpfMapsData, +}; pub async fn display_metrics_map( mut perf_buffers: Vec>, @@ -119,50 +123,36 @@ pub async fn display_time_stamp_events_map( info!("Timestamp event listener stopped"); } -pub async fn event_listener(bpf_maps: Vec) -> Result<(), anyhow::Error> { +pub async fn event_listener(bpf_maps: BpfMapsData) -> Result<(), anyhow::Error> { info!("Getting CPU count..."); - let mut perf_event_arrays = Vec::new(); // contains a vector of PerfEventArrays - let mut event_buffers = Vec::new(); // contains a vector of buffers - - info!("Creating perf buffers..."); - for map in bpf_maps { - debug!("Debugging map type:{:?}", map); - if let std::result::Result::Ok(perf_event_array) = PerfEventArray::try_from(map) { - perf_event_arrays.push(perf_event_array); // this is step 1 - let perf_event_array_buffer = Vec::new(); - event_buffers.push(perf_event_array_buffer); //this is step 2 - } else { - warn!("Map is not a PerfEventArray, skipping load"); - } - } + let mut maps = map_manager(bpf_maps)?; let cpu_count = online_cpus().map_err(|e| anyhow::anyhow!("Error {:?}", e))?; - //info!("CPU count: {}", cpu_count); - for (perf_evt_array, perf_evt_array_buffer) in - perf_event_arrays.iter_mut().zip(event_buffers.iter_mut()) - { - for cpu_id in &cpu_count { - let single_buffer = perf_evt_array.open(*cpu_id, None)?; - perf_evt_array_buffer.push(single_buffer); + for cpu_id in cpu_count { + for (name, (perf_event_array, perf_event_buffer)) in maps.iter_mut() { + let buf = perf_event_array.open(cpu_id, None)?; + perf_event_buffer.push(buf); } } - //info!("Opening perf buffers for {} CPUs...", cpu_count); info!("Perf buffers created successfully"); - let mut event_buffers = event_buffers.into_iter(); - let time_stamp_events_perf_buffer = event_buffers.next().expect(""); - let net_perf_buffer = event_buffers.next().expect(""); + let (time_stamp_events_array, time_stamp_events_perf_buffer) = maps + .remove("time_stamp_events") + .expect("Cannot create time_stamp_events_buffer"); + let (net_perf_array, net_perf_buffer) = maps + .remove("net_metrics") + .expect("Cannot create net_perf_buffer"); // Create shared running flags let net_metrics_running = Arc::new(AtomicBool::new(true)); let time_stamp_events_running = Arc::new(AtomicBool::new(true)); // Create proper sized buffers - let net_metrics_buffers = vec![BytesMut::with_capacity(1024); cpu_count.len()]; - let time_stamp_events_buffers = vec![BytesMut::with_capacity(1024); cpu_count.len()]; + let net_metrics_buffers = BufferSize::NetworkMetricsEvents.set_buffer(); + let time_stamp_events_buffers = BufferSize::TimeMetricsEvents.set_buffer(); // Clone for the signal handler let net_metrics_running_signal = net_metrics_running.clone(); From 9575e8957e54e25208f696da6a964f833b1974f4 Mon Sep 17 00:00:00 2001 From: LorenzoTettamanti Date: Wed, 11 Feb 2026 21:39:29 +0100 Subject: [PATCH 19/46] [#158]: moved Monitoring structures to shared library --- core/common/src/buffer_type.rs | 286 +++++++++++++++++++- core/common/src/lib.rs | 7 +- core/src/components/identity/src/helpers.rs | 58 +--- core/src/components/metrics/src/structs.rs | 33 --- 4 files changed, 286 insertions(+), 98 deletions(-) delete mode 100644 core/src/components/metrics/src/structs.rs diff --git a/core/common/src/buffer_type.rs b/core/common/src/buffer_type.rs index 9fc7828..ad906ce 100644 --- a/core/common/src/buffer_type.rs +++ b/core/common/src/buffer_type.rs @@ -1,3 +1,4 @@ +use aya::{maps::perf::PerfEventArrayBuffer, util::online_cpus}; use bytemuck_derive::Zeroable; use bytes::BytesMut; use std::net::Ipv4Addr; @@ -54,19 +55,21 @@ unsafe impl aya::Pod for PacketLog {} #[cfg(feature = "network-structs")] #[repr(C, packed)] -#[derive(Clone, Copy)] +#[derive(Clone, Copy, Zeroable)] pub struct VethLog { pub name: [u8; 16], // 16 bytes: veth interface name pub state: u64, // 8 bytes: state variable (unsigned long in kernel) - pub dev_addr: [u8; 6], // 32 bytes: device address + pub dev_addr: [u8; 6], // 6 bytes: device address pub event_type: u8, // 1 byte: 1 for veth creation, 2 for veth destruction pub netns: u32, // 4 bytes: network namespace inode number pub pid: u32, // 4 bytes: PID that triggered the event } +#[cfg(feature = "network-structs")] +unsafe impl aya::Pod for VethLog {} #[cfg(feature = "network-structs")] #[repr(C)] -#[derive(Clone, Copy)] +#[derive(Clone, Copy, Zeroable)] pub struct TcpPacketRegistry { pub proto: u8, pub src_ip: u32, @@ -77,6 +80,47 @@ pub struct TcpPacketRegistry { pub command: [u8; 16], pub cgroup_id: u64, } +#[cfg(feature = "network-structs")] +unsafe impl aya::Pod for TcpPacketRegistry {} + +#[cfg(feature = "monitoring-structs")] +pub const TASK_COMM_LEN: usize = 16; // linux/sched.h +#[cfg(feature = "monitoring-structs")] +#[repr(C)] +#[derive(Clone, Copy, Zeroable)] +pub struct NetworkMetrics { + pub tgid: u32, + pub comm: [u8; TASK_COMM_LEN], + pub ts_us: u64, + pub sk_err: i32, // Offset 284 + pub sk_err_soft: i32, // Offset 600 + pub sk_backlog_len: i32, // Offset 196 + pub sk_write_memory_queued: i32, // Offset 376 + pub sk_receive_buffer_size: i32, // Offset 244 + pub sk_ack_backlog: u32, // Offset 604 + pub sk_drops: i32, // Offset 136 +} +#[cfg(feature = "monitoring-structs")] +unsafe impl aya::Pod for NetworkMetrics {} + +#[cfg(feature = "monitoring-structs")] +#[repr(C)] +#[derive(Clone, Copy, Zeroable)] +pub struct TimeStampMetrics { + pub delta_us: u64, + pub ts_us: u64, + pub tgid: u32, + pub comm: [u8; TASK_COMM_LEN], + pub lport: u16, + pub dport_be: u16, + pub af: u16, + pub saddr_v4: u32, + pub daddr_v4: u32, + pub saddr_v6: [u32; 4], + pub daddr_v6: [u32; 4], +} +#[cfg(feature = "monitoring-structs")] +unsafe impl aya::Pod for TimeStampMetrics {} // docs: // This function perform a byte swap from little-endian to big-endian @@ -95,15 +139,23 @@ pub fn reverse_be_addr(addr: u32) -> Ipv4Addr { // enum BuffersType #[cfg(feature = "buffer-reader")] pub enum BufferType { + #[cfg(feature = "network-structs")] PacketLog, + #[cfg(feature = "network-structs")] TcpPacketRegistry, + #[cfg(feature = "network-structs")] VethLog, + #[cfg(feature = "monitoring-structs")] + NetworkMetrics, + #[cfg(feature = "monitoring-structs")] + TimeStampMetrics, } // IDEA: this is an experimental implementation to centralize buffer reading logic // TODO: add variant for cortexflow API exporter #[cfg(feature = "buffer-reader")] impl BufferType { + #[cfg(feature = "network-structs")] pub async fn read_packet_log(buffers: &mut [BytesMut], tot_events: i32, offset: i32) { for i in offset..tot_events { let vec_bytes = &buffers[i as usize]; @@ -147,6 +199,7 @@ impl BufferType { } } } + #[cfg(feature = "network-structs")] pub async fn read_tcp_registry_log(buffers: &mut [BytesMut], tot_events: i32, offset: i32) { for i in offset..tot_events { let vec_bytes = &buffers[i as usize]; @@ -204,11 +257,8 @@ impl BufferType { } } } - pub async fn read_and_handle_veth_log( - buffers: &mut [BytesMut], - tot_events: i32, - offset: i32, - ) { + #[cfg(feature = "network-structs")] + pub async fn read_and_handle_veth_log(buffers: &mut [BytesMut], tot_events: i32, offset: i32) { for i in offset..tot_events { let vec_bytes = &buffers[i as usize]; if vec_bytes.len() < std::mem::size_of::() { @@ -289,4 +339,224 @@ impl BufferType { } } } + #[cfg(feature = "monitoring-structs")] + pub async fn read_network_metrics(buffers: &mut [BytesMut], tot_events: i32, offset: i32) { + for i in offset..tot_events { + let vec_bytes = &buffers[i as usize]; + if vec_bytes.len() < std::mem::size_of::() { + error!( + "Corrupted Network Metrics data. Raw data: {}. Readed {} bytes expected {} bytes", + vec_bytes + .iter() + .map(|b| format!("{:02x}", b)) + .collect::>() + .join(" "), + vec_bytes.len(), + std::mem::size_of::() + ); + continue; + } + if vec_bytes.len() >= std::mem::size_of::() { + let net_metrics: NetworkMetrics = + unsafe { std::ptr::read_unaligned(vec_bytes.as_ptr() as *const _) }; + let tgid = net_metrics.tgid; + let comm = String::from_utf8_lossy(&net_metrics.comm); + let ts_us = net_metrics.ts_us; + let sk_drop_count = net_metrics.sk_drops; + let sk_err = net_metrics.sk_err; + let sk_err_soft = net_metrics.sk_err_soft; + let sk_backlog_len = net_metrics.sk_backlog_len; + let sk_write_memory_queued = net_metrics.sk_write_memory_queued; + let sk_ack_backlog = net_metrics.sk_ack_backlog; + let sk_receive_buffer_size = net_metrics.sk_receive_buffer_size; + + info!( + "tgid: {}, comm: {}, ts_us: {}, sk_drops: {}, sk_err: {}, sk_err_soft: {}, sk_backlog_len: {}, sk_write_memory_queued: {}, sk_ack_backlog: {}, sk_receive_buffer_size: {}", + tgid, + comm, + ts_us, + sk_drop_count, + sk_err, + sk_err_soft, + sk_backlog_len, + sk_write_memory_queued, + sk_ack_backlog, + sk_receive_buffer_size + ); + } + } + } + #[cfg(feature = "monitoring-structs")] + pub async fn read_timestamp_metrics(buffers: &mut [BytesMut], tot_events: i32, offset: i32) { + for i in offset..tot_events { + let vec_bytes = &buffers[i as usize]; + if vec_bytes.len() < std::mem::size_of::() { + error!( + "Corrupted Network Metrics data. Raw data: {}. Readed {} bytes expected {} bytes", + vec_bytes + .iter() + .map(|b| format!("{:02x}", b)) + .collect::>() + .join(" "), + vec_bytes.len(), + std::mem::size_of::() + ); + continue; + } + if vec_bytes.len() >= std::mem::size_of::() { + let time_stamp_event: TimeStampMetrics = + unsafe { std::ptr::read_unaligned(vec_bytes.as_ptr() as *const _) }; + let delta_us = time_stamp_event.delta_us; + let ts_us = time_stamp_event.ts_us; + let tgid = time_stamp_event.tgid; + let comm = String::from_utf8_lossy(&time_stamp_event.comm); + let lport = time_stamp_event.lport; + let dport_be = time_stamp_event.dport_be; + let af = time_stamp_event.af; + info!( + "TimeStampEvent - delta_us: {}, ts_us: {}, tgid: {}, comm: {}, lport: {}, dport_be: {}, af: {}", + delta_us, ts_us, tgid, comm, lport, dport_be, af + ); + } + } + } +} + +// docs: read buffer function: +// template function that take a mut perf_event_array_buffer of type T and a mutable buffer of Vec +#[cfg(feature = "buffer-reader")] +pub async fn read_perf_buffer>( + mut array_buffers: Vec>, + mut buffers: Vec, + buffer_type: BufferType, +) { + // loop over the buffers + loop { + for buf in array_buffers.iter_mut() { + match buf.read_events(&mut buffers) { + Ok(events) => { + // triggered if some events are lost + if events.lost > 0 { + tracing::debug!("Lost events: {} ", events.lost); + } + // triggered if some events are readed + if events.read > 0 { + tracing::debug!("Readed events: {}", events.read); + let offset = 0; + let tot_events = events.read as i32; + + //read the events in the buffer + match buffer_type { + #[cfg(feature = "network-structs")] + BufferType::PacketLog => { + BufferType::read_packet_log(&mut buffers, tot_events, offset).await + } + #[cfg(feature = "network-structs")] + BufferType::TcpPacketRegistry => { + BufferType::read_tcp_registry_log(&mut buffers, tot_events, offset) + .await + } + #[cfg(feature = "network-structs")] + BufferType::VethLog => { + BufferType::read_and_handle_veth_log( + &mut buffers, + tot_events, + offset, + ) + .await + } + #[cfg(feature = "monitoring-structs")] + BufferType::NetworkMetrics => { + BufferType::read_network_metrics(&mut buffers, tot_events, offset) + .await + } + #[cfg(feature = "monitoring-structs")] + BufferType::TimeStampMetrics => { + BufferType::read_timestamp_metrics(&mut buffers, tot_events, offset) + .await + } + } + } + } + Err(e) => { + error!("Cannot read events from buffer. Reason: {} ", e); + } + } + } + tokio::time::sleep(std::time::Duration::from_millis(100)).await; // small sleep + } +} + +#[cfg(feature = "buffer-reader")] +pub enum BufferSize { + #[cfg(feature = "network-structs")] + ClassifierNetEvents, + #[cfg(feature = "network-structs")] + VethEvents, + #[cfg(feature = "network-structs")] + TcpEvents, + #[cfg(feature = "monitoring-structs")] + NetworkMetricsEvents, + #[cfg(feature = "monitoring-structs")] + TimeMetricsEvents, +} +#[cfg(feature = "buffer-reader")] +impl BufferSize { + pub fn get_size(&self) -> usize { + match self { + #[cfg(feature = "network-structs")] + BufferSize::ClassifierNetEvents => std::mem::size_of::(), + #[cfg(feature = "network-structs")] + BufferSize::VethEvents => std::mem::size_of::(), + #[cfg(feature = "network-structs")] + BufferSize::TcpEvents => std::mem::size_of::(), + #[cfg(feature = "monitoring-structs")] + BufferSize::NetworkMetricsEvents => std::mem::size_of::(), + #[cfg(feature = "monitoring-structs")] + BufferSize::TimeMetricsEvents => std::mem::size_of::(), + } + } + pub fn set_buffer(&self) -> Vec { + // iter returns and iterator of cpu ids, + // we need only the total number of cpus to set the buffer size so we use .len() to get + // the count of total cpus and then we allocate a buffer for each cpu with a capacity + // based on the structure size * a factor to have a bigger buffer to avoid overflows and lost events + + // Old buffers where 1024 bytes long. Now we set different buffer size based on + // the frequence of the events. + // ClassifierNetEvents are triggered by the TC classifier program, events has high frequency + // VethEvents are triggered by the creation and deletion of veth interfaces, events has small frequency compared to classifier events + // TcpEvents are triggered by TCP events and connections. Events has similar frequency to ClassifierNetEvents. + + let tot_cpu = online_cpus().iter().len(); // total number of cpus + + // TODO: finish to do all the calculations for the buffer sizes + match self { + #[cfg(feature = "network-structs")] + BufferSize::ClassifierNetEvents => { + let capacity = self.get_size() * 200; + return vec![BytesMut::with_capacity(capacity); tot_cpu]; + } + #[cfg(feature = "network-structs")] + BufferSize::VethEvents => { + let capacity = self.get_size() * 100; // Allocates 4Kb of memory for the buffers + return vec![BytesMut::with_capacity(capacity); tot_cpu]; + } + #[cfg(feature = "network-structs")] + BufferSize::TcpEvents => { + let capacity = self.get_size() * 200; + return vec![BytesMut::with_capacity(capacity); tot_cpu]; + } + #[cfg(feature = "monitoring-structs")] + BufferSize::NetworkMetricsEvents => { + let capacity = self.get_size() * 1024; + return vec![BytesMut::with_capacity(capacity); tot_cpu]; + } + #[cfg(feature = "monitoring-structs")] + BufferSize::TimeMetricsEvents => { + let capacity = self.get_size() * 1024; + return vec![BytesMut::with_capacity(capacity); tot_cpu]; + } + } + } } diff --git a/core/common/src/lib.rs b/core/common/src/lib.rs index d88c1db..d7e48b0 100644 --- a/core/common/src/lib.rs +++ b/core/common/src/lib.rs @@ -1,5 +1,8 @@ -#[cfg(feature = "buffer-reader")] -#[cfg(feature = "network-structs")] +#[cfg(any( + feature = "buffer-reader", + feature = "network-structs", + feature = "monitoring-structs" +))] pub mod buffer_type; pub mod constants; pub mod formatters; diff --git a/core/src/components/identity/src/helpers.rs b/core/src/components/identity/src/helpers.rs index bd76a29..50414bf 100644 --- a/core/src/components/identity/src/helpers.rs +++ b/core/src/components/identity/src/helpers.rs @@ -1,14 +1,13 @@ -use aya::maps::perf::PerfEventArrayBuffer; -use cortexbrain_common::buffer_type::BufferType; use nix::net::if_::if_nameindex; use std::result::Result::Ok; -use tracing::{error, info}; +use tracing::info; // docs: // This function checks if the given interface name is in the list of ignored interfaces // Takes a interface name (iface) as &str and returns true if the interface should be ignored // Typically we want to ignore eth0,docker0,tunl0,lo interfaces because they are not relevant for the internal monitoring // +#[inline(always)] pub fn ignore_iface(iface: &str) -> bool { let ignored_interfaces = ["eth0", "docker0", "tunl0", "lo"]; ignored_interfaces.contains(&iface) @@ -18,6 +17,7 @@ pub fn ignore_iface(iface: &str) -> bool { // This function retrieves the list of veth interfaces on the system, filtering out ignored interfaces with // the ignore_iface function. // +#[inline(always)] pub fn get_veth_channels() -> Vec { //filter interfaces and save the output in the let mut interfaces: Vec = Vec::new(); @@ -36,58 +36,6 @@ pub fn get_veth_channels() -> Vec { interfaces } -// docs: read buffer function: -// template function that take a mut perf_event_array_buffer of type T and a mutable buffer of Vec - -pub async fn read_perf_buffer>( - mut array_buffers: Vec>, - mut buffers: Vec, - buffer_type: BufferType, -) { - // loop over the buffers - loop { - for buf in array_buffers.iter_mut() { - match buf.read_events(&mut buffers) { - Ok(events) => { - // triggered if some events are lost - if events.lost > 0 { - tracing::debug!("Lost events: {} ", events.lost); - } - // triggered if some events are readed - if events.read > 0 { - tracing::debug!("Readed events: {}", events.read); - let offset = 0; - let tot_events = events.read as i32; - - //read the events in the buffer - match buffer_type { - BufferType::PacketLog => { - BufferType::read_packet_log(&mut buffers, tot_events, offset).await - } - BufferType::TcpPacketRegistry => { - BufferType::read_tcp_registry_log(&mut buffers, tot_events, offset) - .await - } - BufferType::VethLog => { - BufferType::read_and_handle_veth_log( - &mut buffers, - tot_events, - offset, - ) - .await - } - } - } - } - Err(e) => { - error!("Cannot read events from buffer. Reason: {} ", e); - } - } - } - tokio::time::sleep(std::time::Duration::from_millis(100)).await; // small sleep - } -} - #[cfg(test)] mod tests { use cortexbrain_common::buffer_type::VethLog; diff --git a/core/src/components/metrics/src/structs.rs b/core/src/components/metrics/src/structs.rs deleted file mode 100644 index dc63ace..0000000 --- a/core/src/components/metrics/src/structs.rs +++ /dev/null @@ -1,33 +0,0 @@ - -pub const TASK_COMM_LEN: usize = 16; // linux/sched.h - -#[repr(C, packed)] -#[derive(Clone, Copy)] -pub struct NetworkMetrics { - pub tgid: u32, - pub comm: [u8; TASK_COMM_LEN], - pub ts_us: u64, - pub sk_err: i32, // Offset 284 - pub sk_err_soft: i32, // Offset 600 - pub sk_backlog_len: i32, // Offset 196 - pub sk_write_memory_queued: i32, // Offset 376 - pub sk_receive_buffer_size: i32, // Offset 244 - pub sk_ack_backlog: u32, // Offset 604 - pub sk_drops: i32, // Offset 136 -} - -#[repr(C)] -#[derive(Clone, Copy)] -pub struct TimeStampMetrics { - pub delta_us: u64, - pub ts_us: u64, - pub tgid: u32, - pub comm: [u8; TASK_COMM_LEN], - pub lport: u16, - pub dport_be: u16, - pub af: u16, - pub saddr_v4: u32, - pub daddr_v4: u32, - pub saddr_v6: [u32; 4], - pub daddr_v6: [u32; 4], -} \ No newline at end of file From 9881df307f0d44b3d1c98e2005ac66b8b0691508 Mon Sep 17 00:00:00 2001 From: LorenzoTettamanti Date: Wed, 11 Feb 2026 21:42:06 +0100 Subject: [PATCH 20/46] [#175]: added otlp provider to metrics service. Simplified map handling and added read_perf_buffer function --- core/common/Cargo.toml | 1 + core/src/components/metrics/Cargo.toml | 5 +- core/src/components/metrics/src/helpers.rs | 148 ++------------------- core/src/components/metrics/src/main.rs | 8 +- 4 files changed, 22 insertions(+), 140 deletions(-) diff --git a/core/common/Cargo.toml b/core/common/Cargo.toml index f604b65..ee50e2b 100644 --- a/core/common/Cargo.toml +++ b/core/common/Cargo.toml @@ -24,6 +24,7 @@ opentelemetry-otlp = { version = "0.31.0", features = ["logs", "grpc-tonic"] } bytemuck = "1.25.0" bytes = "1.11.0" bytemuck_derive = "1.10.2" +tokio = "1.49.0" [features] map-handlers = [] diff --git a/core/src/components/metrics/Cargo.toml b/core/src/components/metrics/Cargo.toml index 0e88d8c..c8dcb5b 100644 --- a/core/src/components/metrics/Cargo.toml +++ b/core/src/components/metrics/Cargo.toml @@ -20,8 +20,11 @@ tracing = "0.1.41" tracing-subscriber = { version = "0.3.19", features = ["env-filter"] } libc = "0.2.172" bytemuck = "1.23.0" -cortexbrain-common = { path = "../../../common", features = [ +cortexbrain-common = { path = "../../../common/", features = [ "map-handlers", "program-handlers", + "buffer-reader", + "monitoring-structs", + "network-structs" ] } nix = { version = "0.30.1", features = ["net"] } diff --git a/core/src/components/metrics/src/helpers.rs b/core/src/components/metrics/src/helpers.rs index e0ab006..0968113 100644 --- a/core/src/components/metrics/src/helpers.rs +++ b/core/src/components/metrics/src/helpers.rs @@ -1,127 +1,11 @@ -use aya::{ - maps::{MapData, perf::PerfEventArrayBuffer}, - util::online_cpus, -}; - -use bytes::BytesMut; -use std::sync::{ - Arc, - atomic::{AtomicBool, Ordering}, -}; -use tokio::signal; - -use tracing::{error, info}; - +use aya::util::online_cpus; use cortexbrain_common::map_handlers::map_manager; use cortexbrain_common::{ - buffer_type::{BufferSize, BufferType}, - buffer_type::{NetworkMetrics, TimeStampMetrics}, + buffer_type::{BufferSize, BufferType, read_perf_buffer}, map_handlers::BpfMapsData, }; - -pub async fn display_metrics_map( - mut perf_buffers: Vec>, - running: Arc, // Changed to Arc - mut buffers: Vec, -) { - info!("Starting metrics event listener..."); - while running.load(Ordering::SeqCst) { - for buf in perf_buffers.iter_mut() { - match buf.read_events(&mut buffers) { - std::result::Result::Ok(events) => { - if events.read > 0 { - info!("Read {} metric events", events.read); - } - for i in 0..events.read { - let data = &buffers[i]; - if data.len() >= std::mem::size_of::() { - let net_metrics: NetworkMetrics = - unsafe { std::ptr::read_unaligned(data.as_ptr() as *const _) }; - let tgid = net_metrics.tgid; - let comm = String::from_utf8_lossy(&net_metrics.comm); - let ts_us = net_metrics.ts_us; - let sk_drop_count = net_metrics.sk_drops; - let sk_err = net_metrics.sk_err; - let sk_err_soft = net_metrics.sk_err_soft; - let sk_backlog_len = net_metrics.sk_backlog_len; - let sk_write_memory_queued = net_metrics.sk_write_memory_queued; - let sk_ack_backlog = net_metrics.sk_ack_backlog; - let sk_receive_buffer_size = net_metrics.sk_receive_buffer_size; - info!( - "tgid: {}, comm: {}, ts_us: {}, sk_drops: {}, sk_err: {}, sk_err_soft: {}, sk_backlog_len: {}, sk_write_memory_queued: {}, sk_ack_backlog: {}, sk_receive_buffer_size: {}", - tgid, - comm, - ts_us, - sk_drop_count, - sk_err, - sk_err_soft, - sk_backlog_len, - sk_write_memory_queued, - sk_ack_backlog, - sk_receive_buffer_size - ); - } else { - info!( - "Received data too small: {} bytes, expected: {}", - data.len(), - std::mem::size_of::() - ); - } - } - } - Err(e) => { - error!("Error reading events: {:?}", e); - } - } - } - tokio::time::sleep(std::time::Duration::from_millis(100)).await; - } - info!("Metrics event listener stopped"); -} - -pub async fn display_time_stamp_events_map( - mut perf_buffers: Vec>, - running: Arc, // Changed to Arc - mut buffers: Vec, -) { - info!("Starting timestamp event listener..."); - while running.load(Ordering::SeqCst) { - for buf in perf_buffers.iter_mut() { - match buf.read_events(&mut buffers) { - std::result::Result::Ok(events) => { - if events.read > 0 { - info!("Read {} timestamp events", events.read); - } - for i in 0..events.read { - let data = &buffers[i]; - if data.len() >= std::mem::size_of::() { - let time_stamp_event: TimeStampMetrics = - unsafe { std::ptr::read_unaligned(data.as_ptr() as *const _) }; - let delta_us = time_stamp_event.delta_us; - let ts_us = time_stamp_event.ts_us; - let tgid = time_stamp_event.tgid; - let comm = String::from_utf8_lossy(&time_stamp_event.comm); - let lport = time_stamp_event.lport; - let dport_be = time_stamp_event.dport_be; - let af = time_stamp_event.af; - info!( - "TimeStampEvent - delta_us: {}, ts_us: {}, tgid: {}, comm: {}, lport: {}, dport_be: {}, af: {}", - delta_us, ts_us, tgid, comm, lport, dport_be, af - ); - } else { - info!("Received timestamp data too small: {} bytes", data.len()); - } - } - } - Err(e) => { - error!("Error reading timestamp events: {:?}", e); - } - } - } - tokio::time::sleep(std::time::Duration::from_millis(100)).await; - } - info!("Timestamp event listener stopped"); -} +use tokio::signal; +use tracing::{error, info}; pub async fn event_listener(bpf_maps: BpfMapsData) -> Result<(), anyhow::Error> { info!("Getting CPU count..."); @@ -146,30 +30,27 @@ pub async fn event_listener(bpf_maps: BpfMapsData) -> Result<(), anyhow::Error> .remove("net_metrics") .expect("Cannot create net_perf_buffer"); - // Create shared running flags - let net_metrics_running = Arc::new(AtomicBool::new(true)); - let time_stamp_events_running = Arc::new(AtomicBool::new(true)); - // Create proper sized buffers let net_metrics_buffers = BufferSize::NetworkMetricsEvents.set_buffer(); let time_stamp_events_buffers = BufferSize::TimeMetricsEvents.set_buffer(); - // Clone for the signal handler - let net_metrics_running_signal = net_metrics_running.clone(); - let time_stamp_events_running_signal = time_stamp_events_running.clone(); - info!("Starting event listener tasks..."); let metrics_map_displayer = tokio::spawn(async move { - display_metrics_map(net_perf_buffer, net_metrics_running, net_metrics_buffers).await; + read_perf_buffer( + net_perf_buffer, + net_metrics_buffers, + BufferType::NetworkMetrics, + ) + .await; }); let time_stamp_events_displayer = tokio::spawn(async move { - display_time_stamp_events_map( + read_perf_buffer( time_stamp_events_perf_buffer, - time_stamp_events_running, time_stamp_events_buffers, + BufferType::TimeStampMetrics, ) - .await + .await; }); info!("Event listeners started, entering main loop..."); @@ -189,9 +70,6 @@ pub async fn event_listener(bpf_maps: BpfMapsData) -> Result<(), anyhow::Error> _ = signal::ctrl_c() => { info!("Ctrl-C received, shutting down..."); - // Stop the event loops - net_metrics_running_signal.store(false, std::sync::atomic::Ordering::SeqCst); - time_stamp_events_running_signal.store(false, std::sync::atomic::Ordering::SeqCst); } } diff --git a/core/src/components/metrics/src/main.rs b/core/src/components/metrics/src/main.rs index e8677fb..e6c9069 100644 --- a/core/src/components/metrics/src/main.rs +++ b/core/src/components/metrics/src/main.rs @@ -1,6 +1,6 @@ use anyhow::{Context, Ok}; use aya::Ebpf; -use cortexbrain_common::{constants, logger}; +use cortexbrain_common::constants; use std::{ env, fs, path::Path, @@ -11,15 +11,14 @@ use tracing::{error, info}; mod helpers; use crate::helpers::event_listener; +use cortexbrain_common::logger::otlp_logger_init; use cortexbrain_common::map_handlers::{init_bpf_maps, map_pinner}; use cortexbrain_common::program_handlers::load_program; -mod structs; - #[tokio::main] async fn main() -> Result<(), anyhow::Error> { //init tracing subscriber - logger::init_default_logger(); + let otlp_provider = otlp_logger_init("metrics-service".to_string()); info!("Starting metrics service..."); info!("fetching data"); @@ -78,6 +77,7 @@ async fn main() -> Result<(), anyhow::Error> { } Err(e) => { error!("Error initializing BPF maps: {:?}", e); + let _ = otlp_provider.shutdown(); return Err(e); } } From b05d9b9be81695462dc8b12d488b188040523b15 Mon Sep 17 00:00:00 2001 From: LorenzoTettamanti Date: Wed, 11 Feb 2026 21:56:34 +0100 Subject: [PATCH 21/46] [#158]: fixed imports from the common crate --- core/src/components/identity/src/main.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/core/src/components/identity/src/main.rs b/core/src/components/identity/src/main.rs index c70011e..d42564a 100644 --- a/core/src/components/identity/src/main.rs +++ b/core/src/components/identity/src/main.rs @@ -11,7 +11,7 @@ mod helpers; mod service_discovery; -use crate::helpers::{get_veth_channels, read_perf_buffer}; +use crate::helpers::get_veth_channels; use aya::{ Ebpf, programs::{SchedClassifier, TcAttachType, tc::SchedClassifierLinkId}, @@ -21,6 +21,7 @@ use aya::{ #[cfg(feature = "experimental")] use crate::helpers::scan_cgroup_cronjob; +use cortexbrain_common::buffer_type::read_perf_buffer; use cortexbrain_common::map_handlers::{ init_bpf_maps, map_manager, map_pinner, populate_blocklist, }; From a32698be18a2abf92f7636db42e45b8463c58309 Mon Sep 17 00:00:00 2001 From: LorenzoTettamanti Date: Wed, 11 Feb 2026 21:57:01 +0100 Subject: [PATCH 22/46] added TODOs in conntracker kernel module --- core/src/components/conntracker/src/main.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/core/src/components/conntracker/src/main.rs b/core/src/components/conntracker/src/main.rs index e723e4b..8438838 100644 --- a/core/src/components/conntracker/src/main.rs +++ b/core/src/components/conntracker/src/main.rs @@ -32,6 +32,10 @@ use crate::tc::try_identity_classifier; use crate::tcp_analyzer::try_tcp_analyzer; use crate::veth_tracer::try_veth_tracer; +// TODO: add function to track +// 1. kprobe:tcp_enter_memory_pressure +// 2. kprobe:tcp_create_openreq_child (https://elixir.bootlin.com/linux/v6.18.6/source/net/ipv4/tcp_ipv4.c#L1776) [function: *tcp_v4_syn_recv_sock] + // docs: // // virtual ethernet (veth) interface tracer: From 398bff0e337b3a023b6f6df13178836e9f79832c Mon Sep 17 00:00:00 2001 From: LorenzoTettamanti Date: Wed, 11 Feb 2026 22:39:06 +0100 Subject: [PATCH 23/46] [#175]: added repr(C,packed) for monitoring structures. Fixed imports. Added error handling in event listener --- core/common/src/buffer_type.rs | 4 ++-- core/src/components/identity/src/main.rs | 23 +++++++++++-------- core/src/components/metrics/src/helpers.rs | 14 ++++++++++- core/src/components/metrics/src/main.rs | 10 ++++---- .../metrics_tracer/src/data_structures.rs | 6 ++--- 5 files changed, 38 insertions(+), 19 deletions(-) diff --git a/core/common/src/buffer_type.rs b/core/common/src/buffer_type.rs index ad906ce..ac0d600 100644 --- a/core/common/src/buffer_type.rs +++ b/core/common/src/buffer_type.rs @@ -86,7 +86,7 @@ unsafe impl aya::Pod for TcpPacketRegistry {} #[cfg(feature = "monitoring-structs")] pub const TASK_COMM_LEN: usize = 16; // linux/sched.h #[cfg(feature = "monitoring-structs")] -#[repr(C)] +#[repr(C, packed)] #[derive(Clone, Copy, Zeroable)] pub struct NetworkMetrics { pub tgid: u32, @@ -104,7 +104,7 @@ pub struct NetworkMetrics { unsafe impl aya::Pod for NetworkMetrics {} #[cfg(feature = "monitoring-structs")] -#[repr(C)] +#[repr(C, packed)] #[derive(Clone, Copy, Zeroable)] pub struct TimeStampMetrics { pub delta_us: u64, diff --git a/core/src/components/identity/src/main.rs b/core/src/components/identity/src/main.rs index d42564a..4efa3c9 100644 --- a/core/src/components/identity/src/main.rs +++ b/core/src/components/identity/src/main.rs @@ -21,21 +21,21 @@ use aya::{ #[cfg(feature = "experimental")] use crate::helpers::scan_cgroup_cronjob; -use cortexbrain_common::buffer_type::read_perf_buffer; -use cortexbrain_common::map_handlers::{ - init_bpf_maps, map_manager, map_pinner, populate_blocklist, +use cortexbrain_common::{ + buffer_type::{BufferSize, BufferType, read_perf_buffer}, + constants, logger, + map_handlers::BpfMapsData, + map_handlers::{init_bpf_maps, map_manager, map_pinner, populate_blocklist}, + program_handlers::load_program, }; -use cortexbrain_common::program_handlers::load_program; -use cortexbrain_common::{buffer_type::BufferType, map_handlers::BpfMapsData}; use std::{ convert::TryInto, path::Path, sync::{Arc, Mutex}, }; -use anyhow::{Context, Ok}; -use cortexbrain_common::buffer_type::BufferSize; -use cortexbrain_common::{constants, logger}; +use anyhow::{Context, Ok, anyhow}; + use std::collections::HashMap; use tokio::{fs, signal}; use tracing::{error, info}; @@ -206,7 +206,12 @@ async fn event_listener(bpf_maps: BpfMapsData) -> Result<(), anyhow::Error> { // fill the input buffers with data from the PerfEventArrays for cpu_id in online_cpus().map_err(|e| anyhow::anyhow!("Error {:?}", e))? { for (name, (perf_evt_array, perf_evt_array_buffer)) in maps.iter_mut() { - let buf = perf_evt_array.open(cpu_id, None)?; + let buf = perf_evt_array.open(cpu_id, None).map_err(|e| { + anyhow!( + "Cannot create perf_event_array buffer from perf_event_array. Reason: {}", + e + ) + })?; info!( "Buffer created for map {:?} on cpu_id {:?}. Buffer size: {}", name, diff --git a/core/src/components/metrics/src/helpers.rs b/core/src/components/metrics/src/helpers.rs index 0968113..843f45d 100644 --- a/core/src/components/metrics/src/helpers.rs +++ b/core/src/components/metrics/src/helpers.rs @@ -1,3 +1,4 @@ +use anyhow::anyhow; use aya::util::online_cpus; use cortexbrain_common::map_handlers::map_manager; use cortexbrain_common::{ @@ -16,7 +17,18 @@ pub async fn event_listener(bpf_maps: BpfMapsData) -> Result<(), anyhow::Error> for cpu_id in cpu_count { for (name, (perf_event_array, perf_event_buffer)) in maps.iter_mut() { - let buf = perf_event_array.open(cpu_id, None)?; + let buf = perf_event_array.open(cpu_id, None).map_err(|e| { + anyhow!( + "Cannot create perf_event_array buffer from perf_event_array. Reason: {}", + e + ) + })?; + info!( + "Buffer created for map {:?} on cpu_id {:?}. Buffer size: {}", + name, + cpu_id, + std::mem::size_of_val(&buf) + ); perf_event_buffer.push(buf); } } diff --git a/core/src/components/metrics/src/main.rs b/core/src/components/metrics/src/main.rs index e6c9069..e5558eb 100644 --- a/core/src/components/metrics/src/main.rs +++ b/core/src/components/metrics/src/main.rs @@ -1,6 +1,5 @@ use anyhow::{Context, Ok}; use aya::Ebpf; -use cortexbrain_common::constants; use std::{ env, fs, path::Path, @@ -11,9 +10,12 @@ use tracing::{error, info}; mod helpers; use crate::helpers::event_listener; -use cortexbrain_common::logger::otlp_logger_init; -use cortexbrain_common::map_handlers::{init_bpf_maps, map_pinner}; -use cortexbrain_common::program_handlers::load_program; +use cortexbrain_common::{ + constants, + logger::otlp_logger_init, + map_handlers::{init_bpf_maps, map_pinner}, + program_handlers::load_program, +}; #[tokio::main] async fn main() -> Result<(), anyhow::Error> { diff --git a/core/src/components/metrics_tracer/src/data_structures.rs b/core/src/components/metrics_tracer/src/data_structures.rs index f6d7afe..e9866a8 100644 --- a/core/src/components/metrics_tracer/src/data_structures.rs +++ b/core/src/components/metrics_tracer/src/data_structures.rs @@ -2,7 +2,7 @@ use aya_ebpf::{macros::map, maps::{LruPerCpuHashMap, HashMap, PerfEventArray}}; pub const TASK_COMM_LEN: usize = 16; - +#[repr(C,packed)] pub struct NetworkMetrics { pub tgid: u32, pub comm: [u8; TASK_COMM_LEN], @@ -16,7 +16,7 @@ pub struct NetworkMetrics { pub sk_drops: i32, // Offset 136 } -#[repr(C)] +#[repr(C,packed)] #[derive(Copy, Clone)] pub struct TimeStampStartInfo { pub comm: [u8; TASK_COMM_LEN], @@ -25,7 +25,7 @@ pub struct TimeStampStartInfo { } // Event we send to userspace when latency is computed -#[repr(C)] +#[repr(C,packed)] #[derive(Copy, Clone)] pub struct TimeStampEvent { pub delta_us: u64, From 01c63c4c6bb830c0383c112fdf3e2c6cc37c9e25 Mon Sep 17 00:00:00 2001 From: LorenzoTettamanti Date: Fri, 13 Feb 2026 22:11:40 +0100 Subject: [PATCH 24/46] [#158]: added control to skip load of blocklist if the addresses vector is empty. Added comments and annotations --- core/common/src/map_handlers.rs | 8 ++++++-- core/common/src/program_handlers.rs | 24 +++++++++++++++--------- 2 files changed, 21 insertions(+), 11 deletions(-) diff --git a/core/common/src/map_handlers.rs b/core/common/src/map_handlers.rs index 313f73e..b246b70 100644 --- a/core/common/src/map_handlers.rs +++ b/core/common/src/map_handlers.rs @@ -121,8 +121,11 @@ pub async fn populate_blocklist() -> Result<(), Error> { .filter(|s| !s.is_empty()) .collect(); //String parsing from "x y" to ["x","y"] - info!("Inserting addresses: {:?}", addresses); - for item in addresses { + if addresses.is_empty() { + warn!("No addresses found in the blocklist. Skipping load"); + } + for item in &addresses { + info!("Inserting addresses: {:?}", &item); let addr = Ipv4Addr::from_str(&item)?.octets(); let _ = blocklist_map.insert(addr, addr, 0); } @@ -138,6 +141,7 @@ pub async fn populate_blocklist() -> Result<(), Error> { } #[cfg(feature = "map-handlers")] +// TODO: modify this to accept also HashMap types pub fn load_perf_event_array_from_mapdata( path: &'static str, ) -> Result, Error> { diff --git a/core/common/src/program_handlers.rs b/core/common/src/program_handlers.rs index 42cd3ba..347be51 100644 --- a/core/common/src/program_handlers.rs +++ b/core/common/src/program_handlers.rs @@ -13,32 +13,38 @@ pub fn load_program( .lock() .map_err(|e| anyhow::anyhow!("Cannot get value from lock. Reason: {}", e))?; - // Load and attach the eBPF programs + // Load and attach the eBPF program let program: &mut KProbe = bpf_new .program_mut(program_name) .ok_or_else(|| anyhow::anyhow!("Program {} not found", program_name))? .try_into() .map_err(|e| anyhow::anyhow!("Failed to convert program: {:?}", e))?; + // STEP 1: load program + program .load() .map_err(|e| anyhow::anyhow!("Cannot load program: {}. Error: {}", &program_name, e))?; + // STEP 2: Attach the loaded program to kernel symbol match program.attach(kernel_symbol, 0) { - Ok(_) => info!("{} program attached successfully", kernel_symbol), + Ok(_) => info!( + "{} program attached successfully to kernel symbol {}", + &program_name, &kernel_symbol + ), Err(e) => { - error!("Error attaching {} program {:?}", kernel_symbol, e); + error!( + "Error attaching {} program to kernel symbol {}. Reason: {:?}", + &program_name, &kernel_symbol, e + ); return Err(anyhow::anyhow!( - "Failed to attach {}: {:?}", - kernel_symbol, + "Failed to attach program {} to kernel symbol {}. Reason {:?}", + &program_name, + &kernel_symbol, e )); } }; - info!( - "eBPF program {} loaded and attached successfully", - program_name - ); Ok(()) } From 147802f11111dda6d9b7b5c4cf74eedf88913377 Mon Sep 17 00:00:00 2001 From: LorenzoTettamanti Date: Fri, 13 Feb 2026 22:14:25 +0100 Subject: [PATCH 25/46] [#158]: added shared hashmap to store tracked veth (TRACKED_VETH). The shared map is used to store the veth names and the status (attached or not) during the startup --- .../conntracker/src/data_structures.rs | 10 ++- core/src/components/identity/src/main.rs | 69 ++++++++++++++++--- 2 files changed, 66 insertions(+), 13 deletions(-) diff --git a/core/src/components/conntracker/src/data_structures.rs b/core/src/components/conntracker/src/data_structures.rs index f4c5047..c55cd3f 100644 --- a/core/src/components/conntracker/src/data_structures.rs +++ b/core/src/components/conntracker/src/data_structures.rs @@ -47,7 +47,7 @@ pub struct ConnArray { // pid: kernel process ID // -#[repr(C,packed)] +#[repr(C, packed)] #[derive(Clone, Copy)] pub struct VethLog { pub name: [u8; 16], // 16 bytes: veth interface name @@ -94,9 +94,13 @@ pub static mut CONNTRACKER: LruPerCpuHashMap = pub static mut VETH_EVENTS: PerfEventArray = PerfEventArray::new(0); #[map(name = "Blocklist", pinning = "by_name")] -pub static mut BLOCKLIST: HashMap<[u8; 4], [u8; 4]> = - HashMap::<[u8; 4], [u8; 4]>::with_max_entries(1024, 0); +pub static mut BLOCKLIST: HashMap<[u8; 4], [u8; 4]> = HashMap::with_max_entries(1024, 0); //here i need to pass an address like this: [135,171,168,192] #[map(name = "TcpPacketRegistry", pinning = "by_name")] pub static mut PACKET_REGISTRY: PerfEventArray = PerfEventArray::new(0); + +#[map(name = "tracked_veth", pinning = "by_name")] +// This map takes a registry of tracked veth interfaces +// The maximum number of characters is 16 of type u8 +pub static mut TRACKED_VETH: HashMap<[u8; 16], [u8; 8]> = HashMap::with_max_entries(1024, 0); diff --git a/core/src/components/identity/src/main.rs b/core/src/components/identity/src/main.rs index 4efa3c9..8d13e22 100644 --- a/core/src/components/identity/src/main.rs +++ b/core/src/components/identity/src/main.rs @@ -14,7 +14,8 @@ mod service_discovery; use crate::helpers::get_veth_channels; use aya::{ Ebpf, - programs::{SchedClassifier, TcAttachType, tc::SchedClassifierLinkId}, + maps::{Map, MapData}, + programs::{SchedClassifier, TcAttachType}, util::online_cpus, }; @@ -36,7 +37,7 @@ use std::{ use anyhow::{Context, Ok, anyhow}; -use std::collections::HashMap; +//use std::collections::HashMap; use tokio::{fs, signal}; use tracing::{error, info}; @@ -49,7 +50,7 @@ async fn main() -> Result<(), anyhow::Error> { info!("fetching data"); // To Store link_ids they can be used to detach tc - let link_ids = Arc::new(Mutex::new(HashMap::::new())); + //let mut link_ids = HashMap::::new(); //init conntracker data path let bpf_path = @@ -67,6 +68,7 @@ async fn main() -> Result<(), anyhow::Error> { "veth_identity_map".to_string(), "TcpPacketRegistry".to_string(), "Blocklist".to_string(), + "tracked_veth".to_string(), ]; match init_bpf_maps(bpf.clone(), map_data) { std::result::Result::Ok(bpf_maps) => { @@ -90,8 +92,8 @@ async fn main() -> Result<(), anyhow::Error> { } { - init_tc_classifier(bpf.clone(), interfaces, link_ids.clone()).await.context( - "An error occured during the execution of attach_bpf_program function" + init_tc_classifier(bpf.clone(), interfaces).await.context( + "An error occured during the execution of attach_bpf_program function", )?; } { @@ -120,10 +122,10 @@ async fn main() -> Result<(), anyhow::Error> { } //attach the tc classifier program to a vector of interfaces +// TODO: consider to create a load schedule classifier in the common functions async fn init_tc_classifier( bpf: Arc>, ifaces: Vec, - link_ids: Arc>>, ) -> Result<(), anyhow::Error> { //this funtion initialize the tc classifier program info!("Loading programs"); @@ -138,10 +140,33 @@ async fn init_tc_classifier( .try_into() .context("Failed to init SchedClassifier program")?; + // load classifier program + program .load() .context("Failed to load identity_classifier program")?; + // attach program only to desired interfaces. We can skip the dock0,tunl0,lo and eth0 interface + // we also save the interfaces to a BPF_HASH_MAP to easily monitor the interfaces using the agent + + // decleare link_ids HashMap which is a shared hashmap between kernel and userspace + // Link_ids hashmap has type of HashMap<[u8; 16], [u8; 8]>. The key is the program name and the value is the state + + // at this point the pinning is already successfull so we can invoque the maps from the pin + + let link_ids_mapdata = MapData::from_pin("/sys/fs/bpf/maps/tracked_veth") + .map_err(|e| anyhow!("Cannot return link_ids_mapdata. Reason: {}", e))?; + + let link_ids_map = Map::HashMap(link_ids_mapdata); + + let mut link_ids: aya::maps::HashMap = + aya::maps::HashMap::try_from(link_ids_map).map_err(|e| { + anyhow!( + "Cannot create link_ids HashMap from link_ids_map. Reason:{}", + e + ) + })?; + for interface in ifaces { match program.attach(&interface, TcAttachType::Ingress) { std::result::Result::Ok(link_id) => { @@ -149,10 +174,34 @@ async fn init_tc_classifier( "Program 'identity_classifier' attached to interface {}", interface ); - let mut map = link_ids - .lock() - .map_err(|e| anyhow::anyhow!("Cannot get value from lock. Reason: {}", e))?; - map.insert(interface.clone(), link_id); + let interface_bytes = interface.as_bytes(); + + let mut if_bytes = [0u8; 16]; + + // to set the len compare the interface_bytes.len() with the if_bytes.len() [16] and take the minimum + // if we have interface_bytes.len() < than 16 we set the len + let len = interface_bytes.len().min(if_bytes.len()); + + // now we can copy the bytes from the slice into the if_bytes variable + if_bytes[..len].copy_from_slice(&interface_bytes[..len]); + + // we compute the same process for the state_bytes + let mut state_bytes = [0u8; 8]; + let state = b"attached"; // prints "attached" as [u8;8] sequence of bytes + let state_len = state.len().min(state_bytes.len()); + state_bytes[..state_len].copy_from_slice(&state[..state_len]); + + match link_ids.insert(if_bytes, state_bytes, 0) { + std::result::Result::Ok(_) => { + info!("Veth interface {} added into map", &interface); + } + Err(e) => { + error!( + "Cannot add Veth interface {} into map. Reason: {}", + &interface, e + ); + } + } } Err(e) => error!( "Error attaching program to interface {}: {:?}", From 8599b901b4700f476eced98ceb93e24afba6baf4 Mon Sep 17 00:00:00 2001 From: LorenzoTettamanti Date: Sat, 14 Feb 2026 13:53:34 +0100 Subject: [PATCH 26/46] [#182]: Added GetTrackedVethFromHashMap grpc endpoint to see the tracked veths (pt.2) --- cli/src/monitoring.rs | 33 +++++++------- core/api/Cargo.toml | 3 +- core/api/protos/agent.proto | 13 +++++- core/api/src/agent.rs | 90 ++++++++++++++++++++++++++++++++++++- core/api/src/api.rs | 53 +++++++++++++++++++--- core/api/src/requests.rs | 10 +++++ 6 files changed, 174 insertions(+), 28 deletions(-) diff --git a/cli/src/monitoring.rs b/cli/src/monitoring.rs index 72a94b8..eefae1c 100644 --- a/cli/src/monitoring.rs +++ b/cli/src/monitoring.rs @@ -10,7 +10,7 @@ use tonic_reflection::pb::v1::server_reflection_response::MessageResponse; use agent_api::client::{connect_to_client, connect_to_server_reflection}; use agent_api::requests::{ get_all_features, send_active_connection_request, send_dropped_packets_request, - send_latency_metrics_request, send_tracked_veth_request, + send_latency_metrics_request, send_tracked_veth_request, send_veth_tracked_hashmap_req, }; use crate::errors::CliError; @@ -304,25 +304,24 @@ pub async fn monitor_tracked_veth() -> Result<(), CliError> { "Connecting to cortexflow Client".white() ); match connect_to_client().await { - Ok(client) => match send_tracked_veth_request(client).await { + Ok(client) => match send_veth_tracked_hashmap_req(client).await { Ok(response) => { let veth_response = response.into_inner(); - if veth_response.tot_monitored_veth == 0 { - println!("{} {} ", "=====>".blue().bold(), "No tracked veth found"); - Ok(()) - } else { - println!( - "{} {} {} {} ", - "=====>".blue().bold(), - "Found:", - &veth_response.tot_monitored_veth, - "tracked veth" - ); - for veth in veth_response.veth_names.iter() { - println!("{} {}", "=====>".blue().bold(), &veth); - } - Ok(()) + // if veth_response.tot_monitored_veth == 0 { + // println!("{} {} ", "=====>".blue().bold(), "No tracked veth found"); + // Ok(()) + // } else { + // println!( + // "{} {} {} {} ", + // "=====>".blue().bold(), + // "Found:", + // &veth_response.tot_monitored_veth, + // "tracked veth" + // ); + for veth in veth_response.veths.iter() { + println!("{} {:?}", "=====>".blue().bold(), &veth); } + Ok(()) } Err(e) => { return Err(CliError::AgentError( diff --git a/core/api/Cargo.toml b/core/api/Cargo.toml index a422fd7..0070430 100644 --- a/core/api/Cargo.toml +++ b/core/api/Cargo.toml @@ -32,7 +32,8 @@ aya = "0.13.1" cortexbrain-common = { path = "../common", features = [ "map-handlers", "network-structs", - "buffer-reader" + "buffer-reader", + "monitoring-structs" ] } tonic-reflection = "0.14.0" tonic-build = "0.14.0" diff --git a/core/api/protos/agent.proto b/core/api/protos/agent.proto index 9bfc6e4..e2b1500 100644 --- a/core/api/protos/agent.proto +++ b/core/api/protos/agent.proto @@ -84,7 +84,13 @@ message VethEvent{ uint32 pid = 6; // Process ID } -//declare agent api +message VethHashMapResponse{ // returns tracked veth from the tracked_veth hashmap + string status = 1; + map veths = 2; +} + +// Agent Service + service Agent{ // active connections endpoint rpc ActiveConnections(RequestActiveConnections) returns (ActiveConnectionResponse); @@ -102,10 +108,15 @@ service Agent{ // dropped packets endpoint rpc GetDroppedPacketsMetrics(google.protobuf.Empty) returns (DroppedPacketsResponse); + // TODO: can i combine this 2 endpoints? // active veth info endpoint rpc GetTrackedVeth(google.protobuf.Empty) returns (VethResponse); + // get tracked veth from blocklist + rpc GetTrackedVethFromHashMap(google.protobuf.Empty) returns (VethHashMapResponse); } +// Blocklist + message AddIpToBlocklistRequest{ optional string ip = 1 ; } diff --git a/core/api/src/agent.rs b/core/api/src/agent.rs index cb93ddd..259c1ab 100644 --- a/core/api/src/agent.rs +++ b/core/api/src/agent.rs @@ -151,6 +151,17 @@ pub struct VethEvent { #[prost(uint32, tag = "6")] pub pid: u32, } +/// returns tracked veth from the tracked_veth hashmap +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct VethHashMapResponse { + #[prost(string, tag = "1")] + pub status: ::prost::alloc::string::String, + #[prost(map = "string, string", tag = "2")] + pub veths: ::std::collections::HashMap< + ::prost::alloc::string::String, + ::prost::alloc::string::String, + >, +} #[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)] pub struct AddIpToBlocklistRequest { #[prost(string, optional, tag = "1")] @@ -192,7 +203,6 @@ pub mod agent_client { )] use tonic::codegen::*; use tonic::codegen::http::Uri; - /// declare agent api #[derive(Debug, Clone)] pub struct AgentClient { inner: tonic::client::Grpc, @@ -444,6 +454,31 @@ pub mod agent_client { .insert(GrpcMethod::new("agent.Agent", "GetTrackedVeth")); self.inner.unary(req, path, codec).await } + /// get tracked veth from blocklist + pub async fn get_tracked_veth_from_hash_map( + &mut self, + request: impl tonic::IntoRequest<()>, + ) -> std::result::Result< + tonic::Response, + tonic::Status, + > { + self.inner + .ready() + .await + .map_err(|e| { + tonic::Status::unknown( + format!("Service was not ready: {}", e.into()), + ) + })?; + let codec = tonic_prost::ProstCodec::default(); + let path = http::uri::PathAndQuery::from_static( + "/agent.Agent/GetTrackedVethFromHashMap", + ); + let mut req = request.into_request(); + req.extensions_mut() + .insert(GrpcMethod::new("agent.Agent", "GetTrackedVethFromHashMap")); + self.inner.unary(req, path, codec).await + } } } /// Generated server implementations. @@ -511,8 +546,15 @@ pub mod agent_server { &self, request: tonic::Request<()>, ) -> std::result::Result, tonic::Status>; + /// get tracked veth from blocklist + async fn get_tracked_veth_from_hash_map( + &self, + request: tonic::Request<()>, + ) -> std::result::Result< + tonic::Response, + tonic::Status, + >; } - /// declare agent api #[derive(Debug)] pub struct AgentServer { inner: Arc, @@ -885,6 +927,50 @@ pub mod agent_server { }; Box::pin(fut) } + "/agent.Agent/GetTrackedVethFromHashMap" => { + #[allow(non_camel_case_types)] + struct GetTrackedVethFromHashMapSvc(pub Arc); + impl tonic::server::UnaryService<()> + for GetTrackedVethFromHashMapSvc { + type Response = super::VethHashMapResponse; + type Future = BoxFuture< + tonic::Response, + tonic::Status, + >; + fn call(&mut self, request: tonic::Request<()>) -> Self::Future { + let inner = Arc::clone(&self.0); + let fut = async move { + ::get_tracked_veth_from_hash_map( + &inner, + request, + ) + .await + }; + Box::pin(fut) + } + } + let accept_compression_encodings = self.accept_compression_encodings; + let send_compression_encodings = self.send_compression_encodings; + let max_decoding_message_size = self.max_decoding_message_size; + let max_encoding_message_size = self.max_encoding_message_size; + let inner = self.inner.clone(); + let fut = async move { + let method = GetTrackedVethFromHashMapSvc(inner); + let codec = tonic_prost::ProstCodec::default(); + let mut grpc = tonic::server::Grpc::new(codec) + .apply_compression_config( + accept_compression_encodings, + send_compression_encodings, + ) + .apply_max_message_size_config( + max_decoding_message_size, + max_encoding_message_size, + ); + let res = grpc.unary(method, req).await; + Ok(res) + }; + Box::pin(fut) + } _ => { Box::pin(async move { let mut response = http::Response::new( diff --git a/core/api/src/api.rs b/core/api/src/api.rs index 79b9df3..405f805 100644 --- a/core/api/src/api.rs +++ b/core/api/src/api.rs @@ -1,8 +1,8 @@ use anyhow::Context; +use anyhow::anyhow; use chrono::Local; use cortexbrain_common::formatters::{format_ipv4, format_ipv6}; use cortexbrain_common::map_handlers::load_perf_event_array_from_mapdata; -use prost::bytes::BytesMut; use std::str::FromStr; use std::sync::Mutex; use tonic::{Request, Response, Status}; @@ -28,7 +28,8 @@ use cortexbrain_common::buffer_type::VethLog; // * contains agent api configuration use crate::agent::{ ActiveConnectionResponse, AddIpToBlocklistRequest, BlocklistResponse, RequestActiveConnections, - RmIpFromBlocklistRequest, RmIpFromBlocklistResponse, VethResponse, agent_server::Agent, + RmIpFromBlocklistRequest, RmIpFromBlocklistResponse, VethHashMapResponse, VethResponse, + agent_server::Agent, }; use crate::constants::PIN_BLOCKLIST_MAP_PATH; @@ -38,6 +39,9 @@ use cortexbrain_common::buffer_type::IpProtocols; use std::net::Ipv4Addr; use tracing::warn; +use cortexbrain_common::buffer_type::BufferSize; +use cortexbrain_common::map_handlers::map_manager; + pub struct AgentApi { //* event_rx is an istance of a mpsc receiver. //* is used to receive the data from the transmitter (tx) @@ -162,6 +166,9 @@ impl Default for AgentApi { tracked_veth_tx: veth_tx.clone(), }; + // init map manager + //let map_manager = map_manager(maps)? + // For network metrics //spawn an event readers @@ -177,7 +184,7 @@ impl Default for AgentApi { .open(cpu_id, None) .expect("Error during the creation of net_events_buf structure"); - let buffers = vec![BytesMut::with_capacity(4096); 8]; + let buffers = BufferSize::ClassifierNetEvents.set_buffer(); net_events_buffer.push((buf, buffers)); } @@ -262,7 +269,7 @@ impl Default for AgentApi { .open(cpu_id, None) .expect("Error during the creation of net_metrics_buf structure"); - let buffers = vec![BytesMut::with_capacity(4096); 8]; + let buffers = BufferSize::NetworkMetricsEvents.set_buffer(); net_metrics_buffer.push((buf, buffers)); } @@ -343,7 +350,7 @@ impl Default for AgentApi { .open(cpu_id, None) .expect("Error during the creation of time stamp events buf structure"); - let buffers = vec![BytesMut::with_capacity(4096); 8]; + let buffers = BufferSize::TimeMetricsEvents.set_buffer(); ts_events_buffer.push((buf, buffers)); } @@ -421,7 +428,7 @@ impl Default for AgentApi { .open(cpu_id, None) .expect("Error during the creation of time stamp events buf structure"); - let buffers = vec![BytesMut::with_capacity(4096); 8]; + let buffers = BufferSize::VethEvents.set_buffer(); veth_events_buffer.push((buf, buffers)); } @@ -560,7 +567,10 @@ impl Agent for AgentApi { //convert ip from string to [u8;4] type and insert into the bpf map let u8_4_ip = Ipv4Addr::from_str(&ip).unwrap().octets(); //TODO: convert datetime in a kernel compatible format - blocklist_map.insert(u8_4_ip, u8_4_ip, 0); + blocklist_map + .insert(u8_4_ip, u8_4_ip, 0) + .map_err(|e| anyhow!("Cannot insert address in the blocklist. Reason: {}", e)) + .unwrap(); info!("CURRENT BLOCKLIST: {:?}", blocklist_map); } let path = std::env::var(PIN_BLOCKLIST_MAP_PATH) @@ -774,4 +784,33 @@ impl Agent for AgentApi { Ok(Response::new(response)) } + + async fn get_tracked_veth_from_hash_map( + &self, + request: Request<()>, + ) -> Result, Status> { + info!("Returning veth hashmap"); + //open blocklist map + let mapdata = MapData::from_pin("/sys/fs/bpf/maps/tracked_veth") + .expect("cannot open tracked_veth Mapdata"); + let tracked_veth_mapdata = Map::HashMap(mapdata); //load mapdata + + let tracked_veth_map: ayaHashMap = + ayaHashMap::try_from(tracked_veth_mapdata).unwrap(); + + //convert the maps with a buffer to match the protobuffer types + + let mut converted_tracked_veth_map: HashMap = HashMap::new(); + for item in tracked_veth_map.iter() { + let (k, v) = item.unwrap(); + // convert keys and values from [u8;4] to String + let key = String::from_utf8(k.to_vec()).unwrap(); + let value = String::from_utf8(v.to_vec()).unwrap(); + converted_tracked_veth_map.insert(key, value); + } + Ok(Response::new(VethHashMapResponse { + status: "success".to_string(), + veths: converted_tracked_veth_map, + })) + } } diff --git a/core/api/src/requests.rs b/core/api/src/requests.rs index 06a4030..7c9f447 100644 --- a/core/api/src/requests.rs +++ b/core/api/src/requests.rs @@ -14,6 +14,7 @@ use crate::agent::LatencyMetricsResponse; use crate::agent::RequestActiveConnections; use crate::agent::RmIpFromBlocklistRequest; use crate::agent::RmIpFromBlocklistResponse; +use crate::agent::VethHashMapResponse; use crate::agent::VethResponse; use crate::agent::agent_client::AgentClient; @@ -100,3 +101,12 @@ pub async fn send_tracked_veth_request( let response = client.get_tracked_veth(request).await?; Ok(response) } + +#[cfg(feature = "client")] +pub async fn send_veth_tracked_hashmap_req( + mut client: AgentClient, +) -> Result, Error> { + let request = Request::new(()); + let response = client.get_tracked_veth_from_hash_map(request).await?; + Ok(response) +} From 4d23d4cd3953cb77ba62689ef2856c6b478fab45 Mon Sep 17 00:00:00 2001 From: LorenzoTettamanti Date: Mon, 2 Mar 2026 17:58:20 +0100 Subject: [PATCH 27/46] [#158]: removed struct.rs Added fill_buffers in buffer_type.rs. Simplified buffers filling and buffer initialization --- core/api/src/agent.rs | 2 + core/api/src/api.rs | 234 +++++++++------------------------ core/api/src/lib.rs | 1 - core/api/src/main.rs | 1 - core/api/src/structs.rs | 48 ------- core/common/src/buffer_type.rs | 22 ++++ 6 files changed, 89 insertions(+), 219 deletions(-) delete mode 100644 core/api/src/structs.rs diff --git a/core/api/src/agent.rs b/core/api/src/agent.rs index 259c1ab..8d004b9 100644 --- a/core/api/src/agent.rs +++ b/core/api/src/agent.rs @@ -432,6 +432,7 @@ pub mod agent_client { .insert(GrpcMethod::new("agent.Agent", "GetDroppedPacketsMetrics")); self.inner.unary(req, path, codec).await } + /// TODO: can i combine this 2 endpoints? /// active veth info endpoint pub async fn get_tracked_veth( &mut self, @@ -541,6 +542,7 @@ pub mod agent_server { tonic::Response, tonic::Status, >; + /// TODO: can i combine this 2 endpoints? /// active veth info endpoint async fn get_tracked_veth( &self, diff --git a/core/api/src/api.rs b/core/api/src/api.rs index 405f805..ba25101 100644 --- a/core/api/src/api.rs +++ b/core/api/src/api.rs @@ -1,6 +1,11 @@ use anyhow::Context; use anyhow::anyhow; +use aya::maps::perf::PerfEventArrayBuffer; use chrono::Local; +use cortexbrain_common::buffer_type::IpProtocols; +use cortexbrain_common::buffer_type::NetworkMetrics; +use cortexbrain_common::buffer_type::PacketLog; +use cortexbrain_common::buffer_type::TimeStampMetrics; use cortexbrain_common::formatters::{format_ipv4, format_ipv6}; use cortexbrain_common::map_handlers::load_perf_event_array_from_mapdata; use std::str::FromStr; @@ -8,7 +13,7 @@ use std::sync::Mutex; use tonic::{Request, Response, Status}; use tracing::info; -use aya::{maps::MapData, util::online_cpus}; +use aya::maps::MapData; use std::result::Result::Ok; use tonic::async_trait; @@ -22,7 +27,6 @@ use crate::agent::{ LatencyMetricsResponse, VethEvent, }; -use crate::structs::{NetworkMetrics, PacketLog, TimeStampMetrics}; use cortexbrain_common::buffer_type::VethLog; // * contains agent api configuration @@ -35,97 +39,23 @@ use crate::constants::PIN_BLOCKLIST_MAP_PATH; use crate::helpers::comm_to_string; use aya::maps::Map; -use cortexbrain_common::buffer_type::IpProtocols; use std::net::Ipv4Addr; use tracing::warn; use cortexbrain_common::buffer_type::BufferSize; -use cortexbrain_common::map_handlers::map_manager; +use cortexbrain_common::buffer_type::fill_buffers; pub struct AgentApi { //* event_rx is an istance of a mpsc receiver. //* is used to receive the data from the transmitter (tx) active_connection_event_rx: Mutex, Status>>>, - active_connection_event_tx: mpsc::Sender, Status>>, + pub(crate) active_connection_event_tx: mpsc::Sender, Status>>, latency_metrics_rx: Mutex, Status>>>, - latency_metrics_tx: mpsc::Sender, Status>>, + pub(crate) latency_metrics_tx: mpsc::Sender, Status>>, dropped_packet_metrics_rx: Mutex, Status>>>, - dropped_packet_metrics_tx: mpsc::Sender, Status>>, + pub(crate) dropped_packet_metrics_tx: mpsc::Sender, Status>>, tracked_veth_rx: Mutex, Status>>>, - tracked_veth_tx: mpsc::Sender, Status>>, -} - -//* Event sender trait. Takes an event from a map and send that to the mpsc channel -//* using the send_map function -#[async_trait] -pub trait EventSender: Send + Sync + 'static { - async fn send_active_connection_event(&self, event: Vec); - async fn send_active_connection_event_map( - &self, - map: Vec, - tx: mpsc::Sender, Status>>, - ) { - let status = Status::new(tonic::Code::Ok, "success"); - let event = Ok(map); - - let _ = tx.send(event).await; - } - - async fn send_latency_metrics_event(&self, event: Vec); - async fn send_latency_metrics_event_map( - &self, - map: Vec, - tx: mpsc::Sender, Status>>, - ) { - let status = Status::new(tonic::Code::Ok, "success"); - let event = Ok(map); - let _ = tx.send(event).await; - } - - async fn send_dropped_packet_metrics_event(&self, event: Vec); - async fn send_dropped_packet_metrics_event_map( - &self, - map: Vec, - tx: mpsc::Sender, Status>>, - ) { - let status = Status::new(tonic::Code::Ok, "success"); - let event = Ok(map); - let _ = tx.send(event).await; - } - - async fn send_tracked_veth_event(&self, event: Vec); - async fn send_tracked_veth_event_map( - &self, - map: Vec, - tx: mpsc::Sender, Status>>, - ) { - let status = Status::new(tonic::Code::Ok, "success"); - let event = Ok(map); - let _ = tx.send(event).await; - } -} - -// send event function. takes an HashMap and send that using mpsc event_tx -#[async_trait] -impl EventSender for AgentApi { - async fn send_active_connection_event(&self, event: Vec) { - self.send_active_connection_event_map(event, self.active_connection_event_tx.clone()) - .await; - } - - async fn send_latency_metrics_event(&self, event: Vec) { - self.send_latency_metrics_event_map(event, self.latency_metrics_tx.clone()) - .await; - } - - async fn send_dropped_packet_metrics_event(&self, event: Vec) { - self.send_dropped_packet_metrics_event_map(event, self.dropped_packet_metrics_tx.clone()) - .await; - } - async fn send_tracked_veth_event(&self, event: Vec) { - self.send_tracked_veth_event_map(event, self.tracked_veth_tx.clone()) - .await; - } + pub(crate) tracked_veth_tx: mpsc::Sender, Status>>, } //initialize a default trait for AgentApi. Loads a name and a bpf istance. @@ -137,13 +67,13 @@ impl Default for AgentApi { // // TODO: in the future will be better to not use .unwrap() - let mut active_connection_events_array = + let active_connection_events_array = load_perf_event_array_from_mapdata("/sys/fs/bpf/maps/events_map").unwrap(); - let mut network_metrics_events_array = + let network_metrics_events_array = load_perf_event_array_from_mapdata("/sys/fs/bpf/trace_maps/net_metrics").unwrap(); - let mut time_stamp_events_array = + let time_stamp_events_array = load_perf_event_array_from_mapdata("/sys/fs/bpf/trace_maps/time_stamp_events").unwrap(); - let mut tracked_veth_events_array = + let tracked_veth_events_array = load_perf_event_array_from_mapdata("/sys/fs/bpf/maps/veth_identity_map").unwrap(); // @@ -155,6 +85,7 @@ impl Default for AgentApi { let (drop_tx, drop_rx) = mpsc::channel(2048); let (veth_tx, tracked_veth_rx) = mpsc::channel(1024); + // init the API to send the events from the agent to the CLI let api = AgentApi { active_connection_event_rx: conn_rx.into(), active_connection_event_tx: conn_tx.clone(), @@ -169,35 +100,42 @@ impl Default for AgentApi { // init map manager //let map_manager = map_manager(maps)? + // init the buffers + let mut net_events_buffers = BufferSize::TcpEvents.set_buffer(); + let mut net_metrics_buffers = BufferSize::NetworkMetricsEvents.set_buffer(); + let mut ts_metrics_buffers = BufferSize::TimeMetricsEvents.set_buffer(); + let mut veth_metrics_buffers = BufferSize::VethEvents.set_buffer(); + + // init the Vec of Buffers + + let mut net_events_vec_buffer = Vec::>::new(); + let mut net_metrics_vec_buffer = Vec::>::new(); + let mut ts_events_vec_buffer = Vec::>::new(); + let mut veth_events_vec_buffer = Vec::>::new(); + + // fill the Vec of Buffers + + net_events_vec_buffer = fill_buffers(net_events_vec_buffer, active_connection_events_array); + net_metrics_vec_buffer = fill_buffers(net_metrics_vec_buffer, network_metrics_events_array); + + ts_events_vec_buffer = fill_buffers(ts_events_vec_buffer, time_stamp_events_array); + + veth_events_vec_buffer = fill_buffers(veth_events_vec_buffer, tracked_veth_events_array); + // For network metrics //spawn an event readers task::spawn(async move { - let mut net_events_buffer = Vec::new(); - //scan the cpus to read the data - - for cpu_id in online_cpus() - .map_err(|e| anyhow::anyhow!("Error {:?}", e)) - .unwrap() - { - let buf = active_connection_events_array - .open(cpu_id, None) - .expect("Error during the creation of net_events_buf structure"); - - let buffers = BufferSize::ClassifierNetEvents.set_buffer(); - net_events_buffer.push((buf, buffers)); - } - info!("Starting event listener"); //send the data through a mpsc channel loop { - for (buf, buffers) in net_events_buffer.iter_mut() { - match buf.read_events(buffers) { + for buf in net_events_vec_buffer.iter_mut() { + match buf.read_events(&mut net_events_buffers) { Ok(events) => { //read the events, this function is similar to the one used in identity/helpers.rs/display_events if events.read > 0 { for i in 0..events.read { - let data = &buffers[i]; + let data = &net_events_buffers[i]; if data.len() >= std::mem::size_of::() { let pl: PacketLog = unsafe { std::ptr::read(data.as_ptr() as *const _) }; @@ -258,32 +196,17 @@ impl Default for AgentApi { }); task::spawn(async move { - let mut net_metrics_buffer = Vec::new(); - - //scan the cpus to read the data - for cpu_id in online_cpus() - .map_err(|e| anyhow::anyhow!("Error {:?}", e)) - .unwrap() - { - let buf = network_metrics_events_array - .open(cpu_id, None) - .expect("Error during the creation of net_metrics_buf structure"); - - let buffers = BufferSize::NetworkMetricsEvents.set_buffer(); - net_metrics_buffer.push((buf, buffers)); - } - info!("Starting network metrics listener"); //send the data through a mpsc channel loop { - for (buf, buffers) in net_metrics_buffer.iter_mut() { - match buf.read_events(buffers) { + for buf in net_metrics_vec_buffer.iter_mut() { + match buf.read_events(&mut net_metrics_buffers) { Ok(events) => { //read the events, this function is similar to the one used in identity/helpers.rs/display_events if events.read > 0 { for i in 0..events.read { - let data = &buffers[i]; + let data = &net_metrics_buffers[i]; if data.len() >= std::mem::size_of::() { let nm: NetworkMetrics = unsafe { std::ptr::read(data.as_ptr() as *const _) }; @@ -340,34 +263,22 @@ impl Default for AgentApi { }); task::spawn(async move { - let mut ts_events_buffer = Vec::new(); - //scan the cpus to read the data - for cpu_id in online_cpus() - .map_err(|e| anyhow::anyhow!("Error {:?}", e)) - .unwrap() - { - let buf = time_stamp_events_array - .open(cpu_id, None) - .expect("Error during the creation of time stamp events buf structure"); - - let buffers = BufferSize::TimeMetricsEvents.set_buffer(); - ts_events_buffer.push((buf, buffers)); - } - info!("Starting time stamp events listener"); //send the data through a mpsc channel loop { - for (buf, buffers) in ts_events_buffer.iter_mut() { - match buf.read_events(buffers) { + for buf in ts_events_vec_buffer.iter_mut() { + match buf.read_events(&mut ts_metrics_buffers) { Ok(events) => { //read the events, this function is similar to the one used in identity/helpers.rs/display_events if events.read > 0 { for i in 0..events.read { - let data = &buffers[i]; + let data = &ts_metrics_buffers[i]; if data.len() >= std::mem::size_of::() { let tsm: TimeStampMetrics = unsafe { std::ptr::read(data.as_ptr() as *const _) }; + let saddr_v6 = tsm.saddr_v6; + let daddr_v6 = tsm.daddr_v6; let latency_metric = LatencyMetric { delta_us: tsm.delta_us, timestamp_us: tsm.ts_us, @@ -378,8 +289,8 @@ impl Default for AgentApi { address_family: tsm.af as u32, src_address_v4: format_ipv4(tsm.saddr_v4), dst_address_v4: format_ipv4(tsm.daddr_v4), - src_address_v6: format_ipv6(&tsm.saddr_v6), - dst_address_v6: format_ipv6(&tsm.daddr_v6), + src_address_v6: format_ipv6(&saddr_v6), + dst_address_v6: format_ipv6(&daddr_v6), }; info!( "Latency Metric - tgid: {}, process_name: {}, delta_us: {}, timestamp_us: {}, local_port: {}, remote_port: {}, address_family: {}, src_address_v4: {}, dst_address_v4: {}, src_address_v6: {}, dst_address_v6: {}", @@ -416,34 +327,19 @@ impl Default for AgentApi { } }); - // TODO: this part needs a better implementation task::spawn(async move { - let mut veth_events_buffer = Vec::new(); - //scan the cpus to read the data - for cpu_id in online_cpus() - .map_err(|e| anyhow::anyhow!("Error {:?}", e)) - .unwrap() - { - let buf = tracked_veth_events_array - .open(cpu_id, None) - .expect("Error during the creation of time stamp events buf structure"); - - let buffers = BufferSize::VethEvents.set_buffer(); - veth_events_buffer.push((buf, buffers)); - } - info!("Starting time stamp events listener"); //send the data through a mpsc channel loop { - for (buf, buffers) in veth_events_buffer.iter_mut() { - match buf.read_events(buffers) { + for buf in veth_events_vec_buffer.iter_mut() { + match buf.read_events(&mut veth_metrics_buffers) { Ok(events) => { //read the events, this function is similar to the one used in identity/helpers.rs/display_events if events.read > 0 { for i in 0..events.read { info!("Found veth events {}", events.read); - let data = &buffers[i]; + let data = &veth_metrics_buffers[i]; if data.len() >= std::mem::size_of::() { let veth: VethLog = unsafe { std::ptr::read(data.as_ptr() as *const _) }; @@ -515,7 +411,7 @@ impl Agent for AgentApi { request: Request, ) -> Result, Status> { //read request - let req = request.into_inner(); + let _req = request.into_inner(); //create the hashmap to process events from the mpsc channel queue let mut aggregated_events: Vec = Vec::new(); @@ -562,7 +458,7 @@ impl Agent for AgentApi { } else { // add ip to the blocklist // log blocklist event - let datetime = Local::now().to_string(); + let _datetime = Local::now().to_string(); let ip = req.ip.unwrap(); //convert ip from string to [u8;4] type and insert into the bpf map let u8_4_ip = Ipv4Addr::from_str(&ip).unwrap().octets(); @@ -573,14 +469,14 @@ impl Agent for AgentApi { .unwrap(); info!("CURRENT BLOCKLIST: {:?}", blocklist_map); } - let path = std::env::var(PIN_BLOCKLIST_MAP_PATH) + let _path = std::env::var(PIN_BLOCKLIST_MAP_PATH) .context("Blocklist map path not found!") .unwrap(); //convert the maps with a buffer to match the protobuffer types let mut converted_blocklist_map: HashMap = HashMap::new(); for item in blocklist_map.iter() { - let (k, v) = item.unwrap(); + let (k, _v) = item.unwrap(); // convert keys and values from [u8;4] to String let key = Ipv4Addr::from(k).to_string(); let value = Ipv4Addr::from(k).to_string(); @@ -596,7 +492,7 @@ impl Agent for AgentApi { async fn check_blocklist( &self, - request: Request<()>, + _request: Request<()>, ) -> Result, Status> { info!("Returning blocklist hashmap"); //open blocklist map @@ -611,7 +507,7 @@ impl Agent for AgentApi { let mut converted_blocklist_map: HashMap = HashMap::new(); for item in blocklist_map.iter() { - let (k, v) = item.unwrap(); + let (k, _v) = item.unwrap(); // convert keys and values from [u8;4] to String let key = Ipv4Addr::from(k).to_string(); let value = Ipv4Addr::from(k).to_string(); @@ -638,7 +534,7 @@ impl Agent for AgentApi { //remove the address let ip_to_remove = req.ip; let u8_4_ip_to_remove = Ipv4Addr::from_str(&ip_to_remove).unwrap().octets(); - blocklist_map.remove(&u8_4_ip_to_remove); + let _ = blocklist_map.remove(&u8_4_ip_to_remove); //convert the maps with a buffer to match the protobuffer types let mut converted_blocklist_map: HashMap = HashMap::new(); @@ -661,7 +557,7 @@ impl Agent for AgentApi { request: Request<()>, ) -> Result, Status> { // Extract the request parameters - let req = request.into_inner(); + let _req = request.into_inner(); info!("Getting latency metrics"); // Here you would typically query your data source for the latency metrics @@ -724,7 +620,7 @@ impl Agent for AgentApi { request: Request<()>, ) -> Result, Status> { // Extract the request parameters - let req = request.into_inner(); + let _req = request.into_inner(); info!("Getting dropped packets metrics"); let mut aggregated_dropped_packet_metrics: Vec = Vec::new(); @@ -759,7 +655,7 @@ impl Agent for AgentApi { &self, request: Request<()>, ) -> Result, Status> { - let req = request.into_inner(); + let _req = request.into_inner(); info!("Getting tracked veth metrics"); let mut tracked_veth = Vec::::new(); let mut tot_veth = 0 as i32; @@ -787,7 +683,7 @@ impl Agent for AgentApi { async fn get_tracked_veth_from_hash_map( &self, - request: Request<()>, + _request: Request<()>, ) -> Result, Status> { info!("Returning veth hashmap"); //open blocklist map diff --git a/core/api/src/lib.rs b/core/api/src/lib.rs index cf2c0c9..e093920 100644 --- a/core/api/src/lib.rs +++ b/core/api/src/lib.rs @@ -2,7 +2,6 @@ pub mod api; pub mod agent; pub mod client; pub mod requests; -pub mod structs; pub mod constants; pub mod helpers; pub mod batcher; diff --git a/core/api/src/main.rs b/core/api/src/main.rs index 30fe550..87478f5 100644 --- a/core/api/src/main.rs +++ b/core/api/src/main.rs @@ -6,7 +6,6 @@ mod agent; mod api; mod constants; mod helpers; -mod structs; mod agent_proto { use tonic::include_file_descriptor_set; diff --git a/core/api/src/structs.rs b/core/api/src/structs.rs deleted file mode 100644 index 97a4017..0000000 --- a/core/api/src/structs.rs +++ /dev/null @@ -1,48 +0,0 @@ -use bytemuck_derive::Zeroable; -use crate::constants::TASK_COMM_LEN; - - -#[repr(C)] -#[derive(Clone, Copy, Zeroable)] -pub struct PacketLog { - pub proto: u8, - pub src_ip: u32, - pub src_port: u16, - pub dst_ip: u32, - pub dst_port: u16, - pub pid: u32, -} -unsafe impl aya::Pod for PacketLog {} - -#[repr(C, packed)] -#[derive(Clone, Copy, Zeroable)] -pub struct NetworkMetrics { - pub tgid: u32, - pub comm: [u8; TASK_COMM_LEN], - pub ts_us: u64, - pub sk_err: i32, - pub sk_err_soft: i32, - pub sk_backlog_len: i32, - pub sk_write_memory_queued: i32, - pub sk_receive_buffer_size: i32, - pub sk_ack_backlog: u32, - pub sk_drops: i32, -} -unsafe impl aya::Pod for NetworkMetrics {} - -#[repr(C)] -#[derive(Clone, Copy, Zeroable)] -pub struct TimeStampMetrics { - pub delta_us: u64, - pub ts_us: u64, - pub tgid: u32, - pub comm: [u8; TASK_COMM_LEN], - pub lport: u16, - pub dport_be: u16, - pub af: u16, - pub saddr_v4: u32, - pub daddr_v4: u32, - pub saddr_v6: [u32; 4], - pub daddr_v6: [u32; 4], -} -unsafe impl aya::Pod for TimeStampMetrics {} diff --git a/core/common/src/buffer_type.rs b/core/common/src/buffer_type.rs index ac0d600..f962698 100644 --- a/core/common/src/buffer_type.rs +++ b/core/common/src/buffer_type.rs @@ -1,3 +1,5 @@ +#[cfg(feature = "buffer-reader")] +use aya::maps::{MapData, PerfEventArray}; use aya::{maps::perf::PerfEventArrayBuffer, util::online_cpus}; use bytemuck_derive::Zeroable; use bytes::BytesMut; @@ -560,3 +562,23 @@ impl BufferSize { } } } + +#[cfg(feature = "buffer-reader")] +pub fn fill_buffers( + //buf: PerfEventArrayBuffer, + mut vec_of_buffers: Vec>, + //buffers: Vec, + mut events_array: PerfEventArray, +) -> Vec> { + for cpu_id in online_cpus() + .map_err(|e| anyhow::anyhow!("Error {:?}", e)) + .unwrap() + { + let buf = events_array + .open(cpu_id, None) + .expect("Error during the creation of net_events_buf structure"); + + vec_of_buffers.push(buf); + } + vec_of_buffers +} From 76d462bfa4f10219375ad73ea611119f40657d4b Mon Sep 17 00:00:00 2001 From: LorenzoTettamanti Date: Mon, 2 Mar 2026 18:02:40 +0100 Subject: [PATCH 28/46] [#158]: moved Event Sender trait in the batcher.rs module --- core/api/src/batcher.rs | 89 +++++++++++++++++++++++++++++++++++------ 1 file changed, 77 insertions(+), 12 deletions(-) diff --git a/core/api/src/batcher.rs b/core/api/src/batcher.rs index 6e984d5..12d9278 100644 --- a/core/api/src/batcher.rs +++ b/core/api/src/batcher.rs @@ -1,22 +1,87 @@ // This module is experimental and may be subject to major changes. -use crate::agent::{ConnectionEvent, DroppedPacketMetric, LatencyMetric}; +// Do not use any of these functions +// FIXME: this module will be deprecated in the next version probably -pub enum MetricsBatcher { - LatencyMetrics, - DroppedPacketsMetrics, -} -pub enum EventBatcher {} -impl MetricsBatcher { - pub async fn send_batched_metrics() { - todo!(); +use tokio::sync::mpsc; +use tonic::{Status, async_trait}; + +use crate::{ + agent::{ConnectionEvent, DroppedPacketMetric, LatencyMetric, VethEvent}, + api::AgentApi, +}; + +// Event sender trait. Takes an event from a map and send that to the mpsc channel +// using the send_map function +#[async_trait] +pub trait EventSender: Send + Sync + 'static { + async fn send_active_connection_event(&self, event: Vec); + async fn send_active_connection_event_map( + &self, + map: Vec, + tx: mpsc::Sender, Status>>, + ) { + let status = Status::new(tonic::Code::Ok, "success"); + let event = Ok(map); + + let _ = tx.send(event).await; + } + + async fn send_latency_metrics_event(&self, event: Vec); + async fn send_latency_metrics_event_map( + &self, + map: Vec, + tx: mpsc::Sender, Status>>, + ) { + let status = Status::new(tonic::Code::Ok, "success"); + let event = Ok(map); + let _ = tx.send(event).await; + } + + async fn send_dropped_packet_metrics_event(&self, event: Vec); + async fn send_dropped_packet_metrics_event_map( + &self, + map: Vec, + tx: mpsc::Sender, Status>>, + ) { + let status = Status::new(tonic::Code::Ok, "success"); + let event = Ok(map); + let _ = tx.send(event).await; + } + + async fn send_tracked_veth_event(&self, event: Vec); + async fn send_tracked_veth_event_map( + &self, + map: Vec, + tx: mpsc::Sender, Status>>, + ) { + let status = Status::new(tonic::Code::Ok, "success"); + let event = Ok(map); + let _ = tx.send(event).await; } } -impl EventBatcher { - pub async fn send_batched_logs() { - todo!(); +// send event function. takes an HashMap and send that using mpsc event_tx +#[async_trait] +impl EventSender for AgentApi { + async fn send_active_connection_event(&self, event: Vec) { + self.send_active_connection_event_map(event, self.active_connection_event_tx.clone()) + .await; + } + + async fn send_latency_metrics_event(&self, event: Vec) { + self.send_latency_metrics_event_map(event, self.latency_metrics_tx.clone()) + .await; + } + + async fn send_dropped_packet_metrics_event(&self, event: Vec) { + self.send_dropped_packet_metrics_event_map(event, self.dropped_packet_metrics_tx.clone()) + .await; + } + async fn send_tracked_veth_event(&self, event: Vec) { + self.send_tracked_veth_event_map(event, self.tracked_veth_tx.clone()) + .await; } } From bf1720bab06a56e6d677097783f9dea2a5dbcd01 Mon Sep 17 00:00:00 2001 From: LorenzoTettamanti Date: Mon, 2 Mar 2026 18:26:28 +0100 Subject: [PATCH 29/46] [deprecated]: removed deprecated Scripts folder --- Scripts/check-cortexflow-components.sh | 21 -------- Scripts/check-dev-requisites.sh | 41 --------------- Scripts/install-debugging-tools.sh | 45 ---------------- Scripts/test-connections.sh | 49 ------------------ Scripts/test-proxy-endpoints.sh | 45 ---------------- Scripts/test-proxy-ports.sh | 18 ------- Scripts/test-sidecar-advanced-tcp.sh | 67 ------------------------ Scripts/test-sidecar-advanced-udp.sh | 70 ------------------------- Scripts/test-sidecar-proxy.sh | 71 -------------------------- 9 files changed, 427 deletions(-) delete mode 100755 Scripts/check-cortexflow-components.sh delete mode 100755 Scripts/check-dev-requisites.sh delete mode 100755 Scripts/install-debugging-tools.sh delete mode 100755 Scripts/test-connections.sh delete mode 100755 Scripts/test-proxy-endpoints.sh delete mode 100755 Scripts/test-proxy-ports.sh delete mode 100755 Scripts/test-sidecar-advanced-tcp.sh delete mode 100755 Scripts/test-sidecar-advanced-udp.sh delete mode 100755 Scripts/test-sidecar-proxy.sh diff --git a/Scripts/check-cortexflow-components.sh b/Scripts/check-cortexflow-components.sh deleted file mode 100755 index 01232cb..0000000 --- a/Scripts/check-cortexflow-components.sh +++ /dev/null @@ -1,21 +0,0 @@ -echo "Welcome to CortexFlow tools" -echo "Checking CortexFlow components" - -echo "Checking if CortexFlow namespace exists..." -if kubectl get namespace cortexflow >/dev/null 2>&1; then - echo "✅ Namespace 'cortexflow' exists." - - sleep 1 - echo "Checking pods..." - kubectl get pods -n cortexflow - - echo - - sleep 1 - echo "Checking services..." - kubectl get svc -n cortexflow - echo -else - echo "❌ Namespace 'cortexflow' does not exist." - exit 1 -fi diff --git a/Scripts/check-dev-requisites.sh b/Scripts/check-dev-requisites.sh deleted file mode 100755 index c775754..0000000 --- a/Scripts/check-dev-requisites.sh +++ /dev/null @@ -1,41 +0,0 @@ -#!/bin/bash - -echo "Welcome to the CortexFlow tools" -echo "Checking pre-requisites for developers" -echo - -echo "Checking Docker installation..." -if which docker >/dev/null 2>&1; then - echo "✅ Docker is installed." -else - echo "❌ Docker is NOT installed." -fi -sleep 1 - -echo -echo "Checking Minikube installation..." -if which minikube >/dev/null 2>&1; then - echo "✅ Minikube is installed." -else - echo "❌ Minikube is NOT installed." -fi -sleep 1 - -echo - -echo "Checking Node.js installation..." -if which node >/dev/null 2>&1; then - echo "✅ Node.js is installed." -else - echo "Node.js is NOT installed." -fi -sleep 1 - -echo - -echo "Checking npm installation..." -if which npm >/dev/null 2>&1; then - echo "✅ npm is installed." -else - echo "❌ npm is NOT installed." -fi diff --git a/Scripts/install-debugging-tools.sh b/Scripts/install-debugging-tools.sh deleted file mode 100755 index 9e3ed01..0000000 --- a/Scripts/install-debugging-tools.sh +++ /dev/null @@ -1,45 +0,0 @@ -if ! kubectl exec -n cortexflow $1 -c $2 -- which netstat >/dev/null 2>&1; then - echo "🔨 installing netstat" - kubectl exec -n cortexflow $1 -c $2 -- apt update - kubectl exec -n cortexflow $1 -c $2 -- apt install -y net-tools -else - echo "✅ Netstat is installed." -fi - -sleep 1.5 - -if ! kubectl exec -n cortexflow $1 -c $2 -- which nc >/dev/null 2>&1; then - echo "🔨 installing netcat" - kubectl exec -n cortexflow $1 -c $2 -- apt install -y netcat-traditional -else - echo "✅ Netcat is installed." -fi - -sleep 1.5 - -if ! kubectl exec -n cortexflow $1 -c $2 -- which curl >/dev/null 2>&1; then - echo "🔨 installing curl" - kubectl exec -n cortexflow $1 -c $2 -- apt install -y curl -else - echo "✅ Curl is installed." -fi - -sleep 1.5 - -if ! kubectl exec -n cortexflow $1 -c $2 -- which nslookup >/dev/null 2>&1; then - echo "🔨 installing dnsutils" - kubectl exec -n cortexflow $1 -c $2 -- apt install -y dnsutils -else - echo "✅ Nslookup is installed." -fi - -sleep 1.5 - -if ! kubectl exec -n cortexflow $1 -c $2 -- which tcpdump >/dev/null 2>&1; then - echo "🔨 installing tcpdump" - kubectl exec -n cortexflow $1 -c $2 -- apt install -y tcpdump -else - echo "✅ tcpdump is installed." -fi - -sleep 1.5 diff --git a/Scripts/test-connections.sh b/Scripts/test-connections.sh deleted file mode 100755 index 95dcc94..0000000 --- a/Scripts/test-connections.sh +++ /dev/null @@ -1,49 +0,0 @@ -#!/bin/bash - -proxy_pod_name=$(kubectl get pods -n cortexflow --no-headers -o custom-columns=":metadata.name" | grep cortexflow-proxy) -proxy_ip=$(kubectl get -o template service/proxy-service -n cortexflow --template='{{.spec.clusterIP}}') -proxy_udp_port=5053 -proxy_tcp_port=5054 -proxy_metrics_port=9090 -proxy_container=$(kubectl get pod $proxy_pod_name -n cortexflow -o jsonpath='{.spec.containers[*].name}') - -echo "🧑🏻‍🔬 Checking cortexflow proxy inside the proxy pod: $proxy_pod_name" - -sleep 1.5 -echo "🔨 checking env variables" -kubectl exec -n cortexflow $proxy_pod_name -- env - -sleep 1.5 - -./install-debugging-tools.sh $proxy_pod_name $proxy_container -echo -./test-proxy-ports.sh $proxy_pod_name $proxy_metrics_port -echo -sleep 1.5 -echo "🔨 Sending a test package with netcat from proxy pod -> proxy pod" -kubectl exec -n cortexflow $proxy_pod_name -- sh -c echo b"Hi CortexFlow" | nc -u -w5 -v 127.0.0.1 $proxy_udp_port - -echo -sleep 1.5 -echo "🔨 Testing the DNS resolution manually with nslookup" -kubectl exec -n cortexflow $proxy_pod_name -- nslookup proxy-service.cortexflow.svc.cluster.local - -sleep 1.5 -echo -./test-proxy-endpoints.sh $proxy_pod_name -echo -echo -echo "🧑🏻‍🔬 Testing outside the proxy pod using a test pod" -echo "🔨 Testing using a temporary test pod and nslookup" -kubectl run -it --rm --image=busybox test-pod --restart=Never -n cortexflow -- nslookup proxy-service.cortexflow.svc.cluster.local - -echo -sleep 1.5 -echo "🔨 Sending a test message using netcat and a temporary test pod" -kubectl run -it --rm --image=busybox test-pod --restart=Never -n cortexflow -- sh -c "echo -n Hi CortexFlow | nc -u -w 3 -v $proxy_ip $proxy_udp_port" - -echo -sleep 1.5 -echo "🔨 Testing the tcp port" -echo "🔨 Sending a test message using netcat and a temporary test pod " -kubectl run -it --rm --image=busybox test-pod --restart=Never -n cortexflow -- sh -c "echo -n Hi TCP | nc -w 3 -v $proxy_ip $proxy_tcp_port" diff --git a/Scripts/test-proxy-endpoints.sh b/Scripts/test-proxy-endpoints.sh deleted file mode 100755 index c89e52e..0000000 --- a/Scripts/test-proxy-endpoints.sh +++ /dev/null @@ -1,45 +0,0 @@ -echo "🔨 Testing curl command" -response=$(kubectl exec -n cortexflow $1 -- curl -s -o /dev/null -w "%{http_code}" http://localhost:9090/) -if [ "$response" -eq 200 ]; then - echo "✅ Server is working" - echo " Checking / endpoint" - kubectl exec -n cortexflow $1 -- curl -v http://localhost:9090/ -else - echo "❌ Error in http response ERROR: $response. Service does not exists or is not exposed" -fi - -echo -sleep 1.5 -echo "🔨 Testing /health endpoint" -response=$(kubectl exec -n cortexflow $1 -- curl -s -o /dev/null -w "%{http_code}" http://localhost:9090/health) -if [ "$response" -eq 200 ]; then - echo "✅ Server is working" - echo " Checking /health endpoint" - kubectl exec -n cortexflow $1 -- curl -v http://localhost:9090/health -else - echo "❌ Error in http response ERROR: $response. Service does not exists or is not exposed" -fi - -echo -sleep 1.5 -echo "🔨 Testing /metrics endpoint" -response=$(kubectl exec -n cortexflow $1 -- curl -s -o /dev/null -w "%{http_code}" http://localhost:9090/metrics) -if [ "$response" -eq 200 ]; then - echo "✅ Server is working" - echo " Checking /metrics endpoint" - kubectl exec -n cortexflow $1 -- curl -v http://localhost:9090/metrics -else - echo "❌ Error in http response ERROR: $response. Service does not exists or is not exposed" -fi - -echo -sleep 1.5 -echo "🔨 Testing /status endpoint" -response=$(kubectl exec -n cortexflow $1 -- curl -s -o /dev/null -w "%{http_code}" http://localhost:9090/status) -if [ "$response" -eq 200 ]; then - echo "✅ Server is working" - echo " Checking /status endpoint" - kubectl exec -n cortexflow $1 -- curl -v http://localhost:9090/status -else - echo "❌ Error in http response ERROR: $response. Service does not exists or is not exposed" -fi diff --git a/Scripts/test-proxy-ports.sh b/Scripts/test-proxy-ports.sh deleted file mode 100755 index 33d658d..0000000 --- a/Scripts/test-proxy-ports.sh +++ /dev/null @@ -1,18 +0,0 @@ -echo "🔨 Testing network connections" -kubectl exec -n cortexflow $1 -- netstat -tulnp | grep $2 - -sleep 1.5 - -echo -echo "🔨 testing if the process is in execution" -kubectl exec -n cortexflow $1 -- ps aux | grep cortexflow-proxy - -sleep 1.5 -echo -echo "🔨 testing using netcat" -kubectl exec -n cortexflow $1 -- nc -zv proxy-service.cortexflow.svc.cluster.local $2 - -sleep 1.5 -echo -echo "🔨 Checking if the proxy is listening in the 5053 port" -kubectl exec -n cortexflow $1 -- netstat -ulnp diff --git a/Scripts/test-sidecar-advanced-tcp.sh b/Scripts/test-sidecar-advanced-tcp.sh deleted file mode 100755 index ec3fce4..0000000 --- a/Scripts/test-sidecar-advanced-tcp.sh +++ /dev/null @@ -1,67 +0,0 @@ -#!/bin/sh - -./install-debugging-tools.sh test-proxy proxy-sidecar -./install-debugging-tools.sh test-proxy2 proxy-sidecar -./install-debugging-tools.sh test-proxy3 proxy-sidecar -./install-debugging-tools.sh test-proxy4 proxy-sidecar - -# start the tcp listener -kubectl exec test-proxy -c proxy-sidecar -n cortexflow -- sh -c ' - echo "Starting TCP listener on port 5054..." - nohup sh -c "nc -l -p 5054" >/dev/null 2>&1 & -' - -kubectl exec test-proxy2 -c proxy-sidecar -n cortexflow -- sh -c ' - echo "Starting TCP listener on port 5054..." - nohup sh -c "nc -l -p 5054" >/dev/null 2>&1 & -' - - -test_proxy_to_proxy2() { - for i in $(seq 1 300); do - sleep $((RANDOM % 5 + 1)) - kubectl exec test-proxy -c proxy-sidecar -n cortexflow -- sh -c ' - printf "{\"service\":\"test-proxy2.cortexflow\",\"direction\":\"Incoming\",\"payload\":\"eyJwYXlsb2FkIjogIkhlbGxvIGZyb20gcHJveHktc2lkZWNhciJ9\"}\n" | nc -w1 test-proxy2 5054 - ' - done -} - -test_proxy2_to_proxy() { - for i in $(seq 1 300); do - sleep $((RANDOM % 5 + 1)) - kubectl exec test-proxy2 -c proxy-sidecar -n cortexflow -- sh -c ' - printf "{\"service\":\"test-proxy.cortexflow\",\"direction\":\"Incoming\",\"payload\":\"eyJwYXlsb2FkIjogIkhlbGxvIGZyb20gcHJveHktc2lkZWNhciJ9\"}\n" | nc -w1 test-proxy 5054 - ' - done -} - -test_proxy3_to_proxy2() { - for i in $(seq 1 300); do - sleep $((RANDOM % 5 + 1)) - kubectl exec test-proxy3 -c proxy-sidecar -n cortexflow -- sh -c ' - printf "{\"service\":\"test-proxy2.cortexflow\",\"direction\":\"Incoming\",\"payload\":\"eyJwYXlsb2FkIjogIkhlbGxvIGZyb20gcHJveHktc2lkZWNhciJ9\"}\n" | nc -w1 test-proxy2 5054 - ' - done -} - -test_proxy4_to_proxy2() { - for i in $(seq 1 300); do - sleep $((RANDOM % 5 + 1)) - kubectl exec test-proxy4 -c proxy-sidecar -n cortexflow -- sh -c ' - printf "{\"service\":\"test-proxy2.cortexflow\",\"direction\":\"Incoming\",\"payload\":\"eyJwYXlsb2FkIjogIkhlbGxvIGZyb20gcHJveHktc2lkZWNhciJ9\"}\n" | nc -w1 test-proxy2 5054 - ' - done -} - -# execute the functions in background -test_proxy_to_proxy2 & -test_proxy2_to_proxy & -test_proxy3_to_proxy2 & -test_proxy4_to_proxy2 & - - -sleep 300 - -# stop the listeners -kubectl exec test-proxy -c proxy-sidecar -n cortexflow -- sh -c 'pkill nc' -kubectl exec test-proxy2 -c proxy-sidecar -n cortexflow -- sh -c 'pkill nc' diff --git a/Scripts/test-sidecar-advanced-udp.sh b/Scripts/test-sidecar-advanced-udp.sh deleted file mode 100755 index d9c52a8..0000000 --- a/Scripts/test-sidecar-advanced-udp.sh +++ /dev/null @@ -1,70 +0,0 @@ -#!/bin/sh -./install-debugging-tools.sh test-proxy proxy-sidecar -./install-debugging-tools.sh test-proxy2 proxy-sidecar -./install-debugging-tools.sh test-proxy3 proxy-sidecar -./install-debugging-tools.sh test-proxy4 proxy-sidecar - -# start the udp listener -kubectl exec test-proxy -c proxy-sidecar -n cortexflow -- sh -c ' - echo "Starting UDP listener on port 5053..." - nohup nc -lu 5053 >/dev/null 2>&1 & -' - -kubectl exec test-proxy2 -c proxy-sidecar -n cortexflow -- sh -c ' - echo "Starting UDP listener on port 5053..." - nohup nc -lu 5053 >/dev/null 2>&1 & -' - - -test_proxy_to_proxy2() { - for i in $(seq 1 300); do - sleep $((RANDOM % 5 + 1)) - echo "Sending UDP packet from test-proxy to test-proxy2..." - kubectl exec test-proxy -c proxy-sidecar -n cortexflow -- sh -c ' - printf "{\"service\":\"test-proxy2.cortexflow\",\"direction\":\"Incoming\",\"payload\":\"eyJwYXlsb2FkIjogIkhlbGxvIGZyb20gcHJveHktc2lkZWNhciJ9\"}\n" | nc -u -w1 test-proxy2 5053 - ' - done -} - -test_proxy2_to_proxy() { - for i in $(seq 1 300); do - sleep $((RANDOM % 5 + 1)) - echo "Sending UDP packet from test-proxy2 to test-proxy..." - kubectl exec test-proxy2 -c proxy-sidecar -n cortexflow -- sh -c ' - printf "{\"service\":\"test-proxy.cortexflow\",\"direction\":\"Incoming\",\"payload\":\"eyJwYXlsb2FkIjogIkhlbGxvIGZyb20gcHJveHktc2lkZWNhciJ9\"}\n" | nc -u -w1 test-proxy 5053 - ' - done -} - -test_proxy3_to_proxy2() { - for i in $(seq 1 300); do - sleep $((RANDOM % 5 + 1)) - echo "Sending UDP packet from test-proxy3 to test-proxy2..." - kubectl exec test-proxy3 -c proxy-sidecar -n cortexflow -- sh -c ' - printf "{\"service\":\"test-proxy2.cortexflow\",\"direction\":\"Incoming\",\"payload\":\"eyJwYXlsb2FkIjogIkhlbGxvIGZyb20gcHJveHktc2lkZWNhciJ9\"}\n" | nc -u -w1 test-proxy2 5053 - ' - done -} - -test_proxy4_to_proxy2() { - for i in $(seq 1 300); do - sleep $((RANDOM % 5 + 1)) - echo "Sending UDP packet from test-proxy4 to test-proxy2..." - kubectl exec test-proxy4 -c proxy-sidecar -n cortexflow -- sh -c ' - printf "{\"service\":\"test-proxy2.cortexflow\",\"direction\":\"Incoming\",\"payload\":\"eyJwYXlsb2FkIjogIkhlbGxvIGZyb20gcHJveHktc2lkZWNhciJ9\"}\n" | nc -u -w1 test-proxy2 5053 - ' - done -} - -# execute the functions in background -(test_proxy_to_proxy2 &) & -(test_proxy2_to_proxy &) & -(test_proxy3_to_proxy2 &) & -(test_proxy4_to_proxy2 &) & - - -sleep 300 - -# stop the listeners -kubectl exec test-proxy -c proxy-sidecar -n cortexflow -- sh -c 'pkill nc || kill $(pgrep nc)' -kubectl exec test-proxy2 -c proxy-sidecar -n cortexflow -- sh -c 'pkill nc || kill $(pgrep nc)' diff --git a/Scripts/test-sidecar-proxy.sh b/Scripts/test-sidecar-proxy.sh deleted file mode 100755 index fcce42d..0000000 --- a/Scripts/test-sidecar-proxy.sh +++ /dev/null @@ -1,71 +0,0 @@ -#!/bin/bash - -echo "Testing Sidecar proxy injection " - -sleep 1 -echo "Checking pods" -kubectl get pods -o wide -n cortexflow -echo -echo "Checking if the sidecar proxy is present" -kubectl get pods -n cortexflow -o json | jq '.items[].spec.containers[].name' - -echo -sleep 1 -echo "Checking open ports in test-proxy" -kubectl get pods test-proxy -o jsonpath='{.spec.containers[*].ports}' -n cortexflow -echo -kubectl get pods test-proxy2 -o jsonpath='{.spec.containers[*].ports}' -n cortexflow - -echo -echo -echo "Installing debugging tools in test-proxy: (PROXY-SIDECAR container)" -sleep 3 -./install-debugging-tools.sh test-proxy proxy-sidecar -echo -echo -echo "Installing debugging tools in test-proxy2: (PROXY-SIDECAR container)" -sleep 3 -./install-debugging-tools.sh test-proxy2 proxy-sidecar - -echo -echo -echo "Checking network connections in test-proxy pod " -kubectl exec -it test-proxy -c proxy-sidecar -n cortexflow -- netstat -tulnp -echo -echo "Checking network connections in test-proxy2 pod" -kubectl exec -it test-proxy2 -c proxy-sidecar -n cortexflow -- netstat -tulnp - - -echo -sleep 2 -echo "TEST 1: Checking if test-proxy can communicate with test-proxy2" -kubectl exec -it test-proxy -c proxy-sidecar -n cortexflow -- nc -zv test-proxy2.cortexflow.svc.cluster.local 5054 -echo - -echo - -echo "TEST 2: Checking if test-proxy can communicate with test-proxy2 (TCP)" - -# 2. Send the message from test-proxy to test-proxy2 -kubectl exec test-proxy -c proxy-sidecar -n cortexflow -- sh -c ' - echo "Test: Incoming Message ⏳" - printf "{\"service\":\"test-proxy2.cortexflow\",\"direction\":\"Incoming\",\"payload\":\"eyJwYXlsb2FkIjogIkhlbGxvIGZyb20gcHJveHktc2lkZWNhciJ9\"}\n" | nc -w3 test-proxy2 5054 && echo "✅ Test completed" -' - -echo -sleep 2 -echo -echo "TEST 2: Sending a message from test-proxy to test-proxy2 (UDP)" - -#Start the UDP listener on test-proxy2 (MUST be before sending the message) -kubectl exec test-proxy2 -c proxy-sidecar -n cortexflow -- sh -c ' - echo "Starting UDP listener on port 5053..." - nohup sh -c "nc -lu -p 5053 > /tmp/received_message.log" >/dev/null 2>&1 & - sleep 2 # Wait for the listener to start -' - -#2. Send the message from test-proxy to test-proxy2 -kubectl exec test-proxy -c proxy-sidecar -n cortexflow -- sh -c ' - echo "Test: Incoming Message ⏳" - echo "{\"service\":\"test-proxy2.cortexflow\",\"direction\":\"Incoming\",\"payload\":\"eyJtZXNzYWdlIjogIkhlbGxvIGZyb20gcHJveHktc2lkZWNhciJ9\"}" | nc -u -w3 test-proxy2 5053 && echo "✅ Test completed" -' From 2c1250cb8caf593a353cacdc2b4b05363abccd3d Mon Sep 17 00:00:00 2001 From: LorenzoTettamanti Date: Sun, 8 Feb 2026 21:10:02 +0100 Subject: [PATCH 30/46] [#158]: moved map manager from identity to common crate to reuse the function in metrics service --- core/common/Cargo.toml | 1 + core/common/src/map_handlers.rs | 54 ++++++++++++++++++++ core/src/components/identity/src/main.rs | 57 ++++++---------------- core/src/components/metrics/src/helpers.rs | 54 +++++++++----------- 4 files changed, 91 insertions(+), 75 deletions(-) diff --git a/core/common/Cargo.toml b/core/common/Cargo.toml index b8e840d..f604b65 100644 --- a/core/common/Cargo.toml +++ b/core/common/Cargo.toml @@ -29,5 +29,6 @@ bytemuck_derive = "1.10.2" map-handlers = [] program-handlers = [] network-structs = [] +monitoring-structs = [] buffer-reader = [] experimental = [] diff --git a/core/common/src/map_handlers.rs b/core/common/src/map_handlers.rs index 19d4e20..313f73e 100644 --- a/core/common/src/map_handlers.rs +++ b/core/common/src/map_handlers.rs @@ -154,3 +154,57 @@ pub fn load_perf_event_array_from_mapdata( })?; Ok(perf_event_array) } + +#[cfg(feature = "map-handlers")] +pub fn map_manager( + maps: BpfMapsData, +) -> Result< + std::collections::HashMap< + String, + ( + aya::maps::PerfEventArray, + Vec>, + ), + >, + Error, +> { + use aya::maps::PerfEventArray; + use aya::maps::{MapData, perf::PerfEventArrayBuffer}; + use tracing::debug; + + let mut map_manager = std::collections::HashMap::< + String, // this will store the bpf map name + (PerfEventArray, Vec>), // this will manage the BPF_MAP_TYPE_PERF_EVENT_ARRAY and its buffer + >::new(); + + // map_manager creates an hashmap that contains: + // MAP NAME as String (KEY) + // + // VALUES (tuple) + // a PERF_EVENT_ARRAY + // a vector of PERF_EVENT_ARRAY_BUFFER + // + // the map manager helps the event listener to specifically call a map by its pinned name + // e.g. veth_identity_map and returns the associated PERF_EVENT_ARRAY and PERF_EVENT_ARRAY_BUFFERS (1 per CPU) + // also the map manager helps to write a more complete debug context by linking map names with arrays and buffers. + // actually i cannot return the extact information using only the Aya library + + // create the PerfEventArrays and the buffers from the BpfMapsData Objects + for (map, name) in maps + .bpf_obj_map + .into_iter() + .zip(maps.bpf_obj_names.into_iter()) + // zip two iterators at the same time for map object and map names + { + debug!("Debugging map type:{:?} for map name {:?}", map, &name); + info!("Creating PerfEventArray for map name {:?}", &name); + + // save the map in a registry if is a PerfEventArray to access them by name + if let std::result::Result::Ok(perf_event_array) = PerfEventArray::try_from(map) { + map_manager.insert(name.clone(), (perf_event_array, Vec::new())); + } else { + warn!("Map {:?} is not a PerfEventArray, skipping load", &name); + } + } + Ok(map_manager) +} diff --git a/core/src/components/identity/src/main.rs b/core/src/components/identity/src/main.rs index 598b964..c70011e 100644 --- a/core/src/components/identity/src/main.rs +++ b/core/src/components/identity/src/main.rs @@ -14,10 +14,6 @@ mod service_discovery; use crate::helpers::{get_veth_channels, read_perf_buffer}; use aya::{ Ebpf, - maps::{ - MapData, - perf::{PerfEventArray, PerfEventArrayBuffer}, - }, programs::{SchedClassifier, TcAttachType, tc::SchedClassifierLinkId}, util::online_cpus, }; @@ -25,8 +21,9 @@ use aya::{ #[cfg(feature = "experimental")] use crate::helpers::scan_cgroup_cronjob; -use bytes::BytesMut; -use cortexbrain_common::map_handlers::{init_bpf_maps, map_pinner, populate_blocklist}; +use cortexbrain_common::map_handlers::{ + init_bpf_maps, map_manager, map_pinner, populate_blocklist, +}; use cortexbrain_common::program_handlers::load_program; use cortexbrain_common::{buffer_type::BufferType, map_handlers::BpfMapsData}; use std::{ @@ -36,11 +33,11 @@ use std::{ }; use anyhow::{Context, Ok}; +use cortexbrain_common::buffer_type::BufferSize; use cortexbrain_common::{constants, logger}; -use tokio::{fs, signal}; -use tracing::{debug, error, info, warn}; - use std::collections::HashMap; +use tokio::{fs, signal}; +use tracing::{error, info}; #[tokio::main] async fn main() -> Result<(), anyhow::Error> { @@ -203,34 +200,11 @@ async fn event_listener(bpf_maps: BpfMapsData) -> Result<(), anyhow::Error> { //TODO: try to change from PerfEventArray to a RingBuffer data structure - let mut map_manager = - HashMap::, Vec>)>::new(); - - // create the PerfEventArrays and the buffers from the BpfMapsData Objects - for (map, name) in bpf_maps - .bpf_obj_map - .into_iter() - .zip(bpf_maps.bpf_obj_names.into_iter()) - // zip two iterators at the same time for map and mapnames - { - debug!("Debugging map type:{:?} for map name {:?}", map, &name); - info!("Creating PerfEventArray for map name {:?}", &name); - - // save the map in a registry if is a PerfEventArray to access them by name - if let std::result::Result::Ok(perf_event_array) = PerfEventArray::try_from(map) { - map_manager.insert(name.clone(), (perf_event_array, Vec::new())); - - // perf_event_arrays.push(perf_event_array); // this is step 1 - // let perf_event_array_buffer = Vec::new(); - // event_buffers.push(perf_event_array_buffer); //this is step 2 - } else { - warn!("Map {:?} is not a PerfEventArray, skipping load", &name); - } - } + let mut maps = map_manager(bpf_maps)?; // fill the input buffers with data from the PerfEventArrays for cpu_id in online_cpus().map_err(|e| anyhow::anyhow!("Error {:?}", e))? { - for (name, (perf_evt_array, perf_evt_array_buffer)) in map_manager.iter_mut() { + for (name, (perf_evt_array, perf_evt_array_buffer)) in maps.iter_mut() { let buf = perf_evt_array.open(cpu_id, None)?; info!( "Buffer created for map {:?} on cpu_id {:?}. Buffer size: {}", @@ -245,23 +219,20 @@ async fn event_listener(bpf_maps: BpfMapsData) -> Result<(), anyhow::Error> { info!("Listening for events..."); // i need to use remove to move the values from the Map Manager to the the async tasks - let (perf_veth_array, perf_veth_buffers) = map_manager + let (perf_veth_array, perf_veth_buffers) = maps .remove("veth_identity_map") .expect("Cannot create perf_veth buffer"); - let (perf_net_events_array, perf_net_events_buffers) = map_manager + let (perf_net_events_array, perf_net_events_buffers) = maps .remove("events_map") .expect("Cannot create perf_net_events buffer"); - let (tcp_registry_array, tcp_registry_buffers) = map_manager + let (tcp_registry_array, tcp_registry_buffers) = maps .remove("TcpPacketRegistry") .expect("Cannot create tcp_registry buffer"); // init output buffers - let veth_buffers = vec![BytesMut::with_capacity(10 * 1024); online_cpus().iter().len()]; - let events_buffers = vec![BytesMut::with_capacity(1024); online_cpus().iter().len()]; - let tcp_buffers = vec![BytesMut::with_capacity(1024); online_cpus().iter().len()]; - - // init veth link ids - //let veth_link_ids = link_ids; + let veth_buffers = BufferSize::VethEvents.set_buffer(); + let events_buffers = BufferSize::ClassifierNetEvents.set_buffer(); + let tcp_buffers = BufferSize::TcpEvents.set_buffer(); // spawn async tasks let veth_events_displayer = tokio::spawn(async move { diff --git a/core/src/components/metrics/src/helpers.rs b/core/src/components/metrics/src/helpers.rs index a67b607..e0ab006 100644 --- a/core/src/components/metrics/src/helpers.rs +++ b/core/src/components/metrics/src/helpers.rs @@ -1,5 +1,5 @@ use aya::{ - maps::{Map, MapData, PerfEventArray, perf::PerfEventArrayBuffer}, + maps::{MapData, perf::PerfEventArrayBuffer}, util::online_cpus, }; @@ -10,10 +10,14 @@ use std::sync::{ }; use tokio::signal; -use tracing::{debug, error, info, warn}; +use tracing::{error, info}; -use crate::structs::NetworkMetrics; -use crate::structs::TimeStampMetrics; +use cortexbrain_common::map_handlers::map_manager; +use cortexbrain_common::{ + buffer_type::{BufferSize, BufferType}, + buffer_type::{NetworkMetrics, TimeStampMetrics}, + map_handlers::BpfMapsData, +}; pub async fn display_metrics_map( mut perf_buffers: Vec>, @@ -119,50 +123,36 @@ pub async fn display_time_stamp_events_map( info!("Timestamp event listener stopped"); } -pub async fn event_listener(bpf_maps: Vec) -> Result<(), anyhow::Error> { +pub async fn event_listener(bpf_maps: BpfMapsData) -> Result<(), anyhow::Error> { info!("Getting CPU count..."); - let mut perf_event_arrays = Vec::new(); // contains a vector of PerfEventArrays - let mut event_buffers = Vec::new(); // contains a vector of buffers - - info!("Creating perf buffers..."); - for map in bpf_maps { - debug!("Debugging map type:{:?}", map); - if let std::result::Result::Ok(perf_event_array) = PerfEventArray::try_from(map) { - perf_event_arrays.push(perf_event_array); // this is step 1 - let perf_event_array_buffer = Vec::new(); - event_buffers.push(perf_event_array_buffer); //this is step 2 - } else { - warn!("Map is not a PerfEventArray, skipping load"); - } - } + let mut maps = map_manager(bpf_maps)?; let cpu_count = online_cpus().map_err(|e| anyhow::anyhow!("Error {:?}", e))?; - //info!("CPU count: {}", cpu_count); - for (perf_evt_array, perf_evt_array_buffer) in - perf_event_arrays.iter_mut().zip(event_buffers.iter_mut()) - { - for cpu_id in &cpu_count { - let single_buffer = perf_evt_array.open(*cpu_id, None)?; - perf_evt_array_buffer.push(single_buffer); + for cpu_id in cpu_count { + for (name, (perf_event_array, perf_event_buffer)) in maps.iter_mut() { + let buf = perf_event_array.open(cpu_id, None)?; + perf_event_buffer.push(buf); } } - //info!("Opening perf buffers for {} CPUs...", cpu_count); info!("Perf buffers created successfully"); - let mut event_buffers = event_buffers.into_iter(); - let time_stamp_events_perf_buffer = event_buffers.next().expect(""); - let net_perf_buffer = event_buffers.next().expect(""); + let (time_stamp_events_array, time_stamp_events_perf_buffer) = maps + .remove("time_stamp_events") + .expect("Cannot create time_stamp_events_buffer"); + let (net_perf_array, net_perf_buffer) = maps + .remove("net_metrics") + .expect("Cannot create net_perf_buffer"); // Create shared running flags let net_metrics_running = Arc::new(AtomicBool::new(true)); let time_stamp_events_running = Arc::new(AtomicBool::new(true)); // Create proper sized buffers - let net_metrics_buffers = vec![BytesMut::with_capacity(1024); cpu_count.len()]; - let time_stamp_events_buffers = vec![BytesMut::with_capacity(1024); cpu_count.len()]; + let net_metrics_buffers = BufferSize::NetworkMetricsEvents.set_buffer(); + let time_stamp_events_buffers = BufferSize::TimeMetricsEvents.set_buffer(); // Clone for the signal handler let net_metrics_running_signal = net_metrics_running.clone(); From 07e7892780651db260903fdf03297444258387ba Mon Sep 17 00:00:00 2001 From: LorenzoTettamanti Date: Wed, 11 Feb 2026 21:39:29 +0100 Subject: [PATCH 31/46] [#158]: moved Monitoring structures to shared library --- core/common/src/buffer_type.rs | 286 +++++++++++++++++++- core/common/src/lib.rs | 7 +- core/src/components/identity/src/helpers.rs | 58 +--- core/src/components/metrics/src/structs.rs | 33 --- 4 files changed, 286 insertions(+), 98 deletions(-) delete mode 100644 core/src/components/metrics/src/structs.rs diff --git a/core/common/src/buffer_type.rs b/core/common/src/buffer_type.rs index 9fc7828..ad906ce 100644 --- a/core/common/src/buffer_type.rs +++ b/core/common/src/buffer_type.rs @@ -1,3 +1,4 @@ +use aya::{maps::perf::PerfEventArrayBuffer, util::online_cpus}; use bytemuck_derive::Zeroable; use bytes::BytesMut; use std::net::Ipv4Addr; @@ -54,19 +55,21 @@ unsafe impl aya::Pod for PacketLog {} #[cfg(feature = "network-structs")] #[repr(C, packed)] -#[derive(Clone, Copy)] +#[derive(Clone, Copy, Zeroable)] pub struct VethLog { pub name: [u8; 16], // 16 bytes: veth interface name pub state: u64, // 8 bytes: state variable (unsigned long in kernel) - pub dev_addr: [u8; 6], // 32 bytes: device address + pub dev_addr: [u8; 6], // 6 bytes: device address pub event_type: u8, // 1 byte: 1 for veth creation, 2 for veth destruction pub netns: u32, // 4 bytes: network namespace inode number pub pid: u32, // 4 bytes: PID that triggered the event } +#[cfg(feature = "network-structs")] +unsafe impl aya::Pod for VethLog {} #[cfg(feature = "network-structs")] #[repr(C)] -#[derive(Clone, Copy)] +#[derive(Clone, Copy, Zeroable)] pub struct TcpPacketRegistry { pub proto: u8, pub src_ip: u32, @@ -77,6 +80,47 @@ pub struct TcpPacketRegistry { pub command: [u8; 16], pub cgroup_id: u64, } +#[cfg(feature = "network-structs")] +unsafe impl aya::Pod for TcpPacketRegistry {} + +#[cfg(feature = "monitoring-structs")] +pub const TASK_COMM_LEN: usize = 16; // linux/sched.h +#[cfg(feature = "monitoring-structs")] +#[repr(C)] +#[derive(Clone, Copy, Zeroable)] +pub struct NetworkMetrics { + pub tgid: u32, + pub comm: [u8; TASK_COMM_LEN], + pub ts_us: u64, + pub sk_err: i32, // Offset 284 + pub sk_err_soft: i32, // Offset 600 + pub sk_backlog_len: i32, // Offset 196 + pub sk_write_memory_queued: i32, // Offset 376 + pub sk_receive_buffer_size: i32, // Offset 244 + pub sk_ack_backlog: u32, // Offset 604 + pub sk_drops: i32, // Offset 136 +} +#[cfg(feature = "monitoring-structs")] +unsafe impl aya::Pod for NetworkMetrics {} + +#[cfg(feature = "monitoring-structs")] +#[repr(C)] +#[derive(Clone, Copy, Zeroable)] +pub struct TimeStampMetrics { + pub delta_us: u64, + pub ts_us: u64, + pub tgid: u32, + pub comm: [u8; TASK_COMM_LEN], + pub lport: u16, + pub dport_be: u16, + pub af: u16, + pub saddr_v4: u32, + pub daddr_v4: u32, + pub saddr_v6: [u32; 4], + pub daddr_v6: [u32; 4], +} +#[cfg(feature = "monitoring-structs")] +unsafe impl aya::Pod for TimeStampMetrics {} // docs: // This function perform a byte swap from little-endian to big-endian @@ -95,15 +139,23 @@ pub fn reverse_be_addr(addr: u32) -> Ipv4Addr { // enum BuffersType #[cfg(feature = "buffer-reader")] pub enum BufferType { + #[cfg(feature = "network-structs")] PacketLog, + #[cfg(feature = "network-structs")] TcpPacketRegistry, + #[cfg(feature = "network-structs")] VethLog, + #[cfg(feature = "monitoring-structs")] + NetworkMetrics, + #[cfg(feature = "monitoring-structs")] + TimeStampMetrics, } // IDEA: this is an experimental implementation to centralize buffer reading logic // TODO: add variant for cortexflow API exporter #[cfg(feature = "buffer-reader")] impl BufferType { + #[cfg(feature = "network-structs")] pub async fn read_packet_log(buffers: &mut [BytesMut], tot_events: i32, offset: i32) { for i in offset..tot_events { let vec_bytes = &buffers[i as usize]; @@ -147,6 +199,7 @@ impl BufferType { } } } + #[cfg(feature = "network-structs")] pub async fn read_tcp_registry_log(buffers: &mut [BytesMut], tot_events: i32, offset: i32) { for i in offset..tot_events { let vec_bytes = &buffers[i as usize]; @@ -204,11 +257,8 @@ impl BufferType { } } } - pub async fn read_and_handle_veth_log( - buffers: &mut [BytesMut], - tot_events: i32, - offset: i32, - ) { + #[cfg(feature = "network-structs")] + pub async fn read_and_handle_veth_log(buffers: &mut [BytesMut], tot_events: i32, offset: i32) { for i in offset..tot_events { let vec_bytes = &buffers[i as usize]; if vec_bytes.len() < std::mem::size_of::() { @@ -289,4 +339,224 @@ impl BufferType { } } } + #[cfg(feature = "monitoring-structs")] + pub async fn read_network_metrics(buffers: &mut [BytesMut], tot_events: i32, offset: i32) { + for i in offset..tot_events { + let vec_bytes = &buffers[i as usize]; + if vec_bytes.len() < std::mem::size_of::() { + error!( + "Corrupted Network Metrics data. Raw data: {}. Readed {} bytes expected {} bytes", + vec_bytes + .iter() + .map(|b| format!("{:02x}", b)) + .collect::>() + .join(" "), + vec_bytes.len(), + std::mem::size_of::() + ); + continue; + } + if vec_bytes.len() >= std::mem::size_of::() { + let net_metrics: NetworkMetrics = + unsafe { std::ptr::read_unaligned(vec_bytes.as_ptr() as *const _) }; + let tgid = net_metrics.tgid; + let comm = String::from_utf8_lossy(&net_metrics.comm); + let ts_us = net_metrics.ts_us; + let sk_drop_count = net_metrics.sk_drops; + let sk_err = net_metrics.sk_err; + let sk_err_soft = net_metrics.sk_err_soft; + let sk_backlog_len = net_metrics.sk_backlog_len; + let sk_write_memory_queued = net_metrics.sk_write_memory_queued; + let sk_ack_backlog = net_metrics.sk_ack_backlog; + let sk_receive_buffer_size = net_metrics.sk_receive_buffer_size; + + info!( + "tgid: {}, comm: {}, ts_us: {}, sk_drops: {}, sk_err: {}, sk_err_soft: {}, sk_backlog_len: {}, sk_write_memory_queued: {}, sk_ack_backlog: {}, sk_receive_buffer_size: {}", + tgid, + comm, + ts_us, + sk_drop_count, + sk_err, + sk_err_soft, + sk_backlog_len, + sk_write_memory_queued, + sk_ack_backlog, + sk_receive_buffer_size + ); + } + } + } + #[cfg(feature = "monitoring-structs")] + pub async fn read_timestamp_metrics(buffers: &mut [BytesMut], tot_events: i32, offset: i32) { + for i in offset..tot_events { + let vec_bytes = &buffers[i as usize]; + if vec_bytes.len() < std::mem::size_of::() { + error!( + "Corrupted Network Metrics data. Raw data: {}. Readed {} bytes expected {} bytes", + vec_bytes + .iter() + .map(|b| format!("{:02x}", b)) + .collect::>() + .join(" "), + vec_bytes.len(), + std::mem::size_of::() + ); + continue; + } + if vec_bytes.len() >= std::mem::size_of::() { + let time_stamp_event: TimeStampMetrics = + unsafe { std::ptr::read_unaligned(vec_bytes.as_ptr() as *const _) }; + let delta_us = time_stamp_event.delta_us; + let ts_us = time_stamp_event.ts_us; + let tgid = time_stamp_event.tgid; + let comm = String::from_utf8_lossy(&time_stamp_event.comm); + let lport = time_stamp_event.lport; + let dport_be = time_stamp_event.dport_be; + let af = time_stamp_event.af; + info!( + "TimeStampEvent - delta_us: {}, ts_us: {}, tgid: {}, comm: {}, lport: {}, dport_be: {}, af: {}", + delta_us, ts_us, tgid, comm, lport, dport_be, af + ); + } + } + } +} + +// docs: read buffer function: +// template function that take a mut perf_event_array_buffer of type T and a mutable buffer of Vec +#[cfg(feature = "buffer-reader")] +pub async fn read_perf_buffer>( + mut array_buffers: Vec>, + mut buffers: Vec, + buffer_type: BufferType, +) { + // loop over the buffers + loop { + for buf in array_buffers.iter_mut() { + match buf.read_events(&mut buffers) { + Ok(events) => { + // triggered if some events are lost + if events.lost > 0 { + tracing::debug!("Lost events: {} ", events.lost); + } + // triggered if some events are readed + if events.read > 0 { + tracing::debug!("Readed events: {}", events.read); + let offset = 0; + let tot_events = events.read as i32; + + //read the events in the buffer + match buffer_type { + #[cfg(feature = "network-structs")] + BufferType::PacketLog => { + BufferType::read_packet_log(&mut buffers, tot_events, offset).await + } + #[cfg(feature = "network-structs")] + BufferType::TcpPacketRegistry => { + BufferType::read_tcp_registry_log(&mut buffers, tot_events, offset) + .await + } + #[cfg(feature = "network-structs")] + BufferType::VethLog => { + BufferType::read_and_handle_veth_log( + &mut buffers, + tot_events, + offset, + ) + .await + } + #[cfg(feature = "monitoring-structs")] + BufferType::NetworkMetrics => { + BufferType::read_network_metrics(&mut buffers, tot_events, offset) + .await + } + #[cfg(feature = "monitoring-structs")] + BufferType::TimeStampMetrics => { + BufferType::read_timestamp_metrics(&mut buffers, tot_events, offset) + .await + } + } + } + } + Err(e) => { + error!("Cannot read events from buffer. Reason: {} ", e); + } + } + } + tokio::time::sleep(std::time::Duration::from_millis(100)).await; // small sleep + } +} + +#[cfg(feature = "buffer-reader")] +pub enum BufferSize { + #[cfg(feature = "network-structs")] + ClassifierNetEvents, + #[cfg(feature = "network-structs")] + VethEvents, + #[cfg(feature = "network-structs")] + TcpEvents, + #[cfg(feature = "monitoring-structs")] + NetworkMetricsEvents, + #[cfg(feature = "monitoring-structs")] + TimeMetricsEvents, +} +#[cfg(feature = "buffer-reader")] +impl BufferSize { + pub fn get_size(&self) -> usize { + match self { + #[cfg(feature = "network-structs")] + BufferSize::ClassifierNetEvents => std::mem::size_of::(), + #[cfg(feature = "network-structs")] + BufferSize::VethEvents => std::mem::size_of::(), + #[cfg(feature = "network-structs")] + BufferSize::TcpEvents => std::mem::size_of::(), + #[cfg(feature = "monitoring-structs")] + BufferSize::NetworkMetricsEvents => std::mem::size_of::(), + #[cfg(feature = "monitoring-structs")] + BufferSize::TimeMetricsEvents => std::mem::size_of::(), + } + } + pub fn set_buffer(&self) -> Vec { + // iter returns and iterator of cpu ids, + // we need only the total number of cpus to set the buffer size so we use .len() to get + // the count of total cpus and then we allocate a buffer for each cpu with a capacity + // based on the structure size * a factor to have a bigger buffer to avoid overflows and lost events + + // Old buffers where 1024 bytes long. Now we set different buffer size based on + // the frequence of the events. + // ClassifierNetEvents are triggered by the TC classifier program, events has high frequency + // VethEvents are triggered by the creation and deletion of veth interfaces, events has small frequency compared to classifier events + // TcpEvents are triggered by TCP events and connections. Events has similar frequency to ClassifierNetEvents. + + let tot_cpu = online_cpus().iter().len(); // total number of cpus + + // TODO: finish to do all the calculations for the buffer sizes + match self { + #[cfg(feature = "network-structs")] + BufferSize::ClassifierNetEvents => { + let capacity = self.get_size() * 200; + return vec![BytesMut::with_capacity(capacity); tot_cpu]; + } + #[cfg(feature = "network-structs")] + BufferSize::VethEvents => { + let capacity = self.get_size() * 100; // Allocates 4Kb of memory for the buffers + return vec![BytesMut::with_capacity(capacity); tot_cpu]; + } + #[cfg(feature = "network-structs")] + BufferSize::TcpEvents => { + let capacity = self.get_size() * 200; + return vec![BytesMut::with_capacity(capacity); tot_cpu]; + } + #[cfg(feature = "monitoring-structs")] + BufferSize::NetworkMetricsEvents => { + let capacity = self.get_size() * 1024; + return vec![BytesMut::with_capacity(capacity); tot_cpu]; + } + #[cfg(feature = "monitoring-structs")] + BufferSize::TimeMetricsEvents => { + let capacity = self.get_size() * 1024; + return vec![BytesMut::with_capacity(capacity); tot_cpu]; + } + } + } } diff --git a/core/common/src/lib.rs b/core/common/src/lib.rs index d88c1db..d7e48b0 100644 --- a/core/common/src/lib.rs +++ b/core/common/src/lib.rs @@ -1,5 +1,8 @@ -#[cfg(feature = "buffer-reader")] -#[cfg(feature = "network-structs")] +#[cfg(any( + feature = "buffer-reader", + feature = "network-structs", + feature = "monitoring-structs" +))] pub mod buffer_type; pub mod constants; pub mod formatters; diff --git a/core/src/components/identity/src/helpers.rs b/core/src/components/identity/src/helpers.rs index bd76a29..50414bf 100644 --- a/core/src/components/identity/src/helpers.rs +++ b/core/src/components/identity/src/helpers.rs @@ -1,14 +1,13 @@ -use aya::maps::perf::PerfEventArrayBuffer; -use cortexbrain_common::buffer_type::BufferType; use nix::net::if_::if_nameindex; use std::result::Result::Ok; -use tracing::{error, info}; +use tracing::info; // docs: // This function checks if the given interface name is in the list of ignored interfaces // Takes a interface name (iface) as &str and returns true if the interface should be ignored // Typically we want to ignore eth0,docker0,tunl0,lo interfaces because they are not relevant for the internal monitoring // +#[inline(always)] pub fn ignore_iface(iface: &str) -> bool { let ignored_interfaces = ["eth0", "docker0", "tunl0", "lo"]; ignored_interfaces.contains(&iface) @@ -18,6 +17,7 @@ pub fn ignore_iface(iface: &str) -> bool { // This function retrieves the list of veth interfaces on the system, filtering out ignored interfaces with // the ignore_iface function. // +#[inline(always)] pub fn get_veth_channels() -> Vec { //filter interfaces and save the output in the let mut interfaces: Vec = Vec::new(); @@ -36,58 +36,6 @@ pub fn get_veth_channels() -> Vec { interfaces } -// docs: read buffer function: -// template function that take a mut perf_event_array_buffer of type T and a mutable buffer of Vec - -pub async fn read_perf_buffer>( - mut array_buffers: Vec>, - mut buffers: Vec, - buffer_type: BufferType, -) { - // loop over the buffers - loop { - for buf in array_buffers.iter_mut() { - match buf.read_events(&mut buffers) { - Ok(events) => { - // triggered if some events are lost - if events.lost > 0 { - tracing::debug!("Lost events: {} ", events.lost); - } - // triggered if some events are readed - if events.read > 0 { - tracing::debug!("Readed events: {}", events.read); - let offset = 0; - let tot_events = events.read as i32; - - //read the events in the buffer - match buffer_type { - BufferType::PacketLog => { - BufferType::read_packet_log(&mut buffers, tot_events, offset).await - } - BufferType::TcpPacketRegistry => { - BufferType::read_tcp_registry_log(&mut buffers, tot_events, offset) - .await - } - BufferType::VethLog => { - BufferType::read_and_handle_veth_log( - &mut buffers, - tot_events, - offset, - ) - .await - } - } - } - } - Err(e) => { - error!("Cannot read events from buffer. Reason: {} ", e); - } - } - } - tokio::time::sleep(std::time::Duration::from_millis(100)).await; // small sleep - } -} - #[cfg(test)] mod tests { use cortexbrain_common::buffer_type::VethLog; diff --git a/core/src/components/metrics/src/structs.rs b/core/src/components/metrics/src/structs.rs deleted file mode 100644 index dc63ace..0000000 --- a/core/src/components/metrics/src/structs.rs +++ /dev/null @@ -1,33 +0,0 @@ - -pub const TASK_COMM_LEN: usize = 16; // linux/sched.h - -#[repr(C, packed)] -#[derive(Clone, Copy)] -pub struct NetworkMetrics { - pub tgid: u32, - pub comm: [u8; TASK_COMM_LEN], - pub ts_us: u64, - pub sk_err: i32, // Offset 284 - pub sk_err_soft: i32, // Offset 600 - pub sk_backlog_len: i32, // Offset 196 - pub sk_write_memory_queued: i32, // Offset 376 - pub sk_receive_buffer_size: i32, // Offset 244 - pub sk_ack_backlog: u32, // Offset 604 - pub sk_drops: i32, // Offset 136 -} - -#[repr(C)] -#[derive(Clone, Copy)] -pub struct TimeStampMetrics { - pub delta_us: u64, - pub ts_us: u64, - pub tgid: u32, - pub comm: [u8; TASK_COMM_LEN], - pub lport: u16, - pub dport_be: u16, - pub af: u16, - pub saddr_v4: u32, - pub daddr_v4: u32, - pub saddr_v6: [u32; 4], - pub daddr_v6: [u32; 4], -} \ No newline at end of file From 5a8dbe75c15d136f2456240ef7df791ff733851c Mon Sep 17 00:00:00 2001 From: LorenzoTettamanti Date: Wed, 11 Feb 2026 21:42:06 +0100 Subject: [PATCH 32/46] [#175]: added otlp provider to metrics service. Simplified map handling and added read_perf_buffer function --- core/common/Cargo.toml | 1 + core/src/components/metrics/Cargo.toml | 5 +- core/src/components/metrics/src/helpers.rs | 148 ++------------------- core/src/components/metrics/src/main.rs | 8 +- 4 files changed, 22 insertions(+), 140 deletions(-) diff --git a/core/common/Cargo.toml b/core/common/Cargo.toml index f604b65..ee50e2b 100644 --- a/core/common/Cargo.toml +++ b/core/common/Cargo.toml @@ -24,6 +24,7 @@ opentelemetry-otlp = { version = "0.31.0", features = ["logs", "grpc-tonic"] } bytemuck = "1.25.0" bytes = "1.11.0" bytemuck_derive = "1.10.2" +tokio = "1.49.0" [features] map-handlers = [] diff --git a/core/src/components/metrics/Cargo.toml b/core/src/components/metrics/Cargo.toml index 0e88d8c..c8dcb5b 100644 --- a/core/src/components/metrics/Cargo.toml +++ b/core/src/components/metrics/Cargo.toml @@ -20,8 +20,11 @@ tracing = "0.1.41" tracing-subscriber = { version = "0.3.19", features = ["env-filter"] } libc = "0.2.172" bytemuck = "1.23.0" -cortexbrain-common = { path = "../../../common", features = [ +cortexbrain-common = { path = "../../../common/", features = [ "map-handlers", "program-handlers", + "buffer-reader", + "monitoring-structs", + "network-structs" ] } nix = { version = "0.30.1", features = ["net"] } diff --git a/core/src/components/metrics/src/helpers.rs b/core/src/components/metrics/src/helpers.rs index e0ab006..0968113 100644 --- a/core/src/components/metrics/src/helpers.rs +++ b/core/src/components/metrics/src/helpers.rs @@ -1,127 +1,11 @@ -use aya::{ - maps::{MapData, perf::PerfEventArrayBuffer}, - util::online_cpus, -}; - -use bytes::BytesMut; -use std::sync::{ - Arc, - atomic::{AtomicBool, Ordering}, -}; -use tokio::signal; - -use tracing::{error, info}; - +use aya::util::online_cpus; use cortexbrain_common::map_handlers::map_manager; use cortexbrain_common::{ - buffer_type::{BufferSize, BufferType}, - buffer_type::{NetworkMetrics, TimeStampMetrics}, + buffer_type::{BufferSize, BufferType, read_perf_buffer}, map_handlers::BpfMapsData, }; - -pub async fn display_metrics_map( - mut perf_buffers: Vec>, - running: Arc, // Changed to Arc - mut buffers: Vec, -) { - info!("Starting metrics event listener..."); - while running.load(Ordering::SeqCst) { - for buf in perf_buffers.iter_mut() { - match buf.read_events(&mut buffers) { - std::result::Result::Ok(events) => { - if events.read > 0 { - info!("Read {} metric events", events.read); - } - for i in 0..events.read { - let data = &buffers[i]; - if data.len() >= std::mem::size_of::() { - let net_metrics: NetworkMetrics = - unsafe { std::ptr::read_unaligned(data.as_ptr() as *const _) }; - let tgid = net_metrics.tgid; - let comm = String::from_utf8_lossy(&net_metrics.comm); - let ts_us = net_metrics.ts_us; - let sk_drop_count = net_metrics.sk_drops; - let sk_err = net_metrics.sk_err; - let sk_err_soft = net_metrics.sk_err_soft; - let sk_backlog_len = net_metrics.sk_backlog_len; - let sk_write_memory_queued = net_metrics.sk_write_memory_queued; - let sk_ack_backlog = net_metrics.sk_ack_backlog; - let sk_receive_buffer_size = net_metrics.sk_receive_buffer_size; - info!( - "tgid: {}, comm: {}, ts_us: {}, sk_drops: {}, sk_err: {}, sk_err_soft: {}, sk_backlog_len: {}, sk_write_memory_queued: {}, sk_ack_backlog: {}, sk_receive_buffer_size: {}", - tgid, - comm, - ts_us, - sk_drop_count, - sk_err, - sk_err_soft, - sk_backlog_len, - sk_write_memory_queued, - sk_ack_backlog, - sk_receive_buffer_size - ); - } else { - info!( - "Received data too small: {} bytes, expected: {}", - data.len(), - std::mem::size_of::() - ); - } - } - } - Err(e) => { - error!("Error reading events: {:?}", e); - } - } - } - tokio::time::sleep(std::time::Duration::from_millis(100)).await; - } - info!("Metrics event listener stopped"); -} - -pub async fn display_time_stamp_events_map( - mut perf_buffers: Vec>, - running: Arc, // Changed to Arc - mut buffers: Vec, -) { - info!("Starting timestamp event listener..."); - while running.load(Ordering::SeqCst) { - for buf in perf_buffers.iter_mut() { - match buf.read_events(&mut buffers) { - std::result::Result::Ok(events) => { - if events.read > 0 { - info!("Read {} timestamp events", events.read); - } - for i in 0..events.read { - let data = &buffers[i]; - if data.len() >= std::mem::size_of::() { - let time_stamp_event: TimeStampMetrics = - unsafe { std::ptr::read_unaligned(data.as_ptr() as *const _) }; - let delta_us = time_stamp_event.delta_us; - let ts_us = time_stamp_event.ts_us; - let tgid = time_stamp_event.tgid; - let comm = String::from_utf8_lossy(&time_stamp_event.comm); - let lport = time_stamp_event.lport; - let dport_be = time_stamp_event.dport_be; - let af = time_stamp_event.af; - info!( - "TimeStampEvent - delta_us: {}, ts_us: {}, tgid: {}, comm: {}, lport: {}, dport_be: {}, af: {}", - delta_us, ts_us, tgid, comm, lport, dport_be, af - ); - } else { - info!("Received timestamp data too small: {} bytes", data.len()); - } - } - } - Err(e) => { - error!("Error reading timestamp events: {:?}", e); - } - } - } - tokio::time::sleep(std::time::Duration::from_millis(100)).await; - } - info!("Timestamp event listener stopped"); -} +use tokio::signal; +use tracing::{error, info}; pub async fn event_listener(bpf_maps: BpfMapsData) -> Result<(), anyhow::Error> { info!("Getting CPU count..."); @@ -146,30 +30,27 @@ pub async fn event_listener(bpf_maps: BpfMapsData) -> Result<(), anyhow::Error> .remove("net_metrics") .expect("Cannot create net_perf_buffer"); - // Create shared running flags - let net_metrics_running = Arc::new(AtomicBool::new(true)); - let time_stamp_events_running = Arc::new(AtomicBool::new(true)); - // Create proper sized buffers let net_metrics_buffers = BufferSize::NetworkMetricsEvents.set_buffer(); let time_stamp_events_buffers = BufferSize::TimeMetricsEvents.set_buffer(); - // Clone for the signal handler - let net_metrics_running_signal = net_metrics_running.clone(); - let time_stamp_events_running_signal = time_stamp_events_running.clone(); - info!("Starting event listener tasks..."); let metrics_map_displayer = tokio::spawn(async move { - display_metrics_map(net_perf_buffer, net_metrics_running, net_metrics_buffers).await; + read_perf_buffer( + net_perf_buffer, + net_metrics_buffers, + BufferType::NetworkMetrics, + ) + .await; }); let time_stamp_events_displayer = tokio::spawn(async move { - display_time_stamp_events_map( + read_perf_buffer( time_stamp_events_perf_buffer, - time_stamp_events_running, time_stamp_events_buffers, + BufferType::TimeStampMetrics, ) - .await + .await; }); info!("Event listeners started, entering main loop..."); @@ -189,9 +70,6 @@ pub async fn event_listener(bpf_maps: BpfMapsData) -> Result<(), anyhow::Error> _ = signal::ctrl_c() => { info!("Ctrl-C received, shutting down..."); - // Stop the event loops - net_metrics_running_signal.store(false, std::sync::atomic::Ordering::SeqCst); - time_stamp_events_running_signal.store(false, std::sync::atomic::Ordering::SeqCst); } } diff --git a/core/src/components/metrics/src/main.rs b/core/src/components/metrics/src/main.rs index e8677fb..e6c9069 100644 --- a/core/src/components/metrics/src/main.rs +++ b/core/src/components/metrics/src/main.rs @@ -1,6 +1,6 @@ use anyhow::{Context, Ok}; use aya::Ebpf; -use cortexbrain_common::{constants, logger}; +use cortexbrain_common::constants; use std::{ env, fs, path::Path, @@ -11,15 +11,14 @@ use tracing::{error, info}; mod helpers; use crate::helpers::event_listener; +use cortexbrain_common::logger::otlp_logger_init; use cortexbrain_common::map_handlers::{init_bpf_maps, map_pinner}; use cortexbrain_common::program_handlers::load_program; -mod structs; - #[tokio::main] async fn main() -> Result<(), anyhow::Error> { //init tracing subscriber - logger::init_default_logger(); + let otlp_provider = otlp_logger_init("metrics-service".to_string()); info!("Starting metrics service..."); info!("fetching data"); @@ -78,6 +77,7 @@ async fn main() -> Result<(), anyhow::Error> { } Err(e) => { error!("Error initializing BPF maps: {:?}", e); + let _ = otlp_provider.shutdown(); return Err(e); } } From 74342ec79a9549590ecc4783f3b4ca951d78b2c9 Mon Sep 17 00:00:00 2001 From: LorenzoTettamanti Date: Wed, 11 Feb 2026 21:56:34 +0100 Subject: [PATCH 33/46] [#158]: fixed imports from the common crate --- core/src/components/identity/src/main.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/core/src/components/identity/src/main.rs b/core/src/components/identity/src/main.rs index c70011e..d42564a 100644 --- a/core/src/components/identity/src/main.rs +++ b/core/src/components/identity/src/main.rs @@ -11,7 +11,7 @@ mod helpers; mod service_discovery; -use crate::helpers::{get_veth_channels, read_perf_buffer}; +use crate::helpers::get_veth_channels; use aya::{ Ebpf, programs::{SchedClassifier, TcAttachType, tc::SchedClassifierLinkId}, @@ -21,6 +21,7 @@ use aya::{ #[cfg(feature = "experimental")] use crate::helpers::scan_cgroup_cronjob; +use cortexbrain_common::buffer_type::read_perf_buffer; use cortexbrain_common::map_handlers::{ init_bpf_maps, map_manager, map_pinner, populate_blocklist, }; From 794df9eebcf5932351d701ceb6676839da5edb2b Mon Sep 17 00:00:00 2001 From: LorenzoTettamanti Date: Wed, 11 Feb 2026 21:57:01 +0100 Subject: [PATCH 34/46] added TODOs in conntracker kernel module --- core/src/components/conntracker/src/main.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/core/src/components/conntracker/src/main.rs b/core/src/components/conntracker/src/main.rs index e723e4b..8438838 100644 --- a/core/src/components/conntracker/src/main.rs +++ b/core/src/components/conntracker/src/main.rs @@ -32,6 +32,10 @@ use crate::tc::try_identity_classifier; use crate::tcp_analyzer::try_tcp_analyzer; use crate::veth_tracer::try_veth_tracer; +// TODO: add function to track +// 1. kprobe:tcp_enter_memory_pressure +// 2. kprobe:tcp_create_openreq_child (https://elixir.bootlin.com/linux/v6.18.6/source/net/ipv4/tcp_ipv4.c#L1776) [function: *tcp_v4_syn_recv_sock] + // docs: // // virtual ethernet (veth) interface tracer: From cc4703b0414009df1533a494fb05f013f12c7705 Mon Sep 17 00:00:00 2001 From: LorenzoTettamanti Date: Wed, 11 Feb 2026 22:39:06 +0100 Subject: [PATCH 35/46] [#175]: added repr(C,packed) for monitoring structures. Fixed imports. Added error handling in event listener --- core/common/src/buffer_type.rs | 4 ++-- core/src/components/identity/src/main.rs | 23 +++++++++++-------- core/src/components/metrics/src/helpers.rs | 14 ++++++++++- core/src/components/metrics/src/main.rs | 10 ++++---- .../metrics_tracer/src/data_structures.rs | 6 ++--- 5 files changed, 38 insertions(+), 19 deletions(-) diff --git a/core/common/src/buffer_type.rs b/core/common/src/buffer_type.rs index ad906ce..ac0d600 100644 --- a/core/common/src/buffer_type.rs +++ b/core/common/src/buffer_type.rs @@ -86,7 +86,7 @@ unsafe impl aya::Pod for TcpPacketRegistry {} #[cfg(feature = "monitoring-structs")] pub const TASK_COMM_LEN: usize = 16; // linux/sched.h #[cfg(feature = "monitoring-structs")] -#[repr(C)] +#[repr(C, packed)] #[derive(Clone, Copy, Zeroable)] pub struct NetworkMetrics { pub tgid: u32, @@ -104,7 +104,7 @@ pub struct NetworkMetrics { unsafe impl aya::Pod for NetworkMetrics {} #[cfg(feature = "monitoring-structs")] -#[repr(C)] +#[repr(C, packed)] #[derive(Clone, Copy, Zeroable)] pub struct TimeStampMetrics { pub delta_us: u64, diff --git a/core/src/components/identity/src/main.rs b/core/src/components/identity/src/main.rs index d42564a..4efa3c9 100644 --- a/core/src/components/identity/src/main.rs +++ b/core/src/components/identity/src/main.rs @@ -21,21 +21,21 @@ use aya::{ #[cfg(feature = "experimental")] use crate::helpers::scan_cgroup_cronjob; -use cortexbrain_common::buffer_type::read_perf_buffer; -use cortexbrain_common::map_handlers::{ - init_bpf_maps, map_manager, map_pinner, populate_blocklist, +use cortexbrain_common::{ + buffer_type::{BufferSize, BufferType, read_perf_buffer}, + constants, logger, + map_handlers::BpfMapsData, + map_handlers::{init_bpf_maps, map_manager, map_pinner, populate_blocklist}, + program_handlers::load_program, }; -use cortexbrain_common::program_handlers::load_program; -use cortexbrain_common::{buffer_type::BufferType, map_handlers::BpfMapsData}; use std::{ convert::TryInto, path::Path, sync::{Arc, Mutex}, }; -use anyhow::{Context, Ok}; -use cortexbrain_common::buffer_type::BufferSize; -use cortexbrain_common::{constants, logger}; +use anyhow::{Context, Ok, anyhow}; + use std::collections::HashMap; use tokio::{fs, signal}; use tracing::{error, info}; @@ -206,7 +206,12 @@ async fn event_listener(bpf_maps: BpfMapsData) -> Result<(), anyhow::Error> { // fill the input buffers with data from the PerfEventArrays for cpu_id in online_cpus().map_err(|e| anyhow::anyhow!("Error {:?}", e))? { for (name, (perf_evt_array, perf_evt_array_buffer)) in maps.iter_mut() { - let buf = perf_evt_array.open(cpu_id, None)?; + let buf = perf_evt_array.open(cpu_id, None).map_err(|e| { + anyhow!( + "Cannot create perf_event_array buffer from perf_event_array. Reason: {}", + e + ) + })?; info!( "Buffer created for map {:?} on cpu_id {:?}. Buffer size: {}", name, diff --git a/core/src/components/metrics/src/helpers.rs b/core/src/components/metrics/src/helpers.rs index 0968113..843f45d 100644 --- a/core/src/components/metrics/src/helpers.rs +++ b/core/src/components/metrics/src/helpers.rs @@ -1,3 +1,4 @@ +use anyhow::anyhow; use aya::util::online_cpus; use cortexbrain_common::map_handlers::map_manager; use cortexbrain_common::{ @@ -16,7 +17,18 @@ pub async fn event_listener(bpf_maps: BpfMapsData) -> Result<(), anyhow::Error> for cpu_id in cpu_count { for (name, (perf_event_array, perf_event_buffer)) in maps.iter_mut() { - let buf = perf_event_array.open(cpu_id, None)?; + let buf = perf_event_array.open(cpu_id, None).map_err(|e| { + anyhow!( + "Cannot create perf_event_array buffer from perf_event_array. Reason: {}", + e + ) + })?; + info!( + "Buffer created for map {:?} on cpu_id {:?}. Buffer size: {}", + name, + cpu_id, + std::mem::size_of_val(&buf) + ); perf_event_buffer.push(buf); } } diff --git a/core/src/components/metrics/src/main.rs b/core/src/components/metrics/src/main.rs index e6c9069..e5558eb 100644 --- a/core/src/components/metrics/src/main.rs +++ b/core/src/components/metrics/src/main.rs @@ -1,6 +1,5 @@ use anyhow::{Context, Ok}; use aya::Ebpf; -use cortexbrain_common::constants; use std::{ env, fs, path::Path, @@ -11,9 +10,12 @@ use tracing::{error, info}; mod helpers; use crate::helpers::event_listener; -use cortexbrain_common::logger::otlp_logger_init; -use cortexbrain_common::map_handlers::{init_bpf_maps, map_pinner}; -use cortexbrain_common::program_handlers::load_program; +use cortexbrain_common::{ + constants, + logger::otlp_logger_init, + map_handlers::{init_bpf_maps, map_pinner}, + program_handlers::load_program, +}; #[tokio::main] async fn main() -> Result<(), anyhow::Error> { diff --git a/core/src/components/metrics_tracer/src/data_structures.rs b/core/src/components/metrics_tracer/src/data_structures.rs index f6d7afe..e9866a8 100644 --- a/core/src/components/metrics_tracer/src/data_structures.rs +++ b/core/src/components/metrics_tracer/src/data_structures.rs @@ -2,7 +2,7 @@ use aya_ebpf::{macros::map, maps::{LruPerCpuHashMap, HashMap, PerfEventArray}}; pub const TASK_COMM_LEN: usize = 16; - +#[repr(C,packed)] pub struct NetworkMetrics { pub tgid: u32, pub comm: [u8; TASK_COMM_LEN], @@ -16,7 +16,7 @@ pub struct NetworkMetrics { pub sk_drops: i32, // Offset 136 } -#[repr(C)] +#[repr(C,packed)] #[derive(Copy, Clone)] pub struct TimeStampStartInfo { pub comm: [u8; TASK_COMM_LEN], @@ -25,7 +25,7 @@ pub struct TimeStampStartInfo { } // Event we send to userspace when latency is computed -#[repr(C)] +#[repr(C,packed)] #[derive(Copy, Clone)] pub struct TimeStampEvent { pub delta_us: u64, From dfa9d9c7a899d3a2c8a4e513417640c4d4ee2993 Mon Sep 17 00:00:00 2001 From: LorenzoTettamanti Date: Fri, 13 Feb 2026 22:11:40 +0100 Subject: [PATCH 36/46] [#158]: added control to skip load of blocklist if the addresses vector is empty. Added comments and annotations --- core/common/src/map_handlers.rs | 8 ++++++-- core/common/src/program_handlers.rs | 24 +++++++++++++++--------- 2 files changed, 21 insertions(+), 11 deletions(-) diff --git a/core/common/src/map_handlers.rs b/core/common/src/map_handlers.rs index 313f73e..b246b70 100644 --- a/core/common/src/map_handlers.rs +++ b/core/common/src/map_handlers.rs @@ -121,8 +121,11 @@ pub async fn populate_blocklist() -> Result<(), Error> { .filter(|s| !s.is_empty()) .collect(); //String parsing from "x y" to ["x","y"] - info!("Inserting addresses: {:?}", addresses); - for item in addresses { + if addresses.is_empty() { + warn!("No addresses found in the blocklist. Skipping load"); + } + for item in &addresses { + info!("Inserting addresses: {:?}", &item); let addr = Ipv4Addr::from_str(&item)?.octets(); let _ = blocklist_map.insert(addr, addr, 0); } @@ -138,6 +141,7 @@ pub async fn populate_blocklist() -> Result<(), Error> { } #[cfg(feature = "map-handlers")] +// TODO: modify this to accept also HashMap types pub fn load_perf_event_array_from_mapdata( path: &'static str, ) -> Result, Error> { diff --git a/core/common/src/program_handlers.rs b/core/common/src/program_handlers.rs index 42cd3ba..347be51 100644 --- a/core/common/src/program_handlers.rs +++ b/core/common/src/program_handlers.rs @@ -13,32 +13,38 @@ pub fn load_program( .lock() .map_err(|e| anyhow::anyhow!("Cannot get value from lock. Reason: {}", e))?; - // Load and attach the eBPF programs + // Load and attach the eBPF program let program: &mut KProbe = bpf_new .program_mut(program_name) .ok_or_else(|| anyhow::anyhow!("Program {} not found", program_name))? .try_into() .map_err(|e| anyhow::anyhow!("Failed to convert program: {:?}", e))?; + // STEP 1: load program + program .load() .map_err(|e| anyhow::anyhow!("Cannot load program: {}. Error: {}", &program_name, e))?; + // STEP 2: Attach the loaded program to kernel symbol match program.attach(kernel_symbol, 0) { - Ok(_) => info!("{} program attached successfully", kernel_symbol), + Ok(_) => info!( + "{} program attached successfully to kernel symbol {}", + &program_name, &kernel_symbol + ), Err(e) => { - error!("Error attaching {} program {:?}", kernel_symbol, e); + error!( + "Error attaching {} program to kernel symbol {}. Reason: {:?}", + &program_name, &kernel_symbol, e + ); return Err(anyhow::anyhow!( - "Failed to attach {}: {:?}", - kernel_symbol, + "Failed to attach program {} to kernel symbol {}. Reason {:?}", + &program_name, + &kernel_symbol, e )); } }; - info!( - "eBPF program {} loaded and attached successfully", - program_name - ); Ok(()) } From a5d87dbab9aba5368a866836a91015232b31b5fd Mon Sep 17 00:00:00 2001 From: LorenzoTettamanti Date: Fri, 13 Feb 2026 22:14:25 +0100 Subject: [PATCH 37/46] [#158]: added shared hashmap to store tracked veth (TRACKED_VETH). The shared map is used to store the veth names and the status (attached or not) during the startup --- .../conntracker/src/data_structures.rs | 10 ++- core/src/components/identity/src/main.rs | 69 ++++++++++++++++--- 2 files changed, 66 insertions(+), 13 deletions(-) diff --git a/core/src/components/conntracker/src/data_structures.rs b/core/src/components/conntracker/src/data_structures.rs index f4c5047..c55cd3f 100644 --- a/core/src/components/conntracker/src/data_structures.rs +++ b/core/src/components/conntracker/src/data_structures.rs @@ -47,7 +47,7 @@ pub struct ConnArray { // pid: kernel process ID // -#[repr(C,packed)] +#[repr(C, packed)] #[derive(Clone, Copy)] pub struct VethLog { pub name: [u8; 16], // 16 bytes: veth interface name @@ -94,9 +94,13 @@ pub static mut CONNTRACKER: LruPerCpuHashMap = pub static mut VETH_EVENTS: PerfEventArray = PerfEventArray::new(0); #[map(name = "Blocklist", pinning = "by_name")] -pub static mut BLOCKLIST: HashMap<[u8; 4], [u8; 4]> = - HashMap::<[u8; 4], [u8; 4]>::with_max_entries(1024, 0); +pub static mut BLOCKLIST: HashMap<[u8; 4], [u8; 4]> = HashMap::with_max_entries(1024, 0); //here i need to pass an address like this: [135,171,168,192] #[map(name = "TcpPacketRegistry", pinning = "by_name")] pub static mut PACKET_REGISTRY: PerfEventArray = PerfEventArray::new(0); + +#[map(name = "tracked_veth", pinning = "by_name")] +// This map takes a registry of tracked veth interfaces +// The maximum number of characters is 16 of type u8 +pub static mut TRACKED_VETH: HashMap<[u8; 16], [u8; 8]> = HashMap::with_max_entries(1024, 0); diff --git a/core/src/components/identity/src/main.rs b/core/src/components/identity/src/main.rs index 4efa3c9..8d13e22 100644 --- a/core/src/components/identity/src/main.rs +++ b/core/src/components/identity/src/main.rs @@ -14,7 +14,8 @@ mod service_discovery; use crate::helpers::get_veth_channels; use aya::{ Ebpf, - programs::{SchedClassifier, TcAttachType, tc::SchedClassifierLinkId}, + maps::{Map, MapData}, + programs::{SchedClassifier, TcAttachType}, util::online_cpus, }; @@ -36,7 +37,7 @@ use std::{ use anyhow::{Context, Ok, anyhow}; -use std::collections::HashMap; +//use std::collections::HashMap; use tokio::{fs, signal}; use tracing::{error, info}; @@ -49,7 +50,7 @@ async fn main() -> Result<(), anyhow::Error> { info!("fetching data"); // To Store link_ids they can be used to detach tc - let link_ids = Arc::new(Mutex::new(HashMap::::new())); + //let mut link_ids = HashMap::::new(); //init conntracker data path let bpf_path = @@ -67,6 +68,7 @@ async fn main() -> Result<(), anyhow::Error> { "veth_identity_map".to_string(), "TcpPacketRegistry".to_string(), "Blocklist".to_string(), + "tracked_veth".to_string(), ]; match init_bpf_maps(bpf.clone(), map_data) { std::result::Result::Ok(bpf_maps) => { @@ -90,8 +92,8 @@ async fn main() -> Result<(), anyhow::Error> { } { - init_tc_classifier(bpf.clone(), interfaces, link_ids.clone()).await.context( - "An error occured during the execution of attach_bpf_program function" + init_tc_classifier(bpf.clone(), interfaces).await.context( + "An error occured during the execution of attach_bpf_program function", )?; } { @@ -120,10 +122,10 @@ async fn main() -> Result<(), anyhow::Error> { } //attach the tc classifier program to a vector of interfaces +// TODO: consider to create a load schedule classifier in the common functions async fn init_tc_classifier( bpf: Arc>, ifaces: Vec, - link_ids: Arc>>, ) -> Result<(), anyhow::Error> { //this funtion initialize the tc classifier program info!("Loading programs"); @@ -138,10 +140,33 @@ async fn init_tc_classifier( .try_into() .context("Failed to init SchedClassifier program")?; + // load classifier program + program .load() .context("Failed to load identity_classifier program")?; + // attach program only to desired interfaces. We can skip the dock0,tunl0,lo and eth0 interface + // we also save the interfaces to a BPF_HASH_MAP to easily monitor the interfaces using the agent + + // decleare link_ids HashMap which is a shared hashmap between kernel and userspace + // Link_ids hashmap has type of HashMap<[u8; 16], [u8; 8]>. The key is the program name and the value is the state + + // at this point the pinning is already successfull so we can invoque the maps from the pin + + let link_ids_mapdata = MapData::from_pin("/sys/fs/bpf/maps/tracked_veth") + .map_err(|e| anyhow!("Cannot return link_ids_mapdata. Reason: {}", e))?; + + let link_ids_map = Map::HashMap(link_ids_mapdata); + + let mut link_ids: aya::maps::HashMap = + aya::maps::HashMap::try_from(link_ids_map).map_err(|e| { + anyhow!( + "Cannot create link_ids HashMap from link_ids_map. Reason:{}", + e + ) + })?; + for interface in ifaces { match program.attach(&interface, TcAttachType::Ingress) { std::result::Result::Ok(link_id) => { @@ -149,10 +174,34 @@ async fn init_tc_classifier( "Program 'identity_classifier' attached to interface {}", interface ); - let mut map = link_ids - .lock() - .map_err(|e| anyhow::anyhow!("Cannot get value from lock. Reason: {}", e))?; - map.insert(interface.clone(), link_id); + let interface_bytes = interface.as_bytes(); + + let mut if_bytes = [0u8; 16]; + + // to set the len compare the interface_bytes.len() with the if_bytes.len() [16] and take the minimum + // if we have interface_bytes.len() < than 16 we set the len + let len = interface_bytes.len().min(if_bytes.len()); + + // now we can copy the bytes from the slice into the if_bytes variable + if_bytes[..len].copy_from_slice(&interface_bytes[..len]); + + // we compute the same process for the state_bytes + let mut state_bytes = [0u8; 8]; + let state = b"attached"; // prints "attached" as [u8;8] sequence of bytes + let state_len = state.len().min(state_bytes.len()); + state_bytes[..state_len].copy_from_slice(&state[..state_len]); + + match link_ids.insert(if_bytes, state_bytes, 0) { + std::result::Result::Ok(_) => { + info!("Veth interface {} added into map", &interface); + } + Err(e) => { + error!( + "Cannot add Veth interface {} into map. Reason: {}", + &interface, e + ); + } + } } Err(e) => error!( "Error attaching program to interface {}: {:?}", From be2158d41326418c3a0624e0679e573f0c12fffa Mon Sep 17 00:00:00 2001 From: LorenzoTettamanti Date: Sat, 14 Feb 2026 13:53:34 +0100 Subject: [PATCH 38/46] [#182]: Added GetTrackedVethFromHashMap grpc endpoint to see the tracked veths (pt.2) --- cli/src/monitoring.rs | 33 +++++++------- core/api/Cargo.toml | 3 +- core/api/protos/agent.proto | 13 +++++- core/api/src/agent.rs | 90 ++++++++++++++++++++++++++++++++++++- core/api/src/api.rs | 53 +++++++++++++++++++--- core/api/src/requests.rs | 10 +++++ 6 files changed, 174 insertions(+), 28 deletions(-) diff --git a/cli/src/monitoring.rs b/cli/src/monitoring.rs index 72a94b8..eefae1c 100644 --- a/cli/src/monitoring.rs +++ b/cli/src/monitoring.rs @@ -10,7 +10,7 @@ use tonic_reflection::pb::v1::server_reflection_response::MessageResponse; use agent_api::client::{connect_to_client, connect_to_server_reflection}; use agent_api::requests::{ get_all_features, send_active_connection_request, send_dropped_packets_request, - send_latency_metrics_request, send_tracked_veth_request, + send_latency_metrics_request, send_tracked_veth_request, send_veth_tracked_hashmap_req, }; use crate::errors::CliError; @@ -304,25 +304,24 @@ pub async fn monitor_tracked_veth() -> Result<(), CliError> { "Connecting to cortexflow Client".white() ); match connect_to_client().await { - Ok(client) => match send_tracked_veth_request(client).await { + Ok(client) => match send_veth_tracked_hashmap_req(client).await { Ok(response) => { let veth_response = response.into_inner(); - if veth_response.tot_monitored_veth == 0 { - println!("{} {} ", "=====>".blue().bold(), "No tracked veth found"); - Ok(()) - } else { - println!( - "{} {} {} {} ", - "=====>".blue().bold(), - "Found:", - &veth_response.tot_monitored_veth, - "tracked veth" - ); - for veth in veth_response.veth_names.iter() { - println!("{} {}", "=====>".blue().bold(), &veth); - } - Ok(()) + // if veth_response.tot_monitored_veth == 0 { + // println!("{} {} ", "=====>".blue().bold(), "No tracked veth found"); + // Ok(()) + // } else { + // println!( + // "{} {} {} {} ", + // "=====>".blue().bold(), + // "Found:", + // &veth_response.tot_monitored_veth, + // "tracked veth" + // ); + for veth in veth_response.veths.iter() { + println!("{} {:?}", "=====>".blue().bold(), &veth); } + Ok(()) } Err(e) => { return Err(CliError::AgentError( diff --git a/core/api/Cargo.toml b/core/api/Cargo.toml index a422fd7..0070430 100644 --- a/core/api/Cargo.toml +++ b/core/api/Cargo.toml @@ -32,7 +32,8 @@ aya = "0.13.1" cortexbrain-common = { path = "../common", features = [ "map-handlers", "network-structs", - "buffer-reader" + "buffer-reader", + "monitoring-structs" ] } tonic-reflection = "0.14.0" tonic-build = "0.14.0" diff --git a/core/api/protos/agent.proto b/core/api/protos/agent.proto index 9bfc6e4..e2b1500 100644 --- a/core/api/protos/agent.proto +++ b/core/api/protos/agent.proto @@ -84,7 +84,13 @@ message VethEvent{ uint32 pid = 6; // Process ID } -//declare agent api +message VethHashMapResponse{ // returns tracked veth from the tracked_veth hashmap + string status = 1; + map veths = 2; +} + +// Agent Service + service Agent{ // active connections endpoint rpc ActiveConnections(RequestActiveConnections) returns (ActiveConnectionResponse); @@ -102,10 +108,15 @@ service Agent{ // dropped packets endpoint rpc GetDroppedPacketsMetrics(google.protobuf.Empty) returns (DroppedPacketsResponse); + // TODO: can i combine this 2 endpoints? // active veth info endpoint rpc GetTrackedVeth(google.protobuf.Empty) returns (VethResponse); + // get tracked veth from blocklist + rpc GetTrackedVethFromHashMap(google.protobuf.Empty) returns (VethHashMapResponse); } +// Blocklist + message AddIpToBlocklistRequest{ optional string ip = 1 ; } diff --git a/core/api/src/agent.rs b/core/api/src/agent.rs index cb93ddd..259c1ab 100644 --- a/core/api/src/agent.rs +++ b/core/api/src/agent.rs @@ -151,6 +151,17 @@ pub struct VethEvent { #[prost(uint32, tag = "6")] pub pid: u32, } +/// returns tracked veth from the tracked_veth hashmap +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct VethHashMapResponse { + #[prost(string, tag = "1")] + pub status: ::prost::alloc::string::String, + #[prost(map = "string, string", tag = "2")] + pub veths: ::std::collections::HashMap< + ::prost::alloc::string::String, + ::prost::alloc::string::String, + >, +} #[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)] pub struct AddIpToBlocklistRequest { #[prost(string, optional, tag = "1")] @@ -192,7 +203,6 @@ pub mod agent_client { )] use tonic::codegen::*; use tonic::codegen::http::Uri; - /// declare agent api #[derive(Debug, Clone)] pub struct AgentClient { inner: tonic::client::Grpc, @@ -444,6 +454,31 @@ pub mod agent_client { .insert(GrpcMethod::new("agent.Agent", "GetTrackedVeth")); self.inner.unary(req, path, codec).await } + /// get tracked veth from blocklist + pub async fn get_tracked_veth_from_hash_map( + &mut self, + request: impl tonic::IntoRequest<()>, + ) -> std::result::Result< + tonic::Response, + tonic::Status, + > { + self.inner + .ready() + .await + .map_err(|e| { + tonic::Status::unknown( + format!("Service was not ready: {}", e.into()), + ) + })?; + let codec = tonic_prost::ProstCodec::default(); + let path = http::uri::PathAndQuery::from_static( + "/agent.Agent/GetTrackedVethFromHashMap", + ); + let mut req = request.into_request(); + req.extensions_mut() + .insert(GrpcMethod::new("agent.Agent", "GetTrackedVethFromHashMap")); + self.inner.unary(req, path, codec).await + } } } /// Generated server implementations. @@ -511,8 +546,15 @@ pub mod agent_server { &self, request: tonic::Request<()>, ) -> std::result::Result, tonic::Status>; + /// get tracked veth from blocklist + async fn get_tracked_veth_from_hash_map( + &self, + request: tonic::Request<()>, + ) -> std::result::Result< + tonic::Response, + tonic::Status, + >; } - /// declare agent api #[derive(Debug)] pub struct AgentServer { inner: Arc, @@ -885,6 +927,50 @@ pub mod agent_server { }; Box::pin(fut) } + "/agent.Agent/GetTrackedVethFromHashMap" => { + #[allow(non_camel_case_types)] + struct GetTrackedVethFromHashMapSvc(pub Arc); + impl tonic::server::UnaryService<()> + for GetTrackedVethFromHashMapSvc { + type Response = super::VethHashMapResponse; + type Future = BoxFuture< + tonic::Response, + tonic::Status, + >; + fn call(&mut self, request: tonic::Request<()>) -> Self::Future { + let inner = Arc::clone(&self.0); + let fut = async move { + ::get_tracked_veth_from_hash_map( + &inner, + request, + ) + .await + }; + Box::pin(fut) + } + } + let accept_compression_encodings = self.accept_compression_encodings; + let send_compression_encodings = self.send_compression_encodings; + let max_decoding_message_size = self.max_decoding_message_size; + let max_encoding_message_size = self.max_encoding_message_size; + let inner = self.inner.clone(); + let fut = async move { + let method = GetTrackedVethFromHashMapSvc(inner); + let codec = tonic_prost::ProstCodec::default(); + let mut grpc = tonic::server::Grpc::new(codec) + .apply_compression_config( + accept_compression_encodings, + send_compression_encodings, + ) + .apply_max_message_size_config( + max_decoding_message_size, + max_encoding_message_size, + ); + let res = grpc.unary(method, req).await; + Ok(res) + }; + Box::pin(fut) + } _ => { Box::pin(async move { let mut response = http::Response::new( diff --git a/core/api/src/api.rs b/core/api/src/api.rs index 79b9df3..405f805 100644 --- a/core/api/src/api.rs +++ b/core/api/src/api.rs @@ -1,8 +1,8 @@ use anyhow::Context; +use anyhow::anyhow; use chrono::Local; use cortexbrain_common::formatters::{format_ipv4, format_ipv6}; use cortexbrain_common::map_handlers::load_perf_event_array_from_mapdata; -use prost::bytes::BytesMut; use std::str::FromStr; use std::sync::Mutex; use tonic::{Request, Response, Status}; @@ -28,7 +28,8 @@ use cortexbrain_common::buffer_type::VethLog; // * contains agent api configuration use crate::agent::{ ActiveConnectionResponse, AddIpToBlocklistRequest, BlocklistResponse, RequestActiveConnections, - RmIpFromBlocklistRequest, RmIpFromBlocklistResponse, VethResponse, agent_server::Agent, + RmIpFromBlocklistRequest, RmIpFromBlocklistResponse, VethHashMapResponse, VethResponse, + agent_server::Agent, }; use crate::constants::PIN_BLOCKLIST_MAP_PATH; @@ -38,6 +39,9 @@ use cortexbrain_common::buffer_type::IpProtocols; use std::net::Ipv4Addr; use tracing::warn; +use cortexbrain_common::buffer_type::BufferSize; +use cortexbrain_common::map_handlers::map_manager; + pub struct AgentApi { //* event_rx is an istance of a mpsc receiver. //* is used to receive the data from the transmitter (tx) @@ -162,6 +166,9 @@ impl Default for AgentApi { tracked_veth_tx: veth_tx.clone(), }; + // init map manager + //let map_manager = map_manager(maps)? + // For network metrics //spawn an event readers @@ -177,7 +184,7 @@ impl Default for AgentApi { .open(cpu_id, None) .expect("Error during the creation of net_events_buf structure"); - let buffers = vec![BytesMut::with_capacity(4096); 8]; + let buffers = BufferSize::ClassifierNetEvents.set_buffer(); net_events_buffer.push((buf, buffers)); } @@ -262,7 +269,7 @@ impl Default for AgentApi { .open(cpu_id, None) .expect("Error during the creation of net_metrics_buf structure"); - let buffers = vec![BytesMut::with_capacity(4096); 8]; + let buffers = BufferSize::NetworkMetricsEvents.set_buffer(); net_metrics_buffer.push((buf, buffers)); } @@ -343,7 +350,7 @@ impl Default for AgentApi { .open(cpu_id, None) .expect("Error during the creation of time stamp events buf structure"); - let buffers = vec![BytesMut::with_capacity(4096); 8]; + let buffers = BufferSize::TimeMetricsEvents.set_buffer(); ts_events_buffer.push((buf, buffers)); } @@ -421,7 +428,7 @@ impl Default for AgentApi { .open(cpu_id, None) .expect("Error during the creation of time stamp events buf structure"); - let buffers = vec![BytesMut::with_capacity(4096); 8]; + let buffers = BufferSize::VethEvents.set_buffer(); veth_events_buffer.push((buf, buffers)); } @@ -560,7 +567,10 @@ impl Agent for AgentApi { //convert ip from string to [u8;4] type and insert into the bpf map let u8_4_ip = Ipv4Addr::from_str(&ip).unwrap().octets(); //TODO: convert datetime in a kernel compatible format - blocklist_map.insert(u8_4_ip, u8_4_ip, 0); + blocklist_map + .insert(u8_4_ip, u8_4_ip, 0) + .map_err(|e| anyhow!("Cannot insert address in the blocklist. Reason: {}", e)) + .unwrap(); info!("CURRENT BLOCKLIST: {:?}", blocklist_map); } let path = std::env::var(PIN_BLOCKLIST_MAP_PATH) @@ -774,4 +784,33 @@ impl Agent for AgentApi { Ok(Response::new(response)) } + + async fn get_tracked_veth_from_hash_map( + &self, + request: Request<()>, + ) -> Result, Status> { + info!("Returning veth hashmap"); + //open blocklist map + let mapdata = MapData::from_pin("/sys/fs/bpf/maps/tracked_veth") + .expect("cannot open tracked_veth Mapdata"); + let tracked_veth_mapdata = Map::HashMap(mapdata); //load mapdata + + let tracked_veth_map: ayaHashMap = + ayaHashMap::try_from(tracked_veth_mapdata).unwrap(); + + //convert the maps with a buffer to match the protobuffer types + + let mut converted_tracked_veth_map: HashMap = HashMap::new(); + for item in tracked_veth_map.iter() { + let (k, v) = item.unwrap(); + // convert keys and values from [u8;4] to String + let key = String::from_utf8(k.to_vec()).unwrap(); + let value = String::from_utf8(v.to_vec()).unwrap(); + converted_tracked_veth_map.insert(key, value); + } + Ok(Response::new(VethHashMapResponse { + status: "success".to_string(), + veths: converted_tracked_veth_map, + })) + } } diff --git a/core/api/src/requests.rs b/core/api/src/requests.rs index 06a4030..7c9f447 100644 --- a/core/api/src/requests.rs +++ b/core/api/src/requests.rs @@ -14,6 +14,7 @@ use crate::agent::LatencyMetricsResponse; use crate::agent::RequestActiveConnections; use crate::agent::RmIpFromBlocklistRequest; use crate::agent::RmIpFromBlocklistResponse; +use crate::agent::VethHashMapResponse; use crate::agent::VethResponse; use crate::agent::agent_client::AgentClient; @@ -100,3 +101,12 @@ pub async fn send_tracked_veth_request( let response = client.get_tracked_veth(request).await?; Ok(response) } + +#[cfg(feature = "client")] +pub async fn send_veth_tracked_hashmap_req( + mut client: AgentClient, +) -> Result, Error> { + let request = Request::new(()); + let response = client.get_tracked_veth_from_hash_map(request).await?; + Ok(response) +} From 3e589e46d5b69111e52dbe83bd43c5a5b689b4b8 Mon Sep 17 00:00:00 2001 From: LorenzoTettamanti Date: Mon, 2 Mar 2026 17:58:20 +0100 Subject: [PATCH 39/46] [#158]: removed struct.rs Added fill_buffers in buffer_type.rs. Simplified buffers filling and buffer initialization --- core/api/src/agent.rs | 2 + core/api/src/api.rs | 234 +++++++++------------------------ core/api/src/lib.rs | 1 - core/api/src/main.rs | 1 - core/api/src/structs.rs | 48 ------- core/common/src/buffer_type.rs | 22 ++++ 6 files changed, 89 insertions(+), 219 deletions(-) delete mode 100644 core/api/src/structs.rs diff --git a/core/api/src/agent.rs b/core/api/src/agent.rs index 259c1ab..8d004b9 100644 --- a/core/api/src/agent.rs +++ b/core/api/src/agent.rs @@ -432,6 +432,7 @@ pub mod agent_client { .insert(GrpcMethod::new("agent.Agent", "GetDroppedPacketsMetrics")); self.inner.unary(req, path, codec).await } + /// TODO: can i combine this 2 endpoints? /// active veth info endpoint pub async fn get_tracked_veth( &mut self, @@ -541,6 +542,7 @@ pub mod agent_server { tonic::Response, tonic::Status, >; + /// TODO: can i combine this 2 endpoints? /// active veth info endpoint async fn get_tracked_veth( &self, diff --git a/core/api/src/api.rs b/core/api/src/api.rs index 405f805..ba25101 100644 --- a/core/api/src/api.rs +++ b/core/api/src/api.rs @@ -1,6 +1,11 @@ use anyhow::Context; use anyhow::anyhow; +use aya::maps::perf::PerfEventArrayBuffer; use chrono::Local; +use cortexbrain_common::buffer_type::IpProtocols; +use cortexbrain_common::buffer_type::NetworkMetrics; +use cortexbrain_common::buffer_type::PacketLog; +use cortexbrain_common::buffer_type::TimeStampMetrics; use cortexbrain_common::formatters::{format_ipv4, format_ipv6}; use cortexbrain_common::map_handlers::load_perf_event_array_from_mapdata; use std::str::FromStr; @@ -8,7 +13,7 @@ use std::sync::Mutex; use tonic::{Request, Response, Status}; use tracing::info; -use aya::{maps::MapData, util::online_cpus}; +use aya::maps::MapData; use std::result::Result::Ok; use tonic::async_trait; @@ -22,7 +27,6 @@ use crate::agent::{ LatencyMetricsResponse, VethEvent, }; -use crate::structs::{NetworkMetrics, PacketLog, TimeStampMetrics}; use cortexbrain_common::buffer_type::VethLog; // * contains agent api configuration @@ -35,97 +39,23 @@ use crate::constants::PIN_BLOCKLIST_MAP_PATH; use crate::helpers::comm_to_string; use aya::maps::Map; -use cortexbrain_common::buffer_type::IpProtocols; use std::net::Ipv4Addr; use tracing::warn; use cortexbrain_common::buffer_type::BufferSize; -use cortexbrain_common::map_handlers::map_manager; +use cortexbrain_common::buffer_type::fill_buffers; pub struct AgentApi { //* event_rx is an istance of a mpsc receiver. //* is used to receive the data from the transmitter (tx) active_connection_event_rx: Mutex, Status>>>, - active_connection_event_tx: mpsc::Sender, Status>>, + pub(crate) active_connection_event_tx: mpsc::Sender, Status>>, latency_metrics_rx: Mutex, Status>>>, - latency_metrics_tx: mpsc::Sender, Status>>, + pub(crate) latency_metrics_tx: mpsc::Sender, Status>>, dropped_packet_metrics_rx: Mutex, Status>>>, - dropped_packet_metrics_tx: mpsc::Sender, Status>>, + pub(crate) dropped_packet_metrics_tx: mpsc::Sender, Status>>, tracked_veth_rx: Mutex, Status>>>, - tracked_veth_tx: mpsc::Sender, Status>>, -} - -//* Event sender trait. Takes an event from a map and send that to the mpsc channel -//* using the send_map function -#[async_trait] -pub trait EventSender: Send + Sync + 'static { - async fn send_active_connection_event(&self, event: Vec); - async fn send_active_connection_event_map( - &self, - map: Vec, - tx: mpsc::Sender, Status>>, - ) { - let status = Status::new(tonic::Code::Ok, "success"); - let event = Ok(map); - - let _ = tx.send(event).await; - } - - async fn send_latency_metrics_event(&self, event: Vec); - async fn send_latency_metrics_event_map( - &self, - map: Vec, - tx: mpsc::Sender, Status>>, - ) { - let status = Status::new(tonic::Code::Ok, "success"); - let event = Ok(map); - let _ = tx.send(event).await; - } - - async fn send_dropped_packet_metrics_event(&self, event: Vec); - async fn send_dropped_packet_metrics_event_map( - &self, - map: Vec, - tx: mpsc::Sender, Status>>, - ) { - let status = Status::new(tonic::Code::Ok, "success"); - let event = Ok(map); - let _ = tx.send(event).await; - } - - async fn send_tracked_veth_event(&self, event: Vec); - async fn send_tracked_veth_event_map( - &self, - map: Vec, - tx: mpsc::Sender, Status>>, - ) { - let status = Status::new(tonic::Code::Ok, "success"); - let event = Ok(map); - let _ = tx.send(event).await; - } -} - -// send event function. takes an HashMap and send that using mpsc event_tx -#[async_trait] -impl EventSender for AgentApi { - async fn send_active_connection_event(&self, event: Vec) { - self.send_active_connection_event_map(event, self.active_connection_event_tx.clone()) - .await; - } - - async fn send_latency_metrics_event(&self, event: Vec) { - self.send_latency_metrics_event_map(event, self.latency_metrics_tx.clone()) - .await; - } - - async fn send_dropped_packet_metrics_event(&self, event: Vec) { - self.send_dropped_packet_metrics_event_map(event, self.dropped_packet_metrics_tx.clone()) - .await; - } - async fn send_tracked_veth_event(&self, event: Vec) { - self.send_tracked_veth_event_map(event, self.tracked_veth_tx.clone()) - .await; - } + pub(crate) tracked_veth_tx: mpsc::Sender, Status>>, } //initialize a default trait for AgentApi. Loads a name and a bpf istance. @@ -137,13 +67,13 @@ impl Default for AgentApi { // // TODO: in the future will be better to not use .unwrap() - let mut active_connection_events_array = + let active_connection_events_array = load_perf_event_array_from_mapdata("/sys/fs/bpf/maps/events_map").unwrap(); - let mut network_metrics_events_array = + let network_metrics_events_array = load_perf_event_array_from_mapdata("/sys/fs/bpf/trace_maps/net_metrics").unwrap(); - let mut time_stamp_events_array = + let time_stamp_events_array = load_perf_event_array_from_mapdata("/sys/fs/bpf/trace_maps/time_stamp_events").unwrap(); - let mut tracked_veth_events_array = + let tracked_veth_events_array = load_perf_event_array_from_mapdata("/sys/fs/bpf/maps/veth_identity_map").unwrap(); // @@ -155,6 +85,7 @@ impl Default for AgentApi { let (drop_tx, drop_rx) = mpsc::channel(2048); let (veth_tx, tracked_veth_rx) = mpsc::channel(1024); + // init the API to send the events from the agent to the CLI let api = AgentApi { active_connection_event_rx: conn_rx.into(), active_connection_event_tx: conn_tx.clone(), @@ -169,35 +100,42 @@ impl Default for AgentApi { // init map manager //let map_manager = map_manager(maps)? + // init the buffers + let mut net_events_buffers = BufferSize::TcpEvents.set_buffer(); + let mut net_metrics_buffers = BufferSize::NetworkMetricsEvents.set_buffer(); + let mut ts_metrics_buffers = BufferSize::TimeMetricsEvents.set_buffer(); + let mut veth_metrics_buffers = BufferSize::VethEvents.set_buffer(); + + // init the Vec of Buffers + + let mut net_events_vec_buffer = Vec::>::new(); + let mut net_metrics_vec_buffer = Vec::>::new(); + let mut ts_events_vec_buffer = Vec::>::new(); + let mut veth_events_vec_buffer = Vec::>::new(); + + // fill the Vec of Buffers + + net_events_vec_buffer = fill_buffers(net_events_vec_buffer, active_connection_events_array); + net_metrics_vec_buffer = fill_buffers(net_metrics_vec_buffer, network_metrics_events_array); + + ts_events_vec_buffer = fill_buffers(ts_events_vec_buffer, time_stamp_events_array); + + veth_events_vec_buffer = fill_buffers(veth_events_vec_buffer, tracked_veth_events_array); + // For network metrics //spawn an event readers task::spawn(async move { - let mut net_events_buffer = Vec::new(); - //scan the cpus to read the data - - for cpu_id in online_cpus() - .map_err(|e| anyhow::anyhow!("Error {:?}", e)) - .unwrap() - { - let buf = active_connection_events_array - .open(cpu_id, None) - .expect("Error during the creation of net_events_buf structure"); - - let buffers = BufferSize::ClassifierNetEvents.set_buffer(); - net_events_buffer.push((buf, buffers)); - } - info!("Starting event listener"); //send the data through a mpsc channel loop { - for (buf, buffers) in net_events_buffer.iter_mut() { - match buf.read_events(buffers) { + for buf in net_events_vec_buffer.iter_mut() { + match buf.read_events(&mut net_events_buffers) { Ok(events) => { //read the events, this function is similar to the one used in identity/helpers.rs/display_events if events.read > 0 { for i in 0..events.read { - let data = &buffers[i]; + let data = &net_events_buffers[i]; if data.len() >= std::mem::size_of::() { let pl: PacketLog = unsafe { std::ptr::read(data.as_ptr() as *const _) }; @@ -258,32 +196,17 @@ impl Default for AgentApi { }); task::spawn(async move { - let mut net_metrics_buffer = Vec::new(); - - //scan the cpus to read the data - for cpu_id in online_cpus() - .map_err(|e| anyhow::anyhow!("Error {:?}", e)) - .unwrap() - { - let buf = network_metrics_events_array - .open(cpu_id, None) - .expect("Error during the creation of net_metrics_buf structure"); - - let buffers = BufferSize::NetworkMetricsEvents.set_buffer(); - net_metrics_buffer.push((buf, buffers)); - } - info!("Starting network metrics listener"); //send the data through a mpsc channel loop { - for (buf, buffers) in net_metrics_buffer.iter_mut() { - match buf.read_events(buffers) { + for buf in net_metrics_vec_buffer.iter_mut() { + match buf.read_events(&mut net_metrics_buffers) { Ok(events) => { //read the events, this function is similar to the one used in identity/helpers.rs/display_events if events.read > 0 { for i in 0..events.read { - let data = &buffers[i]; + let data = &net_metrics_buffers[i]; if data.len() >= std::mem::size_of::() { let nm: NetworkMetrics = unsafe { std::ptr::read(data.as_ptr() as *const _) }; @@ -340,34 +263,22 @@ impl Default for AgentApi { }); task::spawn(async move { - let mut ts_events_buffer = Vec::new(); - //scan the cpus to read the data - for cpu_id in online_cpus() - .map_err(|e| anyhow::anyhow!("Error {:?}", e)) - .unwrap() - { - let buf = time_stamp_events_array - .open(cpu_id, None) - .expect("Error during the creation of time stamp events buf structure"); - - let buffers = BufferSize::TimeMetricsEvents.set_buffer(); - ts_events_buffer.push((buf, buffers)); - } - info!("Starting time stamp events listener"); //send the data through a mpsc channel loop { - for (buf, buffers) in ts_events_buffer.iter_mut() { - match buf.read_events(buffers) { + for buf in ts_events_vec_buffer.iter_mut() { + match buf.read_events(&mut ts_metrics_buffers) { Ok(events) => { //read the events, this function is similar to the one used in identity/helpers.rs/display_events if events.read > 0 { for i in 0..events.read { - let data = &buffers[i]; + let data = &ts_metrics_buffers[i]; if data.len() >= std::mem::size_of::() { let tsm: TimeStampMetrics = unsafe { std::ptr::read(data.as_ptr() as *const _) }; + let saddr_v6 = tsm.saddr_v6; + let daddr_v6 = tsm.daddr_v6; let latency_metric = LatencyMetric { delta_us: tsm.delta_us, timestamp_us: tsm.ts_us, @@ -378,8 +289,8 @@ impl Default for AgentApi { address_family: tsm.af as u32, src_address_v4: format_ipv4(tsm.saddr_v4), dst_address_v4: format_ipv4(tsm.daddr_v4), - src_address_v6: format_ipv6(&tsm.saddr_v6), - dst_address_v6: format_ipv6(&tsm.daddr_v6), + src_address_v6: format_ipv6(&saddr_v6), + dst_address_v6: format_ipv6(&daddr_v6), }; info!( "Latency Metric - tgid: {}, process_name: {}, delta_us: {}, timestamp_us: {}, local_port: {}, remote_port: {}, address_family: {}, src_address_v4: {}, dst_address_v4: {}, src_address_v6: {}, dst_address_v6: {}", @@ -416,34 +327,19 @@ impl Default for AgentApi { } }); - // TODO: this part needs a better implementation task::spawn(async move { - let mut veth_events_buffer = Vec::new(); - //scan the cpus to read the data - for cpu_id in online_cpus() - .map_err(|e| anyhow::anyhow!("Error {:?}", e)) - .unwrap() - { - let buf = tracked_veth_events_array - .open(cpu_id, None) - .expect("Error during the creation of time stamp events buf structure"); - - let buffers = BufferSize::VethEvents.set_buffer(); - veth_events_buffer.push((buf, buffers)); - } - info!("Starting time stamp events listener"); //send the data through a mpsc channel loop { - for (buf, buffers) in veth_events_buffer.iter_mut() { - match buf.read_events(buffers) { + for buf in veth_events_vec_buffer.iter_mut() { + match buf.read_events(&mut veth_metrics_buffers) { Ok(events) => { //read the events, this function is similar to the one used in identity/helpers.rs/display_events if events.read > 0 { for i in 0..events.read { info!("Found veth events {}", events.read); - let data = &buffers[i]; + let data = &veth_metrics_buffers[i]; if data.len() >= std::mem::size_of::() { let veth: VethLog = unsafe { std::ptr::read(data.as_ptr() as *const _) }; @@ -515,7 +411,7 @@ impl Agent for AgentApi { request: Request, ) -> Result, Status> { //read request - let req = request.into_inner(); + let _req = request.into_inner(); //create the hashmap to process events from the mpsc channel queue let mut aggregated_events: Vec = Vec::new(); @@ -562,7 +458,7 @@ impl Agent for AgentApi { } else { // add ip to the blocklist // log blocklist event - let datetime = Local::now().to_string(); + let _datetime = Local::now().to_string(); let ip = req.ip.unwrap(); //convert ip from string to [u8;4] type and insert into the bpf map let u8_4_ip = Ipv4Addr::from_str(&ip).unwrap().octets(); @@ -573,14 +469,14 @@ impl Agent for AgentApi { .unwrap(); info!("CURRENT BLOCKLIST: {:?}", blocklist_map); } - let path = std::env::var(PIN_BLOCKLIST_MAP_PATH) + let _path = std::env::var(PIN_BLOCKLIST_MAP_PATH) .context("Blocklist map path not found!") .unwrap(); //convert the maps with a buffer to match the protobuffer types let mut converted_blocklist_map: HashMap = HashMap::new(); for item in blocklist_map.iter() { - let (k, v) = item.unwrap(); + let (k, _v) = item.unwrap(); // convert keys and values from [u8;4] to String let key = Ipv4Addr::from(k).to_string(); let value = Ipv4Addr::from(k).to_string(); @@ -596,7 +492,7 @@ impl Agent for AgentApi { async fn check_blocklist( &self, - request: Request<()>, + _request: Request<()>, ) -> Result, Status> { info!("Returning blocklist hashmap"); //open blocklist map @@ -611,7 +507,7 @@ impl Agent for AgentApi { let mut converted_blocklist_map: HashMap = HashMap::new(); for item in blocklist_map.iter() { - let (k, v) = item.unwrap(); + let (k, _v) = item.unwrap(); // convert keys and values from [u8;4] to String let key = Ipv4Addr::from(k).to_string(); let value = Ipv4Addr::from(k).to_string(); @@ -638,7 +534,7 @@ impl Agent for AgentApi { //remove the address let ip_to_remove = req.ip; let u8_4_ip_to_remove = Ipv4Addr::from_str(&ip_to_remove).unwrap().octets(); - blocklist_map.remove(&u8_4_ip_to_remove); + let _ = blocklist_map.remove(&u8_4_ip_to_remove); //convert the maps with a buffer to match the protobuffer types let mut converted_blocklist_map: HashMap = HashMap::new(); @@ -661,7 +557,7 @@ impl Agent for AgentApi { request: Request<()>, ) -> Result, Status> { // Extract the request parameters - let req = request.into_inner(); + let _req = request.into_inner(); info!("Getting latency metrics"); // Here you would typically query your data source for the latency metrics @@ -724,7 +620,7 @@ impl Agent for AgentApi { request: Request<()>, ) -> Result, Status> { // Extract the request parameters - let req = request.into_inner(); + let _req = request.into_inner(); info!("Getting dropped packets metrics"); let mut aggregated_dropped_packet_metrics: Vec = Vec::new(); @@ -759,7 +655,7 @@ impl Agent for AgentApi { &self, request: Request<()>, ) -> Result, Status> { - let req = request.into_inner(); + let _req = request.into_inner(); info!("Getting tracked veth metrics"); let mut tracked_veth = Vec::::new(); let mut tot_veth = 0 as i32; @@ -787,7 +683,7 @@ impl Agent for AgentApi { async fn get_tracked_veth_from_hash_map( &self, - request: Request<()>, + _request: Request<()>, ) -> Result, Status> { info!("Returning veth hashmap"); //open blocklist map diff --git a/core/api/src/lib.rs b/core/api/src/lib.rs index cf2c0c9..e093920 100644 --- a/core/api/src/lib.rs +++ b/core/api/src/lib.rs @@ -2,7 +2,6 @@ pub mod api; pub mod agent; pub mod client; pub mod requests; -pub mod structs; pub mod constants; pub mod helpers; pub mod batcher; diff --git a/core/api/src/main.rs b/core/api/src/main.rs index 30fe550..87478f5 100644 --- a/core/api/src/main.rs +++ b/core/api/src/main.rs @@ -6,7 +6,6 @@ mod agent; mod api; mod constants; mod helpers; -mod structs; mod agent_proto { use tonic::include_file_descriptor_set; diff --git a/core/api/src/structs.rs b/core/api/src/structs.rs deleted file mode 100644 index 97a4017..0000000 --- a/core/api/src/structs.rs +++ /dev/null @@ -1,48 +0,0 @@ -use bytemuck_derive::Zeroable; -use crate::constants::TASK_COMM_LEN; - - -#[repr(C)] -#[derive(Clone, Copy, Zeroable)] -pub struct PacketLog { - pub proto: u8, - pub src_ip: u32, - pub src_port: u16, - pub dst_ip: u32, - pub dst_port: u16, - pub pid: u32, -} -unsafe impl aya::Pod for PacketLog {} - -#[repr(C, packed)] -#[derive(Clone, Copy, Zeroable)] -pub struct NetworkMetrics { - pub tgid: u32, - pub comm: [u8; TASK_COMM_LEN], - pub ts_us: u64, - pub sk_err: i32, - pub sk_err_soft: i32, - pub sk_backlog_len: i32, - pub sk_write_memory_queued: i32, - pub sk_receive_buffer_size: i32, - pub sk_ack_backlog: u32, - pub sk_drops: i32, -} -unsafe impl aya::Pod for NetworkMetrics {} - -#[repr(C)] -#[derive(Clone, Copy, Zeroable)] -pub struct TimeStampMetrics { - pub delta_us: u64, - pub ts_us: u64, - pub tgid: u32, - pub comm: [u8; TASK_COMM_LEN], - pub lport: u16, - pub dport_be: u16, - pub af: u16, - pub saddr_v4: u32, - pub daddr_v4: u32, - pub saddr_v6: [u32; 4], - pub daddr_v6: [u32; 4], -} -unsafe impl aya::Pod for TimeStampMetrics {} diff --git a/core/common/src/buffer_type.rs b/core/common/src/buffer_type.rs index ac0d600..f962698 100644 --- a/core/common/src/buffer_type.rs +++ b/core/common/src/buffer_type.rs @@ -1,3 +1,5 @@ +#[cfg(feature = "buffer-reader")] +use aya::maps::{MapData, PerfEventArray}; use aya::{maps::perf::PerfEventArrayBuffer, util::online_cpus}; use bytemuck_derive::Zeroable; use bytes::BytesMut; @@ -560,3 +562,23 @@ impl BufferSize { } } } + +#[cfg(feature = "buffer-reader")] +pub fn fill_buffers( + //buf: PerfEventArrayBuffer, + mut vec_of_buffers: Vec>, + //buffers: Vec, + mut events_array: PerfEventArray, +) -> Vec> { + for cpu_id in online_cpus() + .map_err(|e| anyhow::anyhow!("Error {:?}", e)) + .unwrap() + { + let buf = events_array + .open(cpu_id, None) + .expect("Error during the creation of net_events_buf structure"); + + vec_of_buffers.push(buf); + } + vec_of_buffers +} From 487212593d801744e20320788476f771f5564fb5 Mon Sep 17 00:00:00 2001 From: LorenzoTettamanti Date: Mon, 2 Mar 2026 18:02:40 +0100 Subject: [PATCH 40/46] [#158]: moved Event Sender trait in the batcher.rs module --- core/api/src/batcher.rs | 89 +++++++++++++++++++++++++++++++++++------ 1 file changed, 77 insertions(+), 12 deletions(-) diff --git a/core/api/src/batcher.rs b/core/api/src/batcher.rs index 6e984d5..12d9278 100644 --- a/core/api/src/batcher.rs +++ b/core/api/src/batcher.rs @@ -1,22 +1,87 @@ // This module is experimental and may be subject to major changes. -use crate::agent::{ConnectionEvent, DroppedPacketMetric, LatencyMetric}; +// Do not use any of these functions +// FIXME: this module will be deprecated in the next version probably -pub enum MetricsBatcher { - LatencyMetrics, - DroppedPacketsMetrics, -} -pub enum EventBatcher {} -impl MetricsBatcher { - pub async fn send_batched_metrics() { - todo!(); +use tokio::sync::mpsc; +use tonic::{Status, async_trait}; + +use crate::{ + agent::{ConnectionEvent, DroppedPacketMetric, LatencyMetric, VethEvent}, + api::AgentApi, +}; + +// Event sender trait. Takes an event from a map and send that to the mpsc channel +// using the send_map function +#[async_trait] +pub trait EventSender: Send + Sync + 'static { + async fn send_active_connection_event(&self, event: Vec); + async fn send_active_connection_event_map( + &self, + map: Vec, + tx: mpsc::Sender, Status>>, + ) { + let status = Status::new(tonic::Code::Ok, "success"); + let event = Ok(map); + + let _ = tx.send(event).await; + } + + async fn send_latency_metrics_event(&self, event: Vec); + async fn send_latency_metrics_event_map( + &self, + map: Vec, + tx: mpsc::Sender, Status>>, + ) { + let status = Status::new(tonic::Code::Ok, "success"); + let event = Ok(map); + let _ = tx.send(event).await; + } + + async fn send_dropped_packet_metrics_event(&self, event: Vec); + async fn send_dropped_packet_metrics_event_map( + &self, + map: Vec, + tx: mpsc::Sender, Status>>, + ) { + let status = Status::new(tonic::Code::Ok, "success"); + let event = Ok(map); + let _ = tx.send(event).await; + } + + async fn send_tracked_veth_event(&self, event: Vec); + async fn send_tracked_veth_event_map( + &self, + map: Vec, + tx: mpsc::Sender, Status>>, + ) { + let status = Status::new(tonic::Code::Ok, "success"); + let event = Ok(map); + let _ = tx.send(event).await; } } -impl EventBatcher { - pub async fn send_batched_logs() { - todo!(); +// send event function. takes an HashMap and send that using mpsc event_tx +#[async_trait] +impl EventSender for AgentApi { + async fn send_active_connection_event(&self, event: Vec) { + self.send_active_connection_event_map(event, self.active_connection_event_tx.clone()) + .await; + } + + async fn send_latency_metrics_event(&self, event: Vec) { + self.send_latency_metrics_event_map(event, self.latency_metrics_tx.clone()) + .await; + } + + async fn send_dropped_packet_metrics_event(&self, event: Vec) { + self.send_dropped_packet_metrics_event_map(event, self.dropped_packet_metrics_tx.clone()) + .await; + } + async fn send_tracked_veth_event(&self, event: Vec) { + self.send_tracked_veth_event_map(event, self.tracked_veth_tx.clone()) + .await; } } From c80791255c383ec755e504b5a3cf3955d016c0ec Mon Sep 17 00:00:00 2001 From: LorenzoTettamanti Date: Mon, 2 Mar 2026 18:26:28 +0100 Subject: [PATCH 41/46] [deprecated]: removed deprecated Scripts folder --- Scripts/check-cortexflow-components.sh | 21 -------- Scripts/check-dev-requisites.sh | 41 --------------- Scripts/install-debugging-tools.sh | 45 ---------------- Scripts/test-connections.sh | 49 ------------------ Scripts/test-proxy-endpoints.sh | 45 ---------------- Scripts/test-proxy-ports.sh | 18 ------- Scripts/test-sidecar-advanced-tcp.sh | 67 ------------------------ Scripts/test-sidecar-advanced-udp.sh | 70 ------------------------- Scripts/test-sidecar-proxy.sh | 71 -------------------------- 9 files changed, 427 deletions(-) delete mode 100755 Scripts/check-cortexflow-components.sh delete mode 100755 Scripts/check-dev-requisites.sh delete mode 100755 Scripts/install-debugging-tools.sh delete mode 100755 Scripts/test-connections.sh delete mode 100755 Scripts/test-proxy-endpoints.sh delete mode 100755 Scripts/test-proxy-ports.sh delete mode 100755 Scripts/test-sidecar-advanced-tcp.sh delete mode 100755 Scripts/test-sidecar-advanced-udp.sh delete mode 100755 Scripts/test-sidecar-proxy.sh diff --git a/Scripts/check-cortexflow-components.sh b/Scripts/check-cortexflow-components.sh deleted file mode 100755 index 01232cb..0000000 --- a/Scripts/check-cortexflow-components.sh +++ /dev/null @@ -1,21 +0,0 @@ -echo "Welcome to CortexFlow tools" -echo "Checking CortexFlow components" - -echo "Checking if CortexFlow namespace exists..." -if kubectl get namespace cortexflow >/dev/null 2>&1; then - echo "✅ Namespace 'cortexflow' exists." - - sleep 1 - echo "Checking pods..." - kubectl get pods -n cortexflow - - echo - - sleep 1 - echo "Checking services..." - kubectl get svc -n cortexflow - echo -else - echo "❌ Namespace 'cortexflow' does not exist." - exit 1 -fi diff --git a/Scripts/check-dev-requisites.sh b/Scripts/check-dev-requisites.sh deleted file mode 100755 index c775754..0000000 --- a/Scripts/check-dev-requisites.sh +++ /dev/null @@ -1,41 +0,0 @@ -#!/bin/bash - -echo "Welcome to the CortexFlow tools" -echo "Checking pre-requisites for developers" -echo - -echo "Checking Docker installation..." -if which docker >/dev/null 2>&1; then - echo "✅ Docker is installed." -else - echo "❌ Docker is NOT installed." -fi -sleep 1 - -echo -echo "Checking Minikube installation..." -if which minikube >/dev/null 2>&1; then - echo "✅ Minikube is installed." -else - echo "❌ Minikube is NOT installed." -fi -sleep 1 - -echo - -echo "Checking Node.js installation..." -if which node >/dev/null 2>&1; then - echo "✅ Node.js is installed." -else - echo "Node.js is NOT installed." -fi -sleep 1 - -echo - -echo "Checking npm installation..." -if which npm >/dev/null 2>&1; then - echo "✅ npm is installed." -else - echo "❌ npm is NOT installed." -fi diff --git a/Scripts/install-debugging-tools.sh b/Scripts/install-debugging-tools.sh deleted file mode 100755 index 9e3ed01..0000000 --- a/Scripts/install-debugging-tools.sh +++ /dev/null @@ -1,45 +0,0 @@ -if ! kubectl exec -n cortexflow $1 -c $2 -- which netstat >/dev/null 2>&1; then - echo "🔨 installing netstat" - kubectl exec -n cortexflow $1 -c $2 -- apt update - kubectl exec -n cortexflow $1 -c $2 -- apt install -y net-tools -else - echo "✅ Netstat is installed." -fi - -sleep 1.5 - -if ! kubectl exec -n cortexflow $1 -c $2 -- which nc >/dev/null 2>&1; then - echo "🔨 installing netcat" - kubectl exec -n cortexflow $1 -c $2 -- apt install -y netcat-traditional -else - echo "✅ Netcat is installed." -fi - -sleep 1.5 - -if ! kubectl exec -n cortexflow $1 -c $2 -- which curl >/dev/null 2>&1; then - echo "🔨 installing curl" - kubectl exec -n cortexflow $1 -c $2 -- apt install -y curl -else - echo "✅ Curl is installed." -fi - -sleep 1.5 - -if ! kubectl exec -n cortexflow $1 -c $2 -- which nslookup >/dev/null 2>&1; then - echo "🔨 installing dnsutils" - kubectl exec -n cortexflow $1 -c $2 -- apt install -y dnsutils -else - echo "✅ Nslookup is installed." -fi - -sleep 1.5 - -if ! kubectl exec -n cortexflow $1 -c $2 -- which tcpdump >/dev/null 2>&1; then - echo "🔨 installing tcpdump" - kubectl exec -n cortexflow $1 -c $2 -- apt install -y tcpdump -else - echo "✅ tcpdump is installed." -fi - -sleep 1.5 diff --git a/Scripts/test-connections.sh b/Scripts/test-connections.sh deleted file mode 100755 index 95dcc94..0000000 --- a/Scripts/test-connections.sh +++ /dev/null @@ -1,49 +0,0 @@ -#!/bin/bash - -proxy_pod_name=$(kubectl get pods -n cortexflow --no-headers -o custom-columns=":metadata.name" | grep cortexflow-proxy) -proxy_ip=$(kubectl get -o template service/proxy-service -n cortexflow --template='{{.spec.clusterIP}}') -proxy_udp_port=5053 -proxy_tcp_port=5054 -proxy_metrics_port=9090 -proxy_container=$(kubectl get pod $proxy_pod_name -n cortexflow -o jsonpath='{.spec.containers[*].name}') - -echo "🧑🏻‍🔬 Checking cortexflow proxy inside the proxy pod: $proxy_pod_name" - -sleep 1.5 -echo "🔨 checking env variables" -kubectl exec -n cortexflow $proxy_pod_name -- env - -sleep 1.5 - -./install-debugging-tools.sh $proxy_pod_name $proxy_container -echo -./test-proxy-ports.sh $proxy_pod_name $proxy_metrics_port -echo -sleep 1.5 -echo "🔨 Sending a test package with netcat from proxy pod -> proxy pod" -kubectl exec -n cortexflow $proxy_pod_name -- sh -c echo b"Hi CortexFlow" | nc -u -w5 -v 127.0.0.1 $proxy_udp_port - -echo -sleep 1.5 -echo "🔨 Testing the DNS resolution manually with nslookup" -kubectl exec -n cortexflow $proxy_pod_name -- nslookup proxy-service.cortexflow.svc.cluster.local - -sleep 1.5 -echo -./test-proxy-endpoints.sh $proxy_pod_name -echo -echo -echo "🧑🏻‍🔬 Testing outside the proxy pod using a test pod" -echo "🔨 Testing using a temporary test pod and nslookup" -kubectl run -it --rm --image=busybox test-pod --restart=Never -n cortexflow -- nslookup proxy-service.cortexflow.svc.cluster.local - -echo -sleep 1.5 -echo "🔨 Sending a test message using netcat and a temporary test pod" -kubectl run -it --rm --image=busybox test-pod --restart=Never -n cortexflow -- sh -c "echo -n Hi CortexFlow | nc -u -w 3 -v $proxy_ip $proxy_udp_port" - -echo -sleep 1.5 -echo "🔨 Testing the tcp port" -echo "🔨 Sending a test message using netcat and a temporary test pod " -kubectl run -it --rm --image=busybox test-pod --restart=Never -n cortexflow -- sh -c "echo -n Hi TCP | nc -w 3 -v $proxy_ip $proxy_tcp_port" diff --git a/Scripts/test-proxy-endpoints.sh b/Scripts/test-proxy-endpoints.sh deleted file mode 100755 index c89e52e..0000000 --- a/Scripts/test-proxy-endpoints.sh +++ /dev/null @@ -1,45 +0,0 @@ -echo "🔨 Testing curl command" -response=$(kubectl exec -n cortexflow $1 -- curl -s -o /dev/null -w "%{http_code}" http://localhost:9090/) -if [ "$response" -eq 200 ]; then - echo "✅ Server is working" - echo " Checking / endpoint" - kubectl exec -n cortexflow $1 -- curl -v http://localhost:9090/ -else - echo "❌ Error in http response ERROR: $response. Service does not exists or is not exposed" -fi - -echo -sleep 1.5 -echo "🔨 Testing /health endpoint" -response=$(kubectl exec -n cortexflow $1 -- curl -s -o /dev/null -w "%{http_code}" http://localhost:9090/health) -if [ "$response" -eq 200 ]; then - echo "✅ Server is working" - echo " Checking /health endpoint" - kubectl exec -n cortexflow $1 -- curl -v http://localhost:9090/health -else - echo "❌ Error in http response ERROR: $response. Service does not exists or is not exposed" -fi - -echo -sleep 1.5 -echo "🔨 Testing /metrics endpoint" -response=$(kubectl exec -n cortexflow $1 -- curl -s -o /dev/null -w "%{http_code}" http://localhost:9090/metrics) -if [ "$response" -eq 200 ]; then - echo "✅ Server is working" - echo " Checking /metrics endpoint" - kubectl exec -n cortexflow $1 -- curl -v http://localhost:9090/metrics -else - echo "❌ Error in http response ERROR: $response. Service does not exists or is not exposed" -fi - -echo -sleep 1.5 -echo "🔨 Testing /status endpoint" -response=$(kubectl exec -n cortexflow $1 -- curl -s -o /dev/null -w "%{http_code}" http://localhost:9090/status) -if [ "$response" -eq 200 ]; then - echo "✅ Server is working" - echo " Checking /status endpoint" - kubectl exec -n cortexflow $1 -- curl -v http://localhost:9090/status -else - echo "❌ Error in http response ERROR: $response. Service does not exists or is not exposed" -fi diff --git a/Scripts/test-proxy-ports.sh b/Scripts/test-proxy-ports.sh deleted file mode 100755 index 33d658d..0000000 --- a/Scripts/test-proxy-ports.sh +++ /dev/null @@ -1,18 +0,0 @@ -echo "🔨 Testing network connections" -kubectl exec -n cortexflow $1 -- netstat -tulnp | grep $2 - -sleep 1.5 - -echo -echo "🔨 testing if the process is in execution" -kubectl exec -n cortexflow $1 -- ps aux | grep cortexflow-proxy - -sleep 1.5 -echo -echo "🔨 testing using netcat" -kubectl exec -n cortexflow $1 -- nc -zv proxy-service.cortexflow.svc.cluster.local $2 - -sleep 1.5 -echo -echo "🔨 Checking if the proxy is listening in the 5053 port" -kubectl exec -n cortexflow $1 -- netstat -ulnp diff --git a/Scripts/test-sidecar-advanced-tcp.sh b/Scripts/test-sidecar-advanced-tcp.sh deleted file mode 100755 index ec3fce4..0000000 --- a/Scripts/test-sidecar-advanced-tcp.sh +++ /dev/null @@ -1,67 +0,0 @@ -#!/bin/sh - -./install-debugging-tools.sh test-proxy proxy-sidecar -./install-debugging-tools.sh test-proxy2 proxy-sidecar -./install-debugging-tools.sh test-proxy3 proxy-sidecar -./install-debugging-tools.sh test-proxy4 proxy-sidecar - -# start the tcp listener -kubectl exec test-proxy -c proxy-sidecar -n cortexflow -- sh -c ' - echo "Starting TCP listener on port 5054..." - nohup sh -c "nc -l -p 5054" >/dev/null 2>&1 & -' - -kubectl exec test-proxy2 -c proxy-sidecar -n cortexflow -- sh -c ' - echo "Starting TCP listener on port 5054..." - nohup sh -c "nc -l -p 5054" >/dev/null 2>&1 & -' - - -test_proxy_to_proxy2() { - for i in $(seq 1 300); do - sleep $((RANDOM % 5 + 1)) - kubectl exec test-proxy -c proxy-sidecar -n cortexflow -- sh -c ' - printf "{\"service\":\"test-proxy2.cortexflow\",\"direction\":\"Incoming\",\"payload\":\"eyJwYXlsb2FkIjogIkhlbGxvIGZyb20gcHJveHktc2lkZWNhciJ9\"}\n" | nc -w1 test-proxy2 5054 - ' - done -} - -test_proxy2_to_proxy() { - for i in $(seq 1 300); do - sleep $((RANDOM % 5 + 1)) - kubectl exec test-proxy2 -c proxy-sidecar -n cortexflow -- sh -c ' - printf "{\"service\":\"test-proxy.cortexflow\",\"direction\":\"Incoming\",\"payload\":\"eyJwYXlsb2FkIjogIkhlbGxvIGZyb20gcHJveHktc2lkZWNhciJ9\"}\n" | nc -w1 test-proxy 5054 - ' - done -} - -test_proxy3_to_proxy2() { - for i in $(seq 1 300); do - sleep $((RANDOM % 5 + 1)) - kubectl exec test-proxy3 -c proxy-sidecar -n cortexflow -- sh -c ' - printf "{\"service\":\"test-proxy2.cortexflow\",\"direction\":\"Incoming\",\"payload\":\"eyJwYXlsb2FkIjogIkhlbGxvIGZyb20gcHJveHktc2lkZWNhciJ9\"}\n" | nc -w1 test-proxy2 5054 - ' - done -} - -test_proxy4_to_proxy2() { - for i in $(seq 1 300); do - sleep $((RANDOM % 5 + 1)) - kubectl exec test-proxy4 -c proxy-sidecar -n cortexflow -- sh -c ' - printf "{\"service\":\"test-proxy2.cortexflow\",\"direction\":\"Incoming\",\"payload\":\"eyJwYXlsb2FkIjogIkhlbGxvIGZyb20gcHJveHktc2lkZWNhciJ9\"}\n" | nc -w1 test-proxy2 5054 - ' - done -} - -# execute the functions in background -test_proxy_to_proxy2 & -test_proxy2_to_proxy & -test_proxy3_to_proxy2 & -test_proxy4_to_proxy2 & - - -sleep 300 - -# stop the listeners -kubectl exec test-proxy -c proxy-sidecar -n cortexflow -- sh -c 'pkill nc' -kubectl exec test-proxy2 -c proxy-sidecar -n cortexflow -- sh -c 'pkill nc' diff --git a/Scripts/test-sidecar-advanced-udp.sh b/Scripts/test-sidecar-advanced-udp.sh deleted file mode 100755 index d9c52a8..0000000 --- a/Scripts/test-sidecar-advanced-udp.sh +++ /dev/null @@ -1,70 +0,0 @@ -#!/bin/sh -./install-debugging-tools.sh test-proxy proxy-sidecar -./install-debugging-tools.sh test-proxy2 proxy-sidecar -./install-debugging-tools.sh test-proxy3 proxy-sidecar -./install-debugging-tools.sh test-proxy4 proxy-sidecar - -# start the udp listener -kubectl exec test-proxy -c proxy-sidecar -n cortexflow -- sh -c ' - echo "Starting UDP listener on port 5053..." - nohup nc -lu 5053 >/dev/null 2>&1 & -' - -kubectl exec test-proxy2 -c proxy-sidecar -n cortexflow -- sh -c ' - echo "Starting UDP listener on port 5053..." - nohup nc -lu 5053 >/dev/null 2>&1 & -' - - -test_proxy_to_proxy2() { - for i in $(seq 1 300); do - sleep $((RANDOM % 5 + 1)) - echo "Sending UDP packet from test-proxy to test-proxy2..." - kubectl exec test-proxy -c proxy-sidecar -n cortexflow -- sh -c ' - printf "{\"service\":\"test-proxy2.cortexflow\",\"direction\":\"Incoming\",\"payload\":\"eyJwYXlsb2FkIjogIkhlbGxvIGZyb20gcHJveHktc2lkZWNhciJ9\"}\n" | nc -u -w1 test-proxy2 5053 - ' - done -} - -test_proxy2_to_proxy() { - for i in $(seq 1 300); do - sleep $((RANDOM % 5 + 1)) - echo "Sending UDP packet from test-proxy2 to test-proxy..." - kubectl exec test-proxy2 -c proxy-sidecar -n cortexflow -- sh -c ' - printf "{\"service\":\"test-proxy.cortexflow\",\"direction\":\"Incoming\",\"payload\":\"eyJwYXlsb2FkIjogIkhlbGxvIGZyb20gcHJveHktc2lkZWNhciJ9\"}\n" | nc -u -w1 test-proxy 5053 - ' - done -} - -test_proxy3_to_proxy2() { - for i in $(seq 1 300); do - sleep $((RANDOM % 5 + 1)) - echo "Sending UDP packet from test-proxy3 to test-proxy2..." - kubectl exec test-proxy3 -c proxy-sidecar -n cortexflow -- sh -c ' - printf "{\"service\":\"test-proxy2.cortexflow\",\"direction\":\"Incoming\",\"payload\":\"eyJwYXlsb2FkIjogIkhlbGxvIGZyb20gcHJveHktc2lkZWNhciJ9\"}\n" | nc -u -w1 test-proxy2 5053 - ' - done -} - -test_proxy4_to_proxy2() { - for i in $(seq 1 300); do - sleep $((RANDOM % 5 + 1)) - echo "Sending UDP packet from test-proxy4 to test-proxy2..." - kubectl exec test-proxy4 -c proxy-sidecar -n cortexflow -- sh -c ' - printf "{\"service\":\"test-proxy2.cortexflow\",\"direction\":\"Incoming\",\"payload\":\"eyJwYXlsb2FkIjogIkhlbGxvIGZyb20gcHJveHktc2lkZWNhciJ9\"}\n" | nc -u -w1 test-proxy2 5053 - ' - done -} - -# execute the functions in background -(test_proxy_to_proxy2 &) & -(test_proxy2_to_proxy &) & -(test_proxy3_to_proxy2 &) & -(test_proxy4_to_proxy2 &) & - - -sleep 300 - -# stop the listeners -kubectl exec test-proxy -c proxy-sidecar -n cortexflow -- sh -c 'pkill nc || kill $(pgrep nc)' -kubectl exec test-proxy2 -c proxy-sidecar -n cortexflow -- sh -c 'pkill nc || kill $(pgrep nc)' diff --git a/Scripts/test-sidecar-proxy.sh b/Scripts/test-sidecar-proxy.sh deleted file mode 100755 index fcce42d..0000000 --- a/Scripts/test-sidecar-proxy.sh +++ /dev/null @@ -1,71 +0,0 @@ -#!/bin/bash - -echo "Testing Sidecar proxy injection " - -sleep 1 -echo "Checking pods" -kubectl get pods -o wide -n cortexflow -echo -echo "Checking if the sidecar proxy is present" -kubectl get pods -n cortexflow -o json | jq '.items[].spec.containers[].name' - -echo -sleep 1 -echo "Checking open ports in test-proxy" -kubectl get pods test-proxy -o jsonpath='{.spec.containers[*].ports}' -n cortexflow -echo -kubectl get pods test-proxy2 -o jsonpath='{.spec.containers[*].ports}' -n cortexflow - -echo -echo -echo "Installing debugging tools in test-proxy: (PROXY-SIDECAR container)" -sleep 3 -./install-debugging-tools.sh test-proxy proxy-sidecar -echo -echo -echo "Installing debugging tools in test-proxy2: (PROXY-SIDECAR container)" -sleep 3 -./install-debugging-tools.sh test-proxy2 proxy-sidecar - -echo -echo -echo "Checking network connections in test-proxy pod " -kubectl exec -it test-proxy -c proxy-sidecar -n cortexflow -- netstat -tulnp -echo -echo "Checking network connections in test-proxy2 pod" -kubectl exec -it test-proxy2 -c proxy-sidecar -n cortexflow -- netstat -tulnp - - -echo -sleep 2 -echo "TEST 1: Checking if test-proxy can communicate with test-proxy2" -kubectl exec -it test-proxy -c proxy-sidecar -n cortexflow -- nc -zv test-proxy2.cortexflow.svc.cluster.local 5054 -echo - -echo - -echo "TEST 2: Checking if test-proxy can communicate with test-proxy2 (TCP)" - -# 2. Send the message from test-proxy to test-proxy2 -kubectl exec test-proxy -c proxy-sidecar -n cortexflow -- sh -c ' - echo "Test: Incoming Message ⏳" - printf "{\"service\":\"test-proxy2.cortexflow\",\"direction\":\"Incoming\",\"payload\":\"eyJwYXlsb2FkIjogIkhlbGxvIGZyb20gcHJveHktc2lkZWNhciJ9\"}\n" | nc -w3 test-proxy2 5054 && echo "✅ Test completed" -' - -echo -sleep 2 -echo -echo "TEST 2: Sending a message from test-proxy to test-proxy2 (UDP)" - -#Start the UDP listener on test-proxy2 (MUST be before sending the message) -kubectl exec test-proxy2 -c proxy-sidecar -n cortexflow -- sh -c ' - echo "Starting UDP listener on port 5053..." - nohup sh -c "nc -lu -p 5053 > /tmp/received_message.log" >/dev/null 2>&1 & - sleep 2 # Wait for the listener to start -' - -#2. Send the message from test-proxy to test-proxy2 -kubectl exec test-proxy -c proxy-sidecar -n cortexflow -- sh -c ' - echo "Test: Incoming Message ⏳" - echo "{\"service\":\"test-proxy2.cortexflow\",\"direction\":\"Incoming\",\"payload\":\"eyJtZXNzYWdlIjogIkhlbGxvIGZyb20gcHJveHktc2lkZWNhciJ9\"}" | nc -u -w3 test-proxy2 5053 && echo "✅ Test completed" -' From aa9f4383dbc925763debfb7882082ccbaa15c945 Mon Sep 17 00:00:00 2001 From: LorenzoTettamanti Date: Mon, 1 Jun 2026 19:11:21 +0200 Subject: [PATCH 42/46] [#175]: added modules to initialize the Metrics exporter using opentelemetry sdk. added srtruct Metrics to group all the metrics in one place. added auxiliary functions record_network_metrics and record_timestamp_metrics. added exporter setting in buffer_type/read_network_metrics and buffer_type/read_timestamp_metrics --- core/common/src/buffer_type.rs | 83 +++++++++++- core/common/src/lib.rs | 4 +- core/common/src/otel_metrics.rs | 133 +++++++++++++++++++ core/src/components/metrics/src/otel_init.rs | 120 +++++++++++++++++ 4 files changed, 333 insertions(+), 7 deletions(-) create mode 100644 core/common/src/otel_metrics.rs create mode 100644 core/src/components/metrics/src/otel_init.rs diff --git a/core/common/src/buffer_type.rs b/core/common/src/buffer_type.rs index f962698..45d82c8 100644 --- a/core/common/src/buffer_type.rs +++ b/core/common/src/buffer_type.rs @@ -1,9 +1,14 @@ +#[cfg(feature = "monitoring-structs")] +use crate::otel_metrics::Metrics; #[cfg(feature = "buffer-reader")] use aya::maps::{MapData, PerfEventArray}; use aya::{maps::perf::PerfEventArrayBuffer, util::online_cpus}; use bytemuck_derive::Zeroable; use bytes::BytesMut; use std::net::Ipv4Addr; +#[cfg(feature = "buffer-reader")] +#[cfg(feature = "monitoring-structs")] +use std::sync::Arc; use tracing::{error, info, warn}; // @@ -342,7 +347,39 @@ impl BufferType { } } #[cfg(feature = "monitoring-structs")] - pub async fn read_network_metrics(buffers: &mut [BytesMut], tot_events: i32, offset: i32) { + /// Continuously read [`NetworkMetrics`] events and record OpenTelemetry + /// observations. + /// + /// This helper mirrors the core behaviour of + /// [`cortexbrain_common::buffer_type::read_perf_buffer`] but adds the OTel + /// instrumentation layer. + /// + /// # Loop + /// + /// 1. For every CPU buffer call `read_events`. + /// 2. Parse each raw [`BytesMut`] into [`NetworkMetrics`] using an + /// unaligned read (the struct is `#[repr(C, packed)]` and `Pod`). + /// 3. Call [`Metrics::record_network_metrics`]. + /// 4. Retain the legacy `tracing::info!` log for human-readable local output. + /// 5. Sleep 100 ms between polls. + /// + /// # Safety + /// + /// `std::ptr::read_unaligned` is safe here because the eBPF program writes + /// exactly the `NetworkMetrics` layout into the ring buffer and the struct + /// implements [`aya::Pod`]. + /// Continuously read [`TimeStampMetrics`] events and record OpenTelemetry + /// observations. + /// + /// Counterpart to [`read_network_buffer`] for the `time_stamp_events` map. + + pub async fn read_network_metrics( + buffers: &mut [BytesMut], + tot_events: i32, + offset: i32, + exporter: &str, + metrics: Arc, + ) { for i in offset..tot_events { let vec_bytes = &buffers[i as usize]; if vec_bytes.len() < std::mem::size_of::() { @@ -361,6 +398,11 @@ impl BufferType { if vec_bytes.len() >= std::mem::size_of::() { let net_metrics: NetworkMetrics = unsafe { std::ptr::read_unaligned(vec_bytes.as_ptr() as *const _) }; + + match exporter { + "otlp" => metrics.record_network_metrics(&net_metrics), + _ => continue, // skip + } let tgid = net_metrics.tgid; let comm = String::from_utf8_lossy(&net_metrics.comm); let ts_us = net_metrics.ts_us; @@ -389,7 +431,13 @@ impl BufferType { } } #[cfg(feature = "monitoring-structs")] - pub async fn read_timestamp_metrics(buffers: &mut [BytesMut], tot_events: i32, offset: i32) { + pub async fn read_timestamp_metrics( + buffers: &mut [BytesMut], + tot_events: i32, + offset: i32, + exporter: &str, + metrics: Arc, + ) { for i in offset..tot_events { let vec_bytes = &buffers[i as usize]; if vec_bytes.len() < std::mem::size_of::() { @@ -408,6 +456,12 @@ impl BufferType { if vec_bytes.len() >= std::mem::size_of::() { let time_stamp_event: TimeStampMetrics = unsafe { std::ptr::read_unaligned(vec_bytes.as_ptr() as *const _) }; + + match exporter { + "otlp" => metrics.record_timestamp_metrics(&time_stamp_event), + _ => continue, + } + let delta_us = time_stamp_event.delta_us; let ts_us = time_stamp_event.ts_us; let tgid = time_stamp_event.tgid; @@ -431,6 +485,7 @@ pub async fn read_perf_buffer>( mut array_buffers: Vec>, mut buffers: Vec, buffer_type: BufferType, + #[cfg(feature = "monitoring-structs")] metrics: Option>, ) { // loop over the buffers loop { @@ -469,13 +524,29 @@ pub async fn read_perf_buffer>( } #[cfg(feature = "monitoring-structs")] BufferType::NetworkMetrics => { - BufferType::read_network_metrics(&mut buffers, tot_events, offset) - .await + BufferType::read_network_metrics( + &mut buffers, + tot_events, + offset, + "otlp", + metrics + .clone() + .expect("Metrics required for NetworkMetrics"), + ) + .await } #[cfg(feature = "monitoring-structs")] BufferType::TimeStampMetrics => { - BufferType::read_timestamp_metrics(&mut buffers, tot_events, offset) - .await + BufferType::read_timestamp_metrics( + &mut buffers, + tot_events, + offset, + "otlp", + metrics + .clone() + .expect("Metric required for TimeStampMetrics"), + ) + .await } } } diff --git a/core/common/src/lib.rs b/core/common/src/lib.rs index d7e48b0..15c4ad7 100644 --- a/core/common/src/lib.rs +++ b/core/common/src/lib.rs @@ -1,7 +1,7 @@ #[cfg(any( feature = "buffer-reader", feature = "network-structs", - feature = "monitoring-structs" + feature = "monitoring-structs", ))] pub mod buffer_type; pub mod constants; @@ -9,5 +9,7 @@ pub mod formatters; pub mod logger; #[cfg(feature = "map-handlers")] pub mod map_handlers; +#[cfg(feature = "monitoring-structs")] +pub mod otel_metrics; #[cfg(feature = "program-handlers")] pub mod program_handlers; diff --git a/core/common/src/otel_metrics.rs b/core/common/src/otel_metrics.rs new file mode 100644 index 0000000..ae8c9db --- /dev/null +++ b/core/common/src/otel_metrics.rs @@ -0,0 +1,133 @@ +//! OpenTelemetry metric instruments for eBPF perf-buffer events. +//! +//! This module centralises every [`Meter`]-backed instrument that the +//! `metrics` crate uses to observe raw eBPF events. It provides a single +//! [`Metrics`] handle that is cheap to [`Arc`]-clone and safe to use from +//! multiple asynchronous tasks concurrently. +//! +//! - An [`Arc`] is moved into each Tokio +//! task that reads a perf buffer. All instrument operations are lock-free. +//! - Every observation is tagged with `tgid` and `comm` +//! extracted from the eBPF struct, allowing downstream collectors to group +//! telemetry by process. + +use crate::buffer_type::{NetworkMetrics, TimeStampMetrics}; +use opentelemetry::KeyValue; +use opentelemetry::metrics::{Counter, Gauge, Histogram, Meter}; +pub struct Metrics { + /// Total number of eBPF events processed across all perf buffers. + pub events_total: Counter, + + /// Total number of network-related events produced by the `net_metrics` + /// eBPF map. + pub packets_total: Counter, + + /// Observed socket drop count (`sk_drops`) from the kernel sock struct. + pub sk_drops: Gauge, + + /// Observed socket error count (`sk_err`) from the kernel sock struct. + pub sk_err: Gauge, + + /// Histogram of `delta_us` values supplied by the `time_stamp_events` + /// perf buffer. + pub delta_us: Histogram, + + /// Histogram of `ts_us` values seen in both `net_metrics` and + /// `time_stamp_events`. + pub ts_us: Histogram, +} + +impl Metrics { + /// Initialise all instruments backed by the supplied [`Meter`]. + pub fn new(meter: &Meter) -> Self { + // total events + let events_total = meter + .u64_counter("cortexbrain_events_total") + .with_description("Total number of eBPF events processed") + .build(); + + // total packets + let packets_total = meter + .u64_counter("cortexbrain_packets_total") + .with_description("Total number of network events processed") + .build(); + + // socket drops + let sk_drops = meter + .i64_gauge("cortexbrain_sk_drops") + .with_description("Socket drop count per event") + .build(); + + // socket errors + let sk_err = meter + .i64_gauge("cortexbrain_sk_err") + .with_description("Socket error count per event") + .build(); + + // delta microseconds + let delta_us = meter + .u64_histogram("cortexbrain_delta_us") + .with_description("Distribution of delta_us values from timestamp events") + .build(); + + // timestamp microseconds grouped + let ts_us = meter + .u64_histogram("cortexbrain_ts_us") + .with_description("Distribution of timestamp values from eBPF events") + .build(); + + Self { + events_total, + packets_total, + sk_drops, + sk_err, + delta_us, + ts_us, + } + } + + /// Record a single [`NetworkMetrics`] event. + /// + /// Increments `events_total` and `packets_total`, records `sk_drops` and + /// `sk_err` as gauges, and observes `ts_us` in the timestamp histogram. + /// + /// Every observation carries: + /// + /// -`tgid` – task group ID. + /// - `comm` – command name (null-terminated bytes converted to a UTF-8 + /// string and trimmed). + pub fn record_network_metrics(&self, m: &NetworkMetrics) { + let comm = String::from_utf8_lossy(&m.comm); + let comm_trimmed = comm.trim_end_matches('\0').to_string(); + let attrs = &[ + KeyValue::new("tgid", m.tgid as i64), + KeyValue::new("comm", comm_trimmed), + ]; + + self.events_total.add(1, attrs); + self.packets_total.add(1, attrs); + self.sk_drops.record(m.sk_drops as i64, attrs); + self.sk_err.record(m.sk_err as i64, attrs); + self.ts_us.record(m.ts_us, attrs); + } + + /// Record a single [`TimeStampMetrics`] event. + /// + /// Increments `events_total`, and records `delta_us` and `ts_us` in their + /// respective histograms. + /// + /// Every observation carries `tgid` and `comm` (see + /// [`record_network_metrics`]). + pub fn record_timestamp_metrics(&self, m: &TimeStampMetrics) { + let comm = String::from_utf8_lossy(&m.comm); + let comm_trimmed = comm.trim_end_matches('\0').to_string(); + let attrs = &[ + KeyValue::new("tgid", m.tgid as i64), + KeyValue::new("comm", comm_trimmed), + ]; + + self.events_total.add(1, attrs); + self.delta_us.record(m.delta_us, attrs); + self.ts_us.record(m.ts_us, attrs); + } +} diff --git a/core/src/components/metrics/src/otel_init.rs b/core/src/components/metrics/src/otel_init.rs new file mode 100644 index 0000000..e472c7e --- /dev/null +++ b/core/src/components/metrics/src/otel_init.rs @@ -0,0 +1,120 @@ +//! docs +//! This module configures and bootstraps the OpenTelemetry SDK (OTel SDK) +//! within the `metrics` binary. Its goal is to expose a [`Meter`] --- the +//! primary entry-point for creating counters, gauges and histograms --- +//! backed by an **OTLP/gRPC** metric exporter. +//! +//! # Relationship to the rest of the crate +//! +//! `otel_init::init_opentelemetry()` is invoked **once** in [`main`], before +//! any eBPF program is loaded. The returned [`Meter`] is then passed through +//! the call chain into [`event_listener`](crate::helpers::event_listener) +//! where it is used by the async tasks that read eBPF perf-buffers. See +//! [`crate::helpers`] for the consumption side. +//! +//! When the application exits (either because `Ctrl-C` was received or because +//! an error bubbled up), [`shutdown_opentelemetry`] is called. This flushes +//! every remaining aggregated metric to the OTLP collector before the process +//! terminates. +//! + +use opentelemetry::global; +use opentelemetry::metrics::{Meter, MeterProvider}; +use opentelemetry_otlp::{MetricExporter, WithExportConfig}; +use opentelemetry_sdk::metrics::{PeriodicReader, SdkMeterProvider}; +use std::env; +use std::sync::OnceLock; +use std::time::Duration; + +/// Environment variable that holds the OTLP collector endpoint. +/// +/// Expected format: `"http://collector:4317"` (gRPC transport). +/// +pub const OTEL_EXPORTER_OTLP_ENDPOINT: &str = "OTEL_EXPORTER_OTLP_ENDPOINT"; + +/// Default OTLP endpoint used when [`OTEL_EXPORTER_OTLP_ENDPOINT`] is not +/// present in the environment. +/// +/// Points to a locally-running OpenTelemetry Collector on the standard +/// **gRPC** port `4317`. Note that OTLP over HTTP typically uses `4318` --- +/// make sure your Collector is actually listening for **gRPC** traffic on the +/// port you configure. +pub const DEFAULT_OTLP_ENDPOINT: &str = "http://localhost:4317"; + +/// Singleton that owns the concrete `SdkMeterProvider` instance. +/// OnceLock guarantees single initialisation, we avoid accidentally creating two providers (and +/// two background export tasks) if `init_opentelemetry()` were ever called +/// twice. +/// +/// # Thread safety +/// +/// `OnceLock` is `Sync`, so the static can be read safely from any thread +/// or Tokio task once populated. +static METER_PROVIDER: OnceLock = OnceLock::new(); +/// docs: +/// Initialise the OpenTelemetry SDK, wire up the OTLP/gRPC exporter, and +/// return a [`Meter`] ready for instrumenting the `metrics` crate. +/// +/// 1. Read the endpoint from [`OTEL_EXPORTER_OTLP_ENDPOINT`] with the +/// hard-coded default [`DEFAULT_OTLP_ENDPOINT`]. +/// 2. Build a `MetricExporter` using the Tonic / gRPC transport: +/// - with_tonic()` enables the Tonic-based gRPC client. +/// - `with_endpoint()` sets the target Collector URL. +/// - `with_timeout(Duration::from_secs(10))` caps each export RPC to 10 +/// seconds; if the Collector is unreachable the RPC aborts instead of +/// hanging indefinitely. +/// 3. Wrap the exporter in a `PeriodicReader`. The reader collects +/// aggregated metrics from every instrument every 5 seconds and hands +/// them to the exporter. This is the "push" model --- metrics leave the +/// process automatically without an external scraper. +/// 4. Construct an `SdkMeterProvider` and register it as the global +/// meter provider (`global::set_meter_provider`). The global handle is +/// needed for instrumenting code spawned in other Tokio tasks (see +/// [`helpers::event_listener`](crate::helpers::event_listener)). +/// 5. Keep a clone of the concrete provider in `METER_PROVIDER` so that +/// [`shutdown_opentelemetry`] can later call `SdkMeterProvider::shutdown()`. +/// 6. Create a `Meter named `"cortexbrain-metrics"` and return it. +/// +/// Potential causes of errors: +/// +/// * An invalid endpoint URL (malformed string). +/// * Network-level failure during exporter construction. +/// * The provider already having been initialised +/// +pub fn init_opentelemetry() -> Result { + let endpoint = + env::var(OTEL_EXPORTER_OTLP_ENDPOINT).unwrap_or_else(|_| DEFAULT_OTLP_ENDPOINT.to_string()); + + let exporter = MetricExporter::builder() + .with_tonic() + .with_endpoint(endpoint) + .with_timeout(Duration::from_secs(10)) + .build()?; + + let reader = PeriodicReader::builder(exporter) + .with_interval(Duration::from_secs(5)) + .build(); + + let provider = SdkMeterProvider::builder().with_reader(reader).build(); + + // Make the provider globally discoverable. This clone is cheap because + // SdkMeterProvider is an Arc-backed handle. + global::set_meter_provider(provider.clone()); + + // Stash the concrete handle so shutdown_opentelemetry can flush. + METER_PROVIDER + .set(provider.clone()) + .map_err(|_| anyhow::anyhow!("OpenTelemetry meter provider already initialised"))?; + + let meter = provider.meter("cortexbrain-metrics"); + Ok(meter) +} +/// docs: +/// Flush every buffered metric to the OTLP collector and shut down the SDK. +pub fn shutdown_opentelemetry() { + if let Some(provider) = METER_PROVIDER.get() + && let Err(e) = provider.shutdown() + { + tracing::error!("Failed to shut down OpenTelemetry meter provider: {:?}", e); + } +} From 149abf6dd1fcd88f99dc9a56e6d3ce1a441b03da Mon Sep 17 00:00:00 2001 From: LorenzoTettamanti Date: Mon, 1 Jun 2026 19:11:55 +0200 Subject: [PATCH 43/46] (chore): updated dependencies --- cli/Cargo.lock | 91 ++++++++++++------------ core/Cargo.lock | 98 ++++++++++++++------------ core/common/Cargo.toml | 11 +-- core/src/components/metrics/Cargo.toml | 3 + 4 files changed, 106 insertions(+), 97 deletions(-) diff --git a/cli/Cargo.lock b/cli/Cargo.lock index 0fea51d..7c843ec 100644 --- a/cli/Cargo.lock +++ b/cli/Cargo.lock @@ -361,8 +361,10 @@ dependencies = [ "opentelemetry", "opentelemetry-appender-tracing", "opentelemetry-otlp", + "opentelemetry-semantic-conventions", "opentelemetry-stdout", "opentelemetry_sdk", + "tokio", "tracing", "tracing-subscriber", ] @@ -992,16 +994,6 @@ version = "2.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "469fb0b9cefa57e3ef31275ee7cacb78f2fdca44e4765491884a2b119d4eb130" -[[package]] -name = "iri-string" -version = "0.7.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c91338f0783edbd6195decb37bae672fd3b165faffb89bf7b9e6942f8b1a731a" -dependencies = [ - "memchr", - "serde", -] - [[package]] name = "is_terminal_polyfill" version = "1.70.1" @@ -1266,9 +1258,9 @@ checksum = "d05e27ee213611ffe7d6348b942e8f942b37114c00cc03cec254295a4a17852e" [[package]] name = "opentelemetry" -version = "0.31.0" +version = "0.32.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b84bcd6ae87133e903af7ef497404dda70c60d0ea14895fc8a5e6722754fc2a0" +checksum = "b0142c63252a9e054e68a4c61a5778f7b14f576274d593f8ce883d191a099682" dependencies = [ "futures-core", "futures-sink", @@ -1280,9 +1272,9 @@ dependencies = [ [[package]] name = "opentelemetry-appender-tracing" -version = "0.31.1" +version = "0.32.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ef6a1ac5ca3accf562b8c306fa8483c85f4390f768185ab775f242f7fe8fdcc2" +checksum = "2c0080f0dc1d7c786f467cd85a4e395fcab11ee852004f39a29a18ab7c25d837" dependencies = [ "opentelemetry", "tracing", @@ -1292,9 +1284,9 @@ dependencies = [ [[package]] name = "opentelemetry-http" -version = "0.31.0" +version = "0.32.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d7a6d09a73194e6b66df7c8f1b680f156d916a1a942abf2de06823dd02b7855d" +checksum = "5683015d09e2df236ef005b17f6f196f0d5f6313c4fa43a7b6a53b52776e4331" dependencies = [ "async-trait", "bytes", @@ -1305,9 +1297,9 @@ dependencies = [ [[package]] name = "opentelemetry-otlp" -version = "0.31.0" +version = "0.32.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a2366db2dca4d2ad033cad11e6ee42844fd727007af5ad04a1730f4cb8163bf" +checksum = "9966929966d17620d7c316c643ba62631826e10021409357772d5eea84f62c35" dependencies = [ "http", "opentelemetry", @@ -1319,14 +1311,14 @@ dependencies = [ "thiserror 2.0.16", "tokio", "tonic", - "tracing", + "tonic-types", ] [[package]] name = "opentelemetry-proto" -version = "0.31.0" +version = "0.32.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a7175df06de5eaee9909d4805a3d07e28bb752c34cab57fa9cff549da596b30f" +checksum = "56d658ba1faf63f7b9c492cfbe6e0ec365440a16132d3270c1065f7b33f1b638" dependencies = [ "opentelemetry", "opentelemetry_sdk", @@ -1335,11 +1327,17 @@ dependencies = [ "tonic-prost", ] +[[package]] +name = "opentelemetry-semantic-conventions" +version = "0.32.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ca2f98a0437b427b4b08f19f1caa3c44db885a202bc12cfea13d6c702243d68" + [[package]] name = "opentelemetry-stdout" -version = "0.31.0" +version = "0.32.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bc8887887e169414f637b18751487cce4e095be787d23fad13c454e2fb1b3811" +checksum = "a1b1c6a247d79091f0062a5f4bd058589525cf987a8d4c169440d9c1be72f0ad" dependencies = [ "chrono", "opentelemetry", @@ -1348,15 +1346,16 @@ dependencies = [ [[package]] name = "opentelemetry_sdk" -version = "0.31.0" +version = "0.32.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e14ae4f5991976fd48df6d843de219ca6d31b01daaab2dad5af2badeded372bd" +checksum = "9b59f80e1ac4d5ff7a2db8fb6c80badb7f0f3f858211fba08dd9aaec750894f9" dependencies = [ "futures-channel", "futures-executor", "futures-util", "opentelemetry", "percent-encoding", + "portable-atomic", "rand", "thiserror 2.0.16", "tokio", @@ -1502,6 +1501,12 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" +[[package]] +name = "portable-atomic" +version = "1.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c33a9471896f1c69cecef8d20cbe2f7accd12527ce60845ff44c153bb2a21b49" + [[package]] name = "potential_utf" version = "0.1.4" @@ -1708,9 +1713,9 @@ checksum = "caf4aa5b0f434c91fe5c7f1ecb6a5ece2130b02ad2a590589dda5146df959001" [[package]] name = "reqwest" -version = "0.12.24" +version = "0.13.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9d0946410b9f7b082a427e4ef5c8ff541a88b357bc6c637c40db3a68ac70a36f" +checksum = "219c5811de6525e5416c7d5d53bb656d3afdbc6c5af816e0802bcfa42dbdc1c3" dependencies = [ "base64", "bytes", @@ -1726,9 +1731,6 @@ dependencies = [ "log", "percent-encoding", "pin-project-lite", - "serde", - "serde_json", - "serde_urlencoded", "sync_wrapper", "tokio", "tower", @@ -1926,18 +1928,6 @@ dependencies = [ "serde_core", ] -[[package]] -name = "serde_urlencoded" -version = "0.7.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d3491c14715ca2294c4d6a88f15e84739788c1d030eed8c110436aafdaa2f3fd" -dependencies = [ - "form_urlencoded", - "itoa", - "ryu", - "serde", -] - [[package]] name = "serde_yaml" version = "0.9.34+deprecated" @@ -2273,6 +2263,17 @@ dependencies = [ "tonic-prost", ] +[[package]] +name = "tonic-types" +version = "0.14.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2a875a902255423d34c1f20838ab374126db8eb41625b7947a1d54113b0b7399" +dependencies = [ + "prost", + "prost-types", + "tonic", +] + [[package]] name = "tower" version = "0.5.2" @@ -2294,9 +2295,9 @@ dependencies = [ [[package]] name = "tower-http" -version = "0.6.6" +version = "0.6.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "adc82fd73de2a9722ac5da747f12383d2bfdb93591ee6c58486e0097890f05f2" +checksum = "4cfcf7e2740e6fc6d4d688b4ef00650406bb94adf4731e43c096c3a19fe40840" dependencies = [ "base64", "bitflags", @@ -2304,13 +2305,13 @@ dependencies = [ "futures-util", "http", "http-body", - "iri-string", "mime", "pin-project-lite", "tower", "tower-layer", "tower-service", "tracing", + "url", ] [[package]] diff --git a/core/Cargo.lock b/core/Cargo.lock index 745a66d..6ae4f98 100644 --- a/core/Cargo.lock +++ b/core/Cargo.lock @@ -415,8 +415,10 @@ dependencies = [ "opentelemetry", "opentelemetry-appender-tracing", "opentelemetry-otlp", + "opentelemetry-semantic-conventions", "opentelemetry-stdout", "opentelemetry_sdk", + "tokio", "tracing", "tracing-subscriber", ] @@ -1034,16 +1036,6 @@ version = "2.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "469fb0b9cefa57e3ef31275ee7cacb78f2fdca44e4765491884a2b119d4eb130" -[[package]] -name = "iri-string" -version = "0.7.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c91338f0783edbd6195decb37bae672fd3b165faffb89bf7b9e6942f8b1a731a" -dependencies = [ - "memchr", - "serde", -] - [[package]] name = "itertools" version = "0.14.0" @@ -1239,6 +1231,9 @@ dependencies = [ "cortexbrain-common", "libc", "nix", + "opentelemetry", + "opentelemetry-otlp", + "opentelemetry_sdk", "tokio", "tracing", "tracing-subscriber", @@ -1355,9 +1350,9 @@ checksum = "d05e27ee213611ffe7d6348b942e8f942b37114c00cc03cec254295a4a17852e" [[package]] name = "opentelemetry" -version = "0.31.0" +version = "0.32.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b84bcd6ae87133e903af7ef497404dda70c60d0ea14895fc8a5e6722754fc2a0" +checksum = "b0142c63252a9e054e68a4c61a5778f7b14f576274d593f8ce883d191a099682" dependencies = [ "futures-core", "futures-sink", @@ -1369,9 +1364,9 @@ dependencies = [ [[package]] name = "opentelemetry-appender-tracing" -version = "0.31.1" +version = "0.32.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ef6a1ac5ca3accf562b8c306fa8483c85f4390f768185ab775f242f7fe8fdcc2" +checksum = "2c0080f0dc1d7c786f467cd85a4e395fcab11ee852004f39a29a18ab7c25d837" dependencies = [ "opentelemetry", "tracing", @@ -1381,9 +1376,9 @@ dependencies = [ [[package]] name = "opentelemetry-http" -version = "0.31.0" +version = "0.32.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d7a6d09a73194e6b66df7c8f1b680f156d916a1a942abf2de06823dd02b7855d" +checksum = "5683015d09e2df236ef005b17f6f196f0d5f6313c4fa43a7b6a53b52776e4331" dependencies = [ "async-trait", "bytes", @@ -1394,9 +1389,9 @@ dependencies = [ [[package]] name = "opentelemetry-otlp" -version = "0.31.0" +version = "0.32.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a2366db2dca4d2ad033cad11e6ee42844fd727007af5ad04a1730f4cb8163bf" +checksum = "9966929966d17620d7c316c643ba62631826e10021409357772d5eea84f62c35" dependencies = [ "http", "opentelemetry", @@ -1408,14 +1403,14 @@ dependencies = [ "thiserror 2.0.17", "tokio", "tonic", - "tracing", + "tonic-types", ] [[package]] name = "opentelemetry-proto" -version = "0.31.0" +version = "0.32.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a7175df06de5eaee9909d4805a3d07e28bb752c34cab57fa9cff549da596b30f" +checksum = "56d658ba1faf63f7b9c492cfbe6e0ec365440a16132d3270c1065f7b33f1b638" dependencies = [ "opentelemetry", "opentelemetry_sdk", @@ -1424,11 +1419,17 @@ dependencies = [ "tonic-prost", ] +[[package]] +name = "opentelemetry-semantic-conventions" +version = "0.32.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ca2f98a0437b427b4b08f19f1caa3c44db885a202bc12cfea13d6c702243d68" + [[package]] name = "opentelemetry-stdout" -version = "0.31.0" +version = "0.32.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bc8887887e169414f637b18751487cce4e095be787d23fad13c454e2fb1b3811" +checksum = "a1b1c6a247d79091f0062a5f4bd058589525cf987a8d4c169440d9c1be72f0ad" dependencies = [ "chrono", "opentelemetry", @@ -1437,15 +1438,16 @@ dependencies = [ [[package]] name = "opentelemetry_sdk" -version = "0.31.0" +version = "0.32.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e14ae4f5991976fd48df6d843de219ca6d31b01daaab2dad5af2badeded372bd" +checksum = "9b59f80e1ac4d5ff7a2db8fb6c80badb7f0f3f858211fba08dd9aaec750894f9" dependencies = [ "futures-channel", "futures-executor", "futures-util", "opentelemetry", "percent-encoding", + "portable-atomic", "rand", "thiserror 2.0.17", "tokio", @@ -1585,6 +1587,12 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" +[[package]] +name = "portable-atomic" +version = "1.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c33a9471896f1c69cecef8d20cbe2f7accd12527ce60845ff44c153bb2a21b49" + [[package]] name = "potential_utf" version = "0.1.4" @@ -1792,9 +1800,9 @@ checksum = "7a2d987857b319362043e95f5353c0535c1f58eec5336fdfcf626430af7def58" [[package]] name = "reqwest" -version = "0.12.24" +version = "0.13.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9d0946410b9f7b082a427e4ef5c8ff541a88b357bc6c637c40db3a68ac70a36f" +checksum = "219c5811de6525e5416c7d5d53bb656d3afdbc6c5af816e0802bcfa42dbdc1c3" dependencies = [ "base64", "bytes", @@ -1810,9 +1818,6 @@ dependencies = [ "log", "percent-encoding", "pin-project-lite", - "serde", - "serde_json", - "serde_urlencoded", "sync_wrapper", "tokio", "tower", @@ -2020,18 +2025,6 @@ dependencies = [ "serde_core", ] -[[package]] -name = "serde_urlencoded" -version = "0.7.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d3491c14715ca2294c4d6a88f15e84739788c1d030eed8c110436aafdaa2f3fd" -dependencies = [ - "form_urlencoded", - "itoa", - "ryu", - "serde", -] - [[package]] name = "serde_yaml" version = "0.9.34+deprecated" @@ -2219,9 +2212,9 @@ dependencies = [ [[package]] name = "tokio" -version = "1.48.0" +version = "1.50.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ff360e02eab121e0bc37a2d3b4d4dc622e6eda3a8e5253d5435ecf5bd4c68408" +checksum = "27ad5e34374e03cfffefc301becb44e9dc3c17584f414349ebe29ed26661822d" dependencies = [ "bytes", "libc", @@ -2361,6 +2354,17 @@ dependencies = [ "tonic-prost", ] +[[package]] +name = "tonic-types" +version = "0.14.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2a875a902255423d34c1f20838ab374126db8eb41625b7947a1d54113b0b7399" +dependencies = [ + "prost", + "prost-types", + "tonic", +] + [[package]] name = "tower" version = "0.5.2" @@ -2382,9 +2386,9 @@ dependencies = [ [[package]] name = "tower-http" -version = "0.6.6" +version = "0.6.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "adc82fd73de2a9722ac5da747f12383d2bfdb93591ee6c58486e0097890f05f2" +checksum = "4cfcf7e2740e6fc6d4d688b4ef00650406bb94adf4731e43c096c3a19fe40840" dependencies = [ "base64", "bitflags", @@ -2392,13 +2396,13 @@ dependencies = [ "futures-util", "http", "http-body", - "iri-string", "mime", "pin-project-lite", "tower", "tower-layer", "tower-service", "tracing", + "url", ] [[package]] diff --git a/core/common/Cargo.toml b/core/common/Cargo.toml index ee50e2b..e1c39c5 100644 --- a/core/common/Cargo.toml +++ b/core/common/Cargo.toml @@ -16,15 +16,16 @@ anyhow = "1.0" kube = { version = "2.0.1", features = ["client"] } k8s-openapi = { version = "0.26.0", features = ["v1_34"] } aya = "0.13.1" -opentelemetry = "0.31.0" -opentelemetry_sdk = { version = "0.31.0", features = ["logs", "rt-tokio"] } -opentelemetry-stdout = { version = "0.31.0", features = ["logs"] } -opentelemetry-appender-tracing = "0.31.1" -opentelemetry-otlp = { version = "0.31.0", features = ["logs", "grpc-tonic"] } +opentelemetry = "0.32.0" +opentelemetry_sdk = { version = "0.32.0", features = ["logs", "rt-tokio"] } +opentelemetry-stdout = { version = "0.32.0", features = ["logs"] } +opentelemetry-appender-tracing = "0.32.0" +opentelemetry-otlp = { version = "0.32.0", features = ["logs", "grpc-tonic"] } bytemuck = "1.25.0" bytes = "1.11.0" bytemuck_derive = "1.10.2" tokio = "1.49.0" +opentelemetry-semantic-conventions = "0.32.0" [features] map-handlers = [] diff --git a/core/src/components/metrics/Cargo.toml b/core/src/components/metrics/Cargo.toml index c8dcb5b..1c7d420 100644 --- a/core/src/components/metrics/Cargo.toml +++ b/core/src/components/metrics/Cargo.toml @@ -28,3 +28,6 @@ cortexbrain-common = { path = "../../../common/", features = [ "network-structs" ] } nix = { version = "0.30.1", features = ["net"] } +opentelemetry = "0.32.0" +opentelemetry_sdk = "0.32.0" +opentelemetry-otlp = { version = "0.32.0", features = ["grpc-tonic"] } From 7ee399e84a1ae1707274c59a01fec1082cda1f8e Mon Sep 17 00:00:00 2001 From: LorenzoTettamanti Date: Mon, 1 Jun 2026 19:16:18 +0200 Subject: [PATCH 44/46] [#175]: implemented the opentelemetry metrics export using the function in the common crate --- core/src/components/metrics/src/helpers.rs | 94 +++++++++++++++------- core/src/components/metrics/src/main.rs | 58 ++++++++----- core/src/components/metrics/src/mod.rs | 5 +- 3 files changed, 104 insertions(+), 53 deletions(-) diff --git a/core/src/components/metrics/src/helpers.rs b/core/src/components/metrics/src/helpers.rs index 843f45d..804e930 100644 --- a/core/src/components/metrics/src/helpers.rs +++ b/core/src/components/metrics/src/helpers.rs @@ -1,14 +1,34 @@ use anyhow::anyhow; use aya::util::online_cpus; use cortexbrain_common::map_handlers::map_manager; -use cortexbrain_common::{ - buffer_type::{BufferSize, BufferType, read_perf_buffer}, - map_handlers::BpfMapsData, -}; +use cortexbrain_common::{buffer_type::BufferSize, map_handlers::BpfMapsData}; +use opentelemetry::metrics::Meter; +use std::sync::Arc; use tokio::signal; use tracing::{error, info}; -pub async fn event_listener(bpf_maps: BpfMapsData) -> Result<(), anyhow::Error> { +use cortexbrain_common::buffer_type::{BufferType, read_perf_buffer}; +use cortexbrain_common::otel_metrics::Metrics; + +/// Listen for eBPF perf-buffer events and record OpenTelemetry metrics. +/// +/// This function bridges the eBPF perf-buffer layer with the OpenTelemetry +/// metrics pipeline. It opens per-CPU buffers for the two maps of interest +/// (`net_metrics` and `time_stamp_events`), spawns asynchronous consumers, +/// and parks until a `Ctrl-C` signal is received or one of the consumers +/// terminates. +/// +/// # Arguments +/// +/// -`bpf_maps` – handles for the pinned BPF maps produced by +/// [`cortexbrain_common::map_handlers::map_pinner`]. +/// - `meter` – an initialised OpenTelemetry [`Meter`]. +/// +/// # Errors +/// +/// Returns `Err` if the map manager or CPU enumeration fails. +/// +pub async fn event_listener(bpf_maps: BpfMapsData, meter: Meter) -> Result<(), anyhow::Error> { info!("Getting CPU count..."); let mut maps = map_manager(bpf_maps)?; @@ -35,48 +55,63 @@ pub async fn event_listener(bpf_maps: BpfMapsData) -> Result<(), anyhow::Error> info!("Perf buffers created successfully"); - let (time_stamp_events_array, time_stamp_events_perf_buffer) = maps + let (_time_stamp_events_array, time_stamp_events_perf_buffer) = maps .remove("time_stamp_events") .expect("Cannot create time_stamp_events_buffer"); - let (net_perf_array, net_perf_buffer) = maps + let (_net_perf_array, net_perf_buffer) = maps .remove("net_metrics") .expect("Cannot create net_perf_buffer"); - // Create proper sized buffers + // Allocate byte-buffers sized for each structure type let net_metrics_buffers = BufferSize::NetworkMetricsEvents.set_buffer(); let time_stamp_events_buffers = BufferSize::TimeMetricsEvents.set_buffer(); + let metrics = Arc::new(Metrics::new(&meter)); + info!("Starting event listener tasks..."); - let metrics_map_displayer = tokio::spawn(async move { - read_perf_buffer( - net_perf_buffer, - net_metrics_buffers, - BufferType::NetworkMetrics, - ) - .await; - }); - - let time_stamp_events_displayer = tokio::spawn(async move { - read_perf_buffer( - time_stamp_events_perf_buffer, - time_stamp_events_buffers, - BufferType::TimeStampMetrics, - ) - .await; - }); + + let net_metrics_handle = { + let metrics = Arc::clone(&metrics); + let mut array_buffers = net_perf_buffer; + let mut buffers = net_metrics_buffers; + tokio::spawn(async move { + read_perf_buffer( + array_buffers, + buffers, + BufferType::NetworkMetrics, + Some(metrics), + ) + .await; + }) + }; + + let time_stamp_handle = { + let metrics = Arc::clone(&metrics); + let mut array_buffers = time_stamp_events_perf_buffer; + let mut buffers = time_stamp_events_buffers; + tokio::spawn(async move { + read_perf_buffer( + array_buffers, + buffers, + BufferType::TimeStampMetrics, + Some(metrics), + ) + .await; + }) + }; info!("Event listeners started, entering main loop..."); tokio::select! { - result = metrics_map_displayer => { + result = net_metrics_handle => { if let Err(e) = result { - error!("Metrics map displayer task failed: {:?}", e); + error!("Network metrics task failed: {:?}", e); } } - result = time_stamp_events_displayer => { + result = time_stamp_handle => { if let Err(e) = result { - error!("Time stamp events displayer task failed: {:?}", e); + error!("Timestamp events task failed: {:?}", e); } } @@ -85,6 +120,5 @@ pub async fn event_listener(bpf_maps: BpfMapsData) -> Result<(), anyhow::Error> } } - // return success Ok(()) } diff --git a/core/src/components/metrics/src/main.rs b/core/src/components/metrics/src/main.rs index e5558eb..0211be6 100644 --- a/core/src/components/metrics/src/main.rs +++ b/core/src/components/metrics/src/main.rs @@ -1,4 +1,15 @@ -use anyhow::{Context, Ok}; +//! CortexBrain metrics service – eBPF-based telemetry with OpenTelemetry export. +//! +//! This binary is the node-level metrics agent for CortexBrain. It: +//! +//! 1. Initialises an OpenTelemetry metrics pipeline (OTLP / gRPC). +//! 2. Loads a compiled eBPF object and pins its maps to the BPF filesystem. +//! 3. Attaches a set of kernel kprobe programs. +//! 4. Starts asynchronous consumers that read per-CPU perf buffers and +//! emit OpenTelemetry instruments for every event. +//! 5. Blocks until `Ctrl-C` is received, then shuts down cleanly. + +use anyhow::Context; use aya::Ebpf; use std::{ env, fs, @@ -6,9 +17,10 @@ use std::{ sync::{Arc, Mutex}, }; use tracing::{error, info}; - mod helpers; +mod otel_init; use crate::helpers::event_listener; +use crate::otel_init::{init_opentelemetry, shutdown_opentelemetry}; use cortexbrain_common::{ constants, @@ -19,12 +31,14 @@ use cortexbrain_common::{ #[tokio::main] async fn main() -> Result<(), anyhow::Error> { - //init tracing subscriber - let otlp_provider = otlp_logger_init("metrics-service".to_string()); + let _otlp_log_provider = otlp_logger_init("metrics-service".to_string()); info!("Starting metrics service..."); info!("fetching data"); + let meter = + init_opentelemetry().context("Failed to initialise OpenTelemetry metrics pipeline")?; + let bpf_path = env::var(constants::BPF_PATH).context("BPF_PATH environment variable required")?; let data = fs::read(Path::new(&bpf_path)).context("Failed to load file from path")?; @@ -35,30 +49,33 @@ async fn main() -> Result<(), anyhow::Error> { info!("Running Ebpf logger"); info!("loading programs"); - let bpf_map_save_path = std::env::var(constants::PIN_MAP_PATH) - .context("PIN_MAP_PATH environment variable required")?; + + let bpf_map_save_path = + env::var(constants::PIN_MAP_PATH).context("PIN_MAP_PATH environment variable required")?; let map_data = vec!["time_stamp_events".to_string(), "net_metrics".to_string()]; match init_bpf_maps(bpf.clone(), map_data) { - std::result::Result::Ok(bpf_maps) => { + Ok(bpf_maps) => { info!("BPF maps loaded successfully"); let pin_path = std::path::PathBuf::from(&bpf_map_save_path); info!("About to call map_pinner with path: {:?}", pin_path); + match map_pinner(bpf_maps, &pin_path) { - std::result::Result::Ok(maps) => { + Ok(maps) => { info!("BPF maps pinned successfully to {}", bpf_map_save_path); { load_program(bpf.clone(), "metrics_tracer", "tcp_identify_packet_loss") .context( - "An error occured during the execution of load_program function", + "An error occurred during the execution of load_program function", )?; - load_program(tcp_bpf,"tcp_v4_connect","tcp_v4_connect") - .context("An error occured during the execution of load_and_attach_tcp_programs function")?; - load_program(tcp_v6_bpf,"tcp_v6_connect","tcp_v6_connect") - .context("An error occured during the execution of load_and_attach_tcp_programs function")?; + load_program(tcp_bpf, "tcp_v4_connect", "tcp_v4_connect") + .context("An error occurred during the execution of load_and_attach_tcp_programs function")?; + + load_program(tcp_v6_bpf, "tcp_v6_connect", "tcp_v6_connect") + .context("An error occurred during the execution of load_and_attach_tcp_programs function")?; load_program( tcp_rev_bpf, @@ -66,23 +83,24 @@ async fn main() -> Result<(), anyhow::Error> { "tcp_rcv_state_process", ) .context( - "An error occured during the execution of load_program function", + "An error occurred during the execution of load_program function", )?; } - event_listener(maps).await?; + + // Hand off to the async event consumer + event_listener(maps, meter).await } Err(e) => { error!("Error pinning BPF maps: {:?}", e); - return Err(e); + shutdown_opentelemetry(); + Err(e) } } } Err(e) => { error!("Error initializing BPF maps: {:?}", e); - let _ = otlp_provider.shutdown(); - return Err(e); + shutdown_opentelemetry(); + Err(e) } } - - Ok(()) } diff --git a/core/src/components/metrics/src/mod.rs b/core/src/components/metrics/src/mod.rs index 8414b63..c5e2806 100644 --- a/core/src/components/metrics/src/mod.rs +++ b/core/src/components/metrics/src/mod.rs @@ -1,3 +1,2 @@ -mod structs; -mod enums; -mod helpers; \ No newline at end of file +mod helpers; +mod otel_init; From b433a13e3e1d7badc2bdb6885fe862752b083232 Mon Sep 17 00:00:00 2001 From: LorenzoTettamanti Date: Mon, 1 Jun 2026 19:17:52 +0200 Subject: [PATCH 45/46] [#175]: added metrics exporter in the otel-collector-config ConfigMap. Added updated image in the metrics.yaml with the new implementations --- core/src/testing/metrics.yaml | 2 +- core/src/testing/otel_agent.yaml | 7 +++++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/core/src/testing/metrics.yaml b/core/src/testing/metrics.yaml index 262b28f..a106c4e 100644 --- a/core/src/testing/metrics.yaml +++ b/core/src/testing/metrics.yaml @@ -19,7 +19,7 @@ spec: hostNetwork: true containers: - name: metrics - image: lorenzotettamanti/cortexflow-metrics:0.1.2-test12 + image: lorenzotettamanti/cortexflow-metrics:otel-test-1 command: ["/bin/bash", "-c"] args: - | diff --git a/core/src/testing/otel_agent.yaml b/core/src/testing/otel_agent.yaml index 71b7e08..c5165ac 100644 --- a/core/src/testing/otel_agent.yaml +++ b/core/src/testing/otel_agent.yaml @@ -33,6 +33,9 @@ data: logs: receivers: [otlp] exporters: [otlp, logging] + metrics: + receivers: [otlp] + exporters: [otlp, logging] --- apiVersion: apps/v1 @@ -132,6 +135,10 @@ data: receivers: [otlp] processors: [memory_limiter] exporters: [logging] + metrics: + receivers: [otlp] + processors: [memory_limiter] + exporters: [logging] --- apiVersion: v1 From 836f0b4ff2766628d887317d56b5329364e4c135 Mon Sep 17 00:00:00 2001 From: LorenzoTettamanti Date: Fri, 5 Jun 2026 21:53:08 +0200 Subject: [PATCH 46/46] (fix): updated metrics.yaml manifest --- core/src/testing/metrics.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/src/testing/metrics.yaml b/core/src/testing/metrics.yaml index a106c4e..8a6c7d8 100644 --- a/core/src/testing/metrics.yaml +++ b/core/src/testing/metrics.yaml @@ -19,7 +19,7 @@ spec: hostNetwork: true containers: - name: metrics - image: lorenzotettamanti/cortexflow-metrics:otel-test-1 + image: lorenzotettamanti/cortexflow-metrics:otel-test-2 command: ["/bin/bash", "-c"] args: - |