From 654e8e436321196e56cd0b830f5d09a0fe1347a8 Mon Sep 17 00:00:00 2001 From: Piotr Konopka Date: Wed, 8 Apr 2026 17:21:18 +0200 Subject: [PATCH 1/6] Minor documentation inconsistency fixes Out of curiosity, I asked an LLM agent to review the documentation against the code and point out inconsistencies. That's what it came with. --- README.md | 2 +- core/integration/README.md | 2 +- docs/building.md | 2 +- docs/running.md | 4 ++-- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 4889d30cb..eb4d30be3 100644 --- a/README.md +++ b/README.md @@ -34,7 +34,7 @@ There are two ways of interacting with AliECS: ### I want to ensure AliECS can **run and control my process** -* **My software is based on FairMQ and/or O² DPL (Data Processing Later)** +* **My software is based on FairMQ and/or O² DPL (Data Processing Layer)** AliECS natively supports FairMQ (and DPL) devices. Head to [ControlWorkflows](https://github.com/AliceO2Group/ControlWorkflows) for instructions on how to configure your software to be controlled by AliECS. diff --git a/core/integration/README.md b/core/integration/README.md index eda781e82..41996b2b9 100644 --- a/core/integration/README.md +++ b/core/integration/README.md @@ -177,7 +177,7 @@ DD scheduler plugin informs the Data Distribution software about the pool of FLP See [Legacy events: Kafka plugin](/docs/kafka.md#legacy-events-kafka-plugin) -# LHC plugin +## LHC This plugin listens to Kafka messages coming from the LHC DIP Client and pushes any relevant internal notifications to the AliECS core. Its main purpose is to provide basic information about ongoing LHC activity (e.g. fill information) to affected parties and allow AliECS to react upon them (e.g. by automatically stopping a physics run when stable beams are over). diff --git a/docs/building.md b/docs/building.md index 0a4243ec9..a692b7f91 100644 --- a/docs/building.md +++ b/docs/building.md @@ -80,7 +80,7 @@ Running `make` will take a while as all dependencies are gathered, built and ins $ make all ``` -You should find several executables including `o2control-core`, `o2control-executor` and `coconut` in `bin`. +You should find several executables including `o2-aliecs-core`, `o2-aliecs-executor` and `coconut` in `bin`. For subsequent builds (after the first one), plain `make` (instead of `make all`) is sufficient. See the [Makefile reference](makefile_reference.md) for more information. diff --git a/docs/running.md b/docs/running.md index 53d80afab..d3fd958fa 100644 --- a/docs/running.md +++ b/docs/running.md @@ -10,7 +10,7 @@ The recommended way to set up a Mesos cluster is by performing a complete deploy The AliECS core on the head node should be stopped (`systemctl stop o2-aliecs-core`) and your own AliECS core should be made to point to the head node. Typically, it can be done by replacing the AliECS core binary on the head node with your own and restarting the `o2-aliecs-core` systemd service. -The following example flags assume a remote head node `centosvmtest`, the use of the default `settings.yaml` file, very verbose output, verbose workflow dumps on every workflow deployment, and the executor having been copied (`scp`) to `/opt/o2control-executor` on all controlled nodes: +The following example flags assume a remote head node `centosvmtest`, the use of the default `settings.yaml` file, very verbose output, verbose workflow dumps on every workflow deployment, and the executor having been copied (`scp`) to `/opt/o2-aliecs-executor` on all controlled nodes: ```bash --coreConfigurationUri @@ -22,7 +22,7 @@ http://centosvmtest:5050/api/v1/scheduler --verbose --veryVerbose --executor -/opt/o2control-executor +/opt/o2-aliecs-executor --dumpWorkflows ``` From 37a4f2ec17a4c9fa978e63985979f11be1b63fbc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michal=20Tich=C3=A1k?= <53997499+justonedev1@users.noreply.github.com> Date: Mon, 13 Apr 2026 10:01:39 +0200 Subject: [PATCH 2/6] [peanut] peanut now supports direct, fmq batched and single step modes (#803) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * [peanut] peanut now supports direct, fmq batched and single step modes in TUI and CLI modes written using Claude Opus 4.6 * better --help and handling of streams * better error handling --------- Co-authored-by: Michal Tichák --- cmd/peanut/main.go | 83 +++- executor/executorcmd/nopb/occclient.go | 36 +- occ/peanut/README.md | 237 +++++++++-- occ/peanut/cli.go | 434 +++++++++++++++++++++ occ/peanut/flatten/flatten.go | 3 +- occ/peanut/peanut.go | 520 ++++++++++++++++++------- 6 files changed, 1153 insertions(+), 160 deletions(-) create mode 100644 occ/peanut/cli.go diff --git a/cmd/peanut/main.go b/cmd/peanut/main.go index 59ce9efc2..95ca39f02 100644 --- a/cmd/peanut/main.go +++ b/cmd/peanut/main.go @@ -25,17 +25,90 @@ package main import ( + "flag" + "fmt" "os" - "strings" "github.com/AliceO2Group/Control/occ/peanut" ) func main() { - cmdArg := os.Args[1:] - cmdString := strings.Trim(strings.Join(cmdArg, " "), " ") + fs := flag.NewFlagSet("peanut", flag.ExitOnError) + addr := fs.String("addr", "", "OCC gRPC address (host:port); if empty, OCC_CONTROL_PORT env var is used in direct mode") + mode := fs.String("mode", "direct", "control mode: direct (default), fmq, or fmq-step") + fs.Usage = func() { + fmt.Fprint(os.Stderr, `peanut — process execution and control utility for OCC / FairMQ processes - if err := peanut.Run(cmdString); err != nil { - panic(err) +TUI mode (interactive, launched when no command is given): + peanut direct mode via OCC_CONTROL_PORT env var + peanut -addr host:port direct mode (OCC protobuf, one button per transition) + peanut -addr host:port -mode fmq fmq batched mode (drives full FairMQ sequence per transition) + peanut -addr host:port -mode fmq-step fmq single-step mode (one button per raw FairMQ event) + +CLI mode (non-interactive, launched when a command is given): + peanut [flags] [args] + +TUI Flags: + -addr string gRPC address (host:port); if empty, uses OCC_CONTROL_PORT env var in direct mode + -mode string direct (default), fmq, or fmq-step + +CLI Flags: + -addr string gRPC address (default "localhost:47100") + -mode string fmq (default) or direct + -timeout duration unary call timeout (default 30s) + -config string path to YAML/JSON file with arguments to push (inline key=val args take precedence) + +CLI Commands: + get-state + Print the current FSM state. + + transition [key=val ...] + High-level OCC transition. In fmq mode drives the full multi-step FairMQ sequence: + STANDBY→CONFIGURED runs INIT DEVICE, COMPLETE INIT, BIND, CONNECT, INIT TASK + CONFIGURED→RUNNING runs RUN + RUNNING→CONFIGURED runs STOP + CONFIGURED→STANDBY runs RESET TASK, RESET DEVICE + In direct mode sends a single OCC protobuf Transition RPC. + key=val pairs are forwarded as ConfigEntry arguments. + + direct-step [key=val ...] + Low-level: send a single raw OCC protobuf Transition RPC regardless of -mode. + Events: CONFIGURE, RESET, START, STOP, RECOVER, EXIT + + fmq-step [key=val ...] + Low-level: send a single raw FairMQ gRPC Transition call regardless of -mode. + FairMQ state/event names that contain spaces must be quoted. + + state-stream + Subscribe to StateStream; print updates until interrupted (ctrl-c to stop). + + event-stream + Subscribe to EventStream; print events until interrupted (ctrl-c to stop). + +Examples: + peanut -addr localhost:47100 get-state + peanut -addr localhost:47100 transition STANDBY CONFIGURED chans.x.0.address=ipc://@foo + peanut -addr localhost:47100 -config args.yaml transition STANDBY CONFIGURED + peanut -addr localhost:47100 fmq-step IDLE "INIT DEVICE" chans.x.0.address=ipc://@foo + peanut -addr localhost:47100 direct-step STANDBY CONFIGURE key=val + peanut -addr localhost:47100 state-stream + peanut -addr localhost:47100 -mode direct transition STANDBY CONFIGURED +`) + } + _ = fs.Parse(os.Args[1:]) + + if fs.NArg() > 0 { + // CLI mode — pass all original args so RunCLI can re-parse its own flags + if err := peanut.RunCLI(os.Args[1:]); err != nil { + fmt.Fprintf(os.Stderr, "peanut: %v\n", err) + os.Exit(1) + } + return + } + + // TUI mode + if err := peanut.Run(peanut.Options{Addr: *addr, Mode: *mode}); err != nil { + fmt.Fprintf(os.Stderr, "peanut: %v\n", err) + os.Exit(1) } } diff --git a/executor/executorcmd/nopb/occclient.go b/executor/executorcmd/nopb/occclient.go index 5efe87502..2929aa9db 100644 --- a/executor/executorcmd/nopb/occclient.go +++ b/executor/executorcmd/nopb/occclient.go @@ -61,8 +61,42 @@ func (c *occClient) EventStream(ctx context.Context, in *pb.EventStreamRequest, return x, nil } +type occStateStreamClient struct { + grpc.ClientStream +} + +func (x *occStateStreamClient) Recv() (*pb.StateStreamReply, error) { + m := new(pb.StateStreamReply) + if err := x.ClientStream.RecvMsg(m); err != nil { + return nil, err + } + return m, nil +} + func (c *occClient) StateStream(ctx context.Context, in *pb.StateStreamRequest, opts ...grpc.CallOption) (pb.Occ_StateStreamClient, error) { - return nil, nil + opts = append(opts, + []grpc.CallOption{ + grpc.CallContentSubtype("json"), + }..., + ) + streamDesc := grpc.StreamDesc{ + StreamName: "StateStream", + Handler: nil, + ServerStreams: true, + ClientStreams: false, + } + stream, err := c.cc.NewStream(ctx, &streamDesc, "StateStream", opts...) + if err != nil { + return nil, err + } + x := &occStateStreamClient{stream} + if err := x.ClientStream.SendMsg(in); err != nil { + return nil, err + } + if err := x.ClientStream.CloseSend(); err != nil { + return nil, err + } + return x, nil } func (c *occClient) GetState(ctx context.Context, in *pb.GetStateRequest, opts ...grpc.CallOption) (*pb.GetStateReply, error) { diff --git a/occ/peanut/README.md b/occ/peanut/README.md index 61f72815b..b763776ba 100644 --- a/occ/peanut/README.md +++ b/occ/peanut/README.md @@ -1,42 +1,233 @@ # Process control and execution utility overview `peanut` is the **p**rocess **e**xecution **a**nd co**n**trol **ut**ility for OCClib-based O² processes. Its purpose -is to be a debugging and development aid for non-FairMQ O² devices, where FairMQ's interactive -controller is not available. +is to be a debugging and development aid for OCC-based and FairMQ O² devices. In aliBuild it is part of the `coconut` package. -`peanut` can connect to a running OCClib-based process, query its status, drive its state machine +`peanut` can connect to a running OCClib-based or FairMQ process, query its status, drive its state machine and push runtime configuration data. -`peanut` is an interactive tool, the only information it picks up from its environment is the -`OCC_CONTROL_PORT` variable, which is used to connect to a running OCClib-based process. +`peanut` runs in two modes depending on whether a command is passed: + +* **TUI mode** — interactive terminal UI (launched when no command is given) +* **CLI mode** — non-interactive, scriptable (launched when a command is given) + +--- + +## TUI mode + +![Screenshot of peanut](peanut.png) ```bash -$ OCC_CONTROL_PORT= peanut +peanut [flags] ``` -![Screenshot of peanut](peanut.png) +| Flag | Default | Description | +|------|---------|-------------| +| `-addr` | `""` | gRPC address `host:port`; if empty, falls back to `OCC_CONTROL_PORT` env var (direct mode only) | +| `-mode` | `direct` | `direct`, `fmq`, or `fmq-step` (see below) | + +### Modes + +#### `direct` — OCC protobuf (default) + +Connects to an OCClib-based process using the standard OCC protobuf codec. +The state machine operates on OCC states: `STANDBY`, `CONFIGURED`, `RUNNING`, `ERROR`. + +```bash +OCC_CONTROL_PORT=47100 peanut +# or +peanut -addr localhost:47100 -mode direct +``` + +Control buttons: **CONFIGURE**, **RESET**, **START**, **STOP**, **RECOVER**, **EXIT** + +#### `fmq` — FairMQ JSON codec with automatic multi-step sequencing + +Connects to a FairMQ device using the JSON codec. Each OCC-level button press +automatically drives the full underlying FairMQ state machine sequence. +The state is displayed as an OCC-mapped state (`STANDBY`, `CONFIGURED`, `RUNNING`…). + +```bash +peanut -addr localhost:47100 -mode fmq +``` + +Control buttons: **CONFIGURE**, **RESET**, **START**, **STOP**, **RECOVER**, **EXIT** + +Sequences driven automatically: + +| Button | FairMQ steps | +|--------|-------------| +| CONFIGURE | INIT DEVICE → COMPLETE INIT → BIND → CONNECT → INIT TASK | +| RESET | RESET TASK → RESET DEVICE | +| START | RUN | +| STOP | STOP | +| RECOVER | RESET DEVICE (from ERROR) | +| EXIT | RESET (if needed) → END | + +#### `fmq-step` — FairMQ JSON codec with granular per-step control + +Connects to a FairMQ device using the JSON codec. Exposes each individual FairMQ +state machine step as a separate button. The state is displayed as the raw FairMQ state. + +```bash +peanut -addr localhost:47100 -mode fmq-step +``` + +| Key | Button | Transition | +|-----|--------|-----------| +| `1` | INIT DEVICE | IDLE → INITIALIZING DEVICE | +| `2` | COMPLETE INIT | INITIALIZING DEVICE → INITIALIZED | +| `3` | BIND | INITIALIZED → BOUND | +| `4` | CONNECT | BOUND → DEVICE READY | +| `5` | INIT TASK | DEVICE READY → READY | +| `6` | RUN | READY → RUNNING | +| `7` | STOP | RUNNING → READY | +| `8` | RESET TASK | READY → DEVICE READY | +| `9` | RESET DEVICE | → IDLE | +| `0` | END | IDLE → EXITING | + +### Common TUI controls (all modes) + +| Key | Action | +|-----|--------| +| `n` | **Reconnect** — re-establish the gRPC connection to the controlled process. Use this when the process has been restarted after a crash or deliberate termination. | +| `l` | **Load configuration** — open a file dialog to read a YAML or JSON configuration file. The path field supports tab-completion. Once loaded, the right panel shows `NOT PUSHED` until the next CONFIGURE transition, then `PUSHED`. | +| `q` | **Quit** — disconnect and exit without sending any transitions. | + +### Connection monitoring + +While connected, peanut polls `GetState` every 2 seconds in a background goroutine. If the process stops responding, the state display shows `UNREACHABLE` and an error modal appears. After restarting the controlled process, press `n` to reconnect. + +Transition buttons are dimmed and disabled until a connection is successfully established. + +#### Connection error states + +| State | Meaning | +|-------|---------| +| `CONNECTING` | Connection attempt in progress | +| `UNREACHABLE` | No process is listening on the given address, or the connection was lost after a successful connect | +| `WRONG MODE` | A process is running but speaks a different protocol — check the `-mode` flag | + +If `WRONG MODE` is shown, peanut will suggest the correct mode in the error modal. + +### Runtime configuration files + +Configuration files are YAML or JSON, with arbitrarily nested structure. +`peanut` flattens them to dot-notation key=value pairs before pushing. +Integer map keys and integer values are both handled correctly. + +Example (channel configuration): + +```yaml +chans: + data: + numSockets: 1 + 0: + address: ipc://@o2ipc-example + method: bind + type: push + transport: shmem + sndBufSize: 1000 + rcvBufSize: 1000 + sndKernelSize: 0 + rcvKernelSize: 0 + rateLogging: 0 +``` -`peanut` commands are documented inline. Each transition is applied immediately and -the state is updated in real time. +This flattens to entries like `chans.data.0.address=ipc://@o2ipc-example`. -Compared to the raw gRPC API, the following limitations apply: +--- -* It is not possible to perform a `GO_ERROR` transition, as this transition is only triggered from -user code. +## CLI mode -* The `CONFIGURE` transition may be triggered both with and without runtime configuration data, which -may or may not be suitable depending on user code. All other transitions send no payload. +```bash +peanut [flags] [args] +``` + +| Flag | Default | Description | +|------|---------|-------------| +| `-addr` | `localhost:47100` | gRPC address `host:port` | +| `-mode` | `fmq` | `fmq` (JSON codec) or `direct` (protobuf) | +| `-timeout` | `30s` | timeout for unary gRPC calls | +| `-config` | `""` | path to YAML/JSON file; flattened key=value pairs are sent as arguments. Inline `key=val` arguments take precedence. | + +### Commands + +#### `get-state` + +Print the current FSM state. + +```bash +peanut -addr localhost:47100 get-state +``` + +#### `transition [key=val ...]` + +High-level state transition. In `fmq` mode drives the full multi-step FairMQ sequence automatically. + +```bash +# FairMQ: drive full configure sequence +peanut -addr localhost:47100 -mode fmq transition STANDBY CONFIGURED \ + chans.data.0.address=ipc://@o2ipc-example + +# FairMQ: with config file +peanut -addr localhost:47100 -mode fmq -config stfsender-configure-args.yaml \ + transition STANDBY CONFIGURED + +# Direct OCC +peanut -addr localhost:47100 -mode direct transition STANDBY CONFIGURED +``` + +FairMQ sequences driven automatically: + +| From → To | Steps | +|-----------|-------| +| `STANDBY → CONFIGURED` | INIT DEVICE, COMPLETE INIT, BIND, CONNECT, INIT TASK | +| `CONFIGURED → RUNNING` | RUN | +| `RUNNING → CONFIGURED` | STOP | +| `CONFIGURED → STANDBY` | RESET TASK, RESET DEVICE | + +#### `direct-step [key=val ...]` + +Low-level: send a single raw OCC gRPC Transition call (protobuf codec). + +```bash +peanut -addr localhost:47100 -mode direct direct-step STANDBY CONFIGURE key=val +``` + +Events: `CONFIGURE`, `RESET`, `START`, `STOP`, `RECOVER`, `EXIT` + +#### `fmq-step [key=val ...]` + +Low-level: send a single raw FairMQ gRPC Transition call (JSON codec). +State/event names that contain spaces must be quoted. + +```bash +peanut -addr localhost:47100 fmq-step IDLE "INIT DEVICE" chans.x.0.address=ipc://@foo +peanut -addr localhost:47100 fmq-step READY RUN +``` + +#### `state-stream` + +Subscribe to `StateStream` and print state updates until interrupted (Ctrl-C). + +```bash +peanut -addr localhost:47100 state-stream +``` + +#### `event-stream` + +Subscribe to `EventStream` and print events until interrupted (Ctrl-C). + +```bash +peanut -addr localhost:47100 event-stream +``` -The last two commands are **not** transitions: +--- -* `Load configuration` allows the user to read in a JSON or YAML file containing sample -configuration data that is then available to be pushed to the controlled process during a future -`CONFIGURE` transition. On startup, there is no file loaded, so a `CONFIGURE` transition will push -an empty payload. Once a runtime configuration file is loaded, its title bar reports `NOT PUSHED` -until the next `CONFIGURE` transition, at which point it becomes `PUSHED`. +## Limitations -* `Quit` disconnects from the controlled process and quits `peanut`, but it performs no transitions -or other data exchange with the controlled process. A future instance of `peanut` may reattach itself -to the same process and continue from there. +* The `GO_ERROR` transition cannot be triggered from `peanut`, as it is only triggered from user code inside the controlled process. +* `Quit` / `q` disconnects without sending any transition. A future instance of `peanut` can reattach to the same process and continue. diff --git a/occ/peanut/cli.go b/occ/peanut/cli.go new file mode 100644 index 000000000..29edf21e0 --- /dev/null +++ b/occ/peanut/cli.go @@ -0,0 +1,434 @@ +/* + * === This file is part of ALICE O² === + * + * Copyright 2024 CERN and copyright holders of ALICE O². + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + * In applying this license CERN does not waive the privileges and + * immunities granted to it by virtue of its status as an + * Intergovernmental Organization or submit itself to any jurisdiction. + */ + +package peanut + +import ( + "context" + "encoding/json" + "flag" + "fmt" + "os" + "os/signal" + "strings" + "syscall" + "time" + + "google.golang.org/grpc" + "google.golang.org/grpc/credentials/insecure" + + "github.com/AliceO2Group/Control/executor/executorcmd/nopb" + "github.com/AliceO2Group/Control/executor/executorcmd/transitioner/fairmq" + pb "github.com/AliceO2Group/Control/executor/protos" + "github.com/AliceO2Group/Control/occ/peanut/flatten" +) + +// RunCLI runs peanut in non-interactive CLI mode. +// args should be os.Args[1:]. +func RunCLI(args []string) error { + fs := flag.NewFlagSet("peanut", flag.ExitOnError) + addr := fs.String("addr", "localhost:47100", "OCC gRPC address (host:port)") + mode := fs.String("mode", "fmq", "control mode: fmq (json codec, default) or direct (protobuf)") + timeout := fs.Duration("timeout", 30*time.Second, "request timeout for unary calls") + configFile := fs.String("config", "", "path to YAML/JSON file whose flattened key=value pairs are sent as arguments (inline key=val args take precedence)") + fs.Usage = cliUsage + _ = fs.Parse(args) + + cmds := fs.Args() + if len(cmds) == 0 { + cliUsage() + return fmt.Errorf("no command specified") + } + + conn, err := grpc.Dial(*addr, grpc.WithTransportCredentials(insecure.NewCredentials())) //nolint:staticcheck + if err != nil { + return fmt.Errorf("dial %s: %w", *addr, err) + } + defer conn.Close() + + var client pb.OccClient + if *mode == "fmq" { + client = nopb.NewOccClient(conn) + } else { + client = pb.NewOccClient(conn) + } + + loadedKVs, err := cliLoadConfig(*configFile) + if err != nil { + return fmt.Errorf("config file: %w", err) + } + + switch cmds[0] { + case "get-state": + ctx, cancel := context.WithTimeout(context.Background(), *timeout) + defer cancel() + reply, err := client.GetState(ctx, &pb.GetStateRequest{}) + if err != nil { + return fmt.Errorf("GetState: %w", err) + } + fmt.Println(reply.GetState()) + + case "transition": + if len(cmds) < 3 { + return fmt.Errorf("usage: transition [key=val ...]") + } + from := strings.ToUpper(cmds[1]) + to := strings.ToUpper(cmds[2]) + kvs := cliMergeKVs(loadedKVs, cliParseKVs(cmds[3:])) + + ctx, cancel := context.WithTimeout(context.Background(), *timeout) + defer cancel() + + if *mode == "fmq" { + result, err := cliFMQTransition(ctx, client, from, to, kvs) + if err != nil { + return fmt.Errorf("transition %s→%s: %w", from, to, err) + } + fmt.Printf("ok state=%s\n", result) + } else { + event := cliOCCEventForTransition(from, to) + reply, err := client.Transition(ctx, &pb.TransitionRequest{ + SrcState: from, + TransitionEvent: event, + Arguments: cliKVsToEntries(kvs), + }) + if err != nil { + return fmt.Errorf("Transition: %w", err) + } + fmt.Printf("ok state=%s trigger=%s\n", reply.GetState(), reply.GetTrigger()) + } + + case "direct-step": + // Low-level single OCC gRPC call. Mirrors what the TUI does. + // Usage: direct-step [key=val ...] + if len(cmds) < 3 { + return fmt.Errorf("usage: direct-step [key=val ...]\n e.g. direct-step STANDBY CONFIGURE key=val") + } + kvs := cliMergeKVs(loadedKVs, cliParseKVs(cmds[3:])) + ctx, cancel := context.WithTimeout(context.Background(), *timeout) + defer cancel() + reply, err := client.Transition(ctx, &pb.TransitionRequest{ + SrcState: cmds[1], + TransitionEvent: cmds[2], + Arguments: cliKVsToEntries(kvs), + }) + if err != nil { + return fmt.Errorf("occ-step: %w", err) + } + fmt.Printf("ok state=%s trigger=%s\n", reply.GetState(), reply.GetTrigger()) + + case "fmq-step": + if len(cmds) < 3 { + return fmt.Errorf("usage: fmq-step [key=val ...]\n e.g. fmq-step IDLE \"INIT DEVICE\" key=val") + } + kvs := cliMergeKVs(loadedKVs, cliParseKVs(cmds[3:])) + ctx, cancel := context.WithTimeout(context.Background(), *timeout) + defer cancel() + state, err := cliFMQDoStep(ctx, client, cmds[1], cmds[2], kvs) + if err != nil { + return fmt.Errorf("fmq-step: %w", err) + } + fmt.Printf("ok fmq-state=%s occ-state=%s\n", state, cliFMQToOCCState(state)) + + case "state-stream": + ctx, cancel := signal.NotifyContext(context.Background(), os.Interrupt, syscall.SIGTERM) + defer cancel() + stream, err := client.StateStream(ctx, &pb.StateStreamRequest{}) + if err != nil { + return fmt.Errorf("StateStream: %w", err) + } + if stream == nil { + return fmt.Errorf("StateStream not supported by this server (try polling with get-state)") + } + fmt.Fprintf(os.Stderr, "streaming state updates from %s (ctrl-c to stop)\n", *addr) + for { + msg, err := stream.Recv() + if err != nil { + if ctx.Err() != nil { + return nil + } + return fmt.Errorf("StateStream recv: %w", err) + } + fmt.Printf("type=%-12s state=%s\n", msg.GetType(), msg.GetState()) + } + + case "event-stream": + ctx, cancel := signal.NotifyContext(context.Background(), os.Interrupt, syscall.SIGTERM) + defer cancel() + stream, err := client.EventStream(ctx, &pb.EventStreamRequest{}) + if err != nil { + return fmt.Errorf("EventStream: %w", err) + } + if stream == nil { + return fmt.Errorf("EventStream not supported by this server") + } + fmt.Fprintf(os.Stderr, "streaming events from %s (ctrl-c to stop)\n", *addr) + for { + msg, err := stream.Recv() + if err != nil { + if ctx.Err() != nil { + return nil + } + return fmt.Errorf("EventStream recv: %w", err) + } + fmt.Printf("event=%s\n", msg.GetEvent()) + } + + default: + return fmt.Errorf("unknown command %q — valid: get-state, transition, direct-step, fmq-step, state-stream, event-stream", cmds[0]) + } + return nil +} + +// cliFMQStateToOCC maps FairMQ device states to OCC states. +var cliFMQStateToOCC = map[string]string{ + fairmq.IDLE: "STANDBY", + fairmq.INITIALIZING_DEVICE: "STANDBY", + fairmq.INITIALIZED: "STANDBY", + fairmq.BOUND: "STANDBY", + fairmq.DEVICE_READY: "STANDBY", + fairmq.READY: "CONFIGURED", + fairmq.RUNNING: "RUNNING", + fairmq.ERROR: "ERROR", +} + +func cliFMQToOCCState(fmqState string) string { + if s, ok := cliFMQStateToOCC[strings.ToUpper(fmqState)]; ok { + return s + } + return "STANDBY" +} + +// fmqDoStep performs a single raw FairMQ gRPC Transition call with no output. +func fmqDoStep(ctx context.Context, client pb.OccClient, srcFMQ, event string, kvs map[string]string) (string, error) { + reply, err := client.Transition(ctx, &pb.TransitionRequest{ + SrcState: srcFMQ, + TransitionEvent: event, + Arguments: cliKVsToEntries(kvs), + }) + if err != nil { + return "", fmt.Errorf("step %q failed: %w", event, err) + } + state := reply.GetState() + if !reply.GetOk() { + return state, fmt.Errorf("step %q not ok, state=%s", event, state) + } + return state, nil +} + +// cliFMQDoStep wraps fmqDoStep with stderr progress output for CLI use. +func cliFMQDoStep(ctx context.Context, client pb.OccClient, srcFMQ, event string, kvs map[string]string) (string, error) { + fmt.Fprintf(os.Stderr, " fmq-step src=%-20q event=%q\n", srcFMQ, event) + state, err := fmqDoStep(ctx, client, srcFMQ, event, kvs) + ok := err == nil + fmt.Fprintf(os.Stderr, " result=%-20q ok=%v\n", state, ok) + return state, err +} + +func cliFMQTransition(ctx context.Context, client pb.OccClient, from, to string, kvs map[string]string) (string, error) { + switch { + case from == "STANDBY" && to == "CONFIGURED": + return cliFMQConfigure(ctx, client, kvs) + case from == "CONFIGURED" && to == "STANDBY": + return cliFMQReset(ctx, client, kvs) + case from == "CONFIGURED" && to == "RUNNING": + state, err := cliFMQDoStep(ctx, client, fairmq.READY, fairmq.EvtRUN, kvs) + return cliFMQToOCCState(state), err + case from == "RUNNING" && to == "CONFIGURED": + state, err := cliFMQDoStep(ctx, client, fairmq.RUNNING, fairmq.EvtSTOP, kvs) + return cliFMQToOCCState(state), err + default: + return from, fmt.Errorf("unsupported FairMQ transition %s → %s", from, to) + } +} + +// fmqStepErr formats a FairMQ step failure, omitting the cause when err is nil +// (state arrived but was wrong) to avoid a trailing ": " in the message. +func fmqStepErr(step, want, got string, err error) error { + if err != nil { + return fmt.Errorf("%s: expected %q got %q: %w", step, want, got, err) + } + return fmt.Errorf("%s: expected %q got %q", step, want, got) +} + +func cliFMQConfigure(ctx context.Context, client pb.OccClient, args map[string]string) (string, error) { + state, err := cliFMQDoStep(ctx, client, fairmq.IDLE, fairmq.EvtINIT_DEVICE, args) + if err != nil || state != fairmq.INITIALIZING_DEVICE { + return cliFMQToOCCState(state), fmqStepErr("INIT DEVICE", fairmq.INITIALIZING_DEVICE, state, err) + } + state, err = cliFMQDoStep(ctx, client, fairmq.INITIALIZING_DEVICE, fairmq.EvtCOMPLETE_INIT, nil) + if err != nil || state != fairmq.INITIALIZED { + return cliFMQToOCCState(state), fmqStepErr("COMPLETE INIT", fairmq.INITIALIZED, state, err) + } + state, err = cliFMQDoStep(ctx, client, fairmq.INITIALIZED, fairmq.EvtBIND, nil) + if err != nil || state != fairmq.BOUND { + cliFMQDoStep(ctx, client, fairmq.INITIALIZED, fairmq.EvtRESET_DEVICE, nil) // rollback + return cliFMQToOCCState(state), fmqStepErr("BIND", fairmq.BOUND, state, err) + } + state, err = cliFMQDoStep(ctx, client, fairmq.BOUND, fairmq.EvtCONNECT, nil) + if err != nil || state != fairmq.DEVICE_READY { + cliFMQDoStep(ctx, client, fairmq.BOUND, fairmq.EvtRESET_DEVICE, nil) // rollback + return cliFMQToOCCState(state), fmqStepErr("CONNECT", fairmq.DEVICE_READY, state, err) + } + state, err = cliFMQDoStep(ctx, client, fairmq.DEVICE_READY, fairmq.EvtINIT_TASK, nil) + if err != nil || state != fairmq.READY { + cliFMQDoStep(ctx, client, fairmq.DEVICE_READY, fairmq.EvtRESET_DEVICE, nil) // rollback + return cliFMQToOCCState(state), fmqStepErr("INIT TASK", fairmq.READY, state, err) + } + return cliFMQToOCCState(state), nil +} + +func cliFMQReset(ctx context.Context, client pb.OccClient, args map[string]string) (string, error) { + state, err := cliFMQDoStep(ctx, client, fairmq.READY, fairmq.EvtRESET_TASK, nil) + if err != nil || state != fairmq.DEVICE_READY { + return cliFMQToOCCState(state), fmqStepErr("RESET TASK", fairmq.DEVICE_READY, state, err) + } + state, err = cliFMQDoStep(ctx, client, fairmq.DEVICE_READY, fairmq.EvtRESET_DEVICE, args) + return cliFMQToOCCState(state), err +} + +func cliOCCEventForTransition(from, to string) string { + type edge struct{ from, to string } + table := map[edge]string{ + {"STANDBY", "CONFIGURED"}: "CONFIGURE", + {"CONFIGURED", "RUNNING"}: "START", + {"RUNNING", "CONFIGURED"}: "STOP", + {"CONFIGURED", "STANDBY"}: "RESET", + {"ERROR", "STANDBY"}: "RECOVER", + } + if ev, ok := table[edge{from, to}]; ok { + return ev + } + return to +} + +func cliKVsToEntries(kvs map[string]string) []*pb.ConfigEntry { + entries := make([]*pb.ConfigEntry, 0, len(kvs)) + for k, v := range kvs { + entries = append(entries, &pb.ConfigEntry{Key: k, Value: v}) + } + return entries +} + +func cliParseKVs(args []string) map[string]string { + m := make(map[string]string, len(args)) + for _, kv := range args { + parts := strings.SplitN(kv, "=", 2) + if len(parts) == 2 { + m[parts[0]] = parts[1] + } + } + return m +} + +// cliLoadConfig reads and flattens a YAML/JSON config file into a key=value map. +// Returns an empty map (not an error) if path is empty. +func cliLoadConfig(path string) (map[string]string, error) { + if path == "" { + return map[string]string{}, nil + } + data, err := os.ReadFile(path) + if err != nil { + return nil, fmt.Errorf("cannot read %q: %w", path, err) + } + flattened, err := flatten.FlattenString(string(data), "", flatten.DotStyle) + if err != nil { + return nil, fmt.Errorf("cannot flatten %q: %w", path, err) + } + var rawMap map[string]interface{} + if err := json.Unmarshal([]byte(flattened), &rawMap); err != nil { + return nil, fmt.Errorf("cannot parse flattened config: %w", err) + } + kvs := make(map[string]string, len(rawMap)) + for k, v := range rawMap { + kvs[k] = fmt.Sprintf("%v", v) + } + return kvs, nil +} + +// cliMergeKVs merges base and override maps; keys in override take precedence. +func cliMergeKVs(base, override map[string]string) map[string]string { + merged := make(map[string]string, len(base)+len(override)) + for k, v := range base { + merged[k] = v + } + for k, v := range override { + merged[k] = v + } + return merged +} + +func cliUsage() { + fmt.Fprint(os.Stderr, `peanut — process execution and control utility for OCC / FairMQ processes + +TUI mode (interactive, no command given): + peanut direct mode via OCC_CONTROL_PORT env var + peanut -addr host:port direct mode (OCC protobuf) + peanut -addr host:port -mode fmq fmq batched mode (full FairMQ sequence per transition) + peanut -addr host:port -mode fmq-step fmq single-step mode (one button per raw FairMQ event) + +CLI mode (non-interactive, command given): + peanut [flags] [args] + +CLI Flags: + -addr string gRPC address (default "localhost:47100") + -mode string fmq (FairMQ json codec, default) or direct (OCC protobuf) + -timeout duration unary call timeout (default 30s) + -config string path to YAML/JSON file with arguments to push (inline key=val args take precedence) + +CLI Commands: + get-state + Print the current FSM state. + + transition [key=val ...] + High-level OCC transition. In fmq mode drives the full multi-step + FairMQ sequence automatically: + STANDBY→CONFIGURED runs INIT DEVICE, COMPLETE INIT, BIND, CONNECT, INIT TASK + CONFIGURED→RUNNING runs RUN + RUNNING→CONFIGURED runs STOP + CONFIGURED→STANDBY runs RESET TASK, RESET DEVICE + In direct mode sends a single OCC protobuf Transition RPC. + key=val pairs are forwarded as ConfigEntry arguments. + + direct-step [key=val ...] + Low-level: send a single raw OCC protobuf Transition RPC regardless of -mode. + Events: CONFIGURE, RESET, START, STOP, RECOVER, EXIT + + fmq-step [key=val ...] + Low-level: send a single raw FairMQ gRPC Transition call regardless of -mode. + FairMQ state/event names that contain spaces must be quoted. + + state-stream + Subscribe to StateStream; print updates until interrupted. + + event-stream + Subscribe to EventStream; print events until interrupted. + +Examples: + peanut -addr localhost:47100 get-state + peanut -addr localhost:47100 transition STANDBY CONFIGURED chans.x.0.address=ipc://@foo + peanut -addr localhost:47100 fmq-step IDLE "INIT DEVICE" chans.x.0.address=ipc://@foo + peanut -addr localhost:47100 state-stream + peanut -addr localhost:47100 -mode direct transition STANDBY CONFIGURED +`) +} diff --git a/occ/peanut/flatten/flatten.go b/occ/peanut/flatten/flatten.go index 09db104a1..fbf9b2a58 100644 --- a/occ/peanut/flatten/flatten.go +++ b/occ/peanut/flatten/flatten.go @@ -94,6 +94,7 @@ package flatten import ( "encoding/json" "errors" + "fmt" "strconv" "gopkg.in/yaml.v3" @@ -175,7 +176,7 @@ func flatten(top bool, flatMap map[string]interface{}, nested interface{}, prefi switch nested.(type) { case map[interface{}]interface{}: for k, v := range nested.(map[interface{}]interface{}) { - newKey := enkey(top, prefix, k.(string), style) + newKey := enkey(top, prefix, fmt.Sprintf("%v", k), style) assign(newKey, v) } case map[string]interface{}: diff --git a/occ/peanut/peanut.go b/occ/peanut/peanut.go index 4e6f4d5f5..e0ee3516d 100644 --- a/occ/peanut/peanut.go +++ b/occ/peanut/peanut.go @@ -32,28 +32,153 @@ import ( "fmt" "io/ioutil" "os" + "path/filepath" "strconv" + "strings" "time" - "github.com/AliceO2Group/Control/common/controlmode" - "github.com/AliceO2Group/Control/executor/executorcmd" + "github.com/AliceO2Group/Control/executor/executorcmd/nopb" + "github.com/AliceO2Group/Control/executor/executorcmd/transitioner/fairmq" "github.com/AliceO2Group/Control/executor/protos" "github.com/AliceO2Group/Control/occ/peanut/flatten" "github.com/gdamore/tcell/v2" "github.com/rivo/tview" - log "github.com/sirupsen/logrus" "google.golang.org/grpc" + "google.golang.org/grpc/codes" + "google.golang.org/grpc/credentials/insecure" + "google.golang.org/grpc/status" ) +// Options configures the peanut TUI. +type Options struct { + Addr string // host:port; if empty, falls back to OCC_CONTROL_PORT env var (occ mode only) + Mode string // "direct" (default), "fmq", or "fmq-step" +} + var ( app *tview.Application state string + tuiMode string + tuiAddr string + tuiConn *grpc.ClientConn + monitorCancel context.CancelFunc + transitioning bool + connected bool + setCommandsEnabled func(bool) configMap map[string]string controlList *tview.List configTextView *tview.TextView - rpcClient *executorcmd.RpcClient + configPages *tview.Pages + occClient pb.OccClient ) +func monitorConnection(ctx context.Context) { + // Poll GetState every 2s for disconnect detection. + ticker := time.NewTicker(2 * time.Second) + defer ticker.Stop() + for { + select { + case <-ctx.Done(): + return + case <-ticker.C: + if _, e := occClient.GetState(ctx, &pb.GetStateRequest{}); e != nil { + if ctx.Err() != nil { + return + } + app.QueueUpdateDraw(func() { + connected = false + if setCommandsEnabled != nil { + setCommandsEnabled(false) + } + state = "UNREACHABLE" + errorMessage(configPages, "Connection lost", e.Error()) + }) + return + } + } + } +} + +func connectRPC() { + if monitorCancel != nil { + monitorCancel() // stop any existing stream monitor + } + connected = false + if setCommandsEnabled != nil { + setCommandsEnabled(false) + } + state = "CONNECTING" + go func() { + if tuiConn != nil { + tuiConn.Close() + tuiConn = nil + } + conn, e := grpc.Dial(tuiAddr, grpc.WithTransportCredentials(insecure.NewCredentials())) //nolint:staticcheck + if e != nil { + app.QueueUpdateDraw(func() { + state = "UNREACHABLE" + errorMessage(configPages, "Connection failed", e.Error()) + }) + return + } + if tuiMode == "fmq" || tuiMode == "fmq-step" { + occClient = nopb.NewOccClient(conn) + } else { + occClient = pb.NewOccClient(conn) + } + response, e := occClient.GetState(context.TODO(), &pb.GetStateRequest{}) + if e != nil { + if st, ok := status.FromError(e); ok && st.Code() == codes.Unavailable { + app.QueueUpdateDraw(func() { + state = "UNREACHABLE" + errorMessage(configPages, "Nothing running", "No process is listening on "+tuiAddr+".") + }) + return + } + // Probe with the opposite client to detect mode mismatch. + var altClient pb.OccClient + if tuiMode == "fmq" || tuiMode == "fmq-step" { + altClient = pb.NewOccClient(conn) + } else { + altClient = nopb.NewOccClient(conn) + } + _, altErr := altClient.GetState(context.TODO(), &pb.GetStateRequest{}) + app.QueueUpdateDraw(func() { + if altErr == nil { + state = "WRONG MODE" + if tuiMode == "fmq" || tuiMode == "fmq-step" { + errorMessage(configPages, "Wrong mode", + "The process at "+tuiAddr+" is a direct OCC process.\nRestart peanut without -mode fmq.") + } else { + errorMessage(configPages, "Wrong mode", + "The process at "+tuiAddr+" is a FairMQ process.\nRestart peanut with -mode fmq.") + } + } else { + state = "UNREACHABLE" + errorMessage(configPages, "Connection failed", e.Error()) + } + }) + return + } + tuiConn = conn + connected = true + ctx, cancel := context.WithCancel(context.Background()) + monitorCancel = cancel + go monitorConnection(ctx) + app.QueueUpdateDraw(func() { + if setCommandsEnabled != nil { + setCommandsEnabled(true) + } + switch tuiMode { + case "fmq": + state = cliFMQToOCCState(response.GetState()) + default: + state = response.GetState() + } + }) + }() +} + func modal(p tview.Primitive, width, height int) tview.Primitive { return tview.NewFlex(). AddItem(nil, 0, 1, false). @@ -65,7 +190,7 @@ func modal(p tview.Primitive, width, height int) tview.Primitive { } func transition(evt string) error { - args := make([]*pb.ConfigEntry, 0) + args := make([]*pb.ConfigEntry, 0, len(configMap)) for k, v := range configMap { args = append(args, &pb.ConfigEntry{Key: k, Value: v}) } @@ -78,41 +203,163 @@ func transition(evt string) error { }) } - response, err := rpcClient.Transition(context.TODO(), &pb.TransitionRequest{ - TransitionEvent: evt, - Arguments: args, - SrcState: state, - }, grpc.EmptyCallOption{}) - if err != nil { - app.Stop() + var ( + newState string + err error + ) + + if tuiMode == "fmq" { + newState, err = tuiFMQTransition(evt, args) + } else { + var response *pb.TransitionReply + response, err = occClient.Transition(context.TODO(), &pb.TransitionRequest{ + TransitionEvent: evt, + Arguments: args, + SrcState: state, + }) + if err == nil { + newState = response.GetState() + } + } - fmt.Println(err.Error()) + if err != nil { return err } if evt == "CONFIGURE" { configTextView.SetTitle("runtime configuration (PUSHED)") } - state = response.GetState() + state = newState return nil } +// tuiFMQTransition maps TUI event names to FairMQ multi-step sequences. +func tuiFMQTransition(evt string, args []*pb.ConfigEntry) (string, error) { + kvs := make(map[string]string, len(args)) + for _, e := range args { + kvs[e.Key] = e.Value + } + + switch evt { + case "CONFIGURE": + return tuiFMQConfigure(kvs) + case "RESET": + return tuiFMQReset(kvs) + case "START": + result, err := fmqDoStep(context.TODO(), occClient, fairmq.READY, fairmq.EvtRUN, kvs) + return cliFMQToOCCState(result), err + case "STOP": + result, err := fmqDoStep(context.TODO(), occClient, fairmq.RUNNING, fairmq.EvtSTOP, kvs) + return cliFMQToOCCState(result), err + case "RECOVER": + result, err := fmqDoStep(context.TODO(), occClient, fairmq.ERROR, fairmq.EvtRESET_DEVICE, kvs) + return cliFMQToOCCState(result), err + case "EXIT": + if state == "CONFIGURED" { + if _, err := tuiFMQReset(nil); err != nil { + return state, err + } + } + result, err := fmqDoStep(context.TODO(), occClient, fairmq.IDLE, fairmq.EvtEND, nil) + return cliFMQToOCCState(result), err + default: + return state, fmt.Errorf("unsupported transition %q in FairMQ mode", evt) + } +} + +func tuiFMQConfigure(args map[string]string) (string, error) { + state, err := fmqDoStep(context.TODO(), occClient, fairmq.IDLE, fairmq.EvtINIT_DEVICE, args) + if err != nil || state != fairmq.INITIALIZING_DEVICE { + return cliFMQToOCCState(state), fmqStepErr("INIT DEVICE", fairmq.INITIALIZING_DEVICE, state, err) + } + state, err = fmqDoStep(context.TODO(), occClient, fairmq.INITIALIZING_DEVICE, fairmq.EvtCOMPLETE_INIT, nil) + if err != nil || state != fairmq.INITIALIZED { + return cliFMQToOCCState(state), fmqStepErr("COMPLETE INIT", fairmq.INITIALIZED, state, err) + } + state, err = fmqDoStep(context.TODO(), occClient, fairmq.INITIALIZED, fairmq.EvtBIND, nil) + if err != nil || state != fairmq.BOUND { + fmqDoStep(context.TODO(), occClient, fairmq.INITIALIZED, fairmq.EvtRESET_DEVICE, nil) // rollback + return cliFMQToOCCState(state), fmqStepErr("BIND", fairmq.BOUND, state, err) + } + state, err = fmqDoStep(context.TODO(), occClient, fairmq.BOUND, fairmq.EvtCONNECT, nil) + if err != nil || state != fairmq.DEVICE_READY { + fmqDoStep(context.TODO(), occClient, fairmq.BOUND, fairmq.EvtRESET_DEVICE, nil) // rollback + return cliFMQToOCCState(state), fmqStepErr("CONNECT", fairmq.DEVICE_READY, state, err) + } + state, err = fmqDoStep(context.TODO(), occClient, fairmq.DEVICE_READY, fairmq.EvtINIT_TASK, nil) + if err != nil || state != fairmq.READY { + fmqDoStep(context.TODO(), occClient, fairmq.DEVICE_READY, fairmq.EvtRESET_DEVICE, nil) // rollback + return cliFMQToOCCState(state), fmqStepErr("INIT TASK", fairmq.READY, state, err) + } + return cliFMQToOCCState(state), nil +} + +func tuiFMQReset(args map[string]string) (string, error) { + state, err := fmqDoStep(context.TODO(), occClient, fairmq.READY, fairmq.EvtRESET_TASK, nil) + if err != nil || state != fairmq.DEVICE_READY { + return cliFMQToOCCState(state), fmqStepErr("RESET TASK", fairmq.DEVICE_READY, state, err) + } + state, err = fmqDoStep(context.TODO(), occClient, fairmq.DEVICE_READY, fairmq.EvtRESET_DEVICE, args) + return cliFMQToOCCState(state), err +} + func drawStatus(screen tcell.Screen, x int, y int, width int, height int) (int, int, int, int) { tview.Print(screen, state, x, height/2, width, tview.AlignCenter, tcell.ColorLime) return 0, 0, 0, 0 } +// pathComplete returns filesystem completions for the given partial path. +func pathComplete(text string) []string { + // Expand ~ to home directory + if strings.HasPrefix(text, "~/") { + if home, err := os.UserHomeDir(); err == nil { + text = filepath.Join(home, text[2:]) + } + } + + // Split into directory and filename prefix + dir, prefix := filepath.Split(text) + if dir == "" { + dir = "." + } + + entries, err := os.ReadDir(dir) + if err != nil { + return nil + } + + var matches []string + for _, entry := range entries { + name := entry.Name() + if !strings.HasPrefix(name, prefix) { + continue + } + full := filepath.Join(dir, name) + if entry.IsDir() { + full += "/" + } + matches = append(matches, full) + } + return matches +} + func acquireConfigFile(configPages *tview.Pages) error { configInputFrame := tview.NewForm() configInputFrame.SetTitle("file path for runtime configuration") configInputFrame.SetBorder(true) configInputFrame.AddInputField("path:", "", 0, nil, nil) + // Wire up filesystem tab-completion on the path input field + pathField := configInputFrame.GetFormItemByLabel("path:").(*tview.InputField) + pathField.SetAutocompleteFunc(func(currentText string) []string { + return pathComplete(currentText) + }) + configPages.AddPage("modal", modal(configInputFrame, 40, 10), true, true) + app.SetFocus(configInputFrame) configCancelFunc := func() { configPages.RemovePage("modal") app.SetFocus(controlList) - app.Draw() } configInputFrame.AddButton("Ok", func() { @@ -126,9 +373,6 @@ func acquireConfigFile(configPages *tview.Pages) error { configInputFrame.SetCancelFunc(configCancelFunc) configInputFrame.AddButton("Cancel", configCancelFunc) - app.SetFocus(configInputFrame) - - app.Draw() return nil } @@ -141,7 +385,6 @@ func errorMessage(configPages *tview.Pages, title string, text string) { configPages.AddPage("modal", modalPage, true, true) app.SetFocus(modalPage) - app.Draw() } func loadConfig(configFilePath string, configPages *tview.Pages) { @@ -149,21 +392,6 @@ func loadConfig(configFilePath string, configPages *tview.Pages) { errorMessage(configPages, "Cannot open configuration file", "path empty") return } - /*// Make sure we trim all variants - trimmed := strings.TrimPrefix(configFilePath, "file://") - trimmed = strings.TrimPrefix(trimmed, "file:/") - trimmed = strings.TrimPrefix(trimmed, "file:") - uri := "file://" + trimmed - cfg, err := configuration.NewConfiguration(uri) - if err != nil { - errorMessage(configPages, "Cannot open configuration file", err.Error()) - return - } - yamlConfig, err := cfg.GetRecursiveYaml("") - if err != nil { - errorMessage(configPages, "Cannot parse configuration file", err.Error()) - return - }*/ yamlConfig, err := ioutil.ReadFile(configFilePath) if err != nil { errorMessage(configPages, "Cannot open configuration file", err.Error()) @@ -178,84 +406,149 @@ func loadConfig(configFilePath string, configPages *tview.Pages) { configTextView.SetTitle("runtime configuration (NOT PUSHED)") - configMap = make(map[string]string) - err = json.Unmarshal([]byte(configTextView.GetText(false))[:], &configMap) + var rawMap map[string]interface{} + err = json.Unmarshal([]byte(configTextView.GetText(false)), &rawMap) if err != nil { errorMessage(configPages, "Cannot process configuration file", err.Error()) return } + configMap = make(map[string]string, len(rawMap)) + for k, v := range rawMap { + configMap[k] = fmt.Sprintf("%v", v) + } } -func Run(cmdString string) (err error) { +func Run(opts Options) (err error) { state = "UNKNOWN" + tuiMode = opts.Mode + if tuiMode == "" { + tuiMode = "direct" + } + + // Validate mode + switch tuiMode { + case "direct", "fmq", "fmq-step": + default: + return fmt.Errorf("unknown mode %q — valid modes: direct, fmq, fmq-step", tuiMode) + } + + // Resolve address + addr := opts.Addr + if addr == "" { + if tuiMode == "fmq" || tuiMode == "fmq-step" { + return fmt.Errorf("%s mode requires -addr flag", tuiMode) + } + // Fall back to OCC_CONTROL_PORT env var (direct mode legacy behaviour) + occPortString := os.Getenv("OCC_CONTROL_PORT") + if occPortString == "" { + return fmt.Errorf("OCC_CONTROL_PORT not defined") + } + occPort, e := strconv.ParseUint(occPortString, 10, 64) + if e != nil { + return e + } + addr = fmt.Sprintf("127.0.0.1:%d", occPort) + } // Setup UI app = tview.NewApplication() statusBox := tview.NewBox().SetBorder(true).SetTitle("state") - configTextView = tview.NewTextView().SetChangedFunc(func() { app.Draw() }) + configTextView = tview.NewTextView().SetChangedFunc(func() { app.QueueUpdateDraw(func() {}) }) configTextView.SetBorder(true).SetTitle("runtime configuration (EMPTY)") - configPages := tview.NewPages(). + configPages = tview.NewPages(). AddPage("configBox", configTextView, true, true) - controlList = tview.NewList(). - AddItem("Transition CONFIGURE", - "perform CONFIGURE transition", - 'c', - func() { - err = transition("CONFIGURE") - }). - AddItem("Transition RESET", - "perform RESET transition", - 'r', - func() { - err = transition("RESET") - }). - AddItem("Transition START", - "perform START transition", - 's', - func() { - err = transition("START") - }). - AddItem("Transition STOP", - "perform STOP transition", - 't', - func() { - err = transition("STOP") - }). - //AddItem("Transition GO_ERROR", - // "perform GO_ERROR transition", - // 'e', - // func(){ - // err = transition("GO_ERROR") - // app.Draw() - // }). - AddItem("Transition RECOVER", - "perform RECOVER transition", - 'v', - func() { - err = transition("RECOVER") - }). - AddItem("Transition EXIT", - "perform EXIT transition", - 'x', - func() { - err = transition("EXIT") - }). - AddItem("Load configuration", - "read runtime configuration from file", - 'l', - func() { - err = acquireConfigFile(configPages) - }). - AddItem("Quit", - "disconnect from the process and quit peanut", - 'q', - func() { - app.Stop() + doTransition := func(evt string) { + if !connected || transitioning { + return + } + transitioning = true + go func() { + e := transition(evt) + app.QueueUpdateDraw(func() { + transitioning = false + if e != nil { + err = e + errorMessage(configPages, "Transition error", e.Error()) + } + }) + }() + } + + doFMQStep := func(event string) { + if !connected || transitioning { + return + } + transitioning = true + go func() { + kvs := make(map[string]string, len(configMap)) + for k, v := range configMap { + kvs[k] = v + } + newState, e := fmqDoStep(context.TODO(), occClient, state, event, kvs) + app.QueueUpdateDraw(func() { + transitioning = false + if e != nil { + err = e + errorMessage(configPages, "FMQ step error", e.Error()) + } else { + state = newState + } }) + }() + } + + controlList = tview.NewList() + + var cmdIndices []int + addCmd := func(main, secondary string, shortcut rune, handler func()) { + cmdIndices = append(cmdIndices, controlList.GetItemCount()) + controlList.AddItem(main, secondary, shortcut, handler) + } + + switch tuiMode { + case "fmq-step": + addCmd("INIT DEVICE", "IDLE → INITIALIZING DEVICE", '1', func() { doFMQStep(fairmq.EvtINIT_DEVICE) }) + addCmd("COMPLETE INIT", "INITIALIZING DEVICE → INITIALIZED", '2', func() { doFMQStep(fairmq.EvtCOMPLETE_INIT) }) + addCmd("BIND", "INITIALIZED → BOUND", '3', func() { doFMQStep(fairmq.EvtBIND) }) + addCmd("CONNECT", "BOUND → DEVICE READY", '4', func() { doFMQStep(fairmq.EvtCONNECT) }) + addCmd("INIT TASK", "DEVICE READY → READY", '5', func() { doFMQStep(fairmq.EvtINIT_TASK) }) + addCmd("RUN", "READY → RUNNING", '6', func() { doFMQStep(fairmq.EvtRUN) }) + addCmd("STOP", "RUNNING → READY", '7', func() { doFMQStep(fairmq.EvtSTOP) }) + addCmd("RESET TASK", "READY → DEVICE READY", '8', func() { doFMQStep(fairmq.EvtRESET_TASK) }) + addCmd("RESET DEVICE", "→ IDLE", '9', func() { doFMQStep(fairmq.EvtRESET_DEVICE) }) + addCmd("END", "IDLE → EXITING", '0', func() { doFMQStep(fairmq.EvtEND) }) + default: // direct, fmq + addCmd("Transition CONFIGURE", "perform CONFIGURE transition", 'c', func() { doTransition("CONFIGURE") }) + addCmd("Transition RESET", "perform RESET transition", 'r', func() { doTransition("RESET") }) + addCmd("Transition START", "perform START transition", 's', func() { doTransition("START") }) + addCmd("Transition STOP", "perform STOP transition", 't', func() { doTransition("STOP") }) + addCmd("Transition RECOVER", "perform RECOVER transition", 'v', func() { doTransition("RECOVER") }) + addCmd("Transition EXIT", "perform EXIT transition", 'x', func() { doTransition("EXIT") }) + } + // Always-on items — not registered in cmdIndices. + controlList. + AddItem("Reconnect", "re-establish gRPC connection to the controlled process", 'n', func() { connectRPC() }). + AddItem("Load configuration", "read runtime configuration from file", 'l', func() { err = acquireConfigFile(configPages) }). + AddItem("Quit", "disconnect from the process and quit peanut", 'q', func() { app.Stop() }) controlList.SetBorder(true).SetTitle("control") + origTexts := make([][2]string, len(cmdIndices)) + for i, idx := range cmdIndices { + origTexts[i][0], origTexts[i][1] = controlList.GetItemText(idx) + } + setCommandsEnabled = func(enabled bool) { + for i, idx := range cmdIndices { + if enabled { + controlList.SetItemText(idx, origTexts[i][0], origTexts[i][1]) + } else { + controlList.SetItemText(idx, "[::d]"+origTexts[i][0], "[::d]"+origTexts[i][1]) + } + } + } + setCommandsEnabled(false) // start grayed out until connected + flex := tview.NewFlex().AddItem(tview.NewFlex().SetDirection(tview.FlexRow). AddItem(statusBox, 3, 1, false). AddItem(controlList, 0, 1, true), 0, 1, false). @@ -263,41 +556,8 @@ func Run(cmdString string) (err error) { statusBox.SetDrawFunc(drawStatus) - // Parse input - var occPort uint64 - if len(cmdString) > 0 { - // RUN process - } else { - // If cmdString not passed, env var OCC_CONTROL_PORT (occ/OccGlobals.h) must be defined - occPortString := os.Getenv("OCC_CONTROL_PORT") - if len(occPortString) == 0 { - err = fmt.Errorf("OCC_CONTROL_PORT not defined") - return - } - occPort, err = strconv.ParseUint(occPortString, 10, 64) - if err != nil { - return - } - } - - // Setup RPC - go func() { - // FIXME allow choice of controlmode.FAIRMQ - rpcClient = executorcmd.NewClient( - occPort, - controlmode.DIRECT, - executorcmd.ProtobufTransport, - log.WithField("id", "")) - var response *pb.GetStateReply - response, err = rpcClient.GetState(context.TODO(), &pb.GetStateRequest{}, grpc.EmptyCallOption{}) - if err != nil { - app.Stop() - fmt.Println(err.Error()) - return - } - // NOTE: we acquire the transitioner-dependent STANDBY equivalent state - state = rpcClient.FromDeviceState(response.GetState()) - app.Draw() - }() + // Connect to the controlled process + tuiAddr = addr + connectRPC() return app.SetRoot(flex, true).SetFocus(controlList).Run() } From 674cce10e04d0e5593bad2af57d7e779ab60d7b1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michal=20Tich=C3=A1k?= <53997499+justonedev1@users.noreply.github.com> Date: Fri, 24 Apr 2026 10:57:30 +0200 Subject: [PATCH 3/6] [OCTRL-1092] Improve metrics error reporting (#810) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * [core] add metrics to the transition * [core] OCTRL-1092 add call results to metric in call.go * [core] use transtion name as a tag --------- Co-authored-by: Michal Tichák --- common/monitoring/metric.go | 11 +++++++ core/environment/transition.go | 10 +++++- core/environment/transition_configure.go | 6 ++++ core/environment/transition_deploy.go | 6 ++++ core/environment/transition_goerror.go | 4 +++ core/environment/transition_reset.go | 6 ++++ core/environment/transition_startactivity.go | 6 ++++ core/environment/transition_stopactivity.go | 6 ++++ core/environment/utils.go | 2 +- core/workflow/callable/call.go | 34 +++++++++++++------- 10 files changed, 77 insertions(+), 14 deletions(-) diff --git a/common/monitoring/metric.go b/common/monitoring/metric.go index 41f8b78fd..fe0ab8c9a 100644 --- a/common/monitoring/metric.go +++ b/common/monitoring/metric.go @@ -71,6 +71,17 @@ func (metric *Metric) AddTag(tagName string, value string) { metric.tags = append(metric.tags, Tag{name: tagName, value: value}) } +const ( + ERROR = "error" + SUCCESS = "success" + CANCELLED = "cancelled" + TIMEOUT = "timeout" +) + +func (metric *Metric) AddResult(result string) { + metric.AddTag("result", result) +} + func (metric *Metric) setField(fieldName string, field any) { if metric.fields == nil { metric.fields = make(FieldsType) diff --git a/core/environment/transition.go b/core/environment/transition.go index a821e3dfa..ecb563bf8 100644 --- a/core/environment/transition.go +++ b/core/environment/transition.go @@ -27,7 +27,8 @@ package environment import ( "errors" - "github.com/AliceO2Group/Control/core/protos" + "github.com/AliceO2Group/Control/common/monitoring" + pb "github.com/AliceO2Group/Control/core/protos" "github.com/AliceO2Group/Control/core/task" ) @@ -74,3 +75,10 @@ func (t baseTransition) check() (err error) { func (t baseTransition) eventName() string { return t.name } + +func (t baseTransition) transitionDoMetric(env *Environment) monitoring.Metric { + metric := monitoring.NewMetric("transition_do") + metric.AddTag("transition", t.name) + metric.AddTag("envId", env.Id().String()) + return metric +} diff --git a/core/environment/transition_configure.go b/core/environment/transition_configure.go index aab902bb2..09d93eca9 100644 --- a/core/environment/transition_configure.go +++ b/core/environment/transition_configure.go @@ -30,6 +30,7 @@ import ( "github.com/AliceO2Group/Control/core/workflow" "github.com/AliceO2Group/Control/common/event" + "github.com/AliceO2Group/Control/common/monitoring" "github.com/AliceO2Group/Control/core/task" "github.com/AliceO2Group/Control/core/task/taskop" ) @@ -52,6 +53,9 @@ func (t ConfigureTransition) do(env *Environment) (err error) { return errors.New("cannot transition in NIL environment") } + metric := t.transitionDoMetric(env) + defer monitoring.TimerSendSingle(&metric, monitoring.Millisecond)() + wf := env.Workflow() activeTasks := workflow.GetActiveTasks(wf) @@ -64,9 +68,11 @@ func (t ConfigureTransition) do(env *Environment) (err error) { incomingEv := <-env.stateChangedCh // If some tasks failed to transition if tasksStateErrors := incomingEv.GetTasksStateChangedError(); tasksStateErrors != nil { + metric.AddResult(monitoring.ERROR) return tasksStateErrors } env.sendEnvironmentEvent(&event.EnvironmentEvent{EnvironmentID: env.Id().String(), State: "CONFIGURED"}) + metric.AddResult(monitoring.SUCCESS) return } diff --git a/core/environment/transition_deploy.go b/core/environment/transition_deploy.go index 34436be04..9c0e49b5b 100644 --- a/core/environment/transition_deploy.go +++ b/core/environment/transition_deploy.go @@ -36,6 +36,7 @@ import ( "github.com/AliceO2Group/Control/common/event" "github.com/AliceO2Group/Control/common/logger/infologger" + "github.com/AliceO2Group/Control/common/monitoring" "github.com/AliceO2Group/Control/core/task" "github.com/AliceO2Group/Control/core/task/sm" "github.com/AliceO2Group/Control/core/task/taskop" @@ -66,6 +67,9 @@ func (t DeployTransition) do(env *Environment) (err error) { return errors.New("cannot transition in NIL environment") } + metric := t.transitionDoMetric(env) + defer monitoring.TimerSendSingle(&metric, monitoring.Millisecond)() + wf := env.Workflow() // Skip cleanup for anything other than readout-dataflow @@ -347,10 +351,12 @@ func (t DeployTransition) do(env *Environment) (err error) { log.WithField("level", infologger.IL_Ops). WithField("partition", env.Id().String()). Error(err) + metric.AddResult(monitoring.ERROR) return } env.sendEnvironmentEvent(&event.EnvironmentEvent{EnvironmentID: env.Id().String(), State: "DEPLOYED"}) + metric.AddResult(monitoring.SUCCESS) return } diff --git a/core/environment/transition_goerror.go b/core/environment/transition_goerror.go index c8adb14e2..e0844f6d5 100644 --- a/core/environment/transition_goerror.go +++ b/core/environment/transition_goerror.go @@ -25,6 +25,7 @@ package environment import ( + "github.com/AliceO2Group/Control/common/monitoring" "github.com/AliceO2Group/Control/core/controlcommands" "github.com/AliceO2Group/Control/core/task" "github.com/AliceO2Group/Control/core/task/sm" @@ -44,6 +45,8 @@ type GoErrorTransition struct { } func (t GoErrorTransition) do(env *Environment) (err error) { + metric := t.transitionDoMetric(env) + defer monitoring.TimerSendSingle(&metric, monitoring.Millisecond)() // we stop all tasks which are in RUNNING toStop := env.Workflow().GetTasks().Filtered(func(t *task.Task) bool { @@ -72,5 +75,6 @@ func (t GoErrorTransition) do(env *Environment) (err error) { <-env.stateChangedCh } + metric.AddResult(monitoring.SUCCESS) return } diff --git a/core/environment/transition_reset.go b/core/environment/transition_reset.go index fd9642dfa..84bfd44da 100644 --- a/core/environment/transition_reset.go +++ b/core/environment/transition_reset.go @@ -28,6 +28,7 @@ import ( "errors" "github.com/AliceO2Group/Control/common/event" + "github.com/AliceO2Group/Control/common/monitoring" "github.com/AliceO2Group/Control/core/task" "github.com/AliceO2Group/Control/core/task/sm" "github.com/AliceO2Group/Control/core/workflow" @@ -51,6 +52,9 @@ func (t ResetTransition) do(env *Environment) (err error) { return errors.New("cannot transition in NIL environment") } + metric := t.transitionDoMetric(env) + defer monitoring.TimerSendSingle(&metric, monitoring.Millisecond)() + taskmanMessage := task.NewTransitionTaskMessage( workflow.GetActiveTasks(env.Workflow()), sm.CONFIGURED.String(), @@ -64,9 +68,11 @@ func (t ResetTransition) do(env *Environment) (err error) { incomingEv := <-env.stateChangedCh // If some tasks failed to transition if tasksStateErrors := incomingEv.GetTasksStateChangedError(); tasksStateErrors != nil { + metric.AddResult(monitoring.ERROR) return tasksStateErrors } env.sendEnvironmentEvent(&event.EnvironmentEvent{EnvironmentID: env.Id().String(), State: "RESET"}) + metric.AddResult(monitoring.SUCCESS) return } diff --git a/core/environment/transition_startactivity.go b/core/environment/transition_startactivity.go index be7ec03c6..47a547c1b 100644 --- a/core/environment/transition_startactivity.go +++ b/core/environment/transition_startactivity.go @@ -33,6 +33,7 @@ import ( "github.com/AliceO2Group/Control/common/event" "github.com/AliceO2Group/Control/common/logger/infologger" + "github.com/AliceO2Group/Control/common/monitoring" "github.com/AliceO2Group/Control/core/controlcommands" "github.com/AliceO2Group/Control/core/task" "github.com/iancoleman/strcase" @@ -72,6 +73,9 @@ func (t StartActivityTransition) do(env *Environment) (err error) { return errors.New("cannot transition in NIL environment") } + metric := t.transitionDoMetric(env) + defer monitoring.TimerSendSingle(&metric, monitoring.Millisecond)() + runNumber := env.currentRunNumber log.WithField(infologger.Run, runNumber). @@ -120,6 +124,7 @@ func (t StartActivityTransition) do(env *Environment) (err error) { incomingEv := <-env.stateChangedCh // If some tasks failed to transition if tasksStateErrors := incomingEv.GetTasksStateChangedError(); tasksStateErrors != nil { + metric.AddResult(monitoring.ERROR) return tasksStateErrors } @@ -133,5 +138,6 @@ func (t StartActivityTransition) do(env *Environment) (err error) { Run: env.currentRunNumber, }) + metric.AddResult(monitoring.SUCCESS) return } diff --git a/core/environment/transition_stopactivity.go b/core/environment/transition_stopactivity.go index 8653302e1..7e55fbe62 100644 --- a/core/environment/transition_stopactivity.go +++ b/core/environment/transition_stopactivity.go @@ -29,6 +29,7 @@ import ( "github.com/AliceO2Group/Control/common/event" "github.com/AliceO2Group/Control/common/logger/infologger" + "github.com/AliceO2Group/Control/common/monitoring" "github.com/AliceO2Group/Control/core/controlcommands" "github.com/AliceO2Group/Control/core/task" "github.com/AliceO2Group/Control/core/task/sm" @@ -63,6 +64,9 @@ func (t StopActivityTransition) do(env *Environment) (err error) { return errors.New("cannot transition in NIL environment") } + metric := t.transitionDoMetric(env) + defer monitoring.TimerSendSingle(&metric, monitoring.Millisecond)() + log.WithField(infologger.Run, env.currentRunNumber). WithField("partition", env.Id().String()). WithField(infologger.Level, infologger.IL_Support). @@ -98,6 +102,7 @@ func (t StopActivityTransition) do(env *Environment) (err error) { incomingEv := <-env.stateChangedCh // If some tasks failed to transition if tasksStateErrors := incomingEv.GetTasksStateChangedError(); tasksStateErrors != nil { + metric.AddResult(monitoring.ERROR) return tasksStateErrors } env.sendEnvironmentEvent(&event.EnvironmentEvent{EnvironmentID: env.Id().String(), State: "CONFIGURED"}) @@ -107,5 +112,6 @@ func (t StopActivityTransition) do(env *Environment) (err error) { WithField(infologger.Level, infologger.IL_Support). Info("run stopped") + metric.AddResult(monitoring.SUCCESS) return } diff --git a/core/environment/utils.go b/core/environment/utils.go index 1ca0dcad3..7bc15c4f7 100644 --- a/core/environment/utils.go +++ b/core/environment/utils.go @@ -52,7 +52,7 @@ type WorkflowPublicInfo struct { func parseWorkflowPublicInfo(workflowExpr string) (WorkflowPublicInfo, error) { repoManager := the.RepoManager() - resolvedWorkflowPath, _, err := repoManager.GetWorkflow(workflowExpr) //Will fail if repo unknown + resolvedWorkflowPath, _, err := repoManager.GetWorkflow(workflowExpr) // Will fail if repo unknown if err != nil { return WorkflowPublicInfo{}, err } diff --git a/core/workflow/callable/call.go b/core/workflow/callable/call.go index 502688c1e..1a83c100f 100644 --- a/core/workflow/callable/call.go +++ b/core/workflow/callable/call.go @@ -110,6 +110,15 @@ func (s Calls) AwaitAll() map[*Call]error { return errs } +func (c *Call) callableMetric(name string) monitoring.Metric { + metric := monitoring.NewMetric(name) + metric.AddTag("runtype", c.getRunTypeTag()) + metric.AddTag("name", c.GetName()) + metric.AddTag("trigger", c.GetTraits().Trigger) + metric.AddTag("envId", c.parentRole.GetEnvironmentId().String()) + return metric +} + func (c *Call) Call() error { log.WithField("trigger", c.Traits.Trigger). WithField("await", c.Traits.Await). @@ -117,7 +126,7 @@ func (c *Call) Call() error { WithField("level", infologger.IL_Devel). Debugf("calling hook function %s", c.Func) - metric := c.newMetric("callablecall") + metric := c.callableMetric("callablecall") defer monitoring.TimerSendSingle(&metric, monitoring.Millisecond)() the.EventWriterWithTopic(topic.Call).WriteEvent(&evpb.Ev_CallEvent{ @@ -178,6 +187,7 @@ func (c *Call) Call() error { EnvironmentId: c.parentRole.GetEnvironmentId().String(), }) + metric.AddResult(monitoring.ERROR) return err } if len(returnVar) > 0 { @@ -206,6 +216,7 @@ func (c *Call) Call() error { EnvironmentId: c.parentRole.GetEnvironmentId().String(), }) + metric.AddResult(monitoring.ERROR) return errors.New(errMsg) } @@ -224,32 +235,31 @@ func (c *Call) Call() error { EnvironmentId: c.parentRole.GetEnvironmentId().String(), }) + metric.AddResult(monitoring.SUCCESS) return nil } -func (c *Call) newMetric(name string) monitoring.Metric { - metric := monitoring.NewMetric(name) - metric.AddTag("runtype", c.getRunTypeTag()) - metric.AddTag("name", c.GetName()) - metric.AddTag("trigger", c.GetTraits().Trigger) - metric.AddTag("envId", c.parentRole.GetEnvironmentId().String()) - return metric -} - func (c *Call) Start() { c.await = make(chan error) ctx, cancel := context.WithCancel(context.Background()) c.awaitCancel = cancel go func() { - metric := c.newMetric("callablewrapped") + metric := c.callableMetric("callablewrapped") defer monitoring.TimerSendSingle(&metric, monitoring.Millisecond)() callId := fmt.Sprintf("hook:%s:%s", c.GetTraits().Trigger, c.GetName()) log.Debugf("%s started", callId) defer utils.TimeTrack(time.Now(), callId, log.WithPrefix("callable")) + err := c.Call() select { - case c.await <- c.Call(): + case c.await <- err: + if err == nil { + metric.AddResult(monitoring.SUCCESS) + } else { + metric.AddResult(monitoring.ERROR) + } case <-ctx.Done(): + metric.AddResult(monitoring.CANCELLED) log.Debugf("%s cancelled", callId) } close(c.await) From 9e6bfdc00f16390f76e66a3cdcf99500e9e33362 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michal=20Tich=C3=A1k?= <53997499+justonedev1@users.noreply.github.com> Date: Mon, 27 Apr 2026 15:48:04 +0200 Subject: [PATCH 4/6] OCTRL-1078 Make Task Controller able to control OCC tasks (#804) * [operator] OCTRL-1078 controller operator that is able to control all OCC tasks * [operator] fairmq states are better mapped * [operator] changed name in copyright * [operator] moved to correct image name * [operator] addressing not addressed * [operator] fixed examples with arguments and configMap * [operator] disable kube-rbac-proxy verification for metrics for testing purposes * [operator] health checks listen on correct port * [operator] add gitlab-registry-secret to task-controller image deployment * [operator] don't restart failed pods * [operator] task-controller is started with correct live and metrics ports * [operator] deploy as daemonset and use hostNetwork: true for test purposes --- README.md | 3 + control-operator/.gitignore | 3 + control-operator/Dockerfile | 34 + control-operator/Makefile | 306 ++ control-operator/PROJECT | 23 + control-operator/README.md | 94 + .../api/v1alpha1/groupversion_info.go | 44 + control-operator/api/v1alpha1/task_types.go | 125 + .../api/v1alpha1/zz_generated.deepcopy.go | 194 + control-operator/cmd/main.go | 117 + .../crd/bases/aliecs.alice.cern_tasks.yaml | 4737 +++++++++++++++++ .../config/crd/kustomization.yaml | 21 + .../config/crd/kustomizeconfig.yaml | 19 + .../crd/patches/cainjection_in_tasks.yaml | 7 + .../config/crd/patches/webhook_in_tasks.yaml | 16 + .../config/default/kustomization.yaml | 145 + .../default/manager_auth_proxy_patch.yaml | 39 + .../config/default/manager_config_patch.yaml | 10 + .../config/manager/kustomization.yaml | 8 + control-operator/config/manager/manager.yaml | 106 + .../config/manifests/kustomization.yaml | 28 + .../config/prometheus/kustomization.yaml | 2 + .../config/prometheus/monitor.yaml | 26 + .../rbac/auth_proxy_client_clusterrole.yaml | 16 + .../config/rbac/auth_proxy_role.yaml | 24 + .../config/rbac/auth_proxy_role_binding.yaml | 19 + .../config/rbac/auth_proxy_service.yaml | 21 + .../config/rbac/kustomization.yaml | 18 + .../config/rbac/leader_election_role.yaml | 44 + .../rbac/leader_election_role_binding.yaml | 19 + control-operator/config/rbac/role.yaml | 51 + .../config/rbac/role_binding.yaml | 19 + .../config/rbac/service_account.yaml | 12 + .../config/rbac/task_editor_role.yaml | 31 + .../config/rbac/task_viewer_role.yaml | 27 + .../config/samples/aliecs_v1alpha1_task.yaml | 12 + .../config/samples/kustomization.yaml | 4 + .../config/scorecard/bases/config.yaml | 7 + .../config/scorecard/kustomization.yaml | 16 + .../scorecard/patches/basic.config.yaml | 10 + .../config/scorecard/patches/olm.config.yaml | 50 + .../control-workflows/readout-docker.yaml | 33 + .../control-workflows/readout-kube.yaml | 35 + .../control-workflows/readout-orig.yaml | 33 + .../stfbuilder-senderoutput-kube.yaml | 66 + .../stfbuilder-senderoutput-orig.yaml | 64 + .../control-workflows/stfsender-docker.yaml | 65 + .../control-workflows/stfsender-kube.yaml | 66 + .../control-workflows/stfsender-orig.yaml | 64 + .../ecs-manifests/kubernetes-ecs.md | 35 + .../kubernetes-manifests/readout-test.yaml | 179 + .../kubernetes-manifests/readout.yaml | 82 + .../stfbuilder-senderoutput-test.yaml | 114 + .../stfbuilder-senderoutput.yaml | 91 + .../kubernetes-manifests/stfsender-test.yaml | 92 + .../kubernetes-manifests/stfsender.yaml | 105 + .../readout-configure-args.yaml | 16 + ...tfbuilder-senderoutput-configure-args.yaml | 28 + .../stfsender-configure-args.yaml | 15 + control-operator/go.mod | 86 + control-operator/go.sum | 213 + control-operator/hack/boilerplate.go.txt | 23 + .../internal/controller/direct_transition.go | 145 + .../internal/controller/fairmq_transition.go | 223 + .../internal/controller/grpc_client.go | 305 ++ .../internal/controller/jsoncodec.go | 49 + .../internal/controller/protos/occ.proto | 99 + .../internal/controller/suite_test.go | 89 + .../internal/controller/task_controller.go | 384 ++ 69 files changed, 9306 insertions(+) create mode 100644 control-operator/.gitignore create mode 100644 control-operator/Dockerfile create mode 100644 control-operator/Makefile create mode 100644 control-operator/PROJECT create mode 100644 control-operator/README.md create mode 100644 control-operator/api/v1alpha1/groupversion_info.go create mode 100644 control-operator/api/v1alpha1/task_types.go create mode 100644 control-operator/api/v1alpha1/zz_generated.deepcopy.go create mode 100644 control-operator/cmd/main.go create mode 100644 control-operator/config/crd/bases/aliecs.alice.cern_tasks.yaml create mode 100644 control-operator/config/crd/kustomization.yaml create mode 100644 control-operator/config/crd/kustomizeconfig.yaml create mode 100644 control-operator/config/crd/patches/cainjection_in_tasks.yaml create mode 100644 control-operator/config/crd/patches/webhook_in_tasks.yaml create mode 100644 control-operator/config/default/kustomization.yaml create mode 100644 control-operator/config/default/manager_auth_proxy_patch.yaml create mode 100644 control-operator/config/default/manager_config_patch.yaml create mode 100644 control-operator/config/manager/kustomization.yaml create mode 100644 control-operator/config/manager/manager.yaml create mode 100644 control-operator/config/manifests/kustomization.yaml create mode 100644 control-operator/config/prometheus/kustomization.yaml create mode 100644 control-operator/config/prometheus/monitor.yaml create mode 100644 control-operator/config/rbac/auth_proxy_client_clusterrole.yaml create mode 100644 control-operator/config/rbac/auth_proxy_role.yaml create mode 100644 control-operator/config/rbac/auth_proxy_role_binding.yaml create mode 100644 control-operator/config/rbac/auth_proxy_service.yaml create mode 100644 control-operator/config/rbac/kustomization.yaml create mode 100644 control-operator/config/rbac/leader_election_role.yaml create mode 100644 control-operator/config/rbac/leader_election_role_binding.yaml create mode 100644 control-operator/config/rbac/role.yaml create mode 100644 control-operator/config/rbac/role_binding.yaml create mode 100644 control-operator/config/rbac/service_account.yaml create mode 100644 control-operator/config/rbac/task_editor_role.yaml create mode 100644 control-operator/config/rbac/task_viewer_role.yaml create mode 100644 control-operator/config/samples/aliecs_v1alpha1_task.yaml create mode 100644 control-operator/config/samples/kustomization.yaml create mode 100644 control-operator/config/scorecard/bases/config.yaml create mode 100644 control-operator/config/scorecard/kustomization.yaml create mode 100644 control-operator/config/scorecard/patches/basic.config.yaml create mode 100644 control-operator/config/scorecard/patches/olm.config.yaml create mode 100644 control-operator/ecs-manifests/control-workflows/readout-docker.yaml create mode 100644 control-operator/ecs-manifests/control-workflows/readout-kube.yaml create mode 100644 control-operator/ecs-manifests/control-workflows/readout-orig.yaml create mode 100644 control-operator/ecs-manifests/control-workflows/stfbuilder-senderoutput-kube.yaml create mode 100644 control-operator/ecs-manifests/control-workflows/stfbuilder-senderoutput-orig.yaml create mode 100644 control-operator/ecs-manifests/control-workflows/stfsender-docker.yaml create mode 100644 control-operator/ecs-manifests/control-workflows/stfsender-kube.yaml create mode 100644 control-operator/ecs-manifests/control-workflows/stfsender-orig.yaml create mode 100644 control-operator/ecs-manifests/kubernetes-ecs.md create mode 100644 control-operator/ecs-manifests/kubernetes-manifests/readout-test.yaml create mode 100644 control-operator/ecs-manifests/kubernetes-manifests/readout.yaml create mode 100644 control-operator/ecs-manifests/kubernetes-manifests/stfbuilder-senderoutput-test.yaml create mode 100644 control-operator/ecs-manifests/kubernetes-manifests/stfbuilder-senderoutput.yaml create mode 100644 control-operator/ecs-manifests/kubernetes-manifests/stfsender-test.yaml create mode 100644 control-operator/ecs-manifests/kubernetes-manifests/stfsender.yaml create mode 100644 control-operator/ecs-manifests/occ-configure-arguments/readout-configure-args.yaml create mode 100644 control-operator/ecs-manifests/occ-configure-arguments/stfbuilder-senderoutput-configure-args.yaml create mode 100644 control-operator/ecs-manifests/occ-configure-arguments/stfsender-configure-args.yaml create mode 100644 control-operator/go.mod create mode 100644 control-operator/go.sum create mode 100644 control-operator/hack/boilerplate.go.txt create mode 100644 control-operator/internal/controller/direct_transition.go create mode 100644 control-operator/internal/controller/fairmq_transition.go create mode 100644 control-operator/internal/controller/grpc_client.go create mode 100644 control-operator/internal/controller/jsoncodec.go create mode 100644 control-operator/internal/controller/protos/occ.proto create mode 100644 control-operator/internal/controller/suite_test.go create mode 100644 control-operator/internal/controller/task_controller.go diff --git a/README.md b/README.md index eb4d30be3..80bb8d7a8 100644 --- a/README.md +++ b/README.md @@ -193,6 +193,9 @@ There are two ways of interacting with AliECS: * [Sampling reservoir](/docs/metrics.md#sampling-reservoir) * [OCC API debugging with grpcc](/docs/using_grpcc_occ.md#occ-api-debugging-with-grpcc) * [Running tasks inside docker](/docs/running_docker.md#running-a-task-inside-a-docker-container) + * Kubernetes + * [Operator controller](/control-operator/README.md) + * [Testing manifests](/control-operator/ecs-manifests/kubernetes-ecs.md) * Resources * T. Mrnjavac et. al, [AliECS: A New Experiment Control System for the ALICE Experiment](https://doi.org/10.1051/epjconf/202429502027), CHEP23 diff --git a/control-operator/.gitignore b/control-operator/.gitignore new file mode 100644 index 000000000..2a14aa847 --- /dev/null +++ b/control-operator/.gitignore @@ -0,0 +1,3 @@ +bin +cover.out +/internal/controller/protos/generated diff --git a/control-operator/Dockerfile b/control-operator/Dockerfile new file mode 100644 index 000000000..ddd858167 --- /dev/null +++ b/control-operator/Dockerfile @@ -0,0 +1,34 @@ +# Build the manager binary +FROM golang:1.25 as builder +ARG TARGETOS +ARG TARGETARCH + +WORKDIR /workspace +# Copy the Go Modules manifests +COPY go.mod go.mod +COPY go.sum go.sum +RUN sed -i '\,replace github.com/AliceO2Group/Control,d' go.mod +# cache deps before building and copying source so that we don't need to re-download as much +# and so that source changes don't invalidate our downloaded layer +RUN go mod download + +# Copy the go source +COPY cmd/main.go cmd/main.go +COPY api/ api/ +COPY internal/controller/ internal/controller/ + +# Build +# the GOARCH has not a default value to allow the binary be built according to the host where the command +# was called. For example, if we call make docker-build in a local env which has the Apple Silicon M1 SO +# the docker BUILDPLATFORM arg will be linux/arm64 when for Apple x86 it will be linux/amd64. Therefore, +# by leaving it empty we can ensure that the container and binary shipped on it will have the same platform. +RUN CGO_ENABLED=0 GOOS=${TARGETOS:-linux} GOARCH=${TARGETARCH} go build -a -o manager cmd/main.go + +# Use distroless as minimal base image to package the manager binary +# Refer to https://github.com/GoogleContainerTools/distroless for more details +FROM gcr.io/distroless/static:nonroot +WORKDIR / +COPY --from=builder /workspace/manager . +USER 65532:65532 + +ENTRYPOINT ["/manager"] diff --git a/control-operator/Makefile b/control-operator/Makefile new file mode 100644 index 000000000..d01cfcb64 --- /dev/null +++ b/control-operator/Makefile @@ -0,0 +1,306 @@ +# VERSION defines the project version for the bundle. +# Update this value when you upgrade the version of your project. +# To re-generate a bundle for another specific version without changing the standard setup, you can: +# - use the VERSION as arg of the bundle target (e.g make bundle VERSION=0.0.2) +# - use environment variables to overwrite this value (e.g export VERSION=0.0.2) +VERSION ?= 0.0.1 + +# CHANNELS define the bundle channels used in the bundle. +# Add a new line here if you would like to change its default config. (E.g CHANNELS = "candidate,fast,stable") +# To re-generate a bundle for other specific channels without changing the standard setup, you can: +# - use the CHANNELS as arg of the bundle target (e.g make bundle CHANNELS=candidate,fast,stable) +# - use environment variables to overwrite this value (e.g export CHANNELS="candidate,fast,stable") +ifneq ($(origin CHANNELS), undefined) +BUNDLE_CHANNELS := --channels=$(CHANNELS) +endif + +# DEFAULT_CHANNEL defines the default channel used in the bundle. +# Add a new line here if you would like to change its default config. (E.g DEFAULT_CHANNEL = "stable") +# To re-generate a bundle for any other default channel without changing the default setup, you can: +# - use the DEFAULT_CHANNEL as arg of the bundle target (e.g make bundle DEFAULT_CHANNEL=stable) +# - use environment variables to overwrite this value (e.g export DEFAULT_CHANNEL="stable") +ifneq ($(origin DEFAULT_CHANNEL), undefined) +BUNDLE_DEFAULT_CHANNEL := --default-channel=$(DEFAULT_CHANNEL) +endif +BUNDLE_METADATA_OPTS ?= $(BUNDLE_CHANNELS) $(BUNDLE_DEFAULT_CHANNEL) + +# IMAGE_TAG_BASE defines the docker.io namespace and part of the image name for remote images. +# This variable is used to construct full image tags for bundle and catalog images. +# +# For example, running 'make bundle-build bundle-push catalog-build catalog-push' will build and push both +# alice.cern/operator-bundle:$VERSION and alice.cern/operator-catalog:$VERSION. +IMAGE_TAG_BASE ?= gitlab-registry.cern.ch/aliceo2group/dockerfiles/aliecs + +# BUNDLE_IMG defines the image:tag used for the bundle. +# You can use it as an arg. (E.g make bundle-build BUNDLE_IMG=/:) +BUNDLE_IMG ?= $(IMAGE_TAG_BASE)-bundle:v$(VERSION) + +# BUNDLE_GEN_FLAGS are the flags passed to the operator-sdk generate bundle command +BUNDLE_GEN_FLAGS ?= -q --overwrite --version $(VERSION) $(BUNDLE_METADATA_OPTS) + +# USE_IMAGE_DIGESTS defines if images are resolved via tags or digests +# You can enable this value if you would like to use SHA Based Digests +# To enable set flag to true +USE_IMAGE_DIGESTS ?= false +ifeq ($(USE_IMAGE_DIGESTS), true) + BUNDLE_GEN_FLAGS += --use-image-digests +endif + +# Set the Operator SDK version to use. By default, what is installed on the system is used. +# This is useful for CI or a project to utilize a specific version of the operator-sdk toolkit. +OPERATOR_SDK_VERSION ?= unknown + +# Image URL to use all building/pushing image targets +IMG ?= gitlab-registry.cern.ch/aliceo2group/dockerfiles/aliecs/task-manager:latest +# ENVTEST_K8S_VERSION refers to the version of kubebuilder assets to be downloaded by envtest binary. +ENVTEST_K8S_VERSION = 1.27.1 + +# Get the currently used golang install path (in GOPATH/bin, unless GOBIN is set) +ifeq (,$(shell go env GOBIN)) +GOBIN=$(shell go env GOPATH)/bin +else +GOBIN=$(shell go env GOBIN) +endif + +# CONTAINER_TOOL defines the container tool to be used for building images. +# Be aware that the target commands are only tested with Docker which is +# scaffolded by default. However, you might want to replace it to use other +# tools. (i.e. podman) +CONTAINER_TOOL ?= docker + +# Setting SHELL to bash allows bash commands to be executed by recipes. +# Options are set to exit when a recipe line exits non-zero or a piped command fails. +SHELL = /usr/bin/env bash -o pipefail +.SHELLFLAGS = -ec + +.PHONY: all +all: build + +##@ General + +# The help target prints out all targets with their descriptions organized +# beneath their categories. The categories are represented by '##@' and the +# target descriptions by '##'. The awk commands is responsible for reading the +# entire set of makefiles included in this invocation, looking for lines of the +# file as xyz: ## something, and then pretty-format the target and help. Then, +# if there's a line with ##@ something, that gets pretty-printed as a category. +# More info on the usage of ANSI control characters for terminal formatting: +# https://en.wikipedia.org/wiki/ANSI_escape_code#SGR_parameters +# More info on the awk command: +# http://linuxcommand.org/lc3_adv_awk.php + +.PHONY: help +help: ## Display this help. + @awk 'BEGIN {FS = ":.*##"; printf "\nUsage:\n make \033[36m\033[0m\n"} /^[a-zA-Z_0-9-]+:.*?##/ { printf " \033[36m%-15s\033[0m %s\n", $$1, $$2 } /^##@/ { printf "\n\033[1m%s\033[0m\n", substr($$0, 5) } ' $(MAKEFILE_LIST) + +##@ Development + +.PHONY: manifests +manifests: controller-gen ## Generate WebhookConfiguration, ClusterRole and CustomResourceDefinition objects. + # Note that the option maxDescLen=0 was added in the default scaffold in order to sort out the issue + # Too long: must have at most 262144 bytes. By using kubectl apply to create / update resources an annotation + # is created by K8s API to store the latest version of the resource ( kubectl.kubernetes.io/last-applied-configuration). + # However, it has a size limit and if the CRD is too big with so many long descriptions as this one it will cause the failure. + $(CONTROLLER_GEN) rbac:roleName=manager-role crd:maxDescLen=0 webhook paths="./..." output:crd:artifacts:config=config/crd/bases + +# .PHONY: manifests +# manifests: controller-gen ## Generate WebhookConfiguration, ClusterRole and CustomResourceDefinition objects. +# $(CONTROLLER_GEN) rbac:roleName=manager-role crd webhook paths="./..." output:crd:artifacts:config=config/crd/bases + +.PHONY: generate +generate: controller-gen generate-proto ## Generate code containing DeepCopy, DeepCopyInto, and DeepCopyObject method implementations. + $(CONTROLLER_GEN) object:headerFile="hack/boilerplate.go.txt" paths="./..." + +.PHONY: fmt +fmt: ## Run go fmt against code. + go fmt ./... + +.PHONY: vet +vet: ## Run go vet against code. + go vet ./... + +.PHONY: test +test: manifests generate fmt vet envtest ## Run tests. + KUBEBUILDER_ASSETS="$(shell $(ENVTEST) use $(ENVTEST_K8S_VERSION) --bin-dir $(LOCALBIN) -p path)" go test ./... -coverprofile cover.out + +##@ Protobuf + +.PHONY: generate-proto +generate-proto: ## Generate protobuf and gRPC Go files from occ.proto. + mkdir -p internal/controller/protos/generated + protoc --go_out=internal/controller/protos/generated --go_opt=paths=source_relative --go-grpc_out=internal/controller/protos/generated --go-grpc_opt=paths=source_relative -I internal/controller/protos internal/controller/protos/occ.proto + +.PHONY: clean-proto +clean-proto: ## Remove generated protobuf and gRPC Go files. + rm -rf internal/controller/protos/generated/* + +##@ Build + +.PHONY: build +build: manifests generate fmt vet ## Build manager binary. + go build -o bin/manager cmd/main.go + +.PHONY: run +run: manifests generate fmt vet ## Run a controller from your host. + go run ./cmd/main.go + +# If you wish built the manager image targeting other platforms you can use the --platform flag. +# (i.e. docker build --platform linux/arm64 ). However, you must enable docker buildKit for it. +# More info: https://docs.docker.com/develop/develop-images/build_enhancements/ +.PHONY: docker-build +docker-build: test ## Build docker image with the manager. + $(CONTAINER_TOOL) build -t ${IMG} . + +.PHONY: docker-push +docker-push: ## Push docker image with the manager. + $(CONTAINER_TOOL) push ${IMG} + +# PLATFORMS defines the target platforms for the manager image be build to provide support to multiple +# architectures. (i.e. make docker-buildx IMG=myregistry/mypoperator:0.0.1). To use this option you need to: +# - able to use docker buildx . More info: https://docs.docker.com/build/buildx/ +# - have enable BuildKit, More info: https://docs.docker.com/develop/develop-images/build_enhancements/ +# - be able to push the image for your registry (i.e. if you do not inform a valid value via IMG=> then the export will fail) +# To properly provided solutions that supports more than one platform you should use this option. +PLATFORMS ?= linux/arm64,linux/amd64,linux/s390x,linux/ppc64le +.PHONY: docker-buildx +docker-buildx: test ## Build and push docker image for the manager for cross-platform support + # copy existing Dockerfile and insert --platform=${BUILDPLATFORM} into Dockerfile.cross, and preserve the original Dockerfile + sed -e '1 s/\(^FROM\)/FROM --platform=\$$\{BUILDPLATFORM\}/; t' -e ' 1,// s//FROM --platform=\$$\{BUILDPLATFORM\}/' Dockerfile > Dockerfile.cross + - $(CONTAINER_TOOL) buildx create --name project-v3-builder + $(CONTAINER_TOOL) buildx use project-v3-builder + - $(CONTAINER_TOOL) buildx build --push --platform=$(PLATFORMS) --tag ${IMG} -f Dockerfile.cross . + - $(CONTAINER_TOOL) buildx rm project-v3-builder + rm Dockerfile.cross + +##@ Deployment + +ifndef ignore-not-found + ignore-not-found = false +endif + +.PHONY: install +install: manifests kustomize ## Install CRDs into the K8s cluster specified in ~/.kube/config. + $(KUSTOMIZE) build config/crd | $(KUBECTL) apply --server-side -f - + +.PHONY: uninstall +uninstall: manifests kustomize ## Uninstall CRDs from the K8s cluster specified in ~/.kube/config. Call with ignore-not-found=true to ignore resource not found errors during deletion. + $(KUSTOMIZE) build config/crd | $(KUBECTL) delete --ignore-not-found=$(ignore-not-found) -f - + +.PHONY: deploy +deploy: manifests kustomize ## Deploy controller to the K8s cluster specified in ~/.kube/config. + cd config/manager && $(KUSTOMIZE) edit set image controller=${IMG} + $(KUSTOMIZE) build config/default | $(KUBECTL) apply -f - --server-side + +.PHONY: undeploy +undeploy: ## Undeploy controller from the K8s cluster specified in ~/.kube/config. Call with ignore-not-found=true to ignore resource not found errors during deletion. + $(KUSTOMIZE) build config/default | $(KUBECTL) delete --ignore-not-found=$(ignore-not-found) -f - + +##@ Build Dependencies + +## Location to install dependencies to +LOCALBIN ?= $(shell pwd)/bin +$(LOCALBIN): + mkdir -p $(LOCALBIN) + +## Tool Binaries +KUBECTL ?= kubectl +KUSTOMIZE ?= $(LOCALBIN)/kustomize +CONTROLLER_GEN ?= $(LOCALBIN)/controller-gen +ENVTEST ?= $(LOCALBIN)/setup-envtest + +## Tool Versions +KUSTOMIZE_VERSION ?= v5.0.1 +CONTROLLER_TOOLS_VERSION ?= v0.16.5 + +.PHONY: kustomize +kustomize: $(KUSTOMIZE) ## Download kustomize locally if necessary. If wrong version is installed, it will be removed before downloading. +$(KUSTOMIZE): $(LOCALBIN) + @if test -x $(LOCALBIN)/kustomize && ! $(LOCALBIN)/kustomize version | grep -q $(KUSTOMIZE_VERSION); then \ + echo "$(LOCALBIN)/kustomize version is not expected $(KUSTOMIZE_VERSION). Removing it before installing."; \ + rm -rf $(LOCALBIN)/kustomize; \ + fi + test -s $(LOCALBIN)/kustomize || GOBIN=$(LOCALBIN) GO111MODULE=on go install sigs.k8s.io/kustomize/kustomize/v5@$(KUSTOMIZE_VERSION) + +.PHONY: controller-gen +controller-gen: $(CONTROLLER_GEN) ## Download controller-gen locally if necessary. If wrong version is installed, it will be overwritten. +$(CONTROLLER_GEN): $(LOCALBIN) + test -s $(LOCALBIN)/controller-gen && $(LOCALBIN)/controller-gen --version | grep -q $(CONTROLLER_TOOLS_VERSION) || \ + GOBIN=$(LOCALBIN) go install sigs.k8s.io/controller-tools/cmd/controller-gen@$(CONTROLLER_TOOLS_VERSION) + +.PHONY: envtest +envtest: $(ENVTEST) ## Download envtest-setup locally if necessary. +$(ENVTEST): $(LOCALBIN) + test -s $(LOCALBIN)/setup-envtest || GOBIN=$(LOCALBIN) go install sigs.k8s.io/controller-runtime/tools/setup-envtest@latest + +.PHONY: operator-sdk +OPERATOR_SDK ?= $(LOCALBIN)/operator-sdk +operator-sdk: ## Download operator-sdk locally if necessary. +ifeq (,$(wildcard $(OPERATOR_SDK))) +ifeq (, $(shell which operator-sdk 2>/dev/null)) + @{ \ + set -e ;\ + mkdir -p $(dir $(OPERATOR_SDK)) ;\ + OS=$(shell go env GOOS) && ARCH=$(shell go env GOARCH) && \ + curl -sSLo $(OPERATOR_SDK) https://github.com/operator-framework/operator-sdk/releases/download/$(OPERATOR_SDK_VERSION)/operator-sdk_$${OS}_$${ARCH} ;\ + chmod +x $(OPERATOR_SDK) ;\ + } +else +OPERATOR_SDK = $(shell which operator-sdk) +endif +endif + +.PHONY: bundle +bundle: manifests kustomize operator-sdk ## Generate bundle manifests and metadata, then validate generated files. + $(OPERATOR_SDK) generate kustomize manifests -q + cd config/manager && $(KUSTOMIZE) edit set image controller=$(IMG) + $(KUSTOMIZE) build config/manifests | $(OPERATOR_SDK) generate bundle $(BUNDLE_GEN_FLAGS) + $(OPERATOR_SDK) bundle validate ./bundle + +.PHONY: bundle-build +bundle-build: ## Build the bundle image. + docker build -f bundle.Dockerfile -t $(BUNDLE_IMG) . + +.PHONY: bundle-push +bundle-push: ## Push the bundle image. + $(MAKE) docker-push IMG=$(BUNDLE_IMG) + +.PHONY: opm +OPM = ./bin/opm +opm: ## Download opm locally if necessary. +ifeq (,$(wildcard $(OPM))) +ifeq (,$(shell which opm 2>/dev/null)) + @{ \ + set -e ;\ + mkdir -p $(dir $(OPM)) ;\ + OS=$(shell go env GOOS) && ARCH=$(shell go env GOARCH) && \ + curl -sSLo $(OPM) https://github.com/operator-framework/operator-registry/releases/download/v1.23.0/$${OS}-$${ARCH}-opm ;\ + chmod +x $(OPM) ;\ + } +else +OPM = $(shell which opm) +endif +endif + +# A comma-separated list of bundle images (e.g. make catalog-build BUNDLE_IMGS=example.com/operator-bundle:v0.1.0,example.com/operator-bundle:v0.2.0). +# These images MUST exist in a registry and be pull-able. +BUNDLE_IMGS ?= $(BUNDLE_IMG) + +# The image tag given to the resulting catalog image (e.g. make catalog-build CATALOG_IMG=example.com/operator-catalog:v0.2.0). +CATALOG_IMG ?= $(IMAGE_TAG_BASE)-catalog:v$(VERSION) + +# Set CATALOG_BASE_IMG to an existing catalog image tag to add $BUNDLE_IMGS to that image. +ifneq ($(origin CATALOG_BASE_IMG), undefined) +FROM_INDEX_OPT := --from-index $(CATALOG_BASE_IMG) +endif + +# Build a catalog image by adding bundle images to an empty catalog using the operator package manager tool, 'opm'. +# This recipe invokes 'opm' in 'semver' bundle add mode. For more information on add modes, see: +# https://github.com/operator-framework/community-operators/blob/7f1438c/docs/packaging-operator.md#updating-your-existing-operator +.PHONY: catalog-build +catalog-build: opm ## Build a catalog image. + $(OPM) index add --container-tool docker --mode semver --tag $(CATALOG_IMG) --bundles $(BUNDLE_IMGS) $(FROM_INDEX_OPT) + +# Push the catalog image. +.PHONY: catalog-push +catalog-push: ## Push a catalog image. + $(MAKE) docker-push IMG=$(CATALOG_IMG) diff --git a/control-operator/PROJECT b/control-operator/PROJECT new file mode 100644 index 000000000..3d2493cb6 --- /dev/null +++ b/control-operator/PROJECT @@ -0,0 +1,23 @@ +# Code generated by tool. DO NOT EDIT. +# This file is used to track the info used to scaffold your project +# and allow the plugins properly work. +# More info: https://book.kubebuilder.io/reference/project-config.html +domain: alice.cern +layout: +- go.kubebuilder.io/v4 +plugins: + manifests.sdk.operatorframework.io/v2: {} + scorecard.sdk.operatorframework.io/v2: {} +projectName: operator +repo: github.com/AliceO2Group/Control/operator +resources: +- api: + crdVersion: v1 + namespaced: true + controller: true + domain: alice.cern + group: aliecs + kind: Task + path: github.com/AliceO2Group/Control/operator/api/v1alpha1 + version: v1alpha1 +version: "3" diff --git a/control-operator/README.md b/control-operator/README.md new file mode 100644 index 000000000..4813880ef --- /dev/null +++ b/control-operator/README.md @@ -0,0 +1,94 @@ +# operator +// TODO(user): Add simple overview of use/purpose + +## Description +// TODO(user): An in-depth paragraph about your project and overview of use + +## Getting Started +You’ll need a Kubernetes cluster to run against. You can use [KIND](https://sigs.k8s.io/kind) to get a local cluster for testing, or run against a remote cluster. +**Note:** Your controller will automatically use the current context in your kubeconfig file (i.e. whatever cluster `kubectl cluster-info` shows). + +### Running on the cluster +1. Install Instances of Custom Resources: + +```sh +kubectl apply -f config/samples/ +``` + +2. Build and push your image to the location specified by `IMG`: + +```sh +make docker-build docker-push IMG=/operator:tag +``` + +3. Deploy the controller to the cluster with the image specified by `IMG`: + +```sh +make deploy IMG=/operator:tag +``` + +### Uninstall CRDs +To delete the CRDs from the cluster: + +```sh +make uninstall +``` + +### Undeploy controller +UnDeploy the controller from the cluster: + +```sh +make undeploy +``` + +## Contributing +// TODO(user): Add detailed information on how you would like others to contribute to this project + +### How it works +This project aims to follow the Kubernetes [Operator pattern](https://kubernetes.io/docs/concepts/extend-kubernetes/operator/). + +It uses [Controllers](https://kubernetes.io/docs/concepts/architecture/controller/), +which provide a reconcile function responsible for synchronizing resources until the desired state is reached on the cluster. + +### Test It Out +1. Install the CRDs into the cluster: + +```sh +make install +``` + +2. Run your controller (this will run in the foreground, so switch to a new terminal if you want to leave it running): + +```sh +make run +``` + +**NOTE:** You can also run this in one step by running: `make install run` + +### Modifying the API definitions +If you are editing the API definitions, generate the manifests such as CRs or CRDs using: + +```sh +make manifests +``` + +**NOTE:** Run `make --help` for more information on all potential `make` targets + +More information can be found via the [Kubebuilder Documentation](https://book.kubebuilder.io/introduction.html) + +## License + +Copyright 2024. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + diff --git a/control-operator/api/v1alpha1/groupversion_info.go b/control-operator/api/v1alpha1/groupversion_info.go new file mode 100644 index 000000000..55a752379 --- /dev/null +++ b/control-operator/api/v1alpha1/groupversion_info.go @@ -0,0 +1,44 @@ +/* + * === This file is part of ALICE O² === + * + * Copyright 2026 CERN and copyright holders of ALICE O². + * Author: Michal Tichak + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + * In applying this license CERN does not waive the privileges and + * immunities granted to it by virtue of its status as an + * Intergovernmental Organization or submit itself to any jurisdiction. + */ + +// Package v1alpha1 contains API Schema definitions for the aliecs v1alpha1 API group +// +kubebuilder:object:generate=true +// +groupName=aliecs.alice.cern +package v1alpha1 + +import ( + "k8s.io/apimachinery/pkg/runtime/schema" + "sigs.k8s.io/controller-runtime/pkg/scheme" +) + +var ( + // GroupVersion is group version used to register these objects + GroupVersion = schema.GroupVersion{Group: "aliecs.alice.cern", Version: "v1alpha1"} + + // SchemeBuilder is used to add go types to the GroupVersionKind scheme + SchemeBuilder = &scheme.Builder{GroupVersion: GroupVersion} + + // AddToScheme adds the types in this group-version to the given scheme. + AddToScheme = SchemeBuilder.AddToScheme +) diff --git a/control-operator/api/v1alpha1/task_types.go b/control-operator/api/v1alpha1/task_types.go new file mode 100644 index 000000000..00bca1db5 --- /dev/null +++ b/control-operator/api/v1alpha1/task_types.go @@ -0,0 +1,125 @@ +/* + * === This file is part of ALICE O² === + * + * Copyright 2026 CERN and copyright holders of ALICE O². + * Author: Michal Tichak + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + * In applying this license CERN does not waive the privileges and + * immunities granted to it by virtue of its status as an + * Intergovernmental Organization or submit itself to any jurisdiction. + */ + +package v1alpha1 + +import ( + v1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +// EDIT THIS FILE! THIS IS SCAFFOLDING FOR YOU TO OWN! +// NOTE: json tags are required. Any new fields you add must have json tags for the fields to be serialized. + +type TaskSpecControl struct { + // +kubebuilder:validation:Enum=direct;fairmq;basic;hook + Mode string `json:"mode,omitempty"` + // +kubebuilder:validation:Minimum:1024 + // +kubebuilder:validation:Maximum:49151 + Port int `json:"port,omitempty"` +} + +type TaskSpecChannelInbound struct { + Name string `json:"name"` + // +kubebuilder:validation:Enum=push;pull;pub;sub + Type string `json:"type"` + SndBufSize int `json:"sndBufSize,omitempty"` + RcvBufSize int `json:"rcvBufSize,omitempty"` + RateLogging string `json:"rateLogging,omitempty"` + // +kubebuilder:validation:Enum=default;zeromq;nanomsg;shmem + // +kubebuilder:default=default + Transport string `json:"transport,omitempty"` + Target string `json:"target"` + + Global string `json:"global"` + // +kubebuilder:validation:Enum=tcp;ipc + // +kubebuilder:default=tcp + Addressing string `json:"addressing,omitempty"` +} + +type TaskSpecChannelOutbound struct { + Name string `json:"name"` + // +kubebuilder:validation:Enum=push;pull;pub;sub + Type string `json:"type"` + SndBufSize int `json:"sndBufSize,omitempty"` + RcvBufSize int `json:"rcvBufSize,omitempty"` + RateLogging string `json:"rateLogging,omitempty"` + // +kubebuilder:validation:Enum=default;zeromq;nanomsg;shmem + // +kubebuilder:default=default + Transport string `json:"transport,omitempty"` + Target string `json:"target"` +} + +// TaskSpec defines the desired state of Task +type TaskSpec struct { + // INSERT ADDITIONAL SPEC FIELDS - desired state of cluster + // Important: Run "make" to regenerate code after modifying this file + + // Foo is an example field of Task. Edit task_types.go to remove/update + Pod v1.PodSpec `json:"pod,omitempty"` + Control TaskSpecControl `json:"control,omitempty"` + Bind []TaskSpecChannelInbound `json:"bind,omitempty"` + Connect []TaskSpecChannelOutbound `json:"connect,omitempty"` + Properties map[string]string `json:"properties,omitempty"` + Arguments map[string]string `json:"arguments,omitempty"` + // +kubebuilder:validation:Enum=standby;deployed;configured;running + State string `json:"state,omitempty"` // this is the *requested* state, there are other states the task may end up in but cannot be requested +} + +// TaskStatus defines the observed state of Task +type TaskStatus struct { + // INSERT ADDITIONAL STATUS FIELD - define observed state of cluster + // Important: Run "make" to regenerate code after modifying this file + Pod v1.PodStatus `json:"pod,omitempty"` + State string `json:"state,omitempty"` + Error string `json:"error,omitempty"` +} + +// +kubebuilder:object:root=true +// +kubebuilder:subresource:status +// +kubebuilder:printcolumn:name="Desired",type="string",JSONPath=".spec.state" +// +kubebuilder:printcolumn:name="Actual",type="string",JSONPath=".status.state" +// +kubebuilder:printcolumn:name="Age",type="date",JSONPath=".metadata.creationTimestamp" + +// Task is the Schema for the tasks API +type Task struct { + metav1.TypeMeta `json:",inline"` + metav1.ObjectMeta `json:"metadata,omitempty"` + + Spec TaskSpec `json:"spec,omitempty"` + Status TaskStatus `json:"status,omitempty"` +} + +//+kubebuilder:object:root=true + +// TaskList contains a list of Task +type TaskList struct { + metav1.TypeMeta `json:",inline"` + metav1.ListMeta `json:"metadata,omitempty"` + Items []Task `json:"items"` +} + +func init() { + SchemeBuilder.Register(&Task{}, &TaskList{}) +} diff --git a/control-operator/api/v1alpha1/zz_generated.deepcopy.go b/control-operator/api/v1alpha1/zz_generated.deepcopy.go new file mode 100644 index 000000000..690b50a70 --- /dev/null +++ b/control-operator/api/v1alpha1/zz_generated.deepcopy.go @@ -0,0 +1,194 @@ +//go:build !ignore_autogenerated + +/* + * === This file is part of ALICE O² === + * + * Copyright 2026 CERN and copyright holders of ALICE O². + * Author: Michal Tichak + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + * In applying this license CERN does not waive the privileges and + * immunities granted to it by virtue of its status as an + * Intergovernmental Organization or submit itself to any jurisdiction. + */ + +// Code generated by controller-gen. DO NOT EDIT. + +package v1alpha1 + +import ( + runtime "k8s.io/apimachinery/pkg/runtime" +) + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *Task) DeepCopyInto(out *Task) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) + in.Spec.DeepCopyInto(&out.Spec) + in.Status.DeepCopyInto(&out.Status) +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Task. +func (in *Task) DeepCopy() *Task { + if in == nil { + return nil + } + out := new(Task) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *Task) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *TaskList) DeepCopyInto(out *TaskList) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ListMeta.DeepCopyInto(&out.ListMeta) + if in.Items != nil { + in, out := &in.Items, &out.Items + *out = make([]Task, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new TaskList. +func (in *TaskList) DeepCopy() *TaskList { + if in == nil { + return nil + } + out := new(TaskList) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *TaskList) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *TaskSpec) DeepCopyInto(out *TaskSpec) { + *out = *in + in.Pod.DeepCopyInto(&out.Pod) + out.Control = in.Control + if in.Bind != nil { + in, out := &in.Bind, &out.Bind + *out = make([]TaskSpecChannelInbound, len(*in)) + copy(*out, *in) + } + if in.Connect != nil { + in, out := &in.Connect, &out.Connect + *out = make([]TaskSpecChannelOutbound, len(*in)) + copy(*out, *in) + } + if in.Properties != nil { + in, out := &in.Properties, &out.Properties + *out = make(map[string]string, len(*in)) + for key, val := range *in { + (*out)[key] = val + } + } + if in.Arguments != nil { + in, out := &in.Arguments, &out.Arguments + *out = make(map[string]string, len(*in)) + for key, val := range *in { + (*out)[key] = val + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new TaskSpec. +func (in *TaskSpec) DeepCopy() *TaskSpec { + if in == nil { + return nil + } + out := new(TaskSpec) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *TaskSpecChannelInbound) DeepCopyInto(out *TaskSpecChannelInbound) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new TaskSpecChannelInbound. +func (in *TaskSpecChannelInbound) DeepCopy() *TaskSpecChannelInbound { + if in == nil { + return nil + } + out := new(TaskSpecChannelInbound) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *TaskSpecChannelOutbound) DeepCopyInto(out *TaskSpecChannelOutbound) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new TaskSpecChannelOutbound. +func (in *TaskSpecChannelOutbound) DeepCopy() *TaskSpecChannelOutbound { + if in == nil { + return nil + } + out := new(TaskSpecChannelOutbound) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *TaskSpecControl) DeepCopyInto(out *TaskSpecControl) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new TaskSpecControl. +func (in *TaskSpecControl) DeepCopy() *TaskSpecControl { + if in == nil { + return nil + } + out := new(TaskSpecControl) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *TaskStatus) DeepCopyInto(out *TaskStatus) { + *out = *in + in.Pod.DeepCopyInto(&out.Pod) +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new TaskStatus. +func (in *TaskStatus) DeepCopy() *TaskStatus { + if in == nil { + return nil + } + out := new(TaskStatus) + in.DeepCopyInto(out) + return out +} diff --git a/control-operator/cmd/main.go b/control-operator/cmd/main.go new file mode 100644 index 000000000..5773164eb --- /dev/null +++ b/control-operator/cmd/main.go @@ -0,0 +1,117 @@ +/* +Copyright 2024. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package main + +import ( + "flag" + "os" + + // Import all Kubernetes client auth plugins (e.g. Azure, GCP, OIDC, etc.) + // to ensure that exec-entrypoint and run can make use of them. + "k8s.io/apimachinery/pkg/runtime" + utilruntime "k8s.io/apimachinery/pkg/util/runtime" + clientgoscheme "k8s.io/client-go/kubernetes/scheme" + _ "k8s.io/client-go/plugin/pkg/client/auth" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/healthz" + "sigs.k8s.io/controller-runtime/pkg/log/zap" + "sigs.k8s.io/controller-runtime/pkg/metrics/server" + "sigs.k8s.io/controller-runtime/pkg/webhook" + + aliecsv1alpha1 "github.com/AliceO2Group/ControlOperator/api/v1alpha1" + "github.com/AliceO2Group/ControlOperator/internal/controller" + //+kubebuilder:scaffold:imports +) + +var ( + scheme = runtime.NewScheme() + setupLog = ctrl.Log.WithName("setup") +) + +func init() { + utilruntime.Must(clientgoscheme.AddToScheme(scheme)) + + utilruntime.Must(aliecsv1alpha1.AddToScheme(scheme)) + //+kubebuilder:scaffold:scheme +} + +func main() { + var metricsAddr string + var enableLeaderElection bool + var probeAddr string + flag.StringVar(&metricsAddr, "metrics-bind-address", ":9080", "The address the metric endpoint binds to.") + flag.StringVar(&probeAddr, "health-probe-bind-address", ":9081", "The address the probe endpoint binds to.") + flag.BoolVar(&enableLeaderElection, "leader-elect", false, + "Enable leader election for controller manager. "+ + "Enabling this will ensure there is only one active controller manager.") + opts := zap.Options{ + Development: true, + } + opts.BindFlags(flag.CommandLine) + flag.Parse() + + ctrl.SetLogger(zap.New(zap.UseFlagOptions(&opts))) + + mgr, err := ctrl.NewManager(ctrl.GetConfigOrDie(), ctrl.Options{ + Scheme: scheme, + Metrics: server.Options{BindAddress: metricsAddr}, + HealthProbeBindAddress: probeAddr, + LeaderElection: enableLeaderElection, + LeaderElectionID: "03b91661.alice.cern", + WebhookServer: webhook.NewServer(webhook.Options{Port: 9443}), + // LeaderElectionReleaseOnCancel defines if the leader should step down voluntarily + // when the Manager ends. This requires the binary to immediately end when the + // Manager is stopped, otherwise, this setting is unsafe. Setting this significantly + // speeds up voluntary leader transitions as the new leader don't have to wait + // LeaseDuration time first. + // + // In the default scaffold provided, the program ends immediately after + // the manager stops, so would be fine to enable this option. However, + // if you are doing or is intended to do any operation such as perform cleanups + // after the manager stops then its usage might be unsafe. + // LeaderElectionReleaseOnCancel: true, + }) + if err != nil { + setupLog.Error(err, "unable to start manager") + os.Exit(1) + } + + if err = (&controller.TaskReconciler{ + Client: mgr.GetClient(), + Scheme: mgr.GetScheme(), + Recorder: mgr.GetEventRecorderFor("task-controller"), // used within reconcile method to emit events + }).SetupWithManager(mgr); err != nil { + setupLog.Error(err, "unable to create controller", "controller", "Task") + os.Exit(1) + } + //+kubebuilder:scaffold:builder + + if err := mgr.AddHealthzCheck("healthz", healthz.Ping); err != nil { + setupLog.Error(err, "unable to set up health check") + os.Exit(1) + } + if err := mgr.AddReadyzCheck("readyz", healthz.Ping); err != nil { + setupLog.Error(err, "unable to set up ready check") + os.Exit(1) + } + + setupLog.Info("starting manager") + if err := mgr.Start(ctrl.SetupSignalHandler()); err != nil { + setupLog.Error(err, "problem running manager") + os.Exit(1) + } +} diff --git a/control-operator/config/crd/bases/aliecs.alice.cern_tasks.yaml b/control-operator/config/crd/bases/aliecs.alice.cern_tasks.yaml new file mode 100644 index 000000000..a7b823e63 --- /dev/null +++ b/control-operator/config/crd/bases/aliecs.alice.cern_tasks.yaml @@ -0,0 +1,4737 @@ +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.16.5 + name: tasks.aliecs.alice.cern +spec: + group: aliecs.alice.cern + names: + kind: Task + listKind: TaskList + plural: tasks + singular: task + scope: Namespaced + versions: + - additionalPrinterColumns: + - jsonPath: .spec.state + name: Desired + type: string + - jsonPath: .status.state + name: Actual + type: string + - jsonPath: .metadata.creationTimestamp + name: Age + type: date + name: v1alpha1 + schema: + openAPIV3Schema: + properties: + apiVersion: + type: string + kind: + type: string + metadata: + type: object + spec: + properties: + arguments: + additionalProperties: + type: string + type: object + bind: + items: + properties: + addressing: + default: tcp + enum: + - tcp + - ipc + type: string + global: + type: string + name: + type: string + rateLogging: + type: string + rcvBufSize: + type: integer + sndBufSize: + type: integer + target: + type: string + transport: + default: default + enum: + - default + - zeromq + - nanomsg + - shmem + type: string + type: + enum: + - push + - pull + - pub + - sub + type: string + required: + - global + - name + - target + - type + type: object + type: array + connect: + items: + properties: + name: + type: string + rateLogging: + type: string + rcvBufSize: + type: integer + sndBufSize: + type: integer + target: + type: string + transport: + default: default + enum: + - default + - zeromq + - nanomsg + - shmem + type: string + type: + enum: + - push + - pull + - pub + - sub + type: string + required: + - name + - target + - type + type: object + type: array + control: + properties: + mode: + enum: + - direct + - fairmq + - basic + - hook + type: string + port: + type: integer + type: object + pod: + properties: + activeDeadlineSeconds: + format: int64 + type: integer + affinity: + properties: + nodeAffinity: + properties: + preferredDuringSchedulingIgnoredDuringExecution: + items: + properties: + preference: + properties: + matchExpressions: + items: + properties: + key: + type: string + operator: + type: string + values: + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchFields: + items: + properties: + key: + type: string + operator: + type: string + values: + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + type: object + x-kubernetes-map-type: atomic + weight: + format: int32 + type: integer + required: + - preference + - weight + type: object + type: array + x-kubernetes-list-type: atomic + requiredDuringSchedulingIgnoredDuringExecution: + properties: + nodeSelectorTerms: + items: + properties: + matchExpressions: + items: + properties: + key: + type: string + operator: + type: string + values: + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchFields: + items: + properties: + key: + type: string + operator: + type: string + values: + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + type: object + x-kubernetes-map-type: atomic + type: array + x-kubernetes-list-type: atomic + required: + - nodeSelectorTerms + type: object + x-kubernetes-map-type: atomic + type: object + podAffinity: + properties: + preferredDuringSchedulingIgnoredDuringExecution: + items: + properties: + podAffinityTerm: + properties: + labelSelector: + properties: + matchExpressions: + items: + properties: + key: + type: string + operator: + type: string + values: + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + type: object + type: object + x-kubernetes-map-type: atomic + matchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic + namespaceSelector: + properties: + matchExpressions: + items: + properties: + key: + type: string + operator: + type: string + values: + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + type: object + type: object + x-kubernetes-map-type: atomic + namespaces: + items: + type: string + type: array + x-kubernetes-list-type: atomic + topologyKey: + type: string + required: + - topologyKey + type: object + weight: + format: int32 + type: integer + required: + - podAffinityTerm + - weight + type: object + type: array + x-kubernetes-list-type: atomic + requiredDuringSchedulingIgnoredDuringExecution: + items: + properties: + labelSelector: + properties: + matchExpressions: + items: + properties: + key: + type: string + operator: + type: string + values: + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + type: object + type: object + x-kubernetes-map-type: atomic + matchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic + namespaceSelector: + properties: + matchExpressions: + items: + properties: + key: + type: string + operator: + type: string + values: + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + type: object + type: object + x-kubernetes-map-type: atomic + namespaces: + items: + type: string + type: array + x-kubernetes-list-type: atomic + topologyKey: + type: string + required: + - topologyKey + type: object + type: array + x-kubernetes-list-type: atomic + type: object + podAntiAffinity: + properties: + preferredDuringSchedulingIgnoredDuringExecution: + items: + properties: + podAffinityTerm: + properties: + labelSelector: + properties: + matchExpressions: + items: + properties: + key: + type: string + operator: + type: string + values: + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + type: object + type: object + x-kubernetes-map-type: atomic + matchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic + namespaceSelector: + properties: + matchExpressions: + items: + properties: + key: + type: string + operator: + type: string + values: + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + type: object + type: object + x-kubernetes-map-type: atomic + namespaces: + items: + type: string + type: array + x-kubernetes-list-type: atomic + topologyKey: + type: string + required: + - topologyKey + type: object + weight: + format: int32 + type: integer + required: + - podAffinityTerm + - weight + type: object + type: array + x-kubernetes-list-type: atomic + requiredDuringSchedulingIgnoredDuringExecution: + items: + properties: + labelSelector: + properties: + matchExpressions: + items: + properties: + key: + type: string + operator: + type: string + values: + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + type: object + type: object + x-kubernetes-map-type: atomic + matchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic + namespaceSelector: + properties: + matchExpressions: + items: + properties: + key: + type: string + operator: + type: string + values: + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + type: object + type: object + x-kubernetes-map-type: atomic + namespaces: + items: + type: string + type: array + x-kubernetes-list-type: atomic + topologyKey: + type: string + required: + - topologyKey + type: object + type: array + x-kubernetes-list-type: atomic + type: object + type: object + automountServiceAccountToken: + type: boolean + containers: + items: + properties: + args: + items: + type: string + type: array + x-kubernetes-list-type: atomic + command: + items: + type: string + type: array + x-kubernetes-list-type: atomic + env: + items: + properties: + name: + type: string + value: + type: string + valueFrom: + properties: + configMapKeyRef: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + fieldRef: + properties: + apiVersion: + type: string + fieldPath: + type: string + required: + - fieldPath + type: object + x-kubernetes-map-type: atomic + fileKeyRef: + properties: + key: + type: string + optional: + default: false + type: boolean + path: + type: string + volumeName: + type: string + required: + - key + - path + - volumeName + type: object + x-kubernetes-map-type: atomic + resourceFieldRef: + properties: + containerName: + type: string + divisor: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + resource: + type: string + required: + - resource + type: object + x-kubernetes-map-type: atomic + secretKeyRef: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + type: object + required: + - name + type: object + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map + envFrom: + items: + properties: + configMapRef: + properties: + name: + default: "" + type: string + optional: + type: boolean + type: object + x-kubernetes-map-type: atomic + prefix: + type: string + secretRef: + properties: + name: + default: "" + type: string + optional: + type: boolean + type: object + x-kubernetes-map-type: atomic + type: object + type: array + x-kubernetes-list-type: atomic + image: + type: string + imagePullPolicy: + type: string + lifecycle: + properties: + postStart: + properties: + exec: + properties: + command: + items: + type: string + type: array + x-kubernetes-list-type: atomic + type: object + httpGet: + properties: + host: + type: string + httpHeaders: + items: + properties: + name: + type: string + value: + type: string + required: + - name + - value + type: object + type: array + x-kubernetes-list-type: atomic + path: + type: string + port: + anyOf: + - type: integer + - type: string + x-kubernetes-int-or-string: true + scheme: + type: string + required: + - port + type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object + tcpSocket: + properties: + host: + type: string + port: + anyOf: + - type: integer + - type: string + x-kubernetes-int-or-string: true + required: + - port + type: object + type: object + preStop: + properties: + exec: + properties: + command: + items: + type: string + type: array + x-kubernetes-list-type: atomic + type: object + httpGet: + properties: + host: + type: string + httpHeaders: + items: + properties: + name: + type: string + value: + type: string + required: + - name + - value + type: object + type: array + x-kubernetes-list-type: atomic + path: + type: string + port: + anyOf: + - type: integer + - type: string + x-kubernetes-int-or-string: true + scheme: + type: string + required: + - port + type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object + tcpSocket: + properties: + host: + type: string + port: + anyOf: + - type: integer + - type: string + x-kubernetes-int-or-string: true + required: + - port + type: object + type: object + stopSignal: + type: string + type: object + livenessProbe: + properties: + exec: + properties: + command: + items: + type: string + type: array + x-kubernetes-list-type: atomic + type: object + failureThreshold: + format: int32 + type: integer + grpc: + properties: + port: + format: int32 + type: integer + service: + default: "" + type: string + required: + - port + type: object + httpGet: + properties: + host: + type: string + httpHeaders: + items: + properties: + name: + type: string + value: + type: string + required: + - name + - value + type: object + type: array + x-kubernetes-list-type: atomic + path: + type: string + port: + anyOf: + - type: integer + - type: string + x-kubernetes-int-or-string: true + scheme: + type: string + required: + - port + type: object + initialDelaySeconds: + format: int32 + type: integer + periodSeconds: + format: int32 + type: integer + successThreshold: + format: int32 + type: integer + tcpSocket: + properties: + host: + type: string + port: + anyOf: + - type: integer + - type: string + x-kubernetes-int-or-string: true + required: + - port + type: object + terminationGracePeriodSeconds: + format: int64 + type: integer + timeoutSeconds: + format: int32 + type: integer + type: object + name: + type: string + ports: + items: + properties: + containerPort: + format: int32 + type: integer + hostIP: + type: string + hostPort: + format: int32 + type: integer + name: + type: string + protocol: + default: TCP + type: string + required: + - containerPort + type: object + type: array + x-kubernetes-list-map-keys: + - containerPort + - protocol + x-kubernetes-list-type: map + readinessProbe: + properties: + exec: + properties: + command: + items: + type: string + type: array + x-kubernetes-list-type: atomic + type: object + failureThreshold: + format: int32 + type: integer + grpc: + properties: + port: + format: int32 + type: integer + service: + default: "" + type: string + required: + - port + type: object + httpGet: + properties: + host: + type: string + httpHeaders: + items: + properties: + name: + type: string + value: + type: string + required: + - name + - value + type: object + type: array + x-kubernetes-list-type: atomic + path: + type: string + port: + anyOf: + - type: integer + - type: string + x-kubernetes-int-or-string: true + scheme: + type: string + required: + - port + type: object + initialDelaySeconds: + format: int32 + type: integer + periodSeconds: + format: int32 + type: integer + successThreshold: + format: int32 + type: integer + tcpSocket: + properties: + host: + type: string + port: + anyOf: + - type: integer + - type: string + x-kubernetes-int-or-string: true + required: + - port + type: object + terminationGracePeriodSeconds: + format: int64 + type: integer + timeoutSeconds: + format: int32 + type: integer + type: object + resizePolicy: + items: + properties: + resourceName: + type: string + restartPolicy: + type: string + required: + - resourceName + - restartPolicy + type: object + type: array + x-kubernetes-list-type: atomic + resources: + properties: + claims: + items: + properties: + name: + type: string + request: + type: string + required: + - name + type: object + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map + limits: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + type: object + requests: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + type: object + type: object + restartPolicy: + type: string + restartPolicyRules: + items: + properties: + action: + type: string + exitCodes: + properties: + operator: + type: string + values: + items: + format: int32 + type: integer + type: array + x-kubernetes-list-type: set + required: + - operator + type: object + required: + - action + type: object + type: array + x-kubernetes-list-type: atomic + securityContext: + properties: + allowPrivilegeEscalation: + type: boolean + appArmorProfile: + properties: + localhostProfile: + type: string + type: + type: string + required: + - type + type: object + capabilities: + properties: + add: + items: + type: string + type: array + x-kubernetes-list-type: atomic + drop: + items: + type: string + type: array + x-kubernetes-list-type: atomic + type: object + privileged: + type: boolean + procMount: + type: string + readOnlyRootFilesystem: + type: boolean + runAsGroup: + format: int64 + type: integer + runAsNonRoot: + type: boolean + runAsUser: + format: int64 + type: integer + seLinuxOptions: + properties: + level: + type: string + role: + type: string + type: + type: string + user: + type: string + type: object + seccompProfile: + properties: + localhostProfile: + type: string + type: + type: string + required: + - type + type: object + windowsOptions: + properties: + gmsaCredentialSpec: + type: string + gmsaCredentialSpecName: + type: string + hostProcess: + type: boolean + runAsUserName: + type: string + type: object + type: object + startupProbe: + properties: + exec: + properties: + command: + items: + type: string + type: array + x-kubernetes-list-type: atomic + type: object + failureThreshold: + format: int32 + type: integer + grpc: + properties: + port: + format: int32 + type: integer + service: + default: "" + type: string + required: + - port + type: object + httpGet: + properties: + host: + type: string + httpHeaders: + items: + properties: + name: + type: string + value: + type: string + required: + - name + - value + type: object + type: array + x-kubernetes-list-type: atomic + path: + type: string + port: + anyOf: + - type: integer + - type: string + x-kubernetes-int-or-string: true + scheme: + type: string + required: + - port + type: object + initialDelaySeconds: + format: int32 + type: integer + periodSeconds: + format: int32 + type: integer + successThreshold: + format: int32 + type: integer + tcpSocket: + properties: + host: + type: string + port: + anyOf: + - type: integer + - type: string + x-kubernetes-int-or-string: true + required: + - port + type: object + terminationGracePeriodSeconds: + format: int64 + type: integer + timeoutSeconds: + format: int32 + type: integer + type: object + stdin: + type: boolean + stdinOnce: + type: boolean + terminationMessagePath: + type: string + terminationMessagePolicy: + type: string + tty: + type: boolean + volumeDevices: + items: + properties: + devicePath: + type: string + name: + type: string + required: + - devicePath + - name + type: object + type: array + x-kubernetes-list-map-keys: + - devicePath + x-kubernetes-list-type: map + volumeMounts: + items: + properties: + mountPath: + type: string + mountPropagation: + type: string + name: + type: string + readOnly: + type: boolean + recursiveReadOnly: + type: string + subPath: + type: string + subPathExpr: + type: string + required: + - mountPath + - name + type: object + type: array + x-kubernetes-list-map-keys: + - mountPath + x-kubernetes-list-type: map + workingDir: + type: string + required: + - name + type: object + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map + dnsConfig: + properties: + nameservers: + items: + type: string + type: array + x-kubernetes-list-type: atomic + options: + items: + properties: + name: + type: string + value: + type: string + type: object + type: array + x-kubernetes-list-type: atomic + searches: + items: + type: string + type: array + x-kubernetes-list-type: atomic + type: object + dnsPolicy: + type: string + enableServiceLinks: + type: boolean + ephemeralContainers: + items: + properties: + args: + items: + type: string + type: array + x-kubernetes-list-type: atomic + command: + items: + type: string + type: array + x-kubernetes-list-type: atomic + env: + items: + properties: + name: + type: string + value: + type: string + valueFrom: + properties: + configMapKeyRef: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + fieldRef: + properties: + apiVersion: + type: string + fieldPath: + type: string + required: + - fieldPath + type: object + x-kubernetes-map-type: atomic + fileKeyRef: + properties: + key: + type: string + optional: + default: false + type: boolean + path: + type: string + volumeName: + type: string + required: + - key + - path + - volumeName + type: object + x-kubernetes-map-type: atomic + resourceFieldRef: + properties: + containerName: + type: string + divisor: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + resource: + type: string + required: + - resource + type: object + x-kubernetes-map-type: atomic + secretKeyRef: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + type: object + required: + - name + type: object + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map + envFrom: + items: + properties: + configMapRef: + properties: + name: + default: "" + type: string + optional: + type: boolean + type: object + x-kubernetes-map-type: atomic + prefix: + type: string + secretRef: + properties: + name: + default: "" + type: string + optional: + type: boolean + type: object + x-kubernetes-map-type: atomic + type: object + type: array + x-kubernetes-list-type: atomic + image: + type: string + imagePullPolicy: + type: string + lifecycle: + properties: + postStart: + properties: + exec: + properties: + command: + items: + type: string + type: array + x-kubernetes-list-type: atomic + type: object + httpGet: + properties: + host: + type: string + httpHeaders: + items: + properties: + name: + type: string + value: + type: string + required: + - name + - value + type: object + type: array + x-kubernetes-list-type: atomic + path: + type: string + port: + anyOf: + - type: integer + - type: string + x-kubernetes-int-or-string: true + scheme: + type: string + required: + - port + type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object + tcpSocket: + properties: + host: + type: string + port: + anyOf: + - type: integer + - type: string + x-kubernetes-int-or-string: true + required: + - port + type: object + type: object + preStop: + properties: + exec: + properties: + command: + items: + type: string + type: array + x-kubernetes-list-type: atomic + type: object + httpGet: + properties: + host: + type: string + httpHeaders: + items: + properties: + name: + type: string + value: + type: string + required: + - name + - value + type: object + type: array + x-kubernetes-list-type: atomic + path: + type: string + port: + anyOf: + - type: integer + - type: string + x-kubernetes-int-or-string: true + scheme: + type: string + required: + - port + type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object + tcpSocket: + properties: + host: + type: string + port: + anyOf: + - type: integer + - type: string + x-kubernetes-int-or-string: true + required: + - port + type: object + type: object + stopSignal: + type: string + type: object + livenessProbe: + properties: + exec: + properties: + command: + items: + type: string + type: array + x-kubernetes-list-type: atomic + type: object + failureThreshold: + format: int32 + type: integer + grpc: + properties: + port: + format: int32 + type: integer + service: + default: "" + type: string + required: + - port + type: object + httpGet: + properties: + host: + type: string + httpHeaders: + items: + properties: + name: + type: string + value: + type: string + required: + - name + - value + type: object + type: array + x-kubernetes-list-type: atomic + path: + type: string + port: + anyOf: + - type: integer + - type: string + x-kubernetes-int-or-string: true + scheme: + type: string + required: + - port + type: object + initialDelaySeconds: + format: int32 + type: integer + periodSeconds: + format: int32 + type: integer + successThreshold: + format: int32 + type: integer + tcpSocket: + properties: + host: + type: string + port: + anyOf: + - type: integer + - type: string + x-kubernetes-int-or-string: true + required: + - port + type: object + terminationGracePeriodSeconds: + format: int64 + type: integer + timeoutSeconds: + format: int32 + type: integer + type: object + name: + type: string + ports: + items: + properties: + containerPort: + format: int32 + type: integer + hostIP: + type: string + hostPort: + format: int32 + type: integer + name: + type: string + protocol: + default: TCP + type: string + required: + - containerPort + type: object + type: array + x-kubernetes-list-map-keys: + - containerPort + - protocol + x-kubernetes-list-type: map + readinessProbe: + properties: + exec: + properties: + command: + items: + type: string + type: array + x-kubernetes-list-type: atomic + type: object + failureThreshold: + format: int32 + type: integer + grpc: + properties: + port: + format: int32 + type: integer + service: + default: "" + type: string + required: + - port + type: object + httpGet: + properties: + host: + type: string + httpHeaders: + items: + properties: + name: + type: string + value: + type: string + required: + - name + - value + type: object + type: array + x-kubernetes-list-type: atomic + path: + type: string + port: + anyOf: + - type: integer + - type: string + x-kubernetes-int-or-string: true + scheme: + type: string + required: + - port + type: object + initialDelaySeconds: + format: int32 + type: integer + periodSeconds: + format: int32 + type: integer + successThreshold: + format: int32 + type: integer + tcpSocket: + properties: + host: + type: string + port: + anyOf: + - type: integer + - type: string + x-kubernetes-int-or-string: true + required: + - port + type: object + terminationGracePeriodSeconds: + format: int64 + type: integer + timeoutSeconds: + format: int32 + type: integer + type: object + resizePolicy: + items: + properties: + resourceName: + type: string + restartPolicy: + type: string + required: + - resourceName + - restartPolicy + type: object + type: array + x-kubernetes-list-type: atomic + resources: + properties: + claims: + items: + properties: + name: + type: string + request: + type: string + required: + - name + type: object + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map + limits: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + type: object + requests: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + type: object + type: object + restartPolicy: + type: string + restartPolicyRules: + items: + properties: + action: + type: string + exitCodes: + properties: + operator: + type: string + values: + items: + format: int32 + type: integer + type: array + x-kubernetes-list-type: set + required: + - operator + type: object + required: + - action + type: object + type: array + x-kubernetes-list-type: atomic + securityContext: + properties: + allowPrivilegeEscalation: + type: boolean + appArmorProfile: + properties: + localhostProfile: + type: string + type: + type: string + required: + - type + type: object + capabilities: + properties: + add: + items: + type: string + type: array + x-kubernetes-list-type: atomic + drop: + items: + type: string + type: array + x-kubernetes-list-type: atomic + type: object + privileged: + type: boolean + procMount: + type: string + readOnlyRootFilesystem: + type: boolean + runAsGroup: + format: int64 + type: integer + runAsNonRoot: + type: boolean + runAsUser: + format: int64 + type: integer + seLinuxOptions: + properties: + level: + type: string + role: + type: string + type: + type: string + user: + type: string + type: object + seccompProfile: + properties: + localhostProfile: + type: string + type: + type: string + required: + - type + type: object + windowsOptions: + properties: + gmsaCredentialSpec: + type: string + gmsaCredentialSpecName: + type: string + hostProcess: + type: boolean + runAsUserName: + type: string + type: object + type: object + startupProbe: + properties: + exec: + properties: + command: + items: + type: string + type: array + x-kubernetes-list-type: atomic + type: object + failureThreshold: + format: int32 + type: integer + grpc: + properties: + port: + format: int32 + type: integer + service: + default: "" + type: string + required: + - port + type: object + httpGet: + properties: + host: + type: string + httpHeaders: + items: + properties: + name: + type: string + value: + type: string + required: + - name + - value + type: object + type: array + x-kubernetes-list-type: atomic + path: + type: string + port: + anyOf: + - type: integer + - type: string + x-kubernetes-int-or-string: true + scheme: + type: string + required: + - port + type: object + initialDelaySeconds: + format: int32 + type: integer + periodSeconds: + format: int32 + type: integer + successThreshold: + format: int32 + type: integer + tcpSocket: + properties: + host: + type: string + port: + anyOf: + - type: integer + - type: string + x-kubernetes-int-or-string: true + required: + - port + type: object + terminationGracePeriodSeconds: + format: int64 + type: integer + timeoutSeconds: + format: int32 + type: integer + type: object + stdin: + type: boolean + stdinOnce: + type: boolean + targetContainerName: + type: string + terminationMessagePath: + type: string + terminationMessagePolicy: + type: string + tty: + type: boolean + volumeDevices: + items: + properties: + devicePath: + type: string + name: + type: string + required: + - devicePath + - name + type: object + type: array + x-kubernetes-list-map-keys: + - devicePath + x-kubernetes-list-type: map + volumeMounts: + items: + properties: + mountPath: + type: string + mountPropagation: + type: string + name: + type: string + readOnly: + type: boolean + recursiveReadOnly: + type: string + subPath: + type: string + subPathExpr: + type: string + required: + - mountPath + - name + type: object + type: array + x-kubernetes-list-map-keys: + - mountPath + x-kubernetes-list-type: map + workingDir: + type: string + required: + - name + type: object + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map + hostAliases: + items: + properties: + hostnames: + items: + type: string + type: array + x-kubernetes-list-type: atomic + ip: + type: string + required: + - ip + type: object + type: array + x-kubernetes-list-map-keys: + - ip + x-kubernetes-list-type: map + hostIPC: + type: boolean + hostNetwork: + type: boolean + hostPID: + type: boolean + hostUsers: + type: boolean + hostname: + type: string + hostnameOverride: + type: string + imagePullSecrets: + items: + properties: + name: + default: "" + type: string + type: object + x-kubernetes-map-type: atomic + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map + initContainers: + items: + properties: + args: + items: + type: string + type: array + x-kubernetes-list-type: atomic + command: + items: + type: string + type: array + x-kubernetes-list-type: atomic + env: + items: + properties: + name: + type: string + value: + type: string + valueFrom: + properties: + configMapKeyRef: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + fieldRef: + properties: + apiVersion: + type: string + fieldPath: + type: string + required: + - fieldPath + type: object + x-kubernetes-map-type: atomic + fileKeyRef: + properties: + key: + type: string + optional: + default: false + type: boolean + path: + type: string + volumeName: + type: string + required: + - key + - path + - volumeName + type: object + x-kubernetes-map-type: atomic + resourceFieldRef: + properties: + containerName: + type: string + divisor: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + resource: + type: string + required: + - resource + type: object + x-kubernetes-map-type: atomic + secretKeyRef: + properties: + key: + type: string + name: + default: "" + type: string + optional: + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + type: object + required: + - name + type: object + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map + envFrom: + items: + properties: + configMapRef: + properties: + name: + default: "" + type: string + optional: + type: boolean + type: object + x-kubernetes-map-type: atomic + prefix: + type: string + secretRef: + properties: + name: + default: "" + type: string + optional: + type: boolean + type: object + x-kubernetes-map-type: atomic + type: object + type: array + x-kubernetes-list-type: atomic + image: + type: string + imagePullPolicy: + type: string + lifecycle: + properties: + postStart: + properties: + exec: + properties: + command: + items: + type: string + type: array + x-kubernetes-list-type: atomic + type: object + httpGet: + properties: + host: + type: string + httpHeaders: + items: + properties: + name: + type: string + value: + type: string + required: + - name + - value + type: object + type: array + x-kubernetes-list-type: atomic + path: + type: string + port: + anyOf: + - type: integer + - type: string + x-kubernetes-int-or-string: true + scheme: + type: string + required: + - port + type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object + tcpSocket: + properties: + host: + type: string + port: + anyOf: + - type: integer + - type: string + x-kubernetes-int-or-string: true + required: + - port + type: object + type: object + preStop: + properties: + exec: + properties: + command: + items: + type: string + type: array + x-kubernetes-list-type: atomic + type: object + httpGet: + properties: + host: + type: string + httpHeaders: + items: + properties: + name: + type: string + value: + type: string + required: + - name + - value + type: object + type: array + x-kubernetes-list-type: atomic + path: + type: string + port: + anyOf: + - type: integer + - type: string + x-kubernetes-int-or-string: true + scheme: + type: string + required: + - port + type: object + sleep: + properties: + seconds: + format: int64 + type: integer + required: + - seconds + type: object + tcpSocket: + properties: + host: + type: string + port: + anyOf: + - type: integer + - type: string + x-kubernetes-int-or-string: true + required: + - port + type: object + type: object + stopSignal: + type: string + type: object + livenessProbe: + properties: + exec: + properties: + command: + items: + type: string + type: array + x-kubernetes-list-type: atomic + type: object + failureThreshold: + format: int32 + type: integer + grpc: + properties: + port: + format: int32 + type: integer + service: + default: "" + type: string + required: + - port + type: object + httpGet: + properties: + host: + type: string + httpHeaders: + items: + properties: + name: + type: string + value: + type: string + required: + - name + - value + type: object + type: array + x-kubernetes-list-type: atomic + path: + type: string + port: + anyOf: + - type: integer + - type: string + x-kubernetes-int-or-string: true + scheme: + type: string + required: + - port + type: object + initialDelaySeconds: + format: int32 + type: integer + periodSeconds: + format: int32 + type: integer + successThreshold: + format: int32 + type: integer + tcpSocket: + properties: + host: + type: string + port: + anyOf: + - type: integer + - type: string + x-kubernetes-int-or-string: true + required: + - port + type: object + terminationGracePeriodSeconds: + format: int64 + type: integer + timeoutSeconds: + format: int32 + type: integer + type: object + name: + type: string + ports: + items: + properties: + containerPort: + format: int32 + type: integer + hostIP: + type: string + hostPort: + format: int32 + type: integer + name: + type: string + protocol: + default: TCP + type: string + required: + - containerPort + type: object + type: array + x-kubernetes-list-map-keys: + - containerPort + - protocol + x-kubernetes-list-type: map + readinessProbe: + properties: + exec: + properties: + command: + items: + type: string + type: array + x-kubernetes-list-type: atomic + type: object + failureThreshold: + format: int32 + type: integer + grpc: + properties: + port: + format: int32 + type: integer + service: + default: "" + type: string + required: + - port + type: object + httpGet: + properties: + host: + type: string + httpHeaders: + items: + properties: + name: + type: string + value: + type: string + required: + - name + - value + type: object + type: array + x-kubernetes-list-type: atomic + path: + type: string + port: + anyOf: + - type: integer + - type: string + x-kubernetes-int-or-string: true + scheme: + type: string + required: + - port + type: object + initialDelaySeconds: + format: int32 + type: integer + periodSeconds: + format: int32 + type: integer + successThreshold: + format: int32 + type: integer + tcpSocket: + properties: + host: + type: string + port: + anyOf: + - type: integer + - type: string + x-kubernetes-int-or-string: true + required: + - port + type: object + terminationGracePeriodSeconds: + format: int64 + type: integer + timeoutSeconds: + format: int32 + type: integer + type: object + resizePolicy: + items: + properties: + resourceName: + type: string + restartPolicy: + type: string + required: + - resourceName + - restartPolicy + type: object + type: array + x-kubernetes-list-type: atomic + resources: + properties: + claims: + items: + properties: + name: + type: string + request: + type: string + required: + - name + type: object + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map + limits: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + type: object + requests: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + type: object + type: object + restartPolicy: + type: string + restartPolicyRules: + items: + properties: + action: + type: string + exitCodes: + properties: + operator: + type: string + values: + items: + format: int32 + type: integer + type: array + x-kubernetes-list-type: set + required: + - operator + type: object + required: + - action + type: object + type: array + x-kubernetes-list-type: atomic + securityContext: + properties: + allowPrivilegeEscalation: + type: boolean + appArmorProfile: + properties: + localhostProfile: + type: string + type: + type: string + required: + - type + type: object + capabilities: + properties: + add: + items: + type: string + type: array + x-kubernetes-list-type: atomic + drop: + items: + type: string + type: array + x-kubernetes-list-type: atomic + type: object + privileged: + type: boolean + procMount: + type: string + readOnlyRootFilesystem: + type: boolean + runAsGroup: + format: int64 + type: integer + runAsNonRoot: + type: boolean + runAsUser: + format: int64 + type: integer + seLinuxOptions: + properties: + level: + type: string + role: + type: string + type: + type: string + user: + type: string + type: object + seccompProfile: + properties: + localhostProfile: + type: string + type: + type: string + required: + - type + type: object + windowsOptions: + properties: + gmsaCredentialSpec: + type: string + gmsaCredentialSpecName: + type: string + hostProcess: + type: boolean + runAsUserName: + type: string + type: object + type: object + startupProbe: + properties: + exec: + properties: + command: + items: + type: string + type: array + x-kubernetes-list-type: atomic + type: object + failureThreshold: + format: int32 + type: integer + grpc: + properties: + port: + format: int32 + type: integer + service: + default: "" + type: string + required: + - port + type: object + httpGet: + properties: + host: + type: string + httpHeaders: + items: + properties: + name: + type: string + value: + type: string + required: + - name + - value + type: object + type: array + x-kubernetes-list-type: atomic + path: + type: string + port: + anyOf: + - type: integer + - type: string + x-kubernetes-int-or-string: true + scheme: + type: string + required: + - port + type: object + initialDelaySeconds: + format: int32 + type: integer + periodSeconds: + format: int32 + type: integer + successThreshold: + format: int32 + type: integer + tcpSocket: + properties: + host: + type: string + port: + anyOf: + - type: integer + - type: string + x-kubernetes-int-or-string: true + required: + - port + type: object + terminationGracePeriodSeconds: + format: int64 + type: integer + timeoutSeconds: + format: int32 + type: integer + type: object + stdin: + type: boolean + stdinOnce: + type: boolean + terminationMessagePath: + type: string + terminationMessagePolicy: + type: string + tty: + type: boolean + volumeDevices: + items: + properties: + devicePath: + type: string + name: + type: string + required: + - devicePath + - name + type: object + type: array + x-kubernetes-list-map-keys: + - devicePath + x-kubernetes-list-type: map + volumeMounts: + items: + properties: + mountPath: + type: string + mountPropagation: + type: string + name: + type: string + readOnly: + type: boolean + recursiveReadOnly: + type: string + subPath: + type: string + subPathExpr: + type: string + required: + - mountPath + - name + type: object + type: array + x-kubernetes-list-map-keys: + - mountPath + x-kubernetes-list-type: map + workingDir: + type: string + required: + - name + type: object + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map + nodeName: + type: string + nodeSelector: + additionalProperties: + type: string + type: object + x-kubernetes-map-type: atomic + os: + properties: + name: + type: string + required: + - name + type: object + overhead: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + type: object + preemptionPolicy: + type: string + priority: + format: int32 + type: integer + priorityClassName: + type: string + readinessGates: + items: + properties: + conditionType: + type: string + required: + - conditionType + type: object + type: array + x-kubernetes-list-type: atomic + resourceClaims: + items: + properties: + name: + type: string + resourceClaimName: + type: string + resourceClaimTemplateName: + type: string + required: + - name + type: object + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map + resources: + properties: + claims: + items: + properties: + name: + type: string + request: + type: string + required: + - name + type: object + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map + limits: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + type: object + requests: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + type: object + type: object + restartPolicy: + type: string + runtimeClassName: + type: string + schedulerName: + type: string + schedulingGates: + items: + properties: + name: + type: string + required: + - name + type: object + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map + securityContext: + properties: + appArmorProfile: + properties: + localhostProfile: + type: string + type: + type: string + required: + - type + type: object + fsGroup: + format: int64 + type: integer + fsGroupChangePolicy: + type: string + runAsGroup: + format: int64 + type: integer + runAsNonRoot: + type: boolean + runAsUser: + format: int64 + type: integer + seLinuxChangePolicy: + type: string + seLinuxOptions: + properties: + level: + type: string + role: + type: string + type: + type: string + user: + type: string + type: object + seccompProfile: + properties: + localhostProfile: + type: string + type: + type: string + required: + - type + type: object + supplementalGroups: + items: + format: int64 + type: integer + type: array + x-kubernetes-list-type: atomic + supplementalGroupsPolicy: + type: string + sysctls: + items: + properties: + name: + type: string + value: + type: string + required: + - name + - value + type: object + type: array + x-kubernetes-list-type: atomic + windowsOptions: + properties: + gmsaCredentialSpec: + type: string + gmsaCredentialSpecName: + type: string + hostProcess: + type: boolean + runAsUserName: + type: string + type: object + type: object + serviceAccount: + type: string + serviceAccountName: + type: string + setHostnameAsFQDN: + type: boolean + shareProcessNamespace: + type: boolean + subdomain: + type: string + terminationGracePeriodSeconds: + format: int64 + type: integer + tolerations: + items: + properties: + effect: + type: string + key: + type: string + operator: + type: string + tolerationSeconds: + format: int64 + type: integer + value: + type: string + type: object + type: array + x-kubernetes-list-type: atomic + topologySpreadConstraints: + items: + properties: + labelSelector: + properties: + matchExpressions: + items: + properties: + key: + type: string + operator: + type: string + values: + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + type: object + type: object + x-kubernetes-map-type: atomic + matchLabelKeys: + items: + type: string + type: array + x-kubernetes-list-type: atomic + maxSkew: + format: int32 + type: integer + minDomains: + format: int32 + type: integer + nodeAffinityPolicy: + type: string + nodeTaintsPolicy: + type: string + topologyKey: + type: string + whenUnsatisfiable: + type: string + required: + - maxSkew + - topologyKey + - whenUnsatisfiable + type: object + type: array + x-kubernetes-list-map-keys: + - topologyKey + - whenUnsatisfiable + x-kubernetes-list-type: map + volumes: + items: + properties: + awsElasticBlockStore: + properties: + fsType: + type: string + partition: + format: int32 + type: integer + readOnly: + type: boolean + volumeID: + type: string + required: + - volumeID + type: object + azureDisk: + properties: + cachingMode: + type: string + diskName: + type: string + diskURI: + type: string + fsType: + default: ext4 + type: string + kind: + type: string + readOnly: + default: false + type: boolean + required: + - diskName + - diskURI + type: object + azureFile: + properties: + readOnly: + type: boolean + secretName: + type: string + shareName: + type: string + required: + - secretName + - shareName + type: object + cephfs: + properties: + monitors: + items: + type: string + type: array + x-kubernetes-list-type: atomic + path: + type: string + readOnly: + type: boolean + secretFile: + type: string + secretRef: + properties: + name: + default: "" + type: string + type: object + x-kubernetes-map-type: atomic + user: + type: string + required: + - monitors + type: object + cinder: + properties: + fsType: + type: string + readOnly: + type: boolean + secretRef: + properties: + name: + default: "" + type: string + type: object + x-kubernetes-map-type: atomic + volumeID: + type: string + required: + - volumeID + type: object + configMap: + properties: + defaultMode: + format: int32 + type: integer + items: + items: + properties: + key: + type: string + mode: + format: int32 + type: integer + path: + type: string + required: + - key + - path + type: object + type: array + x-kubernetes-list-type: atomic + name: + default: "" + type: string + optional: + type: boolean + type: object + x-kubernetes-map-type: atomic + csi: + properties: + driver: + type: string + fsType: + type: string + nodePublishSecretRef: + properties: + name: + default: "" + type: string + type: object + x-kubernetes-map-type: atomic + readOnly: + type: boolean + volumeAttributes: + additionalProperties: + type: string + type: object + required: + - driver + type: object + downwardAPI: + properties: + defaultMode: + format: int32 + type: integer + items: + items: + properties: + fieldRef: + properties: + apiVersion: + type: string + fieldPath: + type: string + required: + - fieldPath + type: object + x-kubernetes-map-type: atomic + mode: + format: int32 + type: integer + path: + type: string + resourceFieldRef: + properties: + containerName: + type: string + divisor: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + resource: + type: string + required: + - resource + type: object + x-kubernetes-map-type: atomic + required: + - path + type: object + type: array + x-kubernetes-list-type: atomic + type: object + emptyDir: + properties: + medium: + type: string + sizeLimit: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + type: object + ephemeral: + properties: + volumeClaimTemplate: + properties: + metadata: + type: object + spec: + properties: + accessModes: + items: + type: string + type: array + x-kubernetes-list-type: atomic + dataSource: + properties: + apiGroup: + type: string + kind: + type: string + name: + type: string + required: + - kind + - name + type: object + x-kubernetes-map-type: atomic + dataSourceRef: + properties: + apiGroup: + type: string + kind: + type: string + name: + type: string + namespace: + type: string + required: + - kind + - name + type: object + resources: + properties: + limits: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + type: object + requests: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + type: object + type: object + selector: + properties: + matchExpressions: + items: + properties: + key: + type: string + operator: + type: string + values: + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + type: object + type: object + x-kubernetes-map-type: atomic + storageClassName: + type: string + volumeAttributesClassName: + type: string + volumeMode: + type: string + volumeName: + type: string + type: object + required: + - spec + type: object + type: object + fc: + properties: + fsType: + type: string + lun: + format: int32 + type: integer + readOnly: + type: boolean + targetWWNs: + items: + type: string + type: array + x-kubernetes-list-type: atomic + wwids: + items: + type: string + type: array + x-kubernetes-list-type: atomic + type: object + flexVolume: + properties: + driver: + type: string + fsType: + type: string + options: + additionalProperties: + type: string + type: object + readOnly: + type: boolean + secretRef: + properties: + name: + default: "" + type: string + type: object + x-kubernetes-map-type: atomic + required: + - driver + type: object + flocker: + properties: + datasetName: + type: string + datasetUUID: + type: string + type: object + gcePersistentDisk: + properties: + fsType: + type: string + partition: + format: int32 + type: integer + pdName: + type: string + readOnly: + type: boolean + required: + - pdName + type: object + gitRepo: + properties: + directory: + type: string + repository: + type: string + revision: + type: string + required: + - repository + type: object + glusterfs: + properties: + endpoints: + type: string + path: + type: string + readOnly: + type: boolean + required: + - endpoints + - path + type: object + hostPath: + properties: + path: + type: string + type: + type: string + required: + - path + type: object + image: + properties: + pullPolicy: + type: string + reference: + type: string + type: object + iscsi: + properties: + chapAuthDiscovery: + type: boolean + chapAuthSession: + type: boolean + fsType: + type: string + initiatorName: + type: string + iqn: + type: string + iscsiInterface: + default: default + type: string + lun: + format: int32 + type: integer + portals: + items: + type: string + type: array + x-kubernetes-list-type: atomic + readOnly: + type: boolean + secretRef: + properties: + name: + default: "" + type: string + type: object + x-kubernetes-map-type: atomic + targetPortal: + type: string + required: + - iqn + - lun + - targetPortal + type: object + name: + type: string + nfs: + properties: + path: + type: string + readOnly: + type: boolean + server: + type: string + required: + - path + - server + type: object + persistentVolumeClaim: + properties: + claimName: + type: string + readOnly: + type: boolean + required: + - claimName + type: object + photonPersistentDisk: + properties: + fsType: + type: string + pdID: + type: string + required: + - pdID + type: object + portworxVolume: + properties: + fsType: + type: string + readOnly: + type: boolean + volumeID: + type: string + required: + - volumeID + type: object + projected: + properties: + defaultMode: + format: int32 + type: integer + sources: + items: + properties: + clusterTrustBundle: + properties: + labelSelector: + properties: + matchExpressions: + items: + properties: + key: + type: string + operator: + type: string + values: + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + type: object + type: object + x-kubernetes-map-type: atomic + name: + type: string + optional: + type: boolean + path: + type: string + signerName: + type: string + required: + - path + type: object + configMap: + properties: + items: + items: + properties: + key: + type: string + mode: + format: int32 + type: integer + path: + type: string + required: + - key + - path + type: object + type: array + x-kubernetes-list-type: atomic + name: + default: "" + type: string + optional: + type: boolean + type: object + x-kubernetes-map-type: atomic + downwardAPI: + properties: + items: + items: + properties: + fieldRef: + properties: + apiVersion: + type: string + fieldPath: + type: string + required: + - fieldPath + type: object + x-kubernetes-map-type: atomic + mode: + format: int32 + type: integer + path: + type: string + resourceFieldRef: + properties: + containerName: + type: string + divisor: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + resource: + type: string + required: + - resource + type: object + x-kubernetes-map-type: atomic + required: + - path + type: object + type: array + x-kubernetes-list-type: atomic + type: object + podCertificate: + properties: + certificateChainPath: + type: string + credentialBundlePath: + type: string + keyPath: + type: string + keyType: + type: string + maxExpirationSeconds: + format: int32 + type: integer + signerName: + type: string + userAnnotations: + additionalProperties: + type: string + type: object + required: + - keyType + - signerName + type: object + secret: + properties: + items: + items: + properties: + key: + type: string + mode: + format: int32 + type: integer + path: + type: string + required: + - key + - path + type: object + type: array + x-kubernetes-list-type: atomic + name: + default: "" + type: string + optional: + type: boolean + type: object + x-kubernetes-map-type: atomic + serviceAccountToken: + properties: + audience: + type: string + expirationSeconds: + format: int64 + type: integer + path: + type: string + required: + - path + type: object + type: object + type: array + x-kubernetes-list-type: atomic + type: object + quobyte: + properties: + group: + type: string + readOnly: + type: boolean + registry: + type: string + tenant: + type: string + user: + type: string + volume: + type: string + required: + - registry + - volume + type: object + rbd: + properties: + fsType: + type: string + image: + type: string + keyring: + default: /etc/ceph/keyring + type: string + monitors: + items: + type: string + type: array + x-kubernetes-list-type: atomic + pool: + default: rbd + type: string + readOnly: + type: boolean + secretRef: + properties: + name: + default: "" + type: string + type: object + x-kubernetes-map-type: atomic + user: + default: admin + type: string + required: + - image + - monitors + type: object + scaleIO: + properties: + fsType: + default: xfs + type: string + gateway: + type: string + protectionDomain: + type: string + readOnly: + type: boolean + secretRef: + properties: + name: + default: "" + type: string + type: object + x-kubernetes-map-type: atomic + sslEnabled: + type: boolean + storageMode: + default: ThinProvisioned + type: string + storagePool: + type: string + system: + type: string + volumeName: + type: string + required: + - gateway + - secretRef + - system + type: object + secret: + properties: + defaultMode: + format: int32 + type: integer + items: + items: + properties: + key: + type: string + mode: + format: int32 + type: integer + path: + type: string + required: + - key + - path + type: object + type: array + x-kubernetes-list-type: atomic + optional: + type: boolean + secretName: + type: string + type: object + storageos: + properties: + fsType: + type: string + readOnly: + type: boolean + secretRef: + properties: + name: + default: "" + type: string + type: object + x-kubernetes-map-type: atomic + volumeName: + type: string + volumeNamespace: + type: string + type: object + vsphereVolume: + properties: + fsType: + type: string + storagePolicyID: + type: string + storagePolicyName: + type: string + volumePath: + type: string + required: + - volumePath + type: object + required: + - name + type: object + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map + workloadRef: + properties: + name: + type: string + podGroup: + type: string + podGroupReplicaKey: + type: string + required: + - name + - podGroup + type: object + required: + - containers + type: object + properties: + additionalProperties: + type: string + type: object + state: + enum: + - standby + - deployed + - configured + - running + type: string + type: object + status: + properties: + error: + type: string + pod: + properties: + allocatedResources: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + type: object + conditions: + items: + properties: + lastProbeTime: + format: date-time + type: string + lastTransitionTime: + format: date-time + type: string + message: + type: string + observedGeneration: + format: int64 + type: integer + reason: + type: string + status: + type: string + type: + type: string + required: + - status + - type + type: object + type: array + x-kubernetes-list-map-keys: + - type + x-kubernetes-list-type: map + containerStatuses: + items: + properties: + allocatedResources: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + type: object + allocatedResourcesStatus: + items: + properties: + name: + type: string + resources: + items: + properties: + health: + type: string + resourceID: + type: string + required: + - resourceID + type: object + type: array + x-kubernetes-list-map-keys: + - resourceID + x-kubernetes-list-type: map + required: + - name + type: object + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map + containerID: + type: string + image: + type: string + imageID: + type: string + lastState: + properties: + running: + properties: + startedAt: + format: date-time + type: string + type: object + terminated: + properties: + containerID: + type: string + exitCode: + format: int32 + type: integer + finishedAt: + format: date-time + type: string + message: + type: string + reason: + type: string + signal: + format: int32 + type: integer + startedAt: + format: date-time + type: string + required: + - exitCode + type: object + waiting: + properties: + message: + type: string + reason: + type: string + type: object + type: object + name: + type: string + ready: + type: boolean + resources: + properties: + claims: + items: + properties: + name: + type: string + request: + type: string + required: + - name + type: object + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map + limits: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + type: object + requests: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + type: object + type: object + restartCount: + format: int32 + type: integer + started: + type: boolean + state: + properties: + running: + properties: + startedAt: + format: date-time + type: string + type: object + terminated: + properties: + containerID: + type: string + exitCode: + format: int32 + type: integer + finishedAt: + format: date-time + type: string + message: + type: string + reason: + type: string + signal: + format: int32 + type: integer + startedAt: + format: date-time + type: string + required: + - exitCode + type: object + waiting: + properties: + message: + type: string + reason: + type: string + type: object + type: object + stopSignal: + type: string + user: + properties: + linux: + properties: + gid: + format: int64 + type: integer + supplementalGroups: + items: + format: int64 + type: integer + type: array + x-kubernetes-list-type: atomic + uid: + format: int64 + type: integer + required: + - gid + - uid + type: object + type: object + volumeMounts: + items: + properties: + mountPath: + type: string + name: + type: string + readOnly: + type: boolean + recursiveReadOnly: + type: string + required: + - mountPath + - name + type: object + type: array + x-kubernetes-list-map-keys: + - mountPath + x-kubernetes-list-type: map + required: + - image + - imageID + - name + - ready + - restartCount + type: object + type: array + x-kubernetes-list-type: atomic + ephemeralContainerStatuses: + items: + properties: + allocatedResources: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + type: object + allocatedResourcesStatus: + items: + properties: + name: + type: string + resources: + items: + properties: + health: + type: string + resourceID: + type: string + required: + - resourceID + type: object + type: array + x-kubernetes-list-map-keys: + - resourceID + x-kubernetes-list-type: map + required: + - name + type: object + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map + containerID: + type: string + image: + type: string + imageID: + type: string + lastState: + properties: + running: + properties: + startedAt: + format: date-time + type: string + type: object + terminated: + properties: + containerID: + type: string + exitCode: + format: int32 + type: integer + finishedAt: + format: date-time + type: string + message: + type: string + reason: + type: string + signal: + format: int32 + type: integer + startedAt: + format: date-time + type: string + required: + - exitCode + type: object + waiting: + properties: + message: + type: string + reason: + type: string + type: object + type: object + name: + type: string + ready: + type: boolean + resources: + properties: + claims: + items: + properties: + name: + type: string + request: + type: string + required: + - name + type: object + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map + limits: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + type: object + requests: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + type: object + type: object + restartCount: + format: int32 + type: integer + started: + type: boolean + state: + properties: + running: + properties: + startedAt: + format: date-time + type: string + type: object + terminated: + properties: + containerID: + type: string + exitCode: + format: int32 + type: integer + finishedAt: + format: date-time + type: string + message: + type: string + reason: + type: string + signal: + format: int32 + type: integer + startedAt: + format: date-time + type: string + required: + - exitCode + type: object + waiting: + properties: + message: + type: string + reason: + type: string + type: object + type: object + stopSignal: + type: string + user: + properties: + linux: + properties: + gid: + format: int64 + type: integer + supplementalGroups: + items: + format: int64 + type: integer + type: array + x-kubernetes-list-type: atomic + uid: + format: int64 + type: integer + required: + - gid + - uid + type: object + type: object + volumeMounts: + items: + properties: + mountPath: + type: string + name: + type: string + readOnly: + type: boolean + recursiveReadOnly: + type: string + required: + - mountPath + - name + type: object + type: array + x-kubernetes-list-map-keys: + - mountPath + x-kubernetes-list-type: map + required: + - image + - imageID + - name + - ready + - restartCount + type: object + type: array + x-kubernetes-list-type: atomic + extendedResourceClaimStatus: + properties: + requestMappings: + items: + properties: + containerName: + type: string + requestName: + type: string + resourceName: + type: string + required: + - containerName + - requestName + - resourceName + type: object + type: array + x-kubernetes-list-type: atomic + resourceClaimName: + type: string + required: + - requestMappings + - resourceClaimName + type: object + hostIP: + type: string + hostIPs: + items: + properties: + ip: + type: string + required: + - ip + type: object + type: array + x-kubernetes-list-type: atomic + initContainerStatuses: + items: + properties: + allocatedResources: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + type: object + allocatedResourcesStatus: + items: + properties: + name: + type: string + resources: + items: + properties: + health: + type: string + resourceID: + type: string + required: + - resourceID + type: object + type: array + x-kubernetes-list-map-keys: + - resourceID + x-kubernetes-list-type: map + required: + - name + type: object + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map + containerID: + type: string + image: + type: string + imageID: + type: string + lastState: + properties: + running: + properties: + startedAt: + format: date-time + type: string + type: object + terminated: + properties: + containerID: + type: string + exitCode: + format: int32 + type: integer + finishedAt: + format: date-time + type: string + message: + type: string + reason: + type: string + signal: + format: int32 + type: integer + startedAt: + format: date-time + type: string + required: + - exitCode + type: object + waiting: + properties: + message: + type: string + reason: + type: string + type: object + type: object + name: + type: string + ready: + type: boolean + resources: + properties: + claims: + items: + properties: + name: + type: string + request: + type: string + required: + - name + type: object + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map + limits: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + type: object + requests: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + type: object + type: object + restartCount: + format: int32 + type: integer + started: + type: boolean + state: + properties: + running: + properties: + startedAt: + format: date-time + type: string + type: object + terminated: + properties: + containerID: + type: string + exitCode: + format: int32 + type: integer + finishedAt: + format: date-time + type: string + message: + type: string + reason: + type: string + signal: + format: int32 + type: integer + startedAt: + format: date-time + type: string + required: + - exitCode + type: object + waiting: + properties: + message: + type: string + reason: + type: string + type: object + type: object + stopSignal: + type: string + user: + properties: + linux: + properties: + gid: + format: int64 + type: integer + supplementalGroups: + items: + format: int64 + type: integer + type: array + x-kubernetes-list-type: atomic + uid: + format: int64 + type: integer + required: + - gid + - uid + type: object + type: object + volumeMounts: + items: + properties: + mountPath: + type: string + name: + type: string + readOnly: + type: boolean + recursiveReadOnly: + type: string + required: + - mountPath + - name + type: object + type: array + x-kubernetes-list-map-keys: + - mountPath + x-kubernetes-list-type: map + required: + - image + - imageID + - name + - ready + - restartCount + type: object + type: array + x-kubernetes-list-type: atomic + message: + type: string + nominatedNodeName: + type: string + observedGeneration: + format: int64 + type: integer + phase: + type: string + podIP: + type: string + podIPs: + items: + properties: + ip: + type: string + required: + - ip + type: object + type: array + x-kubernetes-list-map-keys: + - ip + x-kubernetes-list-type: map + qosClass: + type: string + reason: + type: string + resize: + type: string + resourceClaimStatuses: + items: + properties: + name: + type: string + resourceClaimName: + type: string + required: + - name + type: object + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map + resources: + properties: + claims: + items: + properties: + name: + type: string + request: + type: string + required: + - name + type: object + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map + limits: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + type: object + requests: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + type: object + type: object + startTime: + format: date-time + type: string + type: object + state: + type: string + type: object + type: object + served: true + storage: true + subresources: + status: {} diff --git a/control-operator/config/crd/kustomization.yaml b/control-operator/config/crd/kustomization.yaml new file mode 100644 index 000000000..dcc3db956 --- /dev/null +++ b/control-operator/config/crd/kustomization.yaml @@ -0,0 +1,21 @@ +# This kustomization.yaml is not intended to be run by itself, +# since it depends on service name and namespace that are out of this kustomize package. +# It should be run by config/default +resources: +- bases/aliecs.alice.cern_tasks.yaml +#+kubebuilder:scaffold:crdkustomizeresource + +patches: +# [WEBHOOK] To enable webhook, uncomment all the sections with [WEBHOOK] prefix. +# patches here are for enabling the conversion webhook for each CRD +#- path: patches/webhook_in_tasks.yaml +#+kubebuilder:scaffold:crdkustomizewebhookpatch + +# [CERTMANAGER] To enable cert-manager, uncomment all the sections with [CERTMANAGER] prefix. +# patches here are for enabling the CA injection for each CRD +#- path: patches/cainjection_in_tasks.yaml +#+kubebuilder:scaffold:crdkustomizecainjectionpatch + +# the following config is for teaching kustomize how to do kustomization for CRDs. +configurations: +- kustomizeconfig.yaml diff --git a/control-operator/config/crd/kustomizeconfig.yaml b/control-operator/config/crd/kustomizeconfig.yaml new file mode 100644 index 000000000..ec5c150a9 --- /dev/null +++ b/control-operator/config/crd/kustomizeconfig.yaml @@ -0,0 +1,19 @@ +# This file is for teaching kustomize how to substitute name and namespace reference in CRD +nameReference: +- kind: Service + version: v1 + fieldSpecs: + - kind: CustomResourceDefinition + version: v1 + group: apiextensions.k8s.io + path: spec/conversion/webhook/clientConfig/service/name + +namespace: +- kind: CustomResourceDefinition + version: v1 + group: apiextensions.k8s.io + path: spec/conversion/webhook/clientConfig/service/namespace + create: false + +varReference: +- path: metadata/annotations diff --git a/control-operator/config/crd/patches/cainjection_in_tasks.yaml b/control-operator/config/crd/patches/cainjection_in_tasks.yaml new file mode 100644 index 000000000..f5feffa47 --- /dev/null +++ b/control-operator/config/crd/patches/cainjection_in_tasks.yaml @@ -0,0 +1,7 @@ +# The following patch adds a directive for certmanager to inject CA into the CRD +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + cert-manager.io/inject-ca-from: CERTIFICATE_NAMESPACE/CERTIFICATE_NAME + name: tasks.aliecs.alice.cern diff --git a/control-operator/config/crd/patches/webhook_in_tasks.yaml b/control-operator/config/crd/patches/webhook_in_tasks.yaml new file mode 100644 index 000000000..b69d74838 --- /dev/null +++ b/control-operator/config/crd/patches/webhook_in_tasks.yaml @@ -0,0 +1,16 @@ +# The following patch enables a conversion webhook for the CRD +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + name: tasks.aliecs.alice.cern +spec: + conversion: + strategy: Webhook + webhook: + clientConfig: + service: + namespace: system + name: webhook-service + path: /convert + conversionReviewVersions: + - v1 diff --git a/control-operator/config/default/kustomization.yaml b/control-operator/config/default/kustomization.yaml new file mode 100644 index 000000000..b2bc27ead --- /dev/null +++ b/control-operator/config/default/kustomization.yaml @@ -0,0 +1,145 @@ +# Adds namespace to all resources. +namespace: operator-system + +# Value of this field is prepended to the +# names of all resources, e.g. a deployment named +# "wordpress" becomes "alices-wordpress". +# Note that it should also match with the prefix (text before '-') of the namespace +# field above. +namePrefix: operator- + +# Labels to add to all resources and selectors. +#labels: +#- includeSelectors: true +# pairs: +# someName: someValue + +resources: +- ../crd +- ../rbac +- ../manager +# [WEBHOOK] To enable webhook, uncomment all the sections with [WEBHOOK] prefix including the one in +# crd/kustomization.yaml +#- ../webhook +# [CERTMANAGER] To enable cert-manager, uncomment all sections with 'CERTMANAGER'. 'WEBHOOK' components are required. +#- ../certmanager +# [PROMETHEUS] To enable prometheus monitor, uncomment all sections with 'PROMETHEUS'. +#- ../prometheus + +patchesStrategicMerge: +# Protect the /metrics endpoint by putting it behind auth. +# If you want your controller-manager to expose the /metrics +# endpoint w/o any authn/z, please comment the following line. +# TODO: replace kube-rbac-proxy with WithAuthenticationAndAuthorization (see kubebuilder deprecation notice) +#- manager_auth_proxy_patch.yaml + + + +# [WEBHOOK] To enable webhook, uncomment all the sections with [WEBHOOK] prefix including the one in +# crd/kustomization.yaml +#- manager_webhook_patch.yaml + +# [CERTMANAGER] To enable cert-manager, uncomment all sections with 'CERTMANAGER'. +# Uncomment 'CERTMANAGER' sections in crd/kustomization.yaml to enable the CA injection in the admission webhooks. +# 'CERTMANAGER' needs to be enabled to use ca injection +#- webhookcainjection_patch.yaml + +# [CERTMANAGER] To enable cert-manager, uncomment all sections with 'CERTMANAGER' prefix. +# Uncomment the following replacements to add the cert-manager CA injection annotations +#replacements: +# - source: # Add cert-manager annotation to ValidatingWebhookConfiguration, MutatingWebhookConfiguration and CRDs +# kind: Certificate +# group: cert-manager.io +# version: v1 +# name: serving-cert # this name should match the one in certificate.yaml +# fieldPath: .metadata.namespace # namespace of the certificate CR +# targets: +# - select: +# kind: ValidatingWebhookConfiguration +# fieldPaths: +# - .metadata.annotations.[cert-manager.io/inject-ca-from] +# options: +# delimiter: '/' +# index: 0 +# create: true +# - select: +# kind: MutatingWebhookConfiguration +# fieldPaths: +# - .metadata.annotations.[cert-manager.io/inject-ca-from] +# options: +# delimiter: '/' +# index: 0 +# create: true +# - select: +# kind: CustomResourceDefinition +# fieldPaths: +# - .metadata.annotations.[cert-manager.io/inject-ca-from] +# options: +# delimiter: '/' +# index: 0 +# create: true +# - source: +# kind: Certificate +# group: cert-manager.io +# version: v1 +# name: serving-cert # this name should match the one in certificate.yaml +# fieldPath: .metadata.name +# targets: +# - select: +# kind: ValidatingWebhookConfiguration +# fieldPaths: +# - .metadata.annotations.[cert-manager.io/inject-ca-from] +# options: +# delimiter: '/' +# index: 1 +# create: true +# - select: +# kind: MutatingWebhookConfiguration +# fieldPaths: +# - .metadata.annotations.[cert-manager.io/inject-ca-from] +# options: +# delimiter: '/' +# index: 1 +# create: true +# - select: +# kind: CustomResourceDefinition +# fieldPaths: +# - .metadata.annotations.[cert-manager.io/inject-ca-from] +# options: +# delimiter: '/' +# index: 1 +# create: true +# - source: # Add cert-manager annotation to the webhook Service +# kind: Service +# version: v1 +# name: webhook-service +# fieldPath: .metadata.name # namespace of the service +# targets: +# - select: +# kind: Certificate +# group: cert-manager.io +# version: v1 +# fieldPaths: +# - .spec.dnsNames.0 +# - .spec.dnsNames.1 +# options: +# delimiter: '.' +# index: 0 +# create: true +# - source: +# kind: Service +# version: v1 +# name: webhook-service +# fieldPath: .metadata.namespace # namespace of the service +# targets: +# - select: +# kind: Certificate +# group: cert-manager.io +# version: v1 +# fieldPaths: +# - .spec.dnsNames.0 +# - .spec.dnsNames.1 +# options: +# delimiter: '.' +# index: 1 +# create: true diff --git a/control-operator/config/default/manager_auth_proxy_patch.yaml b/control-operator/config/default/manager_auth_proxy_patch.yaml new file mode 100644 index 000000000..73fad2a67 --- /dev/null +++ b/control-operator/config/default/manager_auth_proxy_patch.yaml @@ -0,0 +1,39 @@ +# This patch inject a sidecar container which is a HTTP proxy for the +# controller manager, it performs RBAC authorization against the Kubernetes API using SubjectAccessReviews. +apiVersion: apps/v1 +kind: Deployment +metadata: + name: controller-manager + namespace: system +spec: + template: + spec: + containers: + - name: kube-rbac-proxy + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - "ALL" + image: gcr.io/kubebuilder/kube-rbac-proxy:v0.14.1 + args: + - "--secure-listen-address=0.0.0.0:8443" + - "--upstream=http://127.0.0.1:8080/" + - "--logtostderr=true" + - "--v=0" + ports: + - containerPort: 8443 + protocol: TCP + name: https + resources: + limits: + cpu: 500m + memory: 128Mi + requests: + cpu: 5m + memory: 64Mi + - name: manager + args: + - "--health-probe-bind-address=:8081" + - "--metrics-bind-address=127.0.0.1:8080" + - "--leader-elect" diff --git a/control-operator/config/default/manager_config_patch.yaml b/control-operator/config/default/manager_config_patch.yaml new file mode 100644 index 000000000..f6f589169 --- /dev/null +++ b/control-operator/config/default/manager_config_patch.yaml @@ -0,0 +1,10 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: controller-manager + namespace: system +spec: + template: + spec: + containers: + - name: manager diff --git a/control-operator/config/manager/kustomization.yaml b/control-operator/config/manager/kustomization.yaml new file mode 100644 index 000000000..e3c9ed1ad --- /dev/null +++ b/control-operator/config/manager/kustomization.yaml @@ -0,0 +1,8 @@ +resources: +- manager.yaml +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +images: +- name: controller + newName: gitlab-registry.cern.ch/aliceo2group/dockerfiles/aliecs/task-manager + newTag: latest diff --git a/control-operator/config/manager/manager.yaml b/control-operator/config/manager/manager.yaml new file mode 100644 index 000000000..3a355e8ee --- /dev/null +++ b/control-operator/config/manager/manager.yaml @@ -0,0 +1,106 @@ +apiVersion: v1 +kind: Namespace +metadata: + labels: + control-plane: controller-manager + app.kubernetes.io/name: namespace + app.kubernetes.io/instance: system + app.kubernetes.io/component: manager + app.kubernetes.io/created-by: operator + app.kubernetes.io/part-of: operator + app.kubernetes.io/managed-by: kustomize + name: system +--- +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: controller-manager + namespace: system + labels: + control-plane: controller-manager + app.kubernetes.io/name: daemonset + app.kubernetes.io/instance: controller-manager + app.kubernetes.io/component: manager + app.kubernetes.io/created-by: operator + app.kubernetes.io/part-of: operator + app.kubernetes.io/managed-by: kustomize +spec: + selector: + matchLabels: + control-plane: controller-manager + template: + metadata: + annotations: + kubectl.kubernetes.io/default-container: manager + labels: + control-plane: controller-manager + spec: + # TODO(user): Uncomment the following code to configure the nodeAffinity expression + # according to the platforms which are supported by your solution. + # It is considered best practice to support multiple architectures. You can + # build your manager image using the makefile target docker-buildx. + # affinity: + # nodeAffinity: + # requiredDuringSchedulingIgnoredDuringExecution: + # nodeSelectorTerms: + # - matchExpressions: + # - key: kubernetes.io/arch + # operator: In + # values: + # - amd64 + # - arm64 + # - ppc64le + # - s390x + # - key: kubernetes.io/os + # operator: In + # values: + # - linux + hostNetwork: true + imagePullSecrets: + - name: gitlab-registry-secret + securityContext: + runAsNonRoot: true + # TODO(user): For common cases that do not require escalating privileges + # it is recommended to ensure that all your Pods/Containers are restrictive. + # More info: https://kubernetes.io/docs/concepts/security/pod-security-standards/#restricted + # Please uncomment the following code if your project does NOT have to work on old Kubernetes + # versions < 1.19 or on vendors versions which do NOT support this field by default (i.e. Openshift < 4.11 ). + # seccompProfile: + # type: RuntimeDefault + containers: + - command: + - /manager + args: + - --leader-elect + - --health-probe-bind-address=:9083 + - --metrics-bind-address=:9080 + image: controller:latest + name: manager + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - "ALL" + livenessProbe: + httpGet: + path: /healthz + port: 9083 + initialDelaySeconds: 15 + periodSeconds: 20 + readinessProbe: + httpGet: + path: /readyz + port: 9083 + initialDelaySeconds: 5 + periodSeconds: 10 + # TODO(user): Configure the resources accordingly based on the project requirements. + # More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ + resources: + limits: + cpu: 500m + memory: 128Mi + requests: + cpu: 10m + memory: 64Mi + serviceAccountName: controller-manager + terminationGracePeriodSeconds: 10 diff --git a/control-operator/config/manifests/kustomization.yaml b/control-operator/config/manifests/kustomization.yaml new file mode 100644 index 000000000..61b939e6f --- /dev/null +++ b/control-operator/config/manifests/kustomization.yaml @@ -0,0 +1,28 @@ +# These resources constitute the fully configured set of manifests +# used to generate the 'manifests/' directory in a bundle. +resources: +- bases/operator.clusterserviceversion.yaml +- ../default +- ../samples +- ../scorecard + +# [WEBHOOK] To enable webhooks, uncomment all the sections with [WEBHOOK] prefix. +# Do NOT uncomment sections with prefix [CERTMANAGER], as OLM does not support cert-manager. +# These patches remove the unnecessary "cert" volume and its manager container volumeMount. +#patchesJson6902: +#- target: +# group: apps +# version: v1 +# kind: Deployment +# name: controller-manager +# namespace: system +# patch: |- +# # Remove the manager container's "cert" volumeMount, since OLM will create and mount a set of certs. +# # Update the indices in this path if adding or removing containers/volumeMounts in the manager's Deployment. +# - op: remove + +# path: /spec/template/spec/containers/0/volumeMounts/0 +# # Remove the "cert" volume, since OLM will create and mount a set of certs. +# # Update the indices in this path if adding or removing volumes in the manager's Deployment. +# - op: remove +# path: /spec/template/spec/volumes/0 diff --git a/control-operator/config/prometheus/kustomization.yaml b/control-operator/config/prometheus/kustomization.yaml new file mode 100644 index 000000000..ed137168a --- /dev/null +++ b/control-operator/config/prometheus/kustomization.yaml @@ -0,0 +1,2 @@ +resources: +- monitor.yaml diff --git a/control-operator/config/prometheus/monitor.yaml b/control-operator/config/prometheus/monitor.yaml new file mode 100644 index 000000000..50225c34c --- /dev/null +++ b/control-operator/config/prometheus/monitor.yaml @@ -0,0 +1,26 @@ + +# Prometheus Monitor Service (Metrics) +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + labels: + control-plane: controller-manager + app.kubernetes.io/name: servicemonitor + app.kubernetes.io/instance: controller-manager-metrics-monitor + app.kubernetes.io/component: metrics + app.kubernetes.io/created-by: operator + app.kubernetes.io/part-of: operator + app.kubernetes.io/managed-by: kustomize + name: controller-manager-metrics-monitor + namespace: system +spec: + endpoints: + - path: /metrics + port: https + scheme: https + bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token + tlsConfig: + insecureSkipVerify: true + selector: + matchLabels: + control-plane: controller-manager diff --git a/control-operator/config/rbac/auth_proxy_client_clusterrole.yaml b/control-operator/config/rbac/auth_proxy_client_clusterrole.yaml new file mode 100644 index 000000000..8f6e4d166 --- /dev/null +++ b/control-operator/config/rbac/auth_proxy_client_clusterrole.yaml @@ -0,0 +1,16 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/name: clusterrole + app.kubernetes.io/instance: metrics-reader + app.kubernetes.io/component: kube-rbac-proxy + app.kubernetes.io/created-by: operator + app.kubernetes.io/part-of: operator + app.kubernetes.io/managed-by: kustomize + name: metrics-reader +rules: +- nonResourceURLs: + - "/metrics" + verbs: + - get diff --git a/control-operator/config/rbac/auth_proxy_role.yaml b/control-operator/config/rbac/auth_proxy_role.yaml new file mode 100644 index 000000000..c48234ef0 --- /dev/null +++ b/control-operator/config/rbac/auth_proxy_role.yaml @@ -0,0 +1,24 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/name: clusterrole + app.kubernetes.io/instance: proxy-role + app.kubernetes.io/component: kube-rbac-proxy + app.kubernetes.io/created-by: operator + app.kubernetes.io/part-of: operator + app.kubernetes.io/managed-by: kustomize + name: proxy-role +rules: +- apiGroups: + - authentication.k8s.io + resources: + - tokenreviews + verbs: + - create +- apiGroups: + - authorization.k8s.io + resources: + - subjectaccessreviews + verbs: + - create diff --git a/control-operator/config/rbac/auth_proxy_role_binding.yaml b/control-operator/config/rbac/auth_proxy_role_binding.yaml new file mode 100644 index 000000000..8cb153a80 --- /dev/null +++ b/control-operator/config/rbac/auth_proxy_role_binding.yaml @@ -0,0 +1,19 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + labels: + app.kubernetes.io/name: clusterrolebinding + app.kubernetes.io/instance: proxy-rolebinding + app.kubernetes.io/component: kube-rbac-proxy + app.kubernetes.io/created-by: operator + app.kubernetes.io/part-of: operator + app.kubernetes.io/managed-by: kustomize + name: proxy-rolebinding +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: proxy-role +subjects: +- kind: ServiceAccount + name: controller-manager + namespace: system diff --git a/control-operator/config/rbac/auth_proxy_service.yaml b/control-operator/config/rbac/auth_proxy_service.yaml new file mode 100644 index 000000000..8fa979bad --- /dev/null +++ b/control-operator/config/rbac/auth_proxy_service.yaml @@ -0,0 +1,21 @@ +apiVersion: v1 +kind: Service +metadata: + labels: + control-plane: controller-manager + app.kubernetes.io/name: service + app.kubernetes.io/instance: controller-manager-metrics-service + app.kubernetes.io/component: kube-rbac-proxy + app.kubernetes.io/created-by: operator + app.kubernetes.io/part-of: operator + app.kubernetes.io/managed-by: kustomize + name: controller-manager-metrics-service + namespace: system +spec: + ports: + - name: https + port: 8443 + protocol: TCP + targetPort: https + selector: + control-plane: controller-manager diff --git a/control-operator/config/rbac/kustomization.yaml b/control-operator/config/rbac/kustomization.yaml new file mode 100644 index 000000000..731832a6a --- /dev/null +++ b/control-operator/config/rbac/kustomization.yaml @@ -0,0 +1,18 @@ +resources: +# All RBAC will be applied under this service account in +# the deployment namespace. You may comment out this resource +# if your manager will use a service account that exists at +# runtime. Be sure to update RoleBinding and ClusterRoleBinding +# subjects if changing service account names. +- service_account.yaml +- role.yaml +- role_binding.yaml +- leader_election_role.yaml +- leader_election_role_binding.yaml +# Comment the following 4 lines if you want to disable +# the auth proxy (https://github.com/brancz/kube-rbac-proxy) +# which protects your /metrics endpoint. +- auth_proxy_service.yaml +- auth_proxy_role.yaml +- auth_proxy_role_binding.yaml +- auth_proxy_client_clusterrole.yaml diff --git a/control-operator/config/rbac/leader_election_role.yaml b/control-operator/config/rbac/leader_election_role.yaml new file mode 100644 index 000000000..6a5caf3f6 --- /dev/null +++ b/control-operator/config/rbac/leader_election_role.yaml @@ -0,0 +1,44 @@ +# permissions to do leader election. +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + labels: + app.kubernetes.io/name: role + app.kubernetes.io/instance: leader-election-role + app.kubernetes.io/component: rbac + app.kubernetes.io/created-by: operator + app.kubernetes.io/part-of: operator + app.kubernetes.io/managed-by: kustomize + name: leader-election-role +rules: +- apiGroups: + - "" + resources: + - configmaps + verbs: + - get + - list + - watch + - create + - update + - patch + - delete +- apiGroups: + - coordination.k8s.io + resources: + - leases + verbs: + - get + - list + - watch + - create + - update + - patch + - delete +- apiGroups: + - "" + resources: + - events + verbs: + - create + - patch diff --git a/control-operator/config/rbac/leader_election_role_binding.yaml b/control-operator/config/rbac/leader_election_role_binding.yaml new file mode 100644 index 000000000..f07fa72af --- /dev/null +++ b/control-operator/config/rbac/leader_election_role_binding.yaml @@ -0,0 +1,19 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + labels: + app.kubernetes.io/name: rolebinding + app.kubernetes.io/instance: leader-election-rolebinding + app.kubernetes.io/component: rbac + app.kubernetes.io/created-by: operator + app.kubernetes.io/part-of: operator + app.kubernetes.io/managed-by: kustomize + name: leader-election-rolebinding +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: leader-election-role +subjects: +- kind: ServiceAccount + name: controller-manager + namespace: system diff --git a/control-operator/config/rbac/role.yaml b/control-operator/config/rbac/role.yaml new file mode 100644 index 000000000..a6843b9f2 --- /dev/null +++ b/control-operator/config/rbac/role.yaml @@ -0,0 +1,51 @@ +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: manager-role +rules: +- apiGroups: + - "" + resources: + - events + verbs: + - create + - patch +- apiGroups: + - aliecs.alice.cern + resources: + - tasks + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - aliecs.alice.cern + resources: + - tasks/finalizers + verbs: + - update +- apiGroups: + - aliecs.alice.cern + resources: + - tasks/status + verbs: + - get + - patch + - update +- apiGroups: + - "" + resources: + - pods + verbs: + - create + - delete + - get + - list + - patch + - update + - watch diff --git a/control-operator/config/rbac/role_binding.yaml b/control-operator/config/rbac/role_binding.yaml new file mode 100644 index 000000000..b8b41d60d --- /dev/null +++ b/control-operator/config/rbac/role_binding.yaml @@ -0,0 +1,19 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + labels: + app.kubernetes.io/name: clusterrolebinding + app.kubernetes.io/instance: manager-rolebinding + app.kubernetes.io/component: rbac + app.kubernetes.io/created-by: operator + app.kubernetes.io/part-of: operator + app.kubernetes.io/managed-by: kustomize + name: manager-rolebinding +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: manager-role +subjects: +- kind: ServiceAccount + name: controller-manager + namespace: system diff --git a/control-operator/config/rbac/service_account.yaml b/control-operator/config/rbac/service_account.yaml new file mode 100644 index 000000000..00526cc6b --- /dev/null +++ b/control-operator/config/rbac/service_account.yaml @@ -0,0 +1,12 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + labels: + app.kubernetes.io/name: serviceaccount + app.kubernetes.io/instance: controller-manager-sa + app.kubernetes.io/component: rbac + app.kubernetes.io/created-by: operator + app.kubernetes.io/part-of: operator + app.kubernetes.io/managed-by: kustomize + name: controller-manager + namespace: system diff --git a/control-operator/config/rbac/task_editor_role.yaml b/control-operator/config/rbac/task_editor_role.yaml new file mode 100644 index 000000000..77de69cdc --- /dev/null +++ b/control-operator/config/rbac/task_editor_role.yaml @@ -0,0 +1,31 @@ +# permissions for end users to edit tasks. +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/name: clusterrole + app.kubernetes.io/instance: task-editor-role + app.kubernetes.io/component: rbac + app.kubernetes.io/created-by: operator + app.kubernetes.io/part-of: operator + app.kubernetes.io/managed-by: kustomize + name: task-editor-role +rules: +- apiGroups: + - aliecs.alice.cern + resources: + - tasks + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - aliecs.alice.cern + resources: + - tasks/status + verbs: + - get diff --git a/control-operator/config/rbac/task_viewer_role.yaml b/control-operator/config/rbac/task_viewer_role.yaml new file mode 100644 index 000000000..8eaecaa1c --- /dev/null +++ b/control-operator/config/rbac/task_viewer_role.yaml @@ -0,0 +1,27 @@ +# permissions for end users to view tasks. +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/name: clusterrole + app.kubernetes.io/instance: task-viewer-role + app.kubernetes.io/component: rbac + app.kubernetes.io/created-by: operator + app.kubernetes.io/part-of: operator + app.kubernetes.io/managed-by: kustomize + name: task-viewer-role +rules: +- apiGroups: + - aliecs.alice.cern + resources: + - tasks + verbs: + - get + - list + - watch +- apiGroups: + - aliecs.alice.cern + resources: + - tasks/status + verbs: + - get diff --git a/control-operator/config/samples/aliecs_v1alpha1_task.yaml b/control-operator/config/samples/aliecs_v1alpha1_task.yaml new file mode 100644 index 000000000..b5c853bae --- /dev/null +++ b/control-operator/config/samples/aliecs_v1alpha1_task.yaml @@ -0,0 +1,12 @@ +apiVersion: aliecs.alice.cern/v1alpha1 +kind: Task +metadata: + labels: + app.kubernetes.io/name: task + app.kubernetes.io/instance: task-sample + app.kubernetes.io/part-of: operator + app.kubernetes.io/managed-by: kustomize + app.kubernetes.io/created-by: operator + name: task-sample +spec: + # TODO(user): Add fields here diff --git a/control-operator/config/samples/kustomization.yaml b/control-operator/config/samples/kustomization.yaml new file mode 100644 index 000000000..498ea9f86 --- /dev/null +++ b/control-operator/config/samples/kustomization.yaml @@ -0,0 +1,4 @@ +## Append samples of your project ## +resources: +- aliecs_v1alpha1_task.yaml +#+kubebuilder:scaffold:manifestskustomizesamples diff --git a/control-operator/config/scorecard/bases/config.yaml b/control-operator/config/scorecard/bases/config.yaml new file mode 100644 index 000000000..c77047841 --- /dev/null +++ b/control-operator/config/scorecard/bases/config.yaml @@ -0,0 +1,7 @@ +apiVersion: scorecard.operatorframework.io/v1alpha3 +kind: Configuration +metadata: + name: config +stages: +- parallel: true + tests: [] diff --git a/control-operator/config/scorecard/kustomization.yaml b/control-operator/config/scorecard/kustomization.yaml new file mode 100644 index 000000000..50cd2d084 --- /dev/null +++ b/control-operator/config/scorecard/kustomization.yaml @@ -0,0 +1,16 @@ +resources: +- bases/config.yaml +patchesJson6902: +- path: patches/basic.config.yaml + target: + group: scorecard.operatorframework.io + version: v1alpha3 + kind: Configuration + name: config +- path: patches/olm.config.yaml + target: + group: scorecard.operatorframework.io + version: v1alpha3 + kind: Configuration + name: config +#+kubebuilder:scaffold:patchesJson6902 diff --git a/control-operator/config/scorecard/patches/basic.config.yaml b/control-operator/config/scorecard/patches/basic.config.yaml new file mode 100644 index 000000000..8aa415864 --- /dev/null +++ b/control-operator/config/scorecard/patches/basic.config.yaml @@ -0,0 +1,10 @@ +- op: add + path: /stages/0/tests/- + value: + entrypoint: + - scorecard-test + - basic-check-spec + image: quay.io/operator-framework/scorecard-test:unknown + labels: + suite: basic + test: basic-check-spec-test diff --git a/control-operator/config/scorecard/patches/olm.config.yaml b/control-operator/config/scorecard/patches/olm.config.yaml new file mode 100644 index 000000000..47153a8b7 --- /dev/null +++ b/control-operator/config/scorecard/patches/olm.config.yaml @@ -0,0 +1,50 @@ +- op: add + path: /stages/0/tests/- + value: + entrypoint: + - scorecard-test + - olm-bundle-validation + image: quay.io/operator-framework/scorecard-test:unknown + labels: + suite: olm + test: olm-bundle-validation-test +- op: add + path: /stages/0/tests/- + value: + entrypoint: + - scorecard-test + - olm-crds-have-validation + image: quay.io/operator-framework/scorecard-test:unknown + labels: + suite: olm + test: olm-crds-have-validation-test +- op: add + path: /stages/0/tests/- + value: + entrypoint: + - scorecard-test + - olm-crds-have-resources + image: quay.io/operator-framework/scorecard-test:unknown + labels: + suite: olm + test: olm-crds-have-resources-test +- op: add + path: /stages/0/tests/- + value: + entrypoint: + - scorecard-test + - olm-spec-descriptors + image: quay.io/operator-framework/scorecard-test:unknown + labels: + suite: olm + test: olm-spec-descriptors-test +- op: add + path: /stages/0/tests/- + value: + entrypoint: + - scorecard-test + - olm-status-descriptors + image: quay.io/operator-framework/scorecard-test:unknown + labels: + suite: olm + test: olm-status-descriptors-test diff --git a/control-operator/ecs-manifests/control-workflows/readout-docker.yaml b/control-operator/ecs-manifests/control-workflows/readout-docker.yaml new file mode 100644 index 000000000..043b05f7c --- /dev/null +++ b/control-operator/ecs-manifests/control-workflows/readout-docker.yaml @@ -0,0 +1,33 @@ +name: readout # read by workflow +defaults: + readout_cfg_uri: "consul-ini://{{ consul_endpoint }}/o2/components/readout/ANY/any/readout-standalone-{{ task_hostname }}" + user: flp + log_task_stdout: none + log_task_stderr: none + _module_cmdline: >- + source /etc/profile.d/modules.sh && MODULEPATH={{ modulepath }} module load Readout Control-OCCPlugin && + o2-readout-exe + _plain_cmdline: "sudo -E docker run --name readout --replace --privileged --user flp -v /etc/group:/etc/group:ro -v /etc/passwd:/etc/passwd:ro -v /tmp:/tmp -v /lib/modules/$(uname -r):/lib/modules/$(uname -r) --network=host --ipc=host -e O2_DETECTOR -e O2_PARTITION -e OCC_CONTROL_PORT -e O2_SYSTEM -e O2_ROLE gitlab-registry.cern.ch/aliceo2group/dockerfiles/alma9-flp-node:2 /opt/o2/bin/o2-readout-exe" +control: + mode: direct +wants: + cpu: 0.15 + memory: 128 +bind: + - name: readout + type: push + rateLogging: "{{ fmq_rate_logging }}" + addressing: ipc + transport: shmem +properties: {} +command: + stdout: "{{ log_task_stdout }}" + stderr: "{{ log_task_stderr }}" + shell: true + env: + - O2_DETECTOR={{ detector }} + - O2_PARTITION={{ environment_id }} + user: "{{ user }}" + arguments: + - "{{ readout_cfg_uri }}" + value: "{{ len(modulepath)>0 ? _module_cmdline : _plain_cmdline }}" diff --git a/control-operator/ecs-manifests/control-workflows/readout-kube.yaml b/control-operator/ecs-manifests/control-workflows/readout-kube.yaml new file mode 100644 index 000000000..549aebd14 --- /dev/null +++ b/control-operator/ecs-manifests/control-workflows/readout-kube.yaml @@ -0,0 +1,35 @@ +name: readout # read by workflow +defaults: + readout_cfg_uri: "consul-ini://{{ consul_endpoint }}/o2/components/readout/ANY/any/readout-standalone-{{ task_hostname }}" + user: flp + log_task_stdout: none + log_task_stderr: none + _module_cmdline: >- + source /etc/profile.d/modules.sh && MODULEPATH={{ modulepath }} module load Readout Control-OCCPlugin && + o2-readout-exe + _plain_cmdline: "{{ o2_install_path }}/bin/o2-readout-exe" +control: + mode: kubectl_direct +wants: + cpu: 0.15 + memory: 128 +bind: + - name: readout + type: push + rateLogging: "{{ fmq_rate_logging }}" + addressing: ipc + transport: shmem +properties: {} +command: + stdout: "{{ log_task_stdout }}" + stderr: "{{ log_task_stderr }}" + shell: true + env: + - O2_DETECTOR={{ detector }} + - O2_PARTITION={{ environment_id }} + user: "{{ user }}" + arguments: + # the first argument is the location of yaml readout manifest to be loaded, the one to be used is located in this repo inside control-operator/ecs-manifests/kubernetes-manifests/readout.yaml + - "/root/readout.yaml" + - "{{ readout_cfg_uri }}" + value: "{{ len(modulepath)>0 ? _module_cmdline : _plain_cmdline }}" diff --git a/control-operator/ecs-manifests/control-workflows/readout-orig.yaml b/control-operator/ecs-manifests/control-workflows/readout-orig.yaml new file mode 100644 index 000000000..5d6130024 --- /dev/null +++ b/control-operator/ecs-manifests/control-workflows/readout-orig.yaml @@ -0,0 +1,33 @@ +name: readout # read by workflow +defaults: + readout_cfg_uri: "consul-ini://{{ consul_endpoint }}/o2/components/readout/ANY/any/readout-standalone-{{ task_hostname }}" + user: flp + log_task_stdout: none + log_task_stderr: none + _module_cmdline: >- + source /etc/profile.d/modules.sh && MODULEPATH={{ modulepath }} module load Readout Control-OCCPlugin && + o2-readout-exe + _plain_cmdline: "{{ o2_install_path }}/bin/o2-readout-exe" +control: + mode: direct +wants: + cpu: 0.15 + memory: 128 +bind: + - name: readout + type: push + rateLogging: "{{ fmq_rate_logging }}" + addressing: ipc + transport: shmem +properties: {} +command: + stdout: "{{ log_task_stdout }}" + stderr: "{{ log_task_stderr }}" + shell: true + env: + - O2_DETECTOR={{ detector }} + - O2_PARTITION={{ environment_id }} + user: "{{ user }}" + arguments: + - "{{ readout_cfg_uri }}" + value: "{{ len(modulepath)>0 ? _module_cmdline : _plain_cmdline }}" diff --git a/control-operator/ecs-manifests/control-workflows/stfbuilder-senderoutput-kube.yaml b/control-operator/ecs-manifests/control-workflows/stfbuilder-senderoutput-kube.yaml new file mode 100644 index 000000000..9c84f0ed7 --- /dev/null +++ b/control-operator/ecs-manifests/control-workflows/stfbuilder-senderoutput-kube.yaml @@ -0,0 +1,66 @@ +name: stfbuilder-senderoutput +control: + mode: kubectl_fairmq +defaults: + detector: TST + rdh_version: "6" + user: flp + fmq_severity: info + dd_detector_subspec: feeid + dd_discovery_endpoint: "no-op://" # location of consul docker instance + log_task_stdout: none + log_task_stderr: none + stfb_dd_mode: "physics" + stfb_fee_mask: "0xffff" + stfb_enable_datasink: "false" + stfb_datasink_dir: "/tmp" + stfb_datasink_filesize: "2048" + monitoring_dd_interval: "5" + _module_cmdline: >- + source /etc/profile.d/modules.sh && MODULEPATH={{ modulepath }} module load DataDistribution Control-OCCPlugin && + StfBuilder + _plain_cmdline: "{{ o2_install_path }}/bin/StfBuilder" +wants: + cpu: 0.15 + memory: 128 +bind: + - name: buildertosender + type: push + rateLogging: "{{ fmq_rate_logging }}" + transport: shmem + addressing: ipc + sndBufSize: "4" +command: + env: + - O2_INFOLOGGER_MODE={{infologger_mode}} + - O2_DETECTOR={{ detector }} + - DATADIST_FEE_MASK={{ stfb_fee_mask }} + - O2_PARTITION={{ environment_id }} + stdout: "{{ log_task_stdout }}" + stderr: "{{ log_task_stderr }}" + shell: true + user: "{{ user }}" + value: "{{ len(modulepath)>0 ? _module_cmdline : _plain_cmdline }}" + arguments: + # the first argument is the location of yaml readout manifest to be loaded, the one to be used is located in this repo inside control-operator/ecs-manifests/kubernetes-manifests/stfbuilder-senderoutput.yaml + - "/root/stfbuilder-senderoutput.yaml" + - "--session=default" + - "--transport=shmem" + - "--shm-segment-id=2" + - "--shm-segment-size=33554432" + - "--monitoring-backend='{{ monitoring_dd_url }}'" + - "--monitoring-process-interval='{{ monitoring_dd_interval }}'" + - "--discovery-partition={{ environment_id }}" + - "--discovery-endpoint={{ dd_discovery_endpoint }}" + - "--detector-rdh={{ rdh_version }}" + - "--detector-subspec={{ dd_detector_subspec }}" + - "--severity={{ fmq_severity }}" + - "--severity-infologger={{ infologger_severity }}" + - "--output-channel-name=buildertosender" + - "--shm-monitor=false" + - "--detector={{ detector }}" + - "--run-type={{ stfb_dd_mode }}" + - "{{ stfb_enable_datasink == 'true' ? '--data-sink-enable' : ' ' }}" + - "{{ stfb_enable_datasink == 'true' ? '--data-sink-dir=' + stfb_datasink_dir : ' ' }}" + - "{{ stfb_enable_datasink == 'true' ? '--data-sink-max-stfs-per-file=0' : ' ' }}" + - "{{ stfb_enable_datasink == 'true' ? '--data-sink-max-file-size=' + stfb_datasink_filesize : ' ' }}" diff --git a/control-operator/ecs-manifests/control-workflows/stfbuilder-senderoutput-orig.yaml b/control-operator/ecs-manifests/control-workflows/stfbuilder-senderoutput-orig.yaml new file mode 100644 index 000000000..3d33ac31d --- /dev/null +++ b/control-operator/ecs-manifests/control-workflows/stfbuilder-senderoutput-orig.yaml @@ -0,0 +1,64 @@ +name: stfbuilder-senderoutput +control: + mode: fairmq +defaults: + detector: TST + rdh_version: "6" + user: flp + fmq_severity: info + dd_detector_subspec: feeid + dd_discovery_endpoint: "no-op://" # location of consul docker instance + log_task_stdout: none + log_task_stderr: none + stfb_dd_mode: "physics" + stfb_fee_mask: "0xffff" + stfb_enable_datasink: "false" + stfb_datasink_dir: "/tmp" + stfb_datasink_filesize: "2048" + monitoring_dd_interval: "5" + _module_cmdline: >- + source /etc/profile.d/modules.sh && MODULEPATH={{ modulepath }} module load DataDistribution Control-OCCPlugin && + StfBuilder + _plain_cmdline: "{{ o2_install_path }}/bin/StfBuilder" +wants: + cpu: 0.15 + memory: 128 +bind: + - name: buildertosender + type: push + rateLogging: "{{ fmq_rate_logging }}" + transport: shmem + addressing: ipc + sndBufSize: "4" +command: + env: + - O2_INFOLOGGER_MODE={{infologger_mode}} + - O2_DETECTOR={{ detector }} + - DATADIST_FEE_MASK={{ stfb_fee_mask }} + - O2_PARTITION={{ environment_id }} + stdout: "{{ log_task_stdout }}" + stderr: "{{ log_task_stderr }}" + shell: true + user: "{{ user }}" + value: "{{ len(modulepath)>0 ? _module_cmdline : _plain_cmdline }}" + arguments: + - "--session=default" + - "--transport=shmem" + - "--shm-segment-id=2" + - "--shm-segment-size=33554432" + - "--monitoring-backend='{{ monitoring_dd_url }}'" + - "--monitoring-process-interval='{{ monitoring_dd_interval }}'" + - "--discovery-partition={{ environment_id }}" + - "--discovery-endpoint={{ dd_discovery_endpoint }}" + - "--detector-rdh={{ rdh_version }}" + - "--detector-subspec={{ dd_detector_subspec }}" + - "--severity={{ fmq_severity }}" + - "--severity-infologger={{ infologger_severity }}" + - "--output-channel-name=buildertosender" + - "--shm-monitor=false" + - "--detector={{ detector }}" + - "--run-type={{ stfb_dd_mode }}" + - "{{ stfb_enable_datasink == 'true' ? '--data-sink-enable' : ' ' }}" + - "{{ stfb_enable_datasink == 'true' ? '--data-sink-dir=' + stfb_datasink_dir : ' ' }}" + - "{{ stfb_enable_datasink == 'true' ? '--data-sink-max-stfs-per-file=0' : ' ' }}" + - "{{ stfb_enable_datasink == 'true' ? '--data-sink-max-file-size=' + stfb_datasink_filesize : ' ' }}" diff --git a/control-operator/ecs-manifests/control-workflows/stfsender-docker.yaml b/control-operator/ecs-manifests/control-workflows/stfsender-docker.yaml new file mode 100644 index 000000000..a8f67ba33 --- /dev/null +++ b/control-operator/ecs-manifests/control-workflows/stfsender-docker.yaml @@ -0,0 +1,65 @@ +name: stfsender +control: + mode: fairmq +wants: + cpu: 0.15 + memory: 128 +defaults: + max_buffered_stfs: "-1" + dd_discovery_net_if: "ib0" #the loopback intf + dd_discovery_endpoint: "no-op://" # location of consul docker instance + user: flp + fmq_severity: info + log_task_stdout: none + log_task_stderr: none + stfs_dd_region_size: 4096 + stfs_shm_segment_size: 33554432 + stfs_enable_datasink: "false" + stfs_datasink_dir: "/tmp" + stfs_datasink_filesize: "2048" + monitoring_dd_interval: "5" + _module_cmdline: >- + source /etc/profile.d/modules.sh && MODULEPATH={{ modulepath }} module load DataDistribution Control-OCCPlugin && + numactl --cpunodebind=0 --preferred=0 -- StfSender + #_plain_cmdline: "numactl --cpunodebind=0 --preferred=0 -- {{ o2_install_path }}/bin/StfSender" + _plain_cmdline: "sudo -E docker run --name stfsender --replace --privileged --user flp -v /etc/group:/etc/group:ro -v /etc/passwd:/etc/passwd:ro -v /tmp:/tmp -v /lib/modules/$(uname -r):/lib/modules/$(uname -r) --network=host --ipc=host -e CONTROL_OCCPLUGIN_ROOT=/opt/o2 -e O2_INFOLOGGER_MODE -e UCX_TLS -e UCX_IB_SL -e UCX_DC_MAX_NUM_EPS -e UCX_RC_MAX_NUM_EPS -e http_proxy -e https_proxy -e UCX_NET_DEVICES -e O2_DETECTOR -e O2_PARTITION -e OCC_CONTROL_PORT -e O2_SYSTEM -e O2_ROLE gitlab-registry.cern.ch/aliceo2group/dockerfiles/alma9-flp-node/dd:1 numactl --cpunodebind=0 --preferred=0 -- {{ o2_install_path }}/bin/StfSender" +command: + env: + - O2_INFOLOGGER_MODE={{infologger_mode}} + - http_proxy="" + - https_proxy="" + - no_proxy=-ib,.internal + - O2_DETECTOR={{ detector }} + - UCX_NET_DEVICES=mlx5_0:1 # This limits StfSender to IB interface (used as of DD v1.3.0) + - UCX_TLS=sm,self,dc,rc # Force dc/rc connection (used as of DD v1.4.0) + - UCX_IB_SL=1 # Force IB SL1 with Adaptive Routing (AR) + - UCX_DC_MAX_NUM_EPS=512 # Allow 512 peers for DC transport + - UCX_RC_MAX_NUM_EPS=512 # Allow 512 peers for RC transport + - O2_PARTITION={{ environment_id }} + stdout: "{{ log_task_stdout }}" + stderr: "{{ log_task_stderr }}" + shell: true + user: "{{ user }}" + value: "{{ len(modulepath)>0 ? _module_cmdline : _plain_cmdline }}" + arguments: + - "--session=default" + - "--shm-segment-id=2" + - "--shm-segment-size={{ stfs_shm_segment_size }}" + - "--dd-region-size={{ stfs_dd_region_size }}" + - "--dd-region-id=3536" + - "--transport=shmem" + - "--input-channel-name={{ stfs_input_channel_name }}" + - "--severity={{ fmq_severity }}" + - "--severity-infologger={{ infologger_severity }}" + - "--monitoring-backend='{{ monitoring_dd_url }}'" + - "--monitoring-process-interval='{{ monitoring_dd_interval }}'" + - "--discovery-partition={{ environment_id }}" + - "--discovery-id={{ dd_discovery_stfs_id }}" + - "--discovery-endpoint={{ dd_discovery_endpoint }}" + - "--discovery-net-if={{ dd_discovery_net_if }}" + - '{{ ddsched_enabled == "true" ? "" : "--stand-alone" }}' + - "--shm-monitor=false" + - "{{ stfs_enable_datasink == 'true' ? '--data-sink-enable' : ' ' }}" + - "{{ stfs_enable_datasink == 'true' ? '--data-sink-dir=' + stfs_datasink_dir : ' ' }}" + - "{{ stfs_enable_datasink == 'true' ? '--data-sink-max-stfs-per-file=0' : ' ' }}" + - "{{ stfs_enable_datasink == 'true' ? '--data-sink-max-file-size=' + stfs_datasink_filesize : ' ' }}" diff --git a/control-operator/ecs-manifests/control-workflows/stfsender-kube.yaml b/control-operator/ecs-manifests/control-workflows/stfsender-kube.yaml new file mode 100644 index 000000000..58a6106b6 --- /dev/null +++ b/control-operator/ecs-manifests/control-workflows/stfsender-kube.yaml @@ -0,0 +1,66 @@ +name: stfsender +control: + mode: kubectl_fairmq +wants: + cpu: 0.15 + memory: 128 +defaults: + max_buffered_stfs: "-1" + dd_discovery_net_if: "ib0" #the loopback intf + dd_discovery_endpoint: "no-op://" # location of consul docker instance + user: flp + fmq_severity: info + log_task_stdout: none + log_task_stderr: none + stfs_dd_region_size: 4096 + stfs_shm_segment_size: 33554432 + stfs_enable_datasink: "false" + stfs_datasink_dir: "/tmp" + stfs_datasink_filesize: "2048" + monitoring_dd_interval: "5" + _module_cmdline: >- + source /etc/profile.d/modules.sh && MODULEPATH={{ modulepath }} module load DataDistribution Control-OCCPlugin && + numactl --cpunodebind=0 --preferred=0 -- StfSender + _plain_cmdline: "numactl --cpunodebind=0 --preferred=0 -- {{ o2_install_path }}/bin/StfSender" +command: + env: + - O2_INFOLOGGER_MODE={{infologger_mode}} + - http_proxy= + - https_proxy= + - no_proxy=-ib,.internal + - O2_DETECTOR={{ detector }} + - UCX_NET_DEVICES=mlx5_0:1 # This limits StfSender to IB interface (used as of DD v1.3.0) + - UCX_TLS=sm,self,dc,rc # Force dc/rc connection (used as of DD v1.4.0) + - UCX_IB_SL=1 # Force IB SL1 with Adaptive Routing (AR) + - UCX_DC_MAX_NUM_EPS=512 # Allow 512 peers for DC transport + - UCX_RC_MAX_NUM_EPS=512 # Allow 512 peers for RC transport + - O2_PARTITION={{ environment_id }} + stdout: "{{ log_task_stdout }}" + stderr: "{{ log_task_stderr }}" + shell: true + user: "{{ user }}" + value: "{{ len(modulepath)>0 ? _module_cmdline : _plain_cmdline }}" + arguments: + # the first argument is the location of yaml readout manifest to be loaded, the one to be used is located in this repo inside control-operator/ecs-manifests/kubernetes-manifests/stfsender.yaml + - "/root/stfsender.yaml" + - "--session=default" + - "--shm-segment-id=2" + - "--shm-segment-size={{ stfs_shm_segment_size }}" + - "--dd-region-size={{ stfs_dd_region_size }}" + - "--dd-region-id=3536" + - "--transport=shmem" + - "--input-channel-name={{ stfs_input_channel_name }}" + - "--severity={{ fmq_severity }}" + - "--severity-infologger={{ infologger_severity }}" + - "--monitoring-backend='{{ monitoring_dd_url }}'" + - "--monitoring-process-interval='{{ monitoring_dd_interval }}'" + - "--discovery-partition={{ environment_id }}" + - "--discovery-id={{ dd_discovery_stfs_id }}" + - "--discovery-endpoint={{ dd_discovery_endpoint }}" + - "--discovery-net-if={{ dd_discovery_net_if }}" + - '{{ ddsched_enabled == "true" ? "" : "--stand-alone" }}' + - "--shm-monitor=false" + - "{{ stfs_enable_datasink == 'true' ? '--data-sink-enable' : ' ' }}" + - "{{ stfs_enable_datasink == 'true' ? '--data-sink-dir=' + stfs_datasink_dir : ' ' }}" + - "{{ stfs_enable_datasink == 'true' ? '--data-sink-max-stfs-per-file=0' : ' ' }}" + - "{{ stfs_enable_datasink == 'true' ? '--data-sink-max-file-size=' + stfs_datasink_filesize : ' ' }}" diff --git a/control-operator/ecs-manifests/control-workflows/stfsender-orig.yaml b/control-operator/ecs-manifests/control-workflows/stfsender-orig.yaml new file mode 100644 index 000000000..7bb9ecd23 --- /dev/null +++ b/control-operator/ecs-manifests/control-workflows/stfsender-orig.yaml @@ -0,0 +1,64 @@ +name: stfsender +control: + mode: fairmq +wants: + cpu: 0.15 + memory: 128 +defaults: + max_buffered_stfs: "-1" + dd_discovery_net_if: "ib0" #the loopback intf + dd_discovery_endpoint: "no-op://" # location of consul docker instance + user: flp + fmq_severity: info + log_task_stdout: none + log_task_stderr: none + stfs_dd_region_size: 4096 + stfs_shm_segment_size: 33554432 + stfs_enable_datasink: "false" + stfs_datasink_dir: "/tmp" + stfs_datasink_filesize: "2048" + monitoring_dd_interval: "5" + _module_cmdline: >- + source /etc/profile.d/modules.sh && MODULEPATH={{ modulepath }} module load DataDistribution Control-OCCPlugin && + numactl --cpunodebind=0 --preferred=0 -- StfSender + _plain_cmdline: "numactl --cpunodebind=0 --preferred=0 -- {{ o2_install_path }}/bin/StfSender" +command: + env: + - O2_INFOLOGGER_MODE={{infologger_mode}} + - http_proxy="" + - https_proxy="" + - no_proxy=-ib,.internal + - O2_DETECTOR={{ detector }} + - UCX_NET_DEVICES=mlx5_0:1 # This limits StfSender to IB interface (used as of DD v1.3.0) + - UCX_TLS=sm,self,dc,rc # Force dc/rc connection (used as of DD v1.4.0) + - UCX_IB_SL=1 # Force IB SL1 with Adaptive Routing (AR) + - UCX_DC_MAX_NUM_EPS=512 # Allow 512 peers for DC transport + - UCX_RC_MAX_NUM_EPS=512 # Allow 512 peers for RC transport + - O2_PARTITION={{ environment_id }} + stdout: "{{ log_task_stdout }}" + stderr: "{{ log_task_stderr }}" + shell: true + user: "{{ user }}" + value: "{{ len(modulepath)>0 ? _module_cmdline : _plain_cmdline }}" + arguments: + - "--session=default" + - "--shm-segment-id=2" + - "--shm-segment-size={{ stfs_shm_segment_size }}" + - "--dd-region-size={{ stfs_dd_region_size }}" + - "--dd-region-id=3536" + - "--transport=shmem" + - "--input-channel-name={{ stfs_input_channel_name }}" + - "--severity={{ fmq_severity }}" + - "--severity-infologger={{ infologger_severity }}" + - "--monitoring-backend='{{ monitoring_dd_url }}'" + - "--monitoring-process-interval='{{ monitoring_dd_interval }}'" + - "--discovery-partition={{ environment_id }}" + - "--discovery-id={{ dd_discovery_stfs_id }}" + - "--discovery-endpoint={{ dd_discovery_endpoint }}" + - "--discovery-net-if={{ dd_discovery_net_if }}" + - '{{ ddsched_enabled == "true" ? "" : "--stand-alone" }}' + - "--shm-monitor=false" + - "{{ stfs_enable_datasink == 'true' ? '--data-sink-enable' : ' ' }}" + - "{{ stfs_enable_datasink == 'true' ? '--data-sink-dir=' + stfs_datasink_dir : ' ' }}" + - "{{ stfs_enable_datasink == 'true' ? '--data-sink-max-stfs-per-file=0' : ' ' }}" + - "{{ stfs_enable_datasink == 'true' ? '--data-sink-max-file-size=' + stfs_datasink_filesize : ' ' }}" diff --git a/control-operator/ecs-manifests/kubernetes-ecs.md b/control-operator/ecs-manifests/kubernetes-ecs.md new file mode 100644 index 000000000..e5608750d --- /dev/null +++ b/control-operator/ecs-manifests/kubernetes-ecs.md @@ -0,0 +1,35 @@ +# ECS in Kubernetes + +For now we can run OCC tasks in Kubernetes (namely `readout`, `stfsender`, `stfbuilder-senderoutput`) using +the task controller inside the `control-operator` folder and `kubectltask` created as a version of `controllabletask` +for the Mesos executor wrapping `kubectl` tool. However as the `kubectl` requires manifests we used `control-operator/ecs-manifests` +to store these test manifests. There are more test files apart from these manifests. + +There are 3 subfolders inside `ecs-manifests`, namely `control-workflows`, `kubernetes-manifests` and `occ-configure-arguments`. + +### `control-workflows` + +In order to run given task from ECS you need to provide yaml template normally contained inside the `ControlWorkflows` +repository that is processed by ECS core and sent to Mesos framework that runs the given task on a given agent. +Inside the folder you can find files with suffixes `docker`, `kube`, `orig` appended to the name of the task they +are representing. These files are to be put into the `ControlWorkflows/tasks` so ECS can find those and run task +in proper way. Eg. if one is to run readout in Kubernetes copy (or symlink) `readout-kube.yaml` into the `ControlWorkflows` +directory under the name `readout.yaml` (same for the other tasks) + +### `kubernetes-manifests` + +Kubectltask requires Kubernetes manifests to pass to `kubectl`, these manifests are located in directory `kubernetes-manifests`. +There are two types of manifests `task.yaml` and `task-test.yaml`. The first one is the actual manifest to be used by +executors and kubectltask where environment variables substitution is used in a form of `${VAR}`. `task-test.yaml` +has the same form as templated manifest, but with actual test values to test container/binary inside the kubernetes. +Apply the manifest by invoking: + +```bash +kubectl apply -f task-test.yaml +``` + +### `occ-configure-arguments` + +yaml files containing data used by `peanut` to properly transition `readout`, `stfsender` and `stfbuilder-senderoutput` +to `CONFIGURED`. Use these files by either loading them into `peanut` with LoadConfiguration in TUI mode or +by using `--config` flag in CLI mode. diff --git a/control-operator/ecs-manifests/kubernetes-manifests/readout-test.yaml b/control-operator/ecs-manifests/kubernetes-manifests/readout-test.yaml new file mode 100644 index 000000000..d9711feb3 --- /dev/null +++ b/control-operator/ecs-manifests/kubernetes-manifests/readout-test.yaml @@ -0,0 +1,179 @@ +apiVersion: aliecs.alice.cern/v1alpha1 +kind: Task +metadata: + name: readout +spec: + arguments: + chans.readout.0.address: ipc://@o2ipc-d7ef3du8ndmd7n8j7l2g + chans.readout.0.autoBind: "0" + chans.readout.0.method: bind + chans.readout.0.rateLogging: "0" + chans.readout.0.rcvBufSize: "1000" + chans.readout.0.rcvKernelSize: "0" + chans.readout.0.sndBufSize: "1000" + chans.readout.0.sndKernelSize: "0" + chans.readout.0.transport: shmem + chans.readout.0.type: push + chans.readout.numSockets: "1" + environment_id: 323ikoYJ88i + orbit-reset-time: "" + pod: + hostNetwork: true + hostIPC: true + securityContext: + fsGroup: 1100 + supplementalGroups: [10, 1105] + containers: + - name: readout + image: gitlab-registry.cern.ch/aliceo2group/dockerfiles/alma9-flp-node:2 + command: ["/opt/o2/bin/o2-readout-exe"] + args: ["/etc/readout/readout-cfg.ini"] + securityContext: + privileged: true + runAsUser: 1100 + runAsGroup: 1100 + env: + - name: O2_DETECTOR + value: "TST" + - name: O2_PARTITION + value: "323ikoYJ88i" + - name: OCC_CONTROL_PORT + value: "31000" + - name: O2_SYSTEM + value: "flp" + # it might be necessary to change the role for your use case + - name: O2_ROLE + value: "mtichak" + volumeMounts: + - name: group + mountPath: /etc/group + readOnly: true + - name: passwd + mountPath: /etc/passwd + readOnly: true + - name: tmp + mountPath: /tmp + - name: modules + mountPath: /lib/modules + - name: readout-config + mountPath: /etc/readout + readOnly: true + imagePullPolicy: IfNotPresent + volumes: + - name: group + hostPath: + path: /etc/group + - name: passwd + hostPath: + path: /etc/passwd + - name: tmp + hostPath: + path: /tmp + - name: modules + hostPath: + path: /lib/modules + - name: readout-config + configMap: + name: readout-config + imagePullSecrets: + - name: gitlab-registry-secret + control: + mode: "direct" + port: 31000 + bind: + - name: readout + type: push + rateLogging: "0" + addressing: ipc + transport: shmem + global: "" + target: "" + connect: + properties: + var1: value1 + var2: value2 + state: standby + +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: readout-config +data: + readout-cfg.ini: | + # Example configuration file for o2-readout-exe + # Standalone operation, with emulated input, and output to Data Distribution + # to run readout-stbf out of the box with data emulator + + [readout] + aggregatorSliceTimeout=0.5 + aggregatorStfTimeout=1 + + # dummy data source + [equipment-emulator-1] + enabled=1 + equipmentType=cruEmulator + memoryPoolNumberOfPages=1000 + memoryPoolPageSize=1M + numberOfLinks=1 + PayloadSize=8000 + rdhUseFirstInPageEnabled=1 + #systemId=19 + #feeId=0 + #cruId=0 + #dpwId=0 + id=1 + + # define a (disabled) CRU equipment for CRU end point #0 + [equipment-rorc-1] + enabled=0 + equipmentType=rorc + cardId=#0 + dataSource=Ddg + memoryPoolNumberOfPages=1800 + memoryPoolPageSize=1M + rdhUseFirstInPageEnabled=1 + + + # monitor counters + [consumer-stats] + consumerType=stats + monitoringEnabled=1 + monitoringUpdatePeriod=5 + processMonitoringInterval=15 + monitoringURI=influxdb-unix:///tmp/telegraf.sock + + + # record data to file (disabled) + [consumer-rec] + enabled=0 + consumerType=fileRecorder + fileName=/tmp/data.raw + + + # allow data sampling to take data + [consumer-data-sampling] + enabled=0 + consumerType=DataSampling + + + # send data to stfb + [consumer-StfBuilder] + enabled = 1 + consumerType = FairMQChannel + sessionName = default + fmq-transport = shmem + fmq-name = readout + fmq-type = push + fmq-address = ipc:///tmp/flp-readout-pipe-0 + unmanagedMemorySize = 2G + memoryPoolNumberOfPages = 200 + memoryPoolPageSize = 1M + disableSending=0 + + + # matching config for the test receiver + [receiver-fmq] + decodingMode=stfHbf + channelAddress=ipc:///tmp/flp-readout-pipe-0 + channelType=pull diff --git a/control-operator/ecs-manifests/kubernetes-manifests/readout.yaml b/control-operator/ecs-manifests/kubernetes-manifests/readout.yaml new file mode 100644 index 000000000..90d7600fe --- /dev/null +++ b/control-operator/ecs-manifests/kubernetes-manifests/readout.yaml @@ -0,0 +1,82 @@ +apiVersion: aliecs.alice.cern/v1alpha1 +kind: Task +metadata: + name: readout +spec: + pod: + hostNetwork: true + hostIPC: true + securityContext: + fsGroup: ${FLP_GID} + supplementalGroups: ${FLP_SUPPLEMENTAL_GROUPS} + containers: + - name: readout + image: gitlab-registry.cern.ch/aliceo2group/dockerfiles/alma9-flp-node/readout:1 + command: ["/bin/bash"] + args: + - "-c" + - "${KUBE_COMMAND} ${KUBE_ARGUMENTS}" + securityContext: + privileged: true + runAsUser: ${FLP_UID} + runAsGroup: ${FLP_GID} + env: + - name: O2_DETECTOR + value: "${O2_DETECTOR}" + - name: O2_PARTITION + value: "${O2_PARTITION}" + - name: OCC_CONTROL_PORT + value: "${OCC_CONTROL_PORT}" + - name: O2_SYSTEM + value: "${O2_SYSTEM}" + - name: O2_ROLE + value: "${O2_ROLE}" + volumeMounts: + - name: host-shm + mountPath: /dev/shm + - name: group + mountPath: /etc/group + readOnly: true + - name: passwd + mountPath: /etc/passwd + readOnly: true + - name: tmp + mountPath: /tmp + - name: modules + mountPath: /lib/modules + imagePullPolicy: IfNotPresent + volumes: + - name: host-shm + hostPath: + path: /dev/shm + type: Directory + - name: group + hostPath: + path: /etc/group + - name: passwd + hostPath: + path: /etc/passwd + - name: tmp + hostPath: + path: /tmp + - name: modules + hostPath: + path: /lib/modules + imagePullSecrets: + - name: gitlab-registry-secret + control: + mode: "direct" + port: ${OCC_CONTROL_PORT} + bind: + - name: readout + type: push + rateLogging: "0" + addressing: ipc + transport: shmem + global: "" + target: "" + connect: + properties: + var1: value1 + var2: value2 + state: standby diff --git a/control-operator/ecs-manifests/kubernetes-manifests/stfbuilder-senderoutput-test.yaml b/control-operator/ecs-manifests/kubernetes-manifests/stfbuilder-senderoutput-test.yaml new file mode 100644 index 000000000..2e625d13d --- /dev/null +++ b/control-operator/ecs-manifests/kubernetes-manifests/stfbuilder-senderoutput-test.yaml @@ -0,0 +1,114 @@ +apiVersion: aliecs.alice.cern/v1alpha1 +kind: Task +metadata: + name: stfbuilder-senderoutput +spec: + arguments: + chans.buildertosender.0.address: ipc://@o2ipc-d7ef3du8ndmd7n8j7l30 + chans.buildertosender.0.autoBind: "0" + chans.buildertosender.0.method: bind + chans.buildertosender.0.rateLogging: "0" + chans.buildertosender.0.rcvBufSize: "1000" + chans.buildertosender.0.rcvKernelSize: "0" + chans.buildertosender.0.sndBufSize: "4" + chans.buildertosender.0.sndKernelSize: "0" + chans.buildertosender.0.transport: shmem + chans.buildertosender.0.type: push + chans.buildertosender.numSockets: "1" + chans.readout.0.address: ipc://@o2ipc-d7ef3du8ndmd7n8j7l2g + chans.readout.0.method: connect + chans.readout.0.rateLogging: "0" + chans.readout.0.rcvBufSize: "1000" + chans.readout.0.rcvKernelSize: "0" + chans.readout.0.sndBufSize: "1000" + chans.readout.0.sndKernelSize: "0" + chans.readout.0.transport: shmem + chans.readout.0.type: pull + chans.readout.numSockets: "1" + environment_id: 323ikoYJ88i + orbit-reset-time: "" + pod: + hostNetwork: true + hostIPC: true + securityContext: + fsGroup: 1100 + supplementalGroups: [10, 1105] + containers: + - name: stfbuilder-senderoutput + image: gitlab-registry.cern.ch/aliceo2group/dockerfiles/alma9-flp-node/dd:1 + command: ["/bin/bash"] + args: + - "-lc" + - "/opt/o2/bin/StfBuilder --session=default --transport=shmem --shm-segment-id=2 --shm-segment-size=33554432 --monitoring-backend='influxdb-unix:///tmp/telegraf.sock' --monitoring-process-interval='5' --discovery-partition=323ikoYJ88i --discovery-endpoint=no-op:// --detector-rdh=6 --detector-subspec=feeid --severity=info --severity-infologger=info --output-channel-name=buildertosender --shm-monitor=false --detector=TST --run-type=physics --id 323ikp9sptz -S $CONTROL_OCCPLUGIN_ROOT/lib/ -P OCClite --color false --control-port 31001" + securityContext: + privileged: true + runAsUser: 1100 + runAsGroup: 1100 + capabilities: + add: ["IPC_LOCK"] + env: + - name: O2_DETECTOR + value: "TST" + - name: O2_PARTITION + value: "323ikoYJ88i" + - name: OCC_CONTROL_PORT + value: "31001" + - name: O2_SYSTEM + value: "FLP" + - name: O2_ROLE + value: "mtichak-ost" + - name: O2_INFOLOGGER_MODE + value: "infoLoggerD" + - name: DATADIST_FEE_MASK + value: "0xffff" + - name: CONTROL_OCCPLUGIN_ROOT + value: "/opt/o2" + volumeMounts: + - name: host-shm + mountPath: /dev/shm + - name: group + mountPath: /etc/group + readOnly: true + - name: passwd + mountPath: /etc/passwd + readOnly: true + - name: tmp + mountPath: /tmp + - name: modules + mountPath: /lib/modules + imagePullPolicy: IfNotPresent + volumes: + - name: host-shm + hostPath: + path: /dev/shm + type: Directory + - name: group + hostPath: + path: /etc/group + - name: passwd + hostPath: + path: /etc/passwd + - name: tmp + hostPath: + path: /tmp + - name: modules + hostPath: + path: /lib/modules + imagePullSecrets: + - name: gitlab-registry-secret + control: + mode: "fairmq" + port: 31001 + bind: + - name: readout + type: push + rateLogging: "0" + addressing: ipc + transport: shmem + global: "" + target: "" + connect: + properties: + var1: value1 + var2: value2 + state: standby diff --git a/control-operator/ecs-manifests/kubernetes-manifests/stfbuilder-senderoutput.yaml b/control-operator/ecs-manifests/kubernetes-manifests/stfbuilder-senderoutput.yaml new file mode 100644 index 000000000..9195ed2f6 --- /dev/null +++ b/control-operator/ecs-manifests/kubernetes-manifests/stfbuilder-senderoutput.yaml @@ -0,0 +1,91 @@ +apiVersion: aliecs.alice.cern/v1alpha1 +kind: Task +metadata: + name: stfbuilder-senderoutput +spec: + pod: + hostNetwork: true + hostIPC: true + securityContext: + fsGroup: ${FLP_GID} + supplementalGroups: ${FLP_SUPPLEMENTAL_GROUPS} + containers: + - name: readout + image: gitlab-registry.cern.ch/aliceo2group/dockerfiles/alma9-flp-node/dd:1 + command: ["/bin/bash"] + args: + - "-lc" + - "${KUBE_COMMAND} ${KUBE_ARGUMENTS}" + securityContext: + privileged: true + runAsUser: ${FLP_UID} + runAsGroup: ${FLP_GID} + capabilities: + add: ["IPC_LOCK"] + + env: + - name: O2_DETECTOR + value: "${O2_DETECTOR}" + - name: O2_PARTITION + value: "${O2_PARTITION}" + - name: OCC_CONTROL_PORT + value: "${OCC_CONTROL_PORT}" + - name: O2_SYSTEM + value: "${O2_SYSTEM}" + - name: O2_ROLE + value: "${O2_ROLE}" + - name: O2_INFOLOGGER_MODE + value: "${O2_INFOLOGGER_MODE}" + - name: DATADIST_FEE_MASK + value: "${DATADIST_FEE_MASK}" + - name: CONTROL_OCCPLUGIN_ROOT + value: "/opt/o2" + volumeMounts: + - name: host-shm + mountPath: /dev/shm + - name: group + mountPath: /etc/group + readOnly: true + - name: passwd + mountPath: /etc/passwd + readOnly: true + - name: tmp + mountPath: /tmp + - name: modules + mountPath: /lib/modules + imagePullPolicy: IfNotPresent + volumes: + - name: host-shm + hostPath: + path: /dev/shm + type: Directory + - name: group + hostPath: + path: /etc/group + - name: passwd + hostPath: + path: /etc/passwd + - name: tmp + hostPath: + path: /tmp + - name: modules + hostPath: + path: /lib/modules + imagePullSecrets: + - name: gitlab-registry-secret + control: + mode: "fairmq" + port: ${OCC_CONTROL_PORT} + bind: + - name: readout + type: push + rateLogging: "0" + addressing: ipc + transport: shmem + global: "" + target: "" + connect: + properties: + var1: value1 + var2: value2 + state: standby diff --git a/control-operator/ecs-manifests/kubernetes-manifests/stfsender-test.yaml b/control-operator/ecs-manifests/kubernetes-manifests/stfsender-test.yaml new file mode 100644 index 000000000..7db060d53 --- /dev/null +++ b/control-operator/ecs-manifests/kubernetes-manifests/stfsender-test.yaml @@ -0,0 +1,92 @@ +apiVersion: aliecs.alice.cern/v1alpha1 +kind: Task +metadata: + name: stfsender +spec: + arguments: + chans.buildertosender.0.address: ipc://@o2ipc-d7ef3du8ndmd7n8j7l30 + chans.buildertosender.0.method: connect + chans.buildertosender.0.rateLogging: "0" + chans.buildertosender.0.rcvBufSize: "1000" + chans.buildertosender.0.rcvKernelSize: "0" + chans.buildertosender.0.sndBufSize: "1000" + chans.buildertosender.0.sndKernelSize: "0" + chans.buildertosender.0.transport: shmem + chans.buildertosender.0.type: pull + chans.buildertosender.numSockets: "1" + environment_id: 323ikoYJ88i + orbit-reset-time: "" + + pod: + hostNetwork: true + hostIPC: true + securityContext: + fsGroup: 1100 + supplementalGroups: [10, 1105] + containers: + - name: readout + image: gitlab-registry.cern.ch/aliceo2group/dockerfiles/alma9-flp-node/dd:1 + command: ["/bin/bash"] + args: + - "-c" + # you might need to change mtichak from args to your usecase change this part + - "numactl --cpunodebind=0 --preferred=0 -- /opt/o2/bin/StfSender --session=default --shm-segment-id=2 --shm-segment-size=33554432 --dd-region-size=4096 --dd-region-id=3536 --transport=shmem --input-channel-name=buildertosender --severity=info --severity-infologger=info --monitoring-backend='influxdb-unix:///tmp/telegraf.sock' --monitoring-process-interval='5' --discovery-partition=323ikoYJ88i --discovery-id=stfs-mtichak-ost-323ikoig378 --discovery-endpoint=no-op:// --discovery-net-if=ib0 --stand-alone --shm-monitor=false --id 323ikp9tANv -S $CONTROL_OCCPLUGIN_ROOT/lib/ -P OCClite --color false --control-port 31002" + securityContext: + privileged: true + runAsUser: 1100 + runAsGroup: 1100 + env: + - name: O2_DETECTOR + value: "TST" + - name: O2_PARTITION + value: "xxxxxxxx" + - name: OCC_CONTROL_PORT + value: "31002" + - name: O2_SYSTEM + value: "flp" + # it might be necessary to change the role for your use case + - name: O2_ROLE + value: "mtichak" + volumeMounts: + - name: group + mountPath: /etc/group + readOnly: true + - name: passwd + mountPath: /etc/passwd + readOnly: true + - name: tmp + mountPath: /tmp + - name: modules + mountPath: /lib/modules + imagePullPolicy: IfNotPresent + volumes: + - name: group + hostPath: + path: /etc/group + - name: passwd + hostPath: + path: /etc/passwd + - name: tmp + hostPath: + path: /tmp + - name: modules + hostPath: + path: /lib/modules + imagePullSecrets: + - name: gitlab-registry-secret + control: + mode: "fairmq" + port: 31002 + bind: + - name: readout + type: push + rateLogging: "0" + addressing: ipc + transport: shmem + global: "" + target: "" + connect: + properties: + var1: value1 + var2: value2 + state: standby diff --git a/control-operator/ecs-manifests/kubernetes-manifests/stfsender.yaml b/control-operator/ecs-manifests/kubernetes-manifests/stfsender.yaml new file mode 100644 index 000000000..a61b190ee --- /dev/null +++ b/control-operator/ecs-manifests/kubernetes-manifests/stfsender.yaml @@ -0,0 +1,105 @@ +apiVersion: aliecs.alice.cern/v1alpha1 +kind: Task +metadata: + name: stfsender +spec: + pod: + hostNetwork: true + hostIPC: true + securityContext: + fsGroup: ${FLP_GID} + supplementalGroups: ${FLP_SUPPLEMENTAL_GROUPS} + containers: + - name: readout + image: gitlab-registry.cern.ch/aliceo2group/dockerfiles/alma9-flp-node/dd:1 + command: ["/bin/bash"] + args: + - "-lc" + - "${KUBE_COMMAND} ${KUBE_ARGUMENTS}" + securityContext: + privileged: true + runAsUser: ${FLP_UID} + runAsGroup: ${FLP_GID} + capabilities: + add: ["IPC_LOCK"] + + env: + - name: O2_DETECTOR + value: "${O2_DETECTOR}" + - name: O2_PARTITION + value: "${O2_PARTITION}" + - name: OCC_CONTROL_PORT + value: "${OCC_CONTROL_PORT}" + - name: O2_SYSTEM + value: "${O2_SYSTEM}" + - name: O2_ROLE + value: "${O2_ROLE}" + - name: O2_INFOLOGGER_MODE + value: "${O2_INFOLOGGER_MODE}" + - name: http_proxy + value: "${http_proxy}" + - name: https_proxy + value: "${https_proxy}" + - name: no_proxy + value: "${no_proxy}" + - name: UCX_NET_DEVICES + value: "${UCX_NET_DEVICES}" + - name: UCX_TLS + value: "${UCX_TLS}" + - name: UCX_IB_SL + value: "${UCX_IB_SL}" + - name: UCX_DC_MAX_NUM_EPS + value: "${UCX_DC_MAX_NUM_EPS}" + - name: UCX_RC_MAX_NUM_EPS + value: "${UCX_RC_MAX_NUM_EPS}" + - name: CONTROL_OCCPLUGIN_ROOT + value: "/opt/o2" + volumeMounts: + - name: host-shm + mountPath: /dev/shm + - name: group + mountPath: /etc/group + readOnly: true + - name: passwd + mountPath: /etc/passwd + readOnly: true + - name: tmp + mountPath: /tmp + - name: modules + mountPath: /lib/modules + imagePullPolicy: IfNotPresent + volumes: + - name: host-shm + hostPath: + path: /dev/shm + type: Directory + - name: group + hostPath: + path: /etc/group + - name: passwd + hostPath: + path: /etc/passwd + - name: tmp + hostPath: + path: /tmp + - name: modules + hostPath: + path: /lib/modules + imagePullSecrets: + - name: gitlab-registry-secret + control: + mode: "fairmq" + port: ${OCC_CONTROL_PORT} + bind: + - name: readout + type: push + rateLogging: "0" + addressing: ipc + transport: shmem + global: "" + target: "" + connect: + properties: + var1: value1 + var2: value2 + state: standby diff --git a/control-operator/ecs-manifests/occ-configure-arguments/readout-configure-args.yaml b/control-operator/ecs-manifests/occ-configure-arguments/readout-configure-args.yaml new file mode 100644 index 000000000..2216e58b6 --- /dev/null +++ b/control-operator/ecs-manifests/occ-configure-arguments/readout-configure-args.yaml @@ -0,0 +1,16 @@ +chans: + readout: + numSockets: 1 + 0: + address: ipc://@o2ipc-d6qto8m8ndm409ltqngg + autoBind: 0 + method: bind + rateLogging: 0 + rcvBufSize: 1000 + rcvKernelSize: 0 + sndBufSize: 1000 + sndKernelSize: 0 + transport: shmem + type: push + +environment_id: 31UAUL4cwuk diff --git a/control-operator/ecs-manifests/occ-configure-arguments/stfbuilder-senderoutput-configure-args.yaml b/control-operator/ecs-manifests/occ-configure-arguments/stfbuilder-senderoutput-configure-args.yaml new file mode 100644 index 000000000..9d76ca13d --- /dev/null +++ b/control-operator/ecs-manifests/occ-configure-arguments/stfbuilder-senderoutput-configure-args.yaml @@ -0,0 +1,28 @@ +chans: + buildertosender: + numSockets: 1 + 0: + address: ipc://@o2ipc-d6qto8m8ndm409ltqnh0 + autoBind: 0 + method: bind + rateLogging: 0 + rcvBufSize: 1000 + rcvKernelSize: 0 + sndBufSize: 4 + sndKernelSize: 0 + transport: shmem + type: push + readout: + numSockets: 1 + 0: + address: ipc://@o2ipc-d6qto8m8ndm409ltqngg + method: connect + rateLogging: 0 + rcvBufSize: 1000 + rcvKernelSize: 0 + sndBufSize: 1000 + sndKernelSize: 0 + transport: shmem + type: pull + +environment_id: 31UAUL4cwuk diff --git a/control-operator/ecs-manifests/occ-configure-arguments/stfsender-configure-args.yaml b/control-operator/ecs-manifests/occ-configure-arguments/stfsender-configure-args.yaml new file mode 100644 index 000000000..fabc47d3f --- /dev/null +++ b/control-operator/ecs-manifests/occ-configure-arguments/stfsender-configure-args.yaml @@ -0,0 +1,15 @@ +chans: + buildertosender: + numSockets: 1 + 0: + address: ipc://@o2ipc-d6qto8m8ndm409ltqnh0 + method: connect + rateLogging: 0 + rcvBufSize: 1000 + rcvKernelSize: 0 + sndBufSize: 1000 + sndKernelSize: 0 + transport: shmem + type: pull + +environment_id: 31UAUL4cwuk diff --git a/control-operator/go.mod b/control-operator/go.mod new file mode 100644 index 000000000..2cbfefa21 --- /dev/null +++ b/control-operator/go.mod @@ -0,0 +1,86 @@ +module github.com/AliceO2Group/ControlOperator + +go 1.25.0 + +require ( + github.com/onsi/ginkgo/v2 v2.27.2 + github.com/onsi/gomega v1.38.2 + k8s.io/api v0.35.0 + k8s.io/apimachinery v0.35.0 + k8s.io/client-go v0.35.0 + sigs.k8s.io/controller-runtime v0.23.0 +) + +require ( + github.com/Masterminds/semver/v3 v3.4.0 // indirect + github.com/beorn7/perks v1.0.1 // indirect + github.com/cespare/xxhash/v2 v2.3.0 // indirect + github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect + github.com/emicklei/go-restful/v3 v3.13.0 // indirect + github.com/evanphx/json-patch/v5 v5.9.11 // indirect + github.com/fsnotify/fsnotify v1.9.0 // indirect + github.com/fxamacker/cbor/v2 v2.9.0 // indirect + github.com/go-logr/logr v1.4.3 // indirect + github.com/go-logr/zapr v1.3.0 // indirect + github.com/go-openapi/jsonpointer v0.22.4 // indirect + github.com/go-openapi/jsonreference v0.21.4 // indirect + github.com/go-openapi/swag v0.25.4 // indirect + github.com/go-openapi/swag/cmdutils v0.25.4 // indirect + github.com/go-openapi/swag/conv v0.25.4 // indirect + github.com/go-openapi/swag/fileutils v0.25.4 // indirect + github.com/go-openapi/swag/jsonname v0.25.4 // indirect + github.com/go-openapi/swag/jsonutils v0.25.4 // indirect + github.com/go-openapi/swag/loading v0.25.4 // indirect + github.com/go-openapi/swag/mangling v0.25.4 // indirect + github.com/go-openapi/swag/netutils v0.25.4 // indirect + github.com/go-openapi/swag/stringutils v0.25.4 // indirect + github.com/go-openapi/swag/typeutils v0.25.4 // indirect + github.com/go-openapi/swag/yamlutils v0.25.4 // indirect + github.com/go-task/slim-sprig/v3 v3.0.0 // indirect + github.com/google/btree v1.1.3 // indirect + github.com/google/gnostic-models v0.7.1 // indirect + github.com/google/go-cmp v0.7.0 // indirect + github.com/google/pprof v0.0.0-20250403155104-27863c87afa6 // indirect + github.com/google/uuid v1.6.0 // indirect + github.com/json-iterator/go v1.1.12 // indirect + github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect + github.com/modern-go/reflect2 v1.0.3-0.20250322232337-35a7c28c31ee // indirect + github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect + github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect + github.com/prometheus/client_golang v1.23.2 // indirect + github.com/prometheus/client_model v0.6.2 // indirect + github.com/prometheus/common v0.67.5 // indirect + github.com/prometheus/procfs v0.19.2 // indirect + github.com/spf13/pflag v1.0.10 // indirect + github.com/x448/float16 v0.8.4 // indirect + go.uber.org/multierr v1.11.0 // indirect + go.uber.org/zap v1.27.1 // indirect + go.yaml.in/yaml/v2 v2.4.3 // indirect + go.yaml.in/yaml/v3 v3.0.4 // indirect + golang.org/x/mod v0.31.0 // indirect + golang.org/x/net v0.49.0 // indirect + golang.org/x/oauth2 v0.34.0 // indirect + golang.org/x/sync v0.19.0 // indirect + golang.org/x/sys v0.40.0 // indirect + golang.org/x/term v0.39.0 // indirect + golang.org/x/text v0.33.0 // indirect + golang.org/x/time v0.14.0 // indirect + golang.org/x/tools v0.40.0 // indirect + gomodules.xyz/jsonpatch/v2 v2.5.0 // indirect + google.golang.org/genproto/googleapis/rpc v0.0.0-20260122232226-8e98ce8d340d // indirect + google.golang.org/grpc v1.78.0 // indirect + google.golang.org/grpc/cmd/protoc-gen-go-grpc v1.6.0 // indirect + google.golang.org/protobuf v1.36.11 // indirect + gopkg.in/evanphx/json-patch.v4 v4.13.0 // indirect + gopkg.in/inf.v0 v0.9.1 // indirect + k8s.io/apiextensions-apiserver v0.35.0 // indirect + k8s.io/klog/v2 v2.130.1 // indirect + k8s.io/kube-openapi v0.0.0-20251125145642-4e65d59e963e // indirect + k8s.io/utils v0.0.0-20260108192941-914a6e750570 // indirect + sigs.k8s.io/json v0.0.0-20250730193827-2d320260d730 // indirect + sigs.k8s.io/randfill v1.0.0 // indirect + sigs.k8s.io/structured-merge-diff/v6 v6.3.1 // indirect + sigs.k8s.io/yaml v1.6.0 // indirect +) + +replace github.com/AliceO2Group/ControlOperator/internal/controller/protos/generated => ./internal/controller/protos/generated diff --git a/control-operator/go.sum b/control-operator/go.sum new file mode 100644 index 000000000..752dcbba0 --- /dev/null +++ b/control-operator/go.sum @@ -0,0 +1,213 @@ +github.com/Masterminds/semver/v3 v3.4.0 h1:Zog+i5UMtVoCU8oKka5P7i9q9HgrJeGzI9SA1Xbatp0= +github.com/Masterminds/semver/v3 v3.4.0/go.mod h1:4V+yj/TJE1HU9XfppCwVMZq3I84lprf4nC11bSS5beM= +github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= +github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= +github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= +github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM= +github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/emicklei/go-restful/v3 v3.13.0 h1:C4Bl2xDndpU6nJ4bc1jXd+uTmYPVUwkD6bFY/oTyCes= +github.com/emicklei/go-restful/v3 v3.13.0/go.mod h1:6n3XBCmQQb25CM2LCACGz8ukIrRry+4bhvbpWn3mrbc= +github.com/evanphx/json-patch v0.5.2 h1:xVCHIVMUu1wtM/VkR9jVZ45N3FhZfYMMYGorLCR8P3k= +github.com/evanphx/json-patch v0.5.2/go.mod h1:ZWS5hhDbVDyob71nXKNL0+PWn6ToqBHMikGIFbs31qQ= +github.com/evanphx/json-patch/v5 v5.9.11 h1:/8HVnzMq13/3x9TPvjG08wUGqBTmZBsCWzjTM0wiaDU= +github.com/evanphx/json-patch/v5 v5.9.11/go.mod h1:3j+LviiESTElxA4p3EMKAB9HXj3/XEtnUf6OZxqIQTM= +github.com/fsnotify/fsnotify v1.9.0 h1:2Ml+OJNzbYCTzsxtv8vKSFD9PbJjmhYF14k/jKC7S9k= +github.com/fsnotify/fsnotify v1.9.0/go.mod h1:8jBTzvmWwFyi3Pb8djgCCO5IBqzKJ/Jwo8TRcHyHii0= +github.com/fxamacker/cbor/v2 v2.9.0 h1:NpKPmjDBgUfBms6tr6JZkTHtfFGcMKsw3eGcmD/sapM= +github.com/fxamacker/cbor/v2 v2.9.0/go.mod h1:vM4b+DJCtHn+zz7h3FFp/hDAI9WNWCsZj23V5ytsSxQ= +github.com/gkampitakis/ciinfo v0.3.2 h1:JcuOPk8ZU7nZQjdUhctuhQofk7BGHuIy0c9Ez8BNhXs= +github.com/gkampitakis/ciinfo v0.3.2/go.mod h1:1NIwaOcFChN4fa/B0hEBdAb6npDlFL8Bwx4dfRLRqAo= +github.com/gkampitakis/go-diff v1.3.2 h1:Qyn0J9XJSDTgnsgHRdz9Zp24RaJeKMUHg2+PDZZdC4M= +github.com/gkampitakis/go-diff v1.3.2/go.mod h1:LLgOrpqleQe26cte8s36HTWcTmMEur6OPYerdAAS9tk= +github.com/gkampitakis/go-snaps v0.5.15 h1:amyJrvM1D33cPHwVrjo9jQxX8g/7E2wYdZ+01KS3zGE= +github.com/gkampitakis/go-snaps v0.5.15/go.mod h1:HNpx/9GoKisdhw9AFOBT1N7DBs9DiHo/hGheFGBZ+mc= +github.com/go-logr/logr v1.4.3 h1:CjnDlHq8ikf6E492q6eKboGOC0T8CDaOvkHCIg8idEI= +github.com/go-logr/logr v1.4.3/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= +github.com/go-logr/zapr v1.3.0 h1:XGdV8XW8zdwFiwOA2Dryh1gj2KRQyOOoNmBy4EplIcQ= +github.com/go-logr/zapr v1.3.0/go.mod h1:YKepepNBd1u/oyhd/yQmtjVXmm9uML4IXUgMOwR8/Gg= +github.com/go-openapi/jsonpointer v0.22.4 h1:dZtK82WlNpVLDW2jlA1YCiVJFVqkED1MegOUy9kR5T4= +github.com/go-openapi/jsonpointer v0.22.4/go.mod h1:elX9+UgznpFhgBuaMQ7iu4lvvX1nvNsesQ3oxmYTw80= +github.com/go-openapi/jsonreference v0.21.4 h1:24qaE2y9bx/q3uRK/qN+TDwbok1NhbSmGjjySRCHtC8= +github.com/go-openapi/jsonreference v0.21.4/go.mod h1:rIENPTjDbLpzQmQWCj5kKj3ZlmEh+EFVbz3RTUh30/4= +github.com/go-openapi/swag v0.25.4 h1:OyUPUFYDPDBMkqyxOTkqDYFnrhuhi9NR6QVUvIochMU= +github.com/go-openapi/swag v0.25.4/go.mod h1:zNfJ9WZABGHCFg2RnY0S4IOkAcVTzJ6z2Bi+Q4i6qFQ= +github.com/go-openapi/swag/cmdutils v0.25.4 h1:8rYhB5n6WawR192/BfUu2iVlxqVR9aRgGJP6WaBoW+4= +github.com/go-openapi/swag/cmdutils v0.25.4/go.mod h1:pdae/AFo6WxLl5L0rq87eRzVPm/XRHM3MoYgRMvG4A0= +github.com/go-openapi/swag/conv v0.25.4 h1:/Dd7p0LZXczgUcC/Ikm1+YqVzkEeCc9LnOWjfkpkfe4= +github.com/go-openapi/swag/conv v0.25.4/go.mod h1:3LXfie/lwoAv0NHoEuY1hjoFAYkvlqI/Bn5EQDD3PPU= +github.com/go-openapi/swag/fileutils v0.25.4 h1:2oI0XNW5y6UWZTC7vAxC8hmsK/tOkWXHJQH4lKjqw+Y= +github.com/go-openapi/swag/fileutils v0.25.4/go.mod h1:cdOT/PKbwcysVQ9Tpr0q20lQKH7MGhOEb6EwmHOirUk= +github.com/go-openapi/swag/jsonname v0.25.4 h1:bZH0+MsS03MbnwBXYhuTttMOqk+5KcQ9869Vye1bNHI= +github.com/go-openapi/swag/jsonname v0.25.4/go.mod h1:GPVEk9CWVhNvWhZgrnvRA6utbAltopbKwDu8mXNUMag= +github.com/go-openapi/swag/jsonutils v0.25.4 h1:VSchfbGhD4UTf4vCdR2F4TLBdLwHyUDTd1/q4i+jGZA= +github.com/go-openapi/swag/jsonutils v0.25.4/go.mod h1:7OYGXpvVFPn4PpaSdPHJBtF0iGnbEaTk8AvBkoWnaAY= +github.com/go-openapi/swag/jsonutils/fixtures_test v0.25.4 h1:IACsSvBhiNJwlDix7wq39SS2Fh7lUOCJRmx/4SN4sVo= +github.com/go-openapi/swag/jsonutils/fixtures_test v0.25.4/go.mod h1:Mt0Ost9l3cUzVv4OEZG+WSeoHwjWLnarzMePNDAOBiM= +github.com/go-openapi/swag/loading v0.25.4 h1:jN4MvLj0X6yhCDduRsxDDw1aHe+ZWoLjW+9ZQWIKn2s= +github.com/go-openapi/swag/loading v0.25.4/go.mod h1:rpUM1ZiyEP9+mNLIQUdMiD7dCETXvkkC30z53i+ftTE= +github.com/go-openapi/swag/mangling v0.25.4 h1:2b9kBJk9JvPgxr36V23FxJLdwBrpijI26Bx5JH4Hp48= +github.com/go-openapi/swag/mangling v0.25.4/go.mod h1:6dxwu6QyORHpIIApsdZgb6wBk/DPU15MdyYj/ikn0Hg= +github.com/go-openapi/swag/netutils v0.25.4 h1:Gqe6K71bGRb3ZQLusdI8p/y1KLgV4M/k+/HzVSqT8H0= +github.com/go-openapi/swag/netutils v0.25.4/go.mod h1:m2W8dtdaoX7oj9rEttLyTeEFFEBvnAx9qHd5nJEBzYg= +github.com/go-openapi/swag/stringutils v0.25.4 h1:O6dU1Rd8bej4HPA3/CLPciNBBDwZj9HiEpdVsb8B5A8= +github.com/go-openapi/swag/stringutils v0.25.4/go.mod h1:GTsRvhJW5xM5gkgiFe0fV3PUlFm0dr8vki6/VSRaZK0= +github.com/go-openapi/swag/typeutils v0.25.4 h1:1/fbZOUN472NTc39zpa+YGHn3jzHWhv42wAJSN91wRw= +github.com/go-openapi/swag/typeutils v0.25.4/go.mod h1:Ou7g//Wx8tTLS9vG0UmzfCsjZjKhpjxayRKTHXf2pTE= +github.com/go-openapi/swag/yamlutils v0.25.4 h1:6jdaeSItEUb7ioS9lFoCZ65Cne1/RZtPBZ9A56h92Sw= +github.com/go-openapi/swag/yamlutils v0.25.4/go.mod h1:MNzq1ulQu+yd8Kl7wPOut/YHAAU/H6hL91fF+E2RFwc= +github.com/go-openapi/testify/enable/yaml/v2 v2.0.2 h1:0+Y41Pz1NkbTHz8NngxTuAXxEodtNSI1WG1c/m5Akw4= +github.com/go-openapi/testify/enable/yaml/v2 v2.0.2/go.mod h1:kme83333GCtJQHXQ8UKX3IBZu6z8T5Dvy5+CW3NLUUg= +github.com/go-openapi/testify/v2 v2.0.2 h1:X999g3jeLcoY8qctY/c/Z8iBHTbwLz7R2WXd6Ub6wls= +github.com/go-openapi/testify/v2 v2.0.2/go.mod h1:HCPmvFFnheKK2BuwSA0TbbdxJ3I16pjwMkYkP4Ywn54= +github.com/go-task/slim-sprig/v3 v3.0.0 h1:sUs3vkvUymDpBKi3qH1YSqBQk9+9D/8M2mN1vB6EwHI= +github.com/go-task/slim-sprig/v3 v3.0.0/go.mod h1:W848ghGpv3Qj3dhTPRyJypKRiqCdHZiAzKg9hl15HA8= +github.com/goccy/go-yaml v1.18.0 h1:8W7wMFS12Pcas7KU+VVkaiCng+kG8QiFeFwzFb+rwuw= +github.com/goccy/go-yaml v1.18.0/go.mod h1:XBurs7gK8ATbW4ZPGKgcbrY1Br56PdM69F7LkFRi1kA= +github.com/google/btree v1.1.3 h1:CVpQJjYgC4VbzxeGVHfvZrv1ctoYCAI8vbl07Fcxlyg= +github.com/google/btree v1.1.3/go.mod h1:qOPhT0dTNdNzV6Z/lhRX0YXUafgPLFUh+gZMl761Gm4= +github.com/google/gnostic-models v0.7.1 h1:SisTfuFKJSKM5CPZkffwi6coztzzeYUhc3v4yxLWH8c= +github.com/google/gnostic-models v0.7.1/go.mod h1:whL5G0m6dmc5cPxKc5bdKdEN3UjI7OUGxBlw57miDrQ= +github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8= +github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU= +github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= +github.com/google/gofuzz v1.2.0 h1:xRy4A+RhZaiKjJ1bPfwQ8sedCA+YS2YcCHW6ec7JMi0= +github.com/google/gofuzz v1.2.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= +github.com/google/pprof v0.0.0-20250403155104-27863c87afa6 h1:BHT72Gu3keYf3ZEu2J0b1vyeLSOYI8bm5wbJM/8yDe8= +github.com/google/pprof v0.0.0-20250403155104-27863c87afa6/go.mod h1:boTsfXsheKC2y+lKOCMpSfarhxDeIzfZG1jqGcPl3cA= +github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= +github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/joshdk/go-junit v1.0.0 h1:S86cUKIdwBHWwA6xCmFlf3RTLfVXYQfvanM5Uh+K6GE= +github.com/joshdk/go-junit v1.0.0/go.mod h1:TiiV0PqkaNfFXjEiyjWM3XXrhVyCa1K4Zfga6W52ung= +github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= +github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= +github.com/klauspost/compress v1.18.0 h1:c/Cqfb0r+Yi+JtIEq73FWXVkRonBlf0CRNYc8Zttxdo= +github.com/klauspost/compress v1.18.0/go.mod h1:2Pp+KzxcywXVXMr50+X0Q/Lsb43OQHYWRCY2AiWywWQ= +github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= +github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= +github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= +github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= +github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0SNc= +github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw= +github.com/maruel/natural v1.1.1 h1:Hja7XhhmvEFhcByqDoHz9QZbkWey+COd9xWfCfn1ioo= +github.com/maruel/natural v1.1.1/go.mod h1:v+Rfd79xlw1AgVBjbO0BEQmptqb5HvL/k9GRHB7ZKEg= +github.com/mfridman/tparse v0.18.0 h1:wh6dzOKaIwkUGyKgOntDW4liXSo37qg5AXbIhkMV3vE= +github.com/mfridman/tparse v0.18.0/go.mod h1:gEvqZTuCgEhPbYk/2lS3Kcxg1GmTxxU7kTC8DvP0i/A= +github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= +github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg= +github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= +github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= +github.com/modern-go/reflect2 v1.0.3-0.20250322232337-35a7c28c31ee h1:W5t00kpgFdJifH4BDsTlE89Zl93FEloxaWZfGcifgq8= +github.com/modern-go/reflect2 v1.0.3-0.20250322232337-35a7c28c31ee/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= +github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA= +github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= +github.com/onsi/ginkgo/v2 v2.27.2 h1:LzwLj0b89qtIy6SSASkzlNvX6WktqurSHwkk2ipF/Ns= +github.com/onsi/ginkgo/v2 v2.27.2/go.mod h1:ArE1D/XhNXBXCBkKOLkbsb2c81dQHCRcF5zwn/ykDRo= +github.com/onsi/gomega v1.38.2 h1:eZCjf2xjZAqe+LeWvKb5weQ+NcPwX84kqJ0cZNxok2A= +github.com/onsi/gomega v1.38.2/go.mod h1:W2MJcYxRGV63b418Ai34Ud0hEdTVXq9NW9+Sx6uXf3k= +github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= +github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRIccs7FGNTlIRMkT8wgtp5eCXdBlqhYGL6U= +github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/prometheus/client_golang v1.23.2 h1:Je96obch5RDVy3FDMndoUsjAhG5Edi49h0RJWRi/o0o= +github.com/prometheus/client_golang v1.23.2/go.mod h1:Tb1a6LWHB3/SPIzCoaDXI4I8UHKeFTEQ1YCr+0Gyqmg= +github.com/prometheus/client_model v0.6.2 h1:oBsgwpGs7iVziMvrGhE53c/GrLUsZdHnqNwqPLxwZyk= +github.com/prometheus/client_model v0.6.2/go.mod h1:y3m2F6Gdpfy6Ut/GBsUqTWZqCUvMVzSfMLjcu6wAwpE= +github.com/prometheus/common v0.67.5 h1:pIgK94WWlQt1WLwAC5j2ynLaBRDiinoAb86HZHTUGI4= +github.com/prometheus/common v0.67.5/go.mod h1:SjE/0MzDEEAyrdr5Gqc6G+sXI67maCxzaT3A2+HqjUw= +github.com/prometheus/procfs v0.19.2 h1:zUMhqEW66Ex7OXIiDkll3tl9a1ZdilUOd/F6ZXw4Vws= +github.com/prometheus/procfs v0.19.2/go.mod h1:M0aotyiemPhBCM0z5w87kL22CxfcH05ZpYlu+b4J7mw= +github.com/rogpeppe/go-internal v1.14.1 h1:UQB4HGPB6osV0SQTLymcB4TgvyWu6ZyliaW0tI/otEQ= +github.com/rogpeppe/go-internal v1.14.1/go.mod h1:MaRKkUm5W0goXpeCfT7UZI6fk/L7L7so1lCWt35ZSgc= +github.com/spf13/pflag v1.0.10 h1:4EBh2KAYBwaONj6b2Ye1GiHfwjqyROoF4RwYO+vPwFk= +github.com/spf13/pflag v1.0.10/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/objx v0.5.2 h1:xuMeJ0Sdp5ZMRXx/aWO6RZxdr3beISkG5/G/aIRr3pY= +github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA= +github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= +github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U= +github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U= +github.com/tidwall/gjson v1.18.0 h1:FIDeeyB800efLX89e5a8Y0BNH+LOngJyGrIWxG2FKQY= +github.com/tidwall/gjson v1.18.0/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk= +github.com/tidwall/match v1.1.1 h1:+Ho715JplO36QYgwN9PGYNhgZvoUSc9X2c80KVTi+GA= +github.com/tidwall/match v1.1.1/go.mod h1:eRSPERbgtNPcGhD8UCthc6PmLEQXEWd3PRB5JTxsfmM= +github.com/tidwall/pretty v1.2.1 h1:qjsOFOWWQl+N3RsoF5/ssm1pHmJJwhjlSbZ51I6wMl4= +github.com/tidwall/pretty v1.2.1/go.mod h1:ITEVvHYasfjBbM0u2Pg8T2nJnzm8xPwvNhhsoaGGjNU= +github.com/tidwall/sjson v1.2.5 h1:kLy8mja+1c9jlljvWTlSazM7cKDRfJuR/bOJhcY5NcY= +github.com/tidwall/sjson v1.2.5/go.mod h1:Fvgq9kS/6ociJEDnK0Fk1cpYF4FIW6ZF7LAe+6jwd28= +github.com/x448/float16 v0.8.4 h1:qLwI1I70+NjRFUR3zs1JPUCgaCXSh3SW62uAKT1mSBM= +github.com/x448/float16 v0.8.4/go.mod h1:14CWIYCyZA/cWjXOioeEpHeN/83MdbZDRQHoFcYsOfg= +go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto= +go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE= +go.uber.org/multierr v1.11.0 h1:blXXJkSxSSfBVBlC76pxqeO+LN3aDfLQo+309xJstO0= +go.uber.org/multierr v1.11.0/go.mod h1:20+QtiLqy0Nd6FdQB9TLXag12DsQkrbs3htMFfDN80Y= +go.uber.org/zap v1.27.1 h1:08RqriUEv8+ArZRYSTXy1LeBScaMpVSTBhCeaZYfMYc= +go.uber.org/zap v1.27.1/go.mod h1:GB2qFLM7cTU87MWRP2mPIjqfIDnGu+VIO4V/SdhGo2E= +go.yaml.in/yaml/v2 v2.4.3 h1:6gvOSjQoTB3vt1l+CU+tSyi/HOjfOjRLJ4YwYZGwRO0= +go.yaml.in/yaml/v2 v2.4.3/go.mod h1:zSxWcmIDjOzPXpjlTTbAsKokqkDNAVtZO0WOMiT90s8= +go.yaml.in/yaml/v3 v3.0.4 h1:tfq32ie2Jv2UxXFdLJdh3jXuOzWiL1fo0bu/FbuKpbc= +go.yaml.in/yaml/v3 v3.0.4/go.mod h1:DhzuOOF2ATzADvBadXxruRBLzYTpT36CKvDb3+aBEFg= +golang.org/x/mod v0.31.0 h1:HaW9xtz0+kOcWKwli0ZXy79Ix+UW/vOfmWI5QVd2tgI= +golang.org/x/mod v0.31.0/go.mod h1:43JraMp9cGx1Rx3AqioxrbrhNsLl2l/iNAvuBkrezpg= +golang.org/x/net v0.49.0 h1:eeHFmOGUTtaaPSGNmjBKpbng9MulQsJURQUAfUwY++o= +golang.org/x/net v0.49.0/go.mod h1:/ysNB2EvaqvesRkuLAyjI1ycPZlQHM3q01F02UY/MV8= +golang.org/x/oauth2 v0.34.0 h1:hqK/t4AKgbqWkdkcAeI8XLmbK+4m4G5YeQRrmiotGlw= +golang.org/x/oauth2 v0.34.0/go.mod h1:lzm5WQJQwKZ3nwavOZ3IS5Aulzxi68dUSgRHujetwEA= +golang.org/x/sync v0.19.0 h1:vV+1eWNmZ5geRlYjzm2adRgW2/mcpevXNg50YZtPCE4= +golang.org/x/sync v0.19.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI= +golang.org/x/sys v0.40.0 h1:DBZZqJ2Rkml6QMQsZywtnjnnGvHza6BTfYFWY9kjEWQ= +golang.org/x/sys v0.40.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= +golang.org/x/term v0.39.0 h1:RclSuaJf32jOqZz74CkPA9qFuVTX7vhLlpfj/IGWlqY= +golang.org/x/term v0.39.0/go.mod h1:yxzUCTP/U+FzoxfdKmLaA0RV1WgE0VY7hXBwKtY/4ww= +golang.org/x/text v0.33.0 h1:B3njUFyqtHDUI5jMn1YIr5B0IE2U0qck04r6d4KPAxE= +golang.org/x/text v0.33.0/go.mod h1:LuMebE6+rBincTi9+xWTY8TztLzKHc/9C1uBCG27+q8= +golang.org/x/time v0.14.0 h1:MRx4UaLrDotUKUdCIqzPC48t1Y9hANFKIRpNx+Te8PI= +golang.org/x/time v0.14.0/go.mod h1:eL/Oa2bBBK0TkX57Fyni+NgnyQQN4LitPmob2Hjnqw4= +golang.org/x/tools v0.40.0 h1:yLkxfA+Qnul4cs9QA3KnlFu0lVmd8JJfoq+E41uSutA= +golang.org/x/tools v0.40.0/go.mod h1:Ik/tzLRlbscWpqqMRjyWYDisX8bG13FrdXp3o4Sr9lc= +gomodules.xyz/jsonpatch/v2 v2.5.0 h1:JELs8RLM12qJGXU4u/TO3V25KW8GreMKl9pdkk14RM0= +gomodules.xyz/jsonpatch/v2 v2.5.0/go.mod h1:AH3dM2RI6uoBZxn3LVrfvJ3E0/9dG4cSrbuBJT4moAY= +google.golang.org/genproto/googleapis/rpc v0.0.0-20260122232226-8e98ce8d340d h1:xXzuihhT3gL/ntduUZwHECzAn57E8dA6l8SOtYWdD8Q= +google.golang.org/genproto/googleapis/rpc v0.0.0-20260122232226-8e98ce8d340d/go.mod h1:j9x/tPzZkyxcgEFkiKEEGxfvyumM01BEtsW8xzOahRQ= +google.golang.org/grpc v1.78.0 h1:K1XZG/yGDJnzMdd/uZHAkVqJE+xIDOcmdSFZkBUicNc= +google.golang.org/grpc v1.78.0/go.mod h1:I47qjTo4OKbMkjA/aOOwxDIiPSBofUtQUI5EfpWvW7U= +google.golang.org/grpc/cmd/protoc-gen-go-grpc v1.6.0 h1:6Al3kEFFP9VJhRz3DID6quisgPnTeZVr4lep9kkxdPA= +google.golang.org/grpc/cmd/protoc-gen-go-grpc v1.6.0/go.mod h1:QLvsjh0OIR0TYBeiu2bkWGTJBUNQ64st52iWj/yA93I= +google.golang.org/protobuf v1.36.11 h1:fV6ZwhNocDyBLK0dj+fg8ektcVegBBuEolpbTQyBNVE= +google.golang.org/protobuf v1.36.11/go.mod h1:HTf+CrKn2C3g5S8VImy6tdcUvCska2kB7j23XfzDpco= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= +gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= +gopkg.in/evanphx/json-patch.v4 v4.13.0 h1:czT3CmqEaQ1aanPc5SdlgQrrEIb8w/wwCvWWnfEbYzo= +gopkg.in/evanphx/json-patch.v4 v4.13.0/go.mod h1:p8EYWUEYMpynmqDbY58zCKCFZw8pRWMG4EsWvDvM72M= +gopkg.in/inf.v0 v0.9.1 h1:73M5CoZyi3ZLMOyDlQh031Cx6N9NDJ2Vvfl76EDAgDc= +gopkg.in/inf.v0 v0.9.1/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +k8s.io/api v0.35.0 h1:iBAU5LTyBI9vw3L5glmat1njFK34srdLmktWwLTprlY= +k8s.io/api v0.35.0/go.mod h1:AQ0SNTzm4ZAczM03QH42c7l3bih1TbAXYo0DkF8ktnA= +k8s.io/apiextensions-apiserver v0.35.0 h1:3xHk2rTOdWXXJM+RDQZJvdx0yEOgC0FgQ1PlJatA5T4= +k8s.io/apiextensions-apiserver v0.35.0/go.mod h1:E1Ahk9SADaLQ4qtzYFkwUqusXTcaV2uw3l14aqpL2LU= +k8s.io/apimachinery v0.35.0 h1:Z2L3IHvPVv/MJ7xRxHEtk6GoJElaAqDCCU0S6ncYok8= +k8s.io/apimachinery v0.35.0/go.mod h1:jQCgFZFR1F4Ik7hvr2g84RTJSZegBc8yHgFWKn//hns= +k8s.io/client-go v0.35.0 h1:IAW0ifFbfQQwQmga0UdoH0yvdqrbwMdq9vIFEhRpxBE= +k8s.io/client-go v0.35.0/go.mod h1:q2E5AAyqcbeLGPdoRB+Nxe3KYTfPce1Dnu1myQdqz9o= +k8s.io/klog/v2 v2.130.1 h1:n9Xl7H1Xvksem4KFG4PYbdQCQxqc/tTUyrgXaOhHSzk= +k8s.io/klog/v2 v2.130.1/go.mod h1:3Jpz1GvMt720eyJH1ckRHK1EDfpxISzJ7I9OYgaDtPE= +k8s.io/kube-openapi v0.0.0-20251125145642-4e65d59e963e h1:iW9ChlU0cU16w8MpVYjXk12dqQ4BPFBEgif+ap7/hqQ= +k8s.io/kube-openapi v0.0.0-20251125145642-4e65d59e963e/go.mod h1:kdmbQkyfwUagLfXIad1y2TdrjPFWp2Q89B3qkRwf/pQ= +k8s.io/utils v0.0.0-20260108192941-914a6e750570 h1:JT4W8lsdrGENg9W+YwwdLJxklIuKWdRm+BC+xt33FOY= +k8s.io/utils v0.0.0-20260108192941-914a6e750570/go.mod h1:xDxuJ0whA3d0I4mf/C4ppKHxXynQ+fxnkmQH0vTHnuk= +sigs.k8s.io/controller-runtime v0.23.0 h1:Ubi7klJWiwEWqDY+odSVZiFA0aDSevOCXpa38yCSYu8= +sigs.k8s.io/controller-runtime v0.23.0/go.mod h1:DBOIr9NsprUqCZ1ZhsuJ0wAnQSIxY/C6VjZbmLgw0j0= +sigs.k8s.io/json v0.0.0-20250730193827-2d320260d730 h1:IpInykpT6ceI+QxKBbEflcR5EXP7sU1kvOlxwZh5txg= +sigs.k8s.io/json v0.0.0-20250730193827-2d320260d730/go.mod h1:mdzfpAEoE6DHQEN0uh9ZbOCuHbLK5wOm7dK4ctXE9Tg= +sigs.k8s.io/randfill v1.0.0 h1:JfjMILfT8A6RbawdsK2JXGBR5AQVfd+9TbzrlneTyrU= +sigs.k8s.io/randfill v1.0.0/go.mod h1:XeLlZ/jmk4i1HRopwe7/aU3H5n1zNUcX6TM94b3QxOY= +sigs.k8s.io/structured-merge-diff/v6 v6.3.1 h1:JrhdFMqOd/+3ByqlP2I45kTOZmTRLBUm5pvRjeheg7E= +sigs.k8s.io/structured-merge-diff/v6 v6.3.1/go.mod h1:M3W8sfWvn2HhQDIbGWj3S099YozAsymCo/wrT5ohRUE= +sigs.k8s.io/yaml v1.6.0 h1:G8fkbMSAFqgEFgh4b1wmtzDnioxFCUgTZhlbj5P9QYs= +sigs.k8s.io/yaml v1.6.0/go.mod h1:796bPqUfzR/0jLAl6XjHl3Ck7MiyVv8dbTdyT3/pMf4= diff --git a/control-operator/hack/boilerplate.go.txt b/control-operator/hack/boilerplate.go.txt new file mode 100644 index 000000000..100584a9f --- /dev/null +++ b/control-operator/hack/boilerplate.go.txt @@ -0,0 +1,23 @@ +/* + * === This file is part of ALICE O² === + * + * Copyright 2026 CERN and copyright holders of ALICE O². + * Author: author + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + * In applying this license CERN does not waive the privileges and + * immunities granted to it by virtue of its status as an + * Intergovernmental Organization or submit itself to any jurisdiction. + */ diff --git a/control-operator/internal/controller/direct_transition.go b/control-operator/internal/controller/direct_transition.go new file mode 100644 index 000000000..4f73f1bc4 --- /dev/null +++ b/control-operator/internal/controller/direct_transition.go @@ -0,0 +1,145 @@ +/* + * === This file is part of ALICE O² === + * + * Copyright 2026 CERN and copyright holders of ALICE O². + * Author: Michal Tichak + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + * In applying this license CERN does not waive the privileges and + * immunities granted to it by virtue of its status as an + * Intergovernmental Organization or submit itself to any jurisdiction. + */ + +package controller + +import ( + "fmt" +) + +type State int + +const ( + CONFIGURED = iota + RUNNING + STANDBY + ERROR +) + +type Transition int + +const ( + GO_ERROR = iota + RECOVER + CONFIGURE + RESET + STOP + START + EXIT +) + +type FromTo struct { + from, to State +} + +// How to handle EXIT? +var fromStatesToTransition = map[FromTo]Transition{ + {from: CONFIGURED, to: ERROR}: GO_ERROR, + {from: CONFIGURED, to: RUNNING}: START, + {from: CONFIGURED, to: STANDBY}: RESET, + {from: ERROR, to: STANDBY}: RECOVER, + {from: RUNNING, to: CONFIGURED}: STOP, + {from: RUNNING, to: ERROR}: GO_ERROR, + {from: STANDBY, to: CONFIGURED}: CONFIGURE, +} + +func FromStatesToTransition(from, to State) (Transition, error) { + transition, hasValue := fromStatesToTransition[FromTo{from: from, to: to}] + if !hasValue { + return 0, fmt.Errorf("failed to find transition from %s, to %s", from, to) + } + return transition, nil +} + +func (s State) String() string { + switch s { + case CONFIGURED: + return "configured" + case RUNNING: + return "running" + case STANDBY: + return "standby" + case ERROR: + return "error" + default: + return fmt.Sprintf("State(%d)", s) + } +} + +func StateFromString(s string) (State, error) { + switch s { + case "configured": + return CONFIGURED, nil + case "running": + return RUNNING, nil + case "standby": + return STANDBY, nil + case "error": + return ERROR, nil + default: + return 0, fmt.Errorf("invalid State: %s", s) + } +} + +func (t Transition) String() string { + switch t { + case GO_ERROR: + return "go_error" + case RECOVER: + return "recover" + case CONFIGURE: + return "configure" + case RESET: + return "reset" + case STOP: + return "stop" + case START: + return "start" + case EXIT: + return "exit" + default: + return fmt.Sprintf("Transition(%d)", t) + } +} + +func TransitionFromString(s string) (Transition, error) { + switch s { + case "go_error": + return GO_ERROR, nil + case "recover": + return RECOVER, nil + case "configure": + return CONFIGURE, nil + case "reset": + return RESET, nil + case "stop": + return STOP, nil + case "start": + return START, nil + case "exit": + return EXIT, nil + default: + return 0, fmt.Errorf("invalid Transition: %s", s) + } +} diff --git a/control-operator/internal/controller/fairmq_transition.go b/control-operator/internal/controller/fairmq_transition.go new file mode 100644 index 000000000..37d6f347a --- /dev/null +++ b/control-operator/internal/controller/fairmq_transition.go @@ -0,0 +1,223 @@ +/* + * === This file is part of ALICE O² === + * + * Copyright 2024 CERN and copyright holders of ALICE O². + * Author: Michal Tichak + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + * In applying this license CERN does not waive the privileges and + * immunities granted to it by virtue of its status as an + * Intergovernmental Organization or submit itself to any jurisdiction. + */ + +package controller + +import ( + "context" + "fmt" + "strings" + + pb "github.com/AliceO2Group/ControlOperator/internal/controller/protos/generated" +) + +// FairMQ internal state names as used by the OCC plugin +const ( + fmqOK = "OK" + fmqIdle = "IDLE" + fmqInitializingDevice = "INITIALIZING DEVICE" + fmqInitialized = "INITIALIZED" + fmqBound = "BOUND" + fmqDeviceReady = "DEVICE READY" + fmqReady = "READY" + fmqRunning = "RUNNING" + fmqError = "ERROR" + fmqExiting = "EXITING" +) + +// FairMQ transition event names as used by the OCC plugin +const ( + fmqEvtInitDevice = "INIT DEVICE" + fmqEvtCompleteInit = "COMPLETE INIT" + fmqEvtBind = "BIND" + fmqEvtConnect = "CONNECT" + fmqEvtInitTask = "INIT TASK" + fmqEvtRun = "RUN" + fmqEvtStop = "STOP" + fmqEvtResetTask = "RESET TASK" + fmqEvtResetDevice = "RESET DEVICE" + fmqEvtEnd = "END" + fmqEvtAuto = "Auto" +) + +// fmqToOCCState maps FairMQ internal states to lowercase OCC state names +var fmqToOCCState = map[string]string{ + fmqIdle: "standby", + fmqReady: "configured", + fmqRunning: "running", + fmqError: "error", + fmqExiting: "done", +} + +func occStateForFmqState(fmqState string) string { + if occ, ok := fmqToOCCState[strings.ToUpper(fmqState)]; ok { + return occ + } + // TODO: it might be better to do proper error handling for doFairMQStep when intermediate step fail + return "not-mapped" +} + +// doFairMQStep sends a single FairMQ-level gRPC Transition and returns the +// resulting FairMQ state as reported by the OCC plugin. +func (c *OccClient) doFairMQStep(ctx context.Context, srcFmqState, fmqEvent string, args map[string]string) (string, error) { + var configEntries []*pb.ConfigEntry + for k, v := range args { + configEntries = append(configEntries, &pb.ConfigEntry{Key: k, Value: v}) + } + + request := &pb.TransitionRequest{ + SrcState: srcFmqState, + TransitionEvent: fmqEvent, + Arguments: configEntries, + } + c.log.V(1).Info("FairMQ step", "event", fmqEvent, "src", srcFmqState) + + reply, err := c.client.Transition(ctx, request) + if err != nil { + return "", fmt.Errorf("FairMQ gRPC transition %q failed: %w", fmqEvent, err) + } + + resultState := reply.GetState() + if !reply.GetOk() { + return resultState, fmt.Errorf("FairMQ transition %q not ok, state: %s", fmqEvent, resultState) + } + c.log.V(1).Info("FairMQ step done", "event", fmqEvent, "result", resultState) + return resultState, nil +} + +// fairMQConfigure drives the FairMQ device through the 5-step CONFIGURE sequence: +// +// IDLE → (INIT DEVICE + args) → INITIALIZING DEVICE +// → (COMPLETE INIT) → INITIALIZED +// → (BIND) → BOUND +// → (CONNECT) → DEVICE READY +// → (INIT TASK) → READY (= CONFIGURED in OCC terms) +// +// On failure in any intermediate step, a RESET DEVICE rollback is attempted. +func (c *OccClient) fairMQConfigure(ctx context.Context, args map[string]string) (string, error) { + // Step 1: INIT DEVICE — channel config args go here + state, err := c.doFairMQStep(ctx, fmqIdle, fmqEvtInitDevice, args) + if state != fmqInitializingDevice { + return occStateForFmqState(state), fmt.Errorf("INIT DEVICE: expected %s, got %s: %w", fmqInitializingDevice, state, err) + } + + // Step 2: COMPLETE INIT + state, err = c.doFairMQStep(ctx, fmqInitializingDevice, fmqEvtCompleteInit, nil) + if state != fmqInitialized { + return occStateForFmqState(state), fmt.Errorf("COMPLETE INIT: expected %s, got %s: %w", fmqInitialized, state, err) + } + + // Step 3: BIND + state, err = c.doFairMQStep(ctx, fmqInitialized, fmqEvtBind, nil) + if state == fmqInitialized { + // Stuck — roll back to IDLE + rollback, _ := c.doFairMQStep(ctx, fmqInitialized, fmqEvtResetDevice, nil) + return occStateForFmqState(rollback), fmt.Errorf("BIND: stuck in %s, rolled back to %s", fmqInitialized, rollback) + } else if state != fmqBound { + return occStateForFmqState(state), fmt.Errorf("BIND: expected %s, got %s: %w", fmqBound, state, err) + } + + // Step 4: CONNECT + state, err = c.doFairMQStep(ctx, fmqBound, fmqEvtConnect, nil) + if state == fmqBound { + // Stuck — roll back to IDLE + rollback, _ := c.doFairMQStep(ctx, fmqBound, fmqEvtResetDevice, nil) + return occStateForFmqState(rollback), fmt.Errorf("CONNECT: stuck in %s, rolled back to %s", fmqBound, rollback) + } else if state != fmqDeviceReady { + return occStateForFmqState(state), fmt.Errorf("CONNECT: expected %s, got %s: %w", fmqDeviceReady, state, err) + } + + // Step 5: INIT TASK + state, err = c.doFairMQStep(ctx, fmqDeviceReady, fmqEvtInitTask, nil) + if state == fmqDeviceReady { + // Stuck — roll back to IDLE + rollback, _ := c.doFairMQStep(ctx, fmqDeviceReady, fmqEvtResetDevice, nil) + return occStateForFmqState(rollback), fmt.Errorf("INIT TASK: stuck in %s, rolled back to %s", fmqDeviceReady, rollback) + } + + return occStateForFmqState(state), err +} + +// fairMQReset drives the FairMQ device through the 2-step RESET sequence: +// +// READY → (RESET TASK) → DEVICE READY +// → (RESET DEVICE + args) → IDLE (= STANDBY in OCC terms) +func (c *OccClient) fairMQReset(ctx context.Context, args map[string]string) (string, error) { + // Step 1: RESET TASK + state, err := c.doFairMQStep(ctx, fmqReady, fmqEvtResetTask, nil) + if state != fmqDeviceReady { + return occStateForFmqState(state), fmt.Errorf("RESET TASK: expected %s, got %s: %w", fmqDeviceReady, state, err) + } + + // Step 2: RESET DEVICE — args go here, matching executor doReset behaviour + state, err = c.doFairMQStep(ctx, fmqDeviceReady, fmqEvtResetDevice, args) + if state == fmqDeviceReady { + // Stuck — roll back to READY + rollback, _ := c.doFairMQStep(ctx, fmqDeviceReady, fmqEvtInitTask, nil) + return occStateForFmqState(rollback), fmt.Errorf("RESET DEVICE: stuck in %s, rolled back to %s", fmqDeviceReady, rollback) + } + + return occStateForFmqState(state), err +} + +// FairMQTransitionRequest drives a FairMQ OCC device through the multi-step +// sequence that corresponds to a single OCC-level transition (CONFIGURE, RESET, +// START, STOP). Returns the resulting OCC state in lowercase ("configured", +// "running", "standby", "error"). +func (c *OccClient) FairMQTransitionRequest(ctx context.Context, fromState, toState string, args map[string]string) (string, error) { + if c == nil || c.client == nil { + return fromState, fmt.Errorf("nil client for FairMQTransitionRequest") + } + + from, err := StateFromString(fromState) + if err != nil { + return fromState, err + } + to, err := StateFromString(toState) + if err != nil { + return fromState, err + } + + transition, err := FromStatesToTransition(from, to) + if err != nil { + return fromState, err + } + + c.log.Info("FairMQTransitionRequest", "from", fromState, "to", toState, "transition", transition.String(), "args", args) + + switch transition { + case CONFIGURE: + return c.fairMQConfigure(ctx, args) + case RESET: + return c.fairMQReset(ctx, args) + case START: + state, err := c.doFairMQStep(ctx, fmqReady, fmqEvtRun, args) + return occStateForFmqState(state), err + case STOP: + state, err := c.doFairMQStep(ctx, fmqRunning, fmqEvtStop, args) + return occStateForFmqState(state), err + default: + return fromState, fmt.Errorf("FairMQ transition %s not implemented", transition.String()) + } +} diff --git a/control-operator/internal/controller/grpc_client.go b/control-operator/internal/controller/grpc_client.go new file mode 100644 index 000000000..5c2c13333 --- /dev/null +++ b/control-operator/internal/controller/grpc_client.go @@ -0,0 +1,305 @@ +/* + * === This file is part of ALICE O² === + * + * Copyright 2026 CERN and copyright holders of ALICE O². + * Author: Michal Tichak + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + * In applying this license CERN does not waive the privileges and + * immunities granted to it by virtue of its status as an + * Intergovernmental Organization or submit itself to any jurisdiction. + */ + +package controller + +import ( + "context" + "errors" + "fmt" + "strings" + "time" + + "github.com/go-logr/logr" + "google.golang.org/grpc" + "google.golang.org/grpc/codes" + "google.golang.org/grpc/connectivity" + "google.golang.org/grpc/credentials/insecure" + "google.golang.org/grpc/status" + "k8s.io/apimachinery/pkg/types" + + aliecsv1alpha1 "github.com/AliceO2Group/ControlOperator/api/v1alpha1" + pb "github.com/AliceO2Group/ControlOperator/internal/controller/protos/generated" +) + +// jsonOccClient mirrors executor/executorcmd/nopb/occclient.go: uses short method +// names and JSON content subtype for OCC lite / FairMQ processes. +type jsonOccClient struct{ conn *grpc.ClientConn } + +type jsonEventStreamClient struct{ grpc.ClientStream } + +func (x *jsonEventStreamClient) Recv() (*pb.EventStreamReply, error) { + m := new(pb.EventStreamReply) + if err := x.ClientStream.RecvMsg(m); err != nil { + return nil, err + } + return m, nil +} + +type jsonStateStreamClient struct{ grpc.ClientStream } + +func (x *jsonStateStreamClient) Recv() (*pb.StateStreamReply, error) { + m := new(pb.StateStreamReply) + if err := x.ClientStream.RecvMsg(m); err != nil { + return nil, err + } + return m, nil +} + +func (c *jsonOccClient) EventStream(ctx context.Context, in *pb.EventStreamRequest, opts ...grpc.CallOption) (pb.Occ_EventStreamClient, error) { + opts = append(opts, grpc.CallContentSubtype("json")) + stream, err := c.conn.NewStream(ctx, &grpc.StreamDesc{StreamName: "EventStream", ServerStreams: true}, "EventStream", opts...) + if err != nil { + return nil, err + } + x := &jsonEventStreamClient{stream} + if err := x.SendMsg(in); err != nil { + return nil, err + } + if err := x.CloseSend(); err != nil { + return nil, err + } + return x, nil +} + +func (c *jsonOccClient) StateStream(ctx context.Context, in *pb.StateStreamRequest, opts ...grpc.CallOption) (pb.Occ_StateStreamClient, error) { + opts = append(opts, grpc.CallContentSubtype("json")) + stream, err := c.conn.NewStream(ctx, &grpc.StreamDesc{StreamName: "StateStream", ServerStreams: true}, "StateStream", opts...) + if err != nil { + return nil, err + } + x := &jsonStateStreamClient{stream} + if err := x.SendMsg(in); err != nil { + return nil, err + } + if err := x.CloseSend(); err != nil { + return nil, err + } + return x, nil +} + +func (c *jsonOccClient) GetState(ctx context.Context, in *pb.GetStateRequest, opts ...grpc.CallOption) (*pb.GetStateReply, error) { + out := new(pb.GetStateReply) + opts = append(opts, grpc.CallContentSubtype("json")) + if err := c.conn.Invoke(ctx, "GetState", in, out, opts...); err != nil { + return nil, err + } + return out, nil +} + +func (c *jsonOccClient) Transition(ctx context.Context, in *pb.TransitionRequest, opts ...grpc.CallOption) (*pb.TransitionReply, error) { + out := new(pb.TransitionReply) + opts = append(opts, grpc.CallContentSubtype("json")) + if err := c.conn.Invoke(ctx, "Transition", in, out, opts...); err != nil { + return nil, err + } + return out, nil +} + +type OccClient struct { + client pb.OccClient + conn *grpc.ClientConn + crdName types.NamespacedName + reconciler *TaskReconciler + cancel *context.CancelFunc + log logr.Logger + controlMode string +} + +// fromDeviceState translates a raw device state to an OCC state name, +// mirroring executor/executorcmd/transitioner FairMQ and Direct FromDeviceState logic. +func fromDeviceState(controlMode string, state string) string { + if controlMode == "fairmq" { + return occStateForFmqState(state) + } + return strings.ToLower(state) +} + +func NewOccClient(ctx context.Context, address string, controlMode string, reconciler *TaskReconciler, crdName types.NamespacedName, log logr.Logger) (*OccClient, error) { + // grpc.WithBlock() ensures that the dialer waits for the connection to be established. + // If the server isn't listening, this will return an error after the context timeout. + conn, err := grpc.NewClient(address, grpc.WithTransportCredentials(insecure.NewCredentials())) + if err != nil { + return nil, err + } + + var occClient pb.OccClient + if controlMode == "fairmq" { + occClient = &jsonOccClient{conn} + } else { + occClient = pb.NewOccClient(conn) + } + + client := &OccClient{client: occClient, conn: conn, crdName: crdName, reconciler: reconciler, log: log, controlMode: controlMode} + + client.conn.Connect() + + // ctxWithTimeout, cancel := context.WithTimeout(ctx, 1*time.Second) + // defer cancel() + // client.GetState(ctxWithTimeout) + + return client, nil +} + +func (c *OccClient) ConsumeIfReady(ctx context.Context) bool { + if c.cancel != nil { + return true + } + + if connState := c.conn.GetState(); connState != connectivity.Ready { + c.log.V(1).Info("connection is in different state than ready", "conn state", connState.String()) + return false + } + clientCtx, clientCancel := context.WithCancel(context.Background()) + c.cancel = &clientCancel + + go c.ConsumeEventStream(clientCtx) + go c.ConsumeStateStream(clientCtx) + return true +} + +func (c *OccClient) WaitUntilConnected(ctx context.Context) error { + for { + state := c.conn.GetState() + if state == connectivity.Ready { + return nil + } + + if !c.conn.WaitForStateChange(ctx, state) { + return fmt.Errorf("connection failed: %w", ctx.Err()) + } + } +} + +func (c *OccClient) Close() error { + if c.cancel != nil { + (*c.cancel)() + } + if c.conn != nil { + return c.conn.Close() + } + return nil +} + +func (c *OccClient) GetState(ctx context.Context) (*pb.GetStateReply, error) { + if c == nil || c.client == nil { + return nil, errors.New("nil client for TransitionRequest") + } + c.log.V(1).Info("GetState") + result, err := c.client.GetState(ctx, &pb.GetStateRequest{}) + if err == nil { + result.State = fromDeviceState(c.controlMode, result.State) + } + return result, err +} + +func (c *OccClient) TransitionRequest(ctx context.Context, fromState string, toState string, args map[string]string) (*pb.TransitionReply, error) { + if c == nil || c.client == nil { + return nil, errors.New("nil client for TransitionRequest") + } + + from, err := StateFromString(fromState) + if err != nil { + return nil, err + } + to, err := StateFromString(toState) + if err != nil { + return nil, err + } + + transition, err := FromStatesToTransition(from, to) + if err != nil { + return nil, err + } + + var configEntries []*pb.ConfigEntry + for k, v := range args { + configEntries = append(configEntries, &pb.ConfigEntry{Key: k, Value: v}) + } + + request := &pb.TransitionRequest{ + SrcState: strings.ToUpper(fromState), + TransitionEvent: strings.ToUpper(transition.String()), + Arguments: configEntries, + } + c.log.V(1).Info("TransitionRequest", "req", request) + + return c.client.Transition(ctx, request) +} + +func (c *OccClient) ConsumeEventStream(ctx context.Context) { + c.log.Info("starting to consume EventStream") + stream, err := c.client.EventStream(ctx, &pb.EventStreamRequest{}) + if err != nil { + c.log.Error(err, "failed to start event stream") + return + } + for { + resp, err := stream.Recv() + if err != nil { + if st, ok := status.FromError(err); ok && st.Code() == codes.Canceled { + c.log.Info("EventStream stopped: context cancelled") + return + } + c.log.Error(err, "EventStream stopped with error") + return + } + c.log.Info("received event", "event", resp.GetEvent()) + } +} + +func (c *OccClient) ConsumeStateStream(ctx context.Context) { + c.log.Info("starting to consume StateStream") + stream, err := c.client.StateStream(ctx, &pb.StateStreamRequest{}) + if err != nil { + c.log.Error(err, "failed to start state stream") + return + } + + for { + resp, err := stream.Recv() + if err != nil { + if st, ok := status.FromError(err); ok && st.Code() == codes.Canceled { + c.log.Info("StateStream stopped: context cancelled") + return + } + c.log.Error(err, "StateStream stopped with error") + return + } + c.log.V(1).Info("received state update", "type", resp.GetType(), "state", resp.GetState()) + + task := &aliecsv1alpha1.Task{} + if err := c.reconciler.Get(ctx, c.crdName, task); err != nil { + c.log.V(1).Error(err, "state event could not find task") + continue + } + // TODO: add some checks?? + task.Status.State = fromDeviceState(c.controlMode, resp.GetState()) + updateCtx, cancel := context.WithTimeout(context.Background(), time.Second) + if err := c.reconciler.Status().Update(updateCtx, task); err != nil { + c.log.Error(err, "state event did not apply state change to task", "state", task.Status.State) + } + cancel() + } +} diff --git a/control-operator/internal/controller/jsoncodec.go b/control-operator/internal/controller/jsoncodec.go new file mode 100644 index 000000000..a4070f8e6 --- /dev/null +++ b/control-operator/internal/controller/jsoncodec.go @@ -0,0 +1,49 @@ +/* + * === This file is part of ALICE O² === + * + * Copyright 2026 CERN and copyright holders of ALICE O². + * Author: Michal Tichak + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + * In applying this license CERN does not waive the privileges and + * immunities granted to it by virtue of its status as an + * Intergovernmental Organization or submit itself to any jurisdiction. + */ + +package controller + +import ( + "encoding/json" + + "google.golang.org/grpc/encoding" +) + +func init() { + encoding.RegisterCodec(&jsonCodec{}) +} + +type jsonCodec struct{} + +func (*jsonCodec) Marshal(v interface{}) ([]byte, error) { + return json.Marshal(v) +} + +func (*jsonCodec) Unmarshal(data []byte, v interface{}) error { + return json.Unmarshal(data, v) +} + +func (*jsonCodec) Name() string { + return "json" +} diff --git a/control-operator/internal/controller/protos/occ.proto b/control-operator/internal/controller/protos/occ.proto new file mode 100644 index 000000000..ca00369c1 --- /dev/null +++ b/control-operator/internal/controller/protos/occ.proto @@ -0,0 +1,99 @@ +/* + * === This file is part of ALICE O² === + * + * Copyright 2018 CERN and copyright holders of ALICE O². + * Author: Teo Mrnjavac + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + * In applying this license CERN does not waive the privileges and + * immunities granted to it by virtue of its status as an + * Intergovernmental Organization or submit itself to any jurisdiction. + */ + +syntax = "proto3"; + +package occ_pb; +option go_package = "github.com/AliceO2Group/ControlOperator/internal/controller/protos/generated;pb"; + +////////////////////////////////////////////////////// + +service Occ { + // We have to have a notification stream because the FairMQDevice might transition + // on its own for whatever reason. + rpc EventStream (EventStreamRequest) returns (stream EventStreamReply) {} + rpc StateStream (StateStreamRequest) returns (stream StateStreamReply) {} + rpc GetState (GetStateRequest) returns (GetStateReply) {} + rpc Transition (TransitionRequest) returns (TransitionReply) {} +} + +enum StateChangeTrigger { + EXECUTOR = 0; + DEVICE_INTENTIONAL = 1; + DEVICE_ERROR = 2; +} + +enum StateType { + STATE_STABLE = 0; + STATE_INTERMEDIATE = 1; +} + +enum DeviceEventType { + NULL_DEVICE_EVENT = 0; + END_OF_STREAM = 1; + BASIC_TASK_TERMINATED = 2; + TASK_INTERNAL_ERROR = 3; +} + +message StateStreamRequest {} + +message StateStreamReply { + StateType type = 1; + string state = 2; +} + +message EventStreamRequest {} + +message DeviceEvent { + DeviceEventType type = 1; +} + +message EventStreamReply { + DeviceEvent event = 1; +} + +message GetStateRequest {} + +message GetStateReply { + string state = 1; + int32 pid = 2; +} + +message ConfigEntry { + string key = 1; + string value = 2; +} + +message TransitionRequest { + string srcState = 1; + string transitionEvent = 2; + repeated ConfigEntry arguments = 3; +} + +message TransitionReply { + StateChangeTrigger trigger = 1; + string state = 2; + string transitionEvent = 3; + bool ok = 4; +} diff --git a/control-operator/internal/controller/suite_test.go b/control-operator/internal/controller/suite_test.go new file mode 100644 index 000000000..7be153c8f --- /dev/null +++ b/control-operator/internal/controller/suite_test.go @@ -0,0 +1,89 @@ +/* + * === This file is part of ALICE O² === + * + * Copyright 2026 CERN and copyright holders of ALICE O². + * Author: Michal Tichak + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + * In applying this license CERN does not waive the privileges and + * immunities granted to it by virtue of its status as an + * Intergovernmental Organization or submit itself to any jurisdiction. + */ + +package controller + +import ( + "path/filepath" + "testing" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + + "k8s.io/client-go/kubernetes/scheme" + "k8s.io/client-go/rest" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/envtest" + logf "sigs.k8s.io/controller-runtime/pkg/log" + "sigs.k8s.io/controller-runtime/pkg/log/zap" + + aliecsv1alpha1 "github.com/AliceO2Group/ControlOperator/api/v1alpha1" + //+kubebuilder:scaffold:imports +) + +// These tests use Ginkgo (BDD-style Go testing framework). Refer to +// http://onsi.github.io/ginkgo/ to learn more about Ginkgo. + +var ( + cfg *rest.Config + k8sClient client.Client + testEnv *envtest.Environment +) + +func TestControllers(t *testing.T) { + RegisterFailHandler(Fail) + + RunSpecs(t, "Controller Suite") +} + +var _ = BeforeSuite(func() { + logf.SetLogger(zap.New(zap.WriteTo(GinkgoWriter), zap.UseDevMode(true))) + + By("bootstrapping test environment") + testEnv = &envtest.Environment{ + CRDDirectoryPaths: []string{filepath.Join("..", "..", "config", "crd", "bases")}, + ErrorIfCRDPathMissing: true, + } + + var err error + // cfg is defined in this file globally. + cfg, err = testEnv.Start() + Expect(err).NotTo(HaveOccurred()) + Expect(cfg).NotTo(BeNil()) + + err = aliecsv1alpha1.AddToScheme(scheme.Scheme) + Expect(err).NotTo(HaveOccurred()) + + //+kubebuilder:scaffold:scheme + + k8sClient, err = client.New(cfg, client.Options{Scheme: scheme.Scheme}) + Expect(err).NotTo(HaveOccurred()) + Expect(k8sClient).NotTo(BeNil()) +}) + +var _ = AfterSuite(func() { + By("tearing down the test environment") + err := testEnv.Stop() + Expect(err).NotTo(HaveOccurred()) +}) diff --git a/control-operator/internal/controller/task_controller.go b/control-operator/internal/controller/task_controller.go new file mode 100644 index 000000000..93ec00b96 --- /dev/null +++ b/control-operator/internal/controller/task_controller.go @@ -0,0 +1,384 @@ +/* + * === This file is part of ALICE O² === + * + * Copyright 2026 CERN and copyright holders of ALICE O². + * Author: Michal Tichak + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + * In applying this license CERN does not waive the privileges and + * immunities granted to it by virtue of its status as an + * Intergovernmental Organization or submit itself to any jurisdiction. + */ + +package controller + +import ( + "context" + "encoding/json" + "fmt" + "reflect" + "strings" + "time" + + v1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/types" + "k8s.io/client-go/tools/record" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/controller" + "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" + "sigs.k8s.io/controller-runtime/pkg/log" + + aliecsv1alpha1 "github.com/AliceO2Group/ControlOperator/api/v1alpha1" + "github.com/go-logr/logr" +) + +// TaskReconciler reconciles a Task object +type TaskReconciler struct { + client.Client + Scheme *runtime.Scheme + Recorder record.EventRecorder +} + +var clientsForContainers map[string]*OccClient = make(map[string]*OccClient) + +const taskFinalizer string = "aliecs.alice.cern/finalizer" + +//+kubebuilder:rbac:groups=aliecs.alice.cern,resources=tasks,verbs=get;list;watch;create;update;patch;delete +//+kubebuilder:rbac:groups=aliecs.alice.cern,resources=tasks/status,verbs=get;update;patch +//+kubebuilder:rbac:groups=aliecs.alice.cern,resources=tasks/finalizers,verbs=update +//+kubebuilder:rbac:groups=core,resources=events,verbs=create;patch +//+kubebuilder:rbac:groups=core,resources=pods,verbs=get;list;watch;create;update;patch;delete + +// Reconcile is part of the main kubernetes reconciliation loop which aims to +// move the current state of the cluster closer to the desired state. +// TODO(user): Modify the Reconcile function to compare the state specified by +// the Task object against the actual cluster state, and then +// perform operations to make the cluster state reflect the state specified by +// the user. +// +// For more details, check Reconcile and its Result here: +// - https://pkg.go.dev/sigs.k8s.io/controller-runtime@v0.15.0/pkg/reconcile +// TODO: right now if POD fails and stops sooner, reconciliation creates a new one... +func (r *TaskReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { + log := log.FromContext(ctx) + + t := &aliecsv1alpha1.Task{} + if err := r.Get(ctx, req.NamespacedName, t); err != nil { + return ctrl.Result{}, client.IgnoreNotFound(err) + } + + log.V(1).Info("new reconcile request on existing Task Kind", "request", req) + + // Handle finalizers for clean deletion + if res, stop, err := r.handleFinalizer(ctx, t, log); err != nil || stop { + return res, err + } + + // TODO: add this check to direct_transition.go + if t.Status.State == "error" && t.Spec.State != "standby" { + return ctrl.Result{}, nil + } + + // Get existing pod or create new one, Reconciler owns the pod, so all changes to the pod and task will trigger + // reconciliation + // TODO: make this function + existingPod := &v1.Pod{} + err := r.Get(ctx, types.NamespacedName{ + Name: podNameFromTask(t.Name), + Namespace: t.Namespace, + }, existingPod) + if err != nil { + if errors.IsNotFound(err) { + pod := r.podForTask(t) + log.Info("Creating a new Pod", "Pod.Namespace", pod.Namespace, "Pod.Name", pod.Name) + if err = r.Create(ctx, pod); err != nil { + return ctrl.Result{}, err + } + return ctrl.Result{}, nil + } + return ctrl.Result{}, err + } + + if failed, reason := isPodFailed(existingPod); failed { + if t.Status.State != "error" { + log.Info("Pod failure detected, setting Task state to error", "reason", reason) + t.Status.State = "error" + t.Status.Error = reason + t.Status.Pod = *existingPod.Status.DeepCopy() + if err := r.Status().Update(ctx, t); err != nil { + return ctrl.Result{}, err + } + } + // Always stop reconciliation if the Pod is in a failed state + return ctrl.Result{}, nil + } + + if _, exists := clientsForContainers[t.Name]; !exists { + if existingPod.Status.PodIP == "" { + log.Info("pod doesn't have IP yet, we wait for different event") + return ctrl.Result{}, nil + } + res, err := r.createGRPCConsumer(ctx, t, existingPod, req.NamespacedName, log) + if err != nil || !res.IsZero() { + return res, err + } + } + + // when this succesfully returns we should be able to communicate via gRPC + if res := r.consumeGRPCConsumerIfReady(ctx, t, log); !res.IsZero() { + return res, nil + } + + // As far as I understand this is necessary because even though we have gRPC streams, we cannot rely + // on them being implemented + if t.Status.State == "" { + log.V(1).Info("Status.State is empty, querying container") + client, exists := clientsForContainers[t.Name] + if !exists { + return ctrl.Result{Requeue: true}, nil + } + + stateReply, err := client.GetState(ctx) + if err != nil { + log.Error(err, "Failed to GetState") + return ctrl.Result{}, nil + } + + log.V(1).Info("Current State inside POD is: ", "state", stateReply.State) + t.Status.State = stateReply.State + log.Info("Updating empty Task status ", "state", t.Status.State) + if err := r.Status().Update(ctx, t); err != nil { + return ctrl.Result{}, err + } + return ctrl.Result{}, nil + } + + oldStatus := t.Status.DeepCopy() + + t.Status.Pod = *existingPod.Status.DeepCopy() + + // Handle Spec -> gRPC State Sync + if t.Status.State != t.Spec.State { + client, exists := clientsForContainers[t.Name] + if !exists { + return ctrl.Result{Requeue: true}, nil + } + + stateReply, err := client.GetState(ctx) + if err != nil { + log.Info("Failed to get state for sync, retrying in 5s", "error", err.Error()) + client.Close() + delete(clientsForContainers, t.Name) + return ctrl.Result{RequeueAfter: 5 * time.Second}, nil + } + + if stateReply.GetState() != t.Spec.State { + var ( + newState string + transErr error + ) + + if t.Spec.Control.Mode == "fairmq" { + newState, transErr = client.FairMQTransitionRequest(ctx, stateReply.GetState(), t.Spec.State, t.Spec.Arguments) + } else { + reply, err := client.TransitionRequest(ctx, stateReply.GetState(), t.Spec.State, t.Spec.Arguments) + transErr = err + if err == nil && reply.GetOk() { + newState = strings.ToLower(reply.GetState()) + } + } + + if transErr != nil { + log.Error(transErr, "failed to Transition") + t.Status.Error = transErr.Error() + log.Info("Updating Task status") + if err := r.Status().Update(ctx, t); err != nil { + return ctrl.Result{}, err + } + return ctrl.Result{}, nil + } + if newState != "" { + log.V(1).Info("succeeded in transition", "new state", newState) + t.Status.State = newState + } + } + + } + + // Status Update: Only call the API if the status actually changed + // TODO: should I also add some query for GetState in case some Update or whatever fails? + // TODO: is this the best way? shouldn't we do the Update the moment something changes? + if !reflect.DeepEqual(oldStatus, &t.Status) { + log.Info("Updating Task status") + if err := r.Status().Update(ctx, t); err != nil { + return ctrl.Result{}, err + } + } + + return ctrl.Result{}, nil +} + +func (r *TaskReconciler) createGRPCConsumer(ctx context.Context, t *aliecsv1alpha1.Task, found *v1.Pod, taskName types.NamespacedName, log logr.Logger) (ctrl.Result, error) { + addr := fmt.Sprintf("%s:%d", found.Status.PodIP, t.Spec.Control.Port) + ctx, cancel := context.WithTimeout(ctx, 3*time.Second) + defer cancel() + + log.Info("creating gRPC client for task to task: ", "address", addr) + client, err := NewOccClient(ctx, addr, t.Spec.Control.Mode, r, taskName, log) + if err != nil { + log.Error(err, "failed to create gRPC client, retrying in 5 seconds") + // TODO: what would be proper error handling here? + return ctrl.Result{RequeueAfter: 5 * time.Second}, nil + } + + clientsForContainers[t.Name] = client + + return ctrl.Result{}, nil +} + +func (r *TaskReconciler) consumeGRPCConsumerIfReady(ctx context.Context, t *aliecsv1alpha1.Task, log logr.Logger) ctrl.Result { + client, exists := clientsForContainers[t.Name] + + if !exists { + log.Info("didn't found existing client, retrying ", "task", t.Name) + return ctrl.Result{RequeueAfter: time.Second} + } + + if !client.ConsumeIfReady(ctx) { + log.Info("gRPC client is not ready, retrying in 5 seconds", "name", t.Name) + return ctrl.Result{RequeueAfter: 5 * time.Second} + } + return ctrl.Result{} +} + +// Note: if you add finalizer to a task you cannot delete it unless you remove the finalizer, run: +// kubectl patch task --type=json -p='[{"op": "remove", "path": "/metadata/finalizers"}]' +func (r *TaskReconciler) handleFinalizer(ctx context.Context, t *aliecsv1alpha1.Task, log logr.Logger) (ctrl.Result, bool, error) { + if t.DeletionTimestamp.IsZero() { + if !controllerutil.ContainsFinalizer(t, taskFinalizer) { + controllerutil.AddFinalizer(t, taskFinalizer) + if err := r.Update(ctx, t); err != nil { + return ctrl.Result{}, true, err + } + return ctrl.Result{}, true, nil + } + } else { + if controllerutil.ContainsFinalizer(t, taskFinalizer) { + log.Info("Cleaning up gRPC connection before deletion") + if client, exists := clientsForContainers[t.Name]; exists { + if err := client.Close(); err != nil { + log.Error(err, "Failed to close gRPC client during deletion") + } + delete(clientsForContainers, t.Name) + } + + controllerutil.RemoveFinalizer(t, taskFinalizer) + if err := r.Update(ctx, t); err != nil { + return ctrl.Result{}, true, err + } + } + return ctrl.Result{}, true, nil + } + return ctrl.Result{}, false, nil +} + +func podNameFromTask(name string) string { + return fmt.Sprintf("aliecs-task-pod-%s", name) +} + +func (r *TaskReconciler) podForTask(t *aliecsv1alpha1.Task) *v1.Pod { + lbls := labelsForTask(t) + + pod := &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: podNameFromTask(t.Name), + Namespace: t.Namespace, + Labels: lbls, + }, + Spec: t.Spec.Pod, + } + pod.Spec.RestartPolicy = v1.RestartPolicyNever + + _ = controllerutil.SetControllerReference(t, pod, r.Scheme) + return pod +} + +func labelsForTask(t *aliecsv1alpha1.Task) map[string]string { + return map[string]string{ + "task_name": t.Name, + "application": "ControlOperator", + } +} + +func isPodFailed(pod *v1.Pod) (bool, string) { + if pod.Status.Phase == v1.PodFailed { + return true, fmt.Sprintf("Pod failed: %s", pod.Status.Reason) + } + + // Helper to check container statuses + checkContainerStatus := func(statuses []v1.ContainerStatus) (bool, string) { + for _, cs := range statuses { + if cs.State.Waiting != nil { + reason := cs.State.Waiting.Reason + switch reason { + case "CrashLoopBackOff", "ImagePullBackOff", "ErrImagePull", "CreateContainerConfigError", "InvalidImageName", "CreateContainerError": + return true, fmt.Sprintf("Container %s is in %s: %s", cs.Name, reason, cs.State.Waiting.Message) + } + } + if cs.State.Terminated != nil && cs.State.Terminated.ExitCode != 0 { + return true, fmt.Sprintf("Container %s failed with ExitCode %d: %s", cs.Name, cs.State.Terminated.ExitCode, cs.State.Terminated.Message) + } + // If it has restarted multiple times and is currently not ready, it's likely failing + if cs.RestartCount > 3 && !cs.Ready { + return true, fmt.Sprintf("Container %s has crashed %d times", cs.Name, cs.RestartCount) + } + } + return false, "" + } + + // Check main containers + if failed, reason := checkContainerStatus(pod.Status.ContainerStatuses); failed { + return failed, reason + } + + // Check init containers + if failed, reason := checkContainerStatus(pod.Status.InitContainerStatuses); failed { + return failed, reason + } + + return false, "" +} + +// SetupWithManager sets up the controller with the Manager. +func (r *TaskReconciler) SetupWithManager(mgr ctrl.Manager) error { + return ctrl.NewControllerManagedBy(mgr). + For(&aliecsv1alpha1.Task{}). + Owns(&v1.Pod{}). + WithOptions(controller.Options{MaxConcurrentReconciles: 1}). + Complete(r) +} + +func prettyPrint(i any) string { + s, err := json.MarshalIndent(i, "", " ") + if err != nil { + // If marshalling fails, return a simple error string + return "failed to pretty-print object" + } + return string(s) +} From 21dd064312cab0402f448671ec538da40f03bde9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michal=20Tich=C3=A1k?= <53997499+justonedev1@users.noreply.github.com> Date: Thu, 30 Apr 2026 16:40:35 +0200 Subject: [PATCH 5/6] [OCTRL-1092] Add metrics to TryTransition (#814) --- common/monitoring/metric.go | 8 ++++++++ core/environment/environment.go | 5 +++++ core/environment/transition.go | 5 +---- core/environment/utils.go | 9 +++++++++ 4 files changed, 23 insertions(+), 4 deletions(-) diff --git a/common/monitoring/metric.go b/common/monitoring/metric.go index fe0ab8c9a..b0f4f98a7 100644 --- a/common/monitoring/metric.go +++ b/common/monitoring/metric.go @@ -78,6 +78,14 @@ const ( TIMEOUT = "timeout" ) +func (metric *Metric) AddError(err error) { + if err == nil { + metric.AddResult(SUCCESS) + } else { + metric.AddResult(ERROR) + } +} + func (metric *Metric) AddResult(result string) { metric.AddTag("result", result) } diff --git a/core/environment/environment.go b/core/environment/environment.go index fcdac227e..176b80a2d 100644 --- a/core/environment/environment.go +++ b/core/environment/environment.go @@ -1007,6 +1007,9 @@ func (env *Environment) TryTransition(t Transition) (err error) { WorkflowTemplateInfo: env.GetWorkflowInfo(), }) + metric := transitionMetric(env, t.eventName(), "try") + defer monitoring.TimerSendSingle(&metric, monitoring.Millisecond)() + err = t.check() if err != nil { the.EventWriterWithTopic(topic.Environment).WriteEvent(&pb.Ev_EnvironmentEvent{ @@ -1020,6 +1023,7 @@ func (env *Environment) TryTransition(t Transition) (err error) { LastRequestUser: env.GetLastRequestUser(), WorkflowTemplateInfo: env.GetWorkflowInfo(), }) + metric.AddError(err) return } err = env.Sm.Event(context.Background(), t.eventName(), t) @@ -1048,6 +1052,7 @@ func (env *Environment) TryTransition(t Transition) (err error) { WorkflowTemplateInfo: env.GetWorkflowInfo(), }) } + metric.AddError(err) return } diff --git a/core/environment/transition.go b/core/environment/transition.go index ecb563bf8..0cf342635 100644 --- a/core/environment/transition.go +++ b/core/environment/transition.go @@ -77,8 +77,5 @@ func (t baseTransition) eventName() string { } func (t baseTransition) transitionDoMetric(env *Environment) monitoring.Metric { - metric := monitoring.NewMetric("transition_do") - metric.AddTag("transition", t.name) - metric.AddTag("envId", env.Id().String()) - return metric + return transitionMetric(env, t.eventName(), "do") } diff --git a/core/environment/utils.go b/core/environment/utils.go index 7bc15c4f7..e705dcaff 100644 --- a/core/environment/utils.go +++ b/core/environment/utils.go @@ -33,6 +33,7 @@ import ( "sort" "github.com/AliceO2Group/Control/common/logger/infologger" + "github.com/AliceO2Group/Control/common/monitoring" pb "github.com/AliceO2Group/Control/common/protos" "github.com/AliceO2Group/Control/core/task" "github.com/AliceO2Group/Control/core/task/sm" @@ -166,3 +167,11 @@ func HandleFailedGoError(err error, env *Environment) { env.setState("ERROR") } } + +func transitionMetric(env *Environment, transition string, funName string) monitoring.Metric { + metric := monitoring.NewMetric("transition") + metric.AddTag("function", funName) + metric.AddTag("transition", transition) + metric.AddTag("envId", env.Id().String()) + return metric +} From e0a7f09706312a568c3470dacfb09058310094fc Mon Sep 17 00:00:00 2001 From: Piotr Konopka Date: Mon, 4 May 2026 08:50:53 +0200 Subject: [PATCH 6/6] Bump to v1.50.0 --- VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/VERSION b/VERSION index 00cbec6bd..3530926cc 100644 --- a/VERSION +++ b/VERSION @@ -1,4 +1,4 @@ # GNU Make syntax VERSION_MAJOR := 1 -VERSION_MINOR := 49 +VERSION_MINOR := 50 VERSION_PATCH := 0