From ce9ff1e714e753e0a976c23b3f417d1530a09542 Mon Sep 17 00:00:00 2001 From: Lukas Fischer Date: Fri, 9 Jun 2023 17:24:00 +0200 Subject: [PATCH 1/4] #911 Create a download cache for trivy database This uses the trivy client/server functionality to download the vulnerability DB only once for multiple scans to avoid GitHub's rate limit. An additional container running trivy in server mode is started, and all scans connect to it. This integrates the functionality previously documented [in the SCB docs for trivy][1], now that trivy supports remote scanning for more than just container image scans. [1]: https://www.securecodebox.io/docs/scanners/trivy/#scanning-many-targets Signed-off-by: Lukas Fischer --- .../trivy/templates/trivy-database-cache.yaml | 51 +++++++++++++++++++ scanners/trivy/templates/trivy-scan-type.yaml | 20 ++++++++ scanners/trivy/values.yaml | 4 ++ 3 files changed, 75 insertions(+) create mode 100644 scanners/trivy/templates/trivy-database-cache.yaml diff --git a/scanners/trivy/templates/trivy-database-cache.yaml b/scanners/trivy/templates/trivy-database-cache.yaml new file mode 100644 index 0000000000..6301075c73 --- /dev/null +++ b/scanners/trivy/templates/trivy-database-cache.yaml @@ -0,0 +1,51 @@ +# SPDX-FileCopyrightText: the secureCodeBox authors +# +# SPDX-License-Identifier: Apache-2.0 + +{{ if .Values.trivyDatabaseCache.enabled }} +# We use trivy in client/server mode for central storing of the trivy vulnerability database to prevent downloading it for each scan again, enabled by default. +# First declare a service that will serve requests to the database cache pod +kind: Service +apiVersion: v1 +metadata: + name: trivy-database + labels: + app: trivy-database +spec: + selector: + app: trivy-database + ports: + - port: 8080 + protocol: TCP + targetPort: 8080 + type: ClusterIP +--- +# Now declare the actual deployment of the database cache server +apiVersion: apps/v1 +kind: Deployment +metadata: + name: trivy-database + labels: + app: trivy-database +spec: + replicas: 1 + selector: + matchLabels: + app: trivy-database + template: + metadata: + labels: + app: trivy-database + spec: + containers: + - name: trivy-database + image: "{{ .Values.scanner.image.repository }}:{{ .Values.scanner.image.tag | default .Chart.AppVersion }}" + imagePullPolicy: IfNotPresent + args: + - "server" + - "--listen" + - "0.0.0.0:8080" + ports: + - containerPort: 8080 + protocol: TCP +{{- end }} diff --git a/scanners/trivy/templates/trivy-scan-type.yaml b/scanners/trivy/templates/trivy-scan-type.yaml index c4a7ee054a..0206d60712 100644 --- a/scanners/trivy/templates/trivy-scan-type.yaml +++ b/scanners/trivy/templates/trivy-scan-type.yaml @@ -41,6 +41,11 @@ spec: - "image" # Suppress progress bar, as it pollutes non interactive terminal logs - "--no-progress" + {{- if .Values.trivyDatabaseCache.enabled }} + # Connect to trivy database cache to avoid downloading the vulnerability DB for each scan + - "--server" + - "http://trivy-database.{{ .Release.Namespace }}.svc:8080" + {{- end }} - "--format" - "json" - "--output" @@ -101,6 +106,11 @@ spec: - "image" # Suppress progress bar, as it pollutes non interactive terminal logs - "--no-progress" + {{- if .Values.trivyDatabaseCache.enabled }} + # Connect to trivy database cache to avoid downloading the vulnerability DB for each scan + - "--server" + - "http://trivy-database.{{ .Release.Namespace }}.svc:8080" + {{- end }} - "--format" - "json" - "--output" @@ -154,6 +164,11 @@ spec: - "filesystem" # Suppress progress bar, as it pollutes non interactive terminal logs - "--no-progress" + {{- if .Values.trivyDatabaseCache.enabled }} + # Connect to trivy database cache to avoid downloading the vulnerability DB for each scan + - "--server" + - "http://trivy-database.{{ .Release.Namespace }}.svc:8080" + {{- end }} - "--format" - "json" - "--output" @@ -206,6 +221,11 @@ spec: - "repo" # Suppress progress bar, as it pollutes non interactive terminal logs - "--no-progress" + {{- if .Values.trivyDatabaseCache.enabled }} + # Connect to trivy database cache to avoid downloading the vulnerability DB for each scan + - "--server" + - "http://trivy-database.{{ .Release.Namespace }}.svc:8080" + {{- end }} - "--format" - "json" - "--output" diff --git a/scanners/trivy/values.yaml b/scanners/trivy/values.yaml index 17a52d19e2..dc3a7667fb 100644 --- a/scanners/trivy/values.yaml +++ b/scanners/trivy/values.yaml @@ -103,6 +103,10 @@ scanner: # -- if set to true the scan job will be suspended after creation. You can then resume the job using `kubectl resume ` or using a job scheduler like kueue suspend: false +trivyDatabaseCache: + # -- Enables or disables the use of trivy server in another pod to cache the vulnerability database for all scans. + enabled: true + cascadingRules: # cascadingRules.enabled -- Enables or disables the installation of the default cascading rules for this scanner enabled: false From de44be571428f2b95212978dfa9734bbf72ee316 Mon Sep 17 00:00:00 2001 From: Lukas Fischer Date: Mon, 26 Jun 2023 15:39:17 +0200 Subject: [PATCH 2/4] #911 Make number of trivy DB replicas configurable Instead of hardcoding only one replica, allow any number of replicas for the trivy download cache to be configured by using the the trivyDatabaseCache.replicas value. Signed-off-by: Lukas Fischer --- scanners/trivy/templates/trivy-database-cache.yaml | 2 +- scanners/trivy/values.yaml | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/scanners/trivy/templates/trivy-database-cache.yaml b/scanners/trivy/templates/trivy-database-cache.yaml index 6301075c73..e45ffbc166 100644 --- a/scanners/trivy/templates/trivy-database-cache.yaml +++ b/scanners/trivy/templates/trivy-database-cache.yaml @@ -28,7 +28,7 @@ metadata: labels: app: trivy-database spec: - replicas: 1 + replicas: {{ .Values.trivyDatabaseCache.replicas }} selector: matchLabels: app: trivy-database diff --git a/scanners/trivy/values.yaml b/scanners/trivy/values.yaml index dc3a7667fb..2ca111eebf 100644 --- a/scanners/trivy/values.yaml +++ b/scanners/trivy/values.yaml @@ -106,6 +106,8 @@ scanner: trivyDatabaseCache: # -- Enables or disables the use of trivy server in another pod to cache the vulnerability database for all scans. enabled: true + # -- amount of replicas to configure for the Deployment + replicas: 1 cascadingRules: # cascadingRules.enabled -- Enables or disables the installation of the default cascading rules for this scanner From ec87d5086029d162c2cc95ecac732dd8b4068c10 Mon Sep 17 00:00:00 2001 From: Lukas Fischer Date: Wed, 14 Jun 2023 13:54:49 +0200 Subject: [PATCH 3/4] #911 Add liveness/readiness checks for trivy cache Trivy server provides a /healthz endpoint that always returns 200 (see aquasecurity/trivy#534), and according to the logs/stdout trivy only starts listening after downloading the database. The helm chart provided by trivy (added in aquasecurity/trivy#751) uses this endpoint for liveness and readiness checks as well. This change therefore integrates the same checks. Signed-off-by: Lukas Fischer --- .../trivy/templates/trivy-database-cache.yaml | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/scanners/trivy/templates/trivy-database-cache.yaml b/scanners/trivy/templates/trivy-database-cache.yaml index e45ffbc166..e0e8c10204 100644 --- a/scanners/trivy/templates/trivy-database-cache.yaml +++ b/scanners/trivy/templates/trivy-database-cache.yaml @@ -48,4 +48,23 @@ spec: ports: - containerPort: 8080 protocol: TCP + name: trivy-http + livenessProbe: + httpGet: + scheme: HTTP + path: /healthz + port: trivy-http + initialDelaySeconds: 5 + periodSeconds: 10 + successThreshold: 1 + failureThreshold: 10 + readinessProbe: + httpGet: + scheme: HTTP + path: /healthz + port: trivy-http + initialDelaySeconds: 5 + periodSeconds: 10 + successThreshold: 1 + failureThreshold: 3 {{- end }} From 82397413218bf1910c8ea0159760ca4fd4cd0f46 Mon Sep 17 00:00:00 2001 From: Lukas Fischer Date: Tue, 27 Jun 2023 15:37:42 +0200 Subject: [PATCH 4/4] #911 Document trivy vulnerability database cache Update the documentation to reflect that the trivy vulnerability DB is now by default served by a trivy server container. This shortens this section of the documentation quite a bit, because most of it was example code to set up what is now integrated. Signed-off-by: Lukas Fischer --- scanners/trivy/.helm-docs.gotmpl | 91 ++--------------------- scanners/trivy/README.md | 91 ++--------------------- scanners/trivy/docs/README.ArtifactHub.md | 91 ++--------------------- 3 files changed, 15 insertions(+), 258 deletions(-) diff --git a/scanners/trivy/.helm-docs.gotmpl b/scanners/trivy/.helm-docs.gotmpl index 5f6ae5c0f7..d8f3156364 100644 --- a/scanners/trivy/.helm-docs.gotmpl +++ b/scanners/trivy/.helm-docs.gotmpl @@ -68,95 +68,14 @@ spec: ``` ### Scanning Many Targets -By default, the docker container of trivy will download new rulesets when starting the process. +By default, the docker container of trivy will download the vulnerability database when starting the process. As this download is performed directly from GitHub, you will run into API rate limiting issues after roughly 50 requests. -Trivy [supports a client-server mode](https://aquasecurity.github.io/trivy/latest/advanced/modes/client-server/) where one process downloads a copy of the rule database and provides it to the others. -Due to [limitations in trivy](https://github.com/aquasecurity/trivy/issues/634), this mode currently only supports scanning container images. -If this fits your use case, you can deploy a rule service with the following template: -```yaml -# First declare a service that will serve requests to the rule pod -kind: Service -apiVersion: v1 -metadata: - name: trivy-rules - # Update the namespace here if you are using a different one - namespace: default - labels: - app: trivy-rules -spec: - selector: - app: trivy-rules - ports: - - port: 8080 - protocol: TCP - targetPort: 8080 - type: ClusterIP ---- -# Now declare the actual deployment of the rule server -apiVersion: apps/v1 -kind: Deployment -metadata: - name: trivy-rules - # Again, update the namespace here - namespace: default - labels: - app: trivy-rules -spec: - replicas: 1 - selector: - matchLabels: - app: trivy-rules - template: - metadata: - labels: - app: trivy-rules - spec: - containers: - - name: trivy-rules - # Don't forget to set this to a version matching that used in secureCodeBox - image: aquasec/trivy:0.20.2 - imagePullPolicy: Always - args: - - "server" - - "--listen" - - "0.0.0.0:8080" - ports: - - containerPort: 8080 - protocol: TCP -``` - -You can then start scans of images using the client mode. For example: +Trivy [supports a client-server mode](https://aquasecurity.github.io/trivy/latest/docs/references/modes/client-server/) where one process downloads a copy of the vulnerability database and provides it to the others. -```yaml -apiVersion: "execution.securecodebox.io/v1" -kind: Scan -metadata: - name: "test-trivy" - # Don't forget to update the namespace if necessary - namespace: default -spec: - scanType: "trivy-image" - parameters: - - "client" - # Again, add the extra parameters here (required to make the parser work) - # But don't add the --no-progress switch. - - "--format" - - "json" - - "--output" - - "/home/securecodebox/trivy-results.json" - # Specify the rule service internal DNS name here. - # (Substitute a different namespace if you changed it) - - "--remote" - - "http://trivy-rules.default.svc:8080" - # Finally, specify the image you want to scan - - "securecodebox/operator:3.0.0" -``` +This mode is implemented and active by default. +A separate Deployment for the trivy server will be created during the installation and the trivy scanTypes are automatically configured to run in client mode and connect to the server. -If you want to scan anything other than docker images, you currently [cannot use the client-server mode](https://github.com/aquasecurity/trivy/issues/634) described above. -Instead, you have to [manually download the ruleset and provide it to trivy](https://aquasecurity.github.io/trivy/latest/advanced/air-gap/). -In practice, this is a difficult problem because the most natural method for providing these files in kubernetes, ConfigMaps, has a size limit of 1 MB, while the vulnerability database is over 200 MB in size (28 MB after compression). -Your best bet would thus be to serve the files from your own servers and load them into the scanner [using an initContainer](https://www.securecodebox.io/docs/api/crds/scan#initcontainers-optional), taking care to keep the databases on your server up to date. -Consult the [trivy documentation](https://aquasecurity.github.io/trivy/latest/advanced/air-gap/) for additional details on the required steps. +In case only a single scan or very few are run, and you want to avoid the small performance overhead, client/server mode can be disabled by setting `--set="trivyDatabaseCache.enabled=false"` during helm install. {{- end }} {{- define "extra.chartConfigurationSection" -}} diff --git a/scanners/trivy/README.md b/scanners/trivy/README.md index a437256887..8efbb0fee3 100644 --- a/scanners/trivy/README.md +++ b/scanners/trivy/README.md @@ -85,95 +85,14 @@ spec: ``` ### Scanning Many Targets -By default, the docker container of trivy will download new rulesets when starting the process. +By default, the docker container of trivy will download the vulnerability database when starting the process. As this download is performed directly from GitHub, you will run into API rate limiting issues after roughly 50 requests. -Trivy [supports a client-server mode](https://aquasecurity.github.io/trivy/latest/advanced/modes/client-server/) where one process downloads a copy of the rule database and provides it to the others. -Due to [limitations in trivy](https://github.com/aquasecurity/trivy/issues/634), this mode currently only supports scanning container images. -If this fits your use case, you can deploy a rule service with the following template: -```yaml -# First declare a service that will serve requests to the rule pod -kind: Service -apiVersion: v1 -metadata: - name: trivy-rules - # Update the namespace here if you are using a different one - namespace: default - labels: - app: trivy-rules -spec: - selector: - app: trivy-rules - ports: - - port: 8080 - protocol: TCP - targetPort: 8080 - type: ClusterIP ---- -# Now declare the actual deployment of the rule server -apiVersion: apps/v1 -kind: Deployment -metadata: - name: trivy-rules - # Again, update the namespace here - namespace: default - labels: - app: trivy-rules -spec: - replicas: 1 - selector: - matchLabels: - app: trivy-rules - template: - metadata: - labels: - app: trivy-rules - spec: - containers: - - name: trivy-rules - # Don't forget to set this to a version matching that used in secureCodeBox - image: aquasec/trivy:0.20.2 - imagePullPolicy: Always - args: - - "server" - - "--listen" - - "0.0.0.0:8080" - ports: - - containerPort: 8080 - protocol: TCP -``` - -You can then start scans of images using the client mode. For example: +Trivy [supports a client-server mode](https://aquasecurity.github.io/trivy/latest/docs/references/modes/client-server/) where one process downloads a copy of the vulnerability database and provides it to the others. -```yaml -apiVersion: "execution.securecodebox.io/v1" -kind: Scan -metadata: - name: "test-trivy" - # Don't forget to update the namespace if necessary - namespace: default -spec: - scanType: "trivy-image" - parameters: - - "client" - # Again, add the extra parameters here (required to make the parser work) - # But don't add the --no-progress switch. - - "--format" - - "json" - - "--output" - - "/home/securecodebox/trivy-results.json" - # Specify the rule service internal DNS name here. - # (Substitute a different namespace if you changed it) - - "--remote" - - "http://trivy-rules.default.svc:8080" - # Finally, specify the image you want to scan - - "securecodebox/operator:3.0.0" -``` +This mode is implemented and active by default. +A separate Deployment for the trivy server will be created during the installation and the trivy scanTypes are automatically configured to run in client mode and connect to the server. -If you want to scan anything other than docker images, you currently [cannot use the client-server mode](https://github.com/aquasecurity/trivy/issues/634) described above. -Instead, you have to [manually download the ruleset and provide it to trivy](https://aquasecurity.github.io/trivy/latest/advanced/air-gap/). -In practice, this is a difficult problem because the most natural method for providing these files in kubernetes, ConfigMaps, has a size limit of 1 MB, while the vulnerability database is over 200 MB in size (28 MB after compression). -Your best bet would thus be to serve the files from your own servers and load them into the scanner [using an initContainer](https://www.securecodebox.io/docs/api/crds/scan#initcontainers-optional), taking care to keep the databases on your server up to date. -Consult the [trivy documentation](https://aquasecurity.github.io/trivy/latest/advanced/air-gap/) for additional details on the required steps. +In case only a single scan or very few are run, and you want to avoid the small performance overhead, client/server mode can be disabled by setting `--set="trivyDatabaseCache.enabled=false"` during helm install. ## Requirements diff --git a/scanners/trivy/docs/README.ArtifactHub.md b/scanners/trivy/docs/README.ArtifactHub.md index c20777e743..56e443c7a2 100644 --- a/scanners/trivy/docs/README.ArtifactHub.md +++ b/scanners/trivy/docs/README.ArtifactHub.md @@ -92,95 +92,14 @@ spec: ``` ### Scanning Many Targets -By default, the docker container of trivy will download new rulesets when starting the process. +By default, the docker container of trivy will download the vulnerability database when starting the process. As this download is performed directly from GitHub, you will run into API rate limiting issues after roughly 50 requests. -Trivy [supports a client-server mode](https://aquasecurity.github.io/trivy/latest/advanced/modes/client-server/) where one process downloads a copy of the rule database and provides it to the others. -Due to [limitations in trivy](https://github.com/aquasecurity/trivy/issues/634), this mode currently only supports scanning container images. -If this fits your use case, you can deploy a rule service with the following template: -```yaml -# First declare a service that will serve requests to the rule pod -kind: Service -apiVersion: v1 -metadata: - name: trivy-rules - # Update the namespace here if you are using a different one - namespace: default - labels: - app: trivy-rules -spec: - selector: - app: trivy-rules - ports: - - port: 8080 - protocol: TCP - targetPort: 8080 - type: ClusterIP ---- -# Now declare the actual deployment of the rule server -apiVersion: apps/v1 -kind: Deployment -metadata: - name: trivy-rules - # Again, update the namespace here - namespace: default - labels: - app: trivy-rules -spec: - replicas: 1 - selector: - matchLabels: - app: trivy-rules - template: - metadata: - labels: - app: trivy-rules - spec: - containers: - - name: trivy-rules - # Don't forget to set this to a version matching that used in secureCodeBox - image: aquasec/trivy:0.20.2 - imagePullPolicy: Always - args: - - "server" - - "--listen" - - "0.0.0.0:8080" - ports: - - containerPort: 8080 - protocol: TCP -``` +Trivy [supports a client-server mode](https://aquasecurity.github.io/trivy/latest/docs/references/modes/client-server/) where one process downloads a copy of the vulnerability database and provides it to the others. -You can then start scans of images using the client mode. For example: - -```yaml -apiVersion: "execution.securecodebox.io/v1" -kind: Scan -metadata: - name: "test-trivy" - # Don't forget to update the namespace if necessary - namespace: default -spec: - scanType: "trivy-image" - parameters: - - "client" - # Again, add the extra parameters here (required to make the parser work) - # But don't add the --no-progress switch. - - "--format" - - "json" - - "--output" - - "/home/securecodebox/trivy-results.json" - # Specify the rule service internal DNS name here. - # (Substitute a different namespace if you changed it) - - "--remote" - - "http://trivy-rules.default.svc:8080" - # Finally, specify the image you want to scan - - "securecodebox/operator:3.0.0" -``` +This mode is implemented and active by default. +A separate Deployment for the trivy server will be created during the installation and the trivy scanTypes are automatically configured to run in client mode and connect to the server. -If you want to scan anything other than docker images, you currently [cannot use the client-server mode](https://github.com/aquasecurity/trivy/issues/634) described above. -Instead, you have to [manually download the ruleset and provide it to trivy](https://aquasecurity.github.io/trivy/latest/advanced/air-gap/). -In practice, this is a difficult problem because the most natural method for providing these files in kubernetes, ConfigMaps, has a size limit of 1 MB, while the vulnerability database is over 200 MB in size (28 MB after compression). -Your best bet would thus be to serve the files from your own servers and load them into the scanner [using an initContainer](https://www.securecodebox.io/docs/api/crds/scan#initcontainers-optional), taking care to keep the databases on your server up to date. -Consult the [trivy documentation](https://aquasecurity.github.io/trivy/latest/advanced/air-gap/) for additional details on the required steps. +In case only a single scan or very few are run, and you want to avoid the small performance overhead, client/server mode can be disabled by setting `--set="trivyDatabaseCache.enabled=false"` during helm install. ## Requirements