From 978710a95668654f9c7be692178bbdf8f414a480 Mon Sep 17 00:00:00 2001 From: Callum Styan Date: Fri, 12 Jun 2026 22:18:37 +0000 Subject: [PATCH 1/2] feat: add nats_ca crypto key feature for NATS cluster mTLS CA Adds cryptokeys support for a CA that will sign the ephemeral leaf certificates replicas use for NATS cluster mTLS: - New crypto_key_feature enum value nats_ca; the rotator generates a self-signed ECDSA P-256 CA (PEM cert+key bundle in the secret column) and rotates it on the shared key duration with a 30 day token duration matching the maximum leaf lifetime. - FetchNATSCA accessor returns the active CA (parsed cert and signer) plus a trust bundle of all valid CA rows and the active row sequence so callers can detect rotation. It creates the CA on first fetch under an advisory lock since the NATS pubsub is constructed before the rotator starts. - The CA bundle contains a private key and stays excluded from the wsproxy crypto-keys endpoint allowlist and the signing/encryption keycaches. The feature is dormant: nothing fetches the CA until cli/server.go is wired up in a follow-up branch. --- coderd/cryptokeys/ca.go | 272 ++++++++++++++++++ coderd/cryptokeys/ca_internal_test.go | 182 ++++++++++++ coderd/cryptokeys/rotate.go | 13 +- coderd/cryptokeys/rotate_internal_test.go | 63 +++- coderd/database/dbgen/dbgen.go | 40 +++ coderd/database/dump.sql | 3 +- coderd/database/lock.go | 1 + ...000520_nats_ca_crypto_key_feature.down.sql | 16 ++ .../000520_nats_ca_crypto_key_feature.up.sql | 1 + coderd/database/models.go | 5 +- enterprise/coderd/workspaceproxy_test.go | 6 + 11 files changed, 597 insertions(+), 5 deletions(-) create mode 100644 coderd/cryptokeys/ca.go create mode 100644 coderd/cryptokeys/ca_internal_test.go create mode 100644 coderd/database/migrations/000520_nats_ca_crypto_key_feature.down.sql create mode 100644 coderd/database/migrations/000520_nats_ca_crypto_key_feature.up.sql diff --git a/coderd/cryptokeys/ca.go b/coderd/cryptokeys/ca.go new file mode 100644 index 0000000000000..05f89477c50e9 --- /dev/null +++ b/coderd/cryptokeys/ca.go @@ -0,0 +1,272 @@ +package cryptokeys + +import ( + "context" + "crypto" + "crypto/ecdsa" + "crypto/elliptic" + "crypto/rand" + "crypto/x509" + "crypto/x509/pkix" + "database/sql" + "encoding/pem" + "math/big" + "time" + + "golang.org/x/xerrors" + + "github.com/coder/coder/v2/coderd/database" + "github.com/coder/coder/v2/coderd/database/dbauthz" + "github.com/coder/coder/v2/coderd/database/dbtime" +) + +const ( + caCertPEMBlockType = "CERTIFICATE" + caKeyPEMBlockType = "EC PRIVATE KEY" +) + +// NATSCA is the parsed state of the nats_ca crypto key feature at one point in +// time. The CA signs the ephemeral leaf certificates that replicas use for +// NATS cluster mTLS. +// +// Callers that need to react to CA rotation (re-minting leaves and reloading +// the NATS server config) should poll FetchNATSCA and compare Sequence to +// detect when the active CA has changed. +type NATSCA struct { + // Sequence is the crypto_keys sequence of the active row. + Sequence int32 + // Cert is the active CA certificate used to sign leaf certificates. + Cert *x509.Certificate + // Key is the active CA private key. + Key crypto.Signer + // TrustBundle contains the certificates of all currently valid CA rows, + // including Cert. During a rotation overlap window it has two entries; + // installing the full bundle as the trust root lets replicas on either + // side of a rotation verify each other. + TrustBundle []*x509.Certificate +} + +// FetchNATSCA returns the current NATS cluster CA, creating it if no valid CA +// exists. The NATS pubsub is constructed before the key rotator starts, so on +// fresh deployments the CA row will not exist at first fetch; creation here is +// guarded by an advisory lock and is idempotent under concurrent callers. +// After creation the rotator owns the key's lifecycle. +func FetchNATSCA(ctx context.Context, db database.Store) (*NATSCA, error) { + //nolint:gocritic // The CA accessor requires the same crypto key access as the rotator. + ctx = dbauthz.AsKeyRotator(ctx) + + now := dbtime.Now() + + keys, err := db.GetCryptoKeysByFeature(ctx, database.CryptoKeyFeatureNatsCa) + if err != nil { + return nil, xerrors.Errorf("get crypto keys by feature: %w", err) + } + + ca, ok, err := parseNATSCAKeys(keys, now) + if err != nil { + return nil, err + } + if ok { + return ca, nil + } + + // No active CA exists. Create one inside a transaction under an advisory + // lock, re-checking after the lock is acquired so that concurrent callers + // insert exactly one row. This mirrors rotator.rotateKeys. + err = db.InTx(func(tx database.Store) error { + err := tx.AcquireLock(ctx, database.LockIDNATSCACreate) + if err != nil { + return xerrors.Errorf("acquire lock: %w", err) + } + + keys, err = tx.GetCryptoKeysByFeature(ctx, database.CryptoKeyFeatureNatsCa) + if err != nil { + return xerrors.Errorf("get crypto keys by feature: %w", err) + } + + // Recompute now after acquiring the lock: a concurrent creator may + // have committed a row with a StartsAt later than the time captured + // before we blocked on the lock. + now = dbtime.Now() + var ok bool + ca, ok, err = parseNATSCAKeys(keys, now) + if err != nil { + return err + } + if ok { + return nil + } + + secret, err := generateCASecret(now) + if err != nil { + return xerrors.Errorf("generate CA secret: %w", err) + } + + latestKey, err := tx.GetLatestCryptoKeyByFeature(ctx, database.CryptoKeyFeatureNatsCa) + if err != nil && !xerrors.Is(err, sql.ErrNoRows) { + return xerrors.Errorf("get latest key: %w", err) + } + + newKey, err := tx.InsertCryptoKey(ctx, database.InsertCryptoKeyParams{ + Feature: database.CryptoKeyFeatureNatsCa, + Sequence: latestKey.Sequence + 1, + Secret: sql.NullString{ + String: secret, + Valid: true, + }, + // Set by dbcrypt if it's required. + SecretKeyID: sql.NullString{}, + StartsAt: now, + }) + if err != nil { + return xerrors.Errorf("insert crypto key: %w", err) + } + + ca, ok, err = parseNATSCAKeys([]database.CryptoKey{newKey}, now) + if err != nil { + return err + } + if !ok { + return xerrors.New("inserted NATS CA is not usable for signing") + } + return nil + }, &database.TxOptions{ + // Read committed (the default) is required here: with repeatable + // read, the snapshot is taken before the advisory lock is granted, + // so the post-lock re-check would not see a row committed by a + // concurrent creator and we would insert a duplicate. + TxIdentifier: "fetch_nats_ca", + }) + if err != nil { + return nil, err + } + return ca, nil +} + +// parseNATSCAKeys builds a NATSCA from the database rows for the nats_ca +// feature. Rows must be ordered by sequence descending (the order returned by +// GetCryptoKeysByFeature). The active CA is the newest row that is usable for +// signing; the trust bundle contains the certificates of every row that is +// still valid for verification. The boolean reports whether a row could act +// as the active CA. +func parseNATSCAKeys(keys []database.CryptoKey, now time.Time) (*NATSCA, bool, error) { + ca := &NATSCA{} + for _, key := range keys { + if !key.CanVerify(now) { + continue + } + cert, signer, err := parseCASecret(key.Secret.String) + if err != nil { + return nil, false, xerrors.Errorf("parse CA secret for sequence %d: %w", key.Sequence, err) + } + ca.TrustBundle = append(ca.TrustBundle, cert) + if ca.Cert == nil && key.CanSign(now) { + ca.Sequence = key.Sequence + ca.Cert = cert + ca.Key = signer + } + } + if ca.Cert == nil { + return nil, false, nil + } + return ca, true, nil +} + +// generateCASecret generates a new self-signed CA certificate and private key +// for signing NATS cluster leaf certificates, PEM-encoded into a single +// bundle for storage in the crypto_keys secret column. +// +// The certificate outlives the key row on purpose: a row is rotated after +// DefaultKeyDuration but remains a valid trust root until its deletes_at +// (an hour plus NATSCATokenDuration after rotation), and leaves minted just +// before rotation live for up to NATSCATokenDuration. +func generateCASecret(now time.Time) (string, error) { + key, err := ecdsa.GenerateKey(elliptic.P256(), rand.Reader) + if err != nil { + return "", xerrors.Errorf("generate key: %w", err) + } + + // 128-bit random serial per CA/Browser Forum conventions. + serial, err := rand.Int(rand.Reader, new(big.Int).Lsh(big.NewInt(1), 128)) + if err != nil { + return "", xerrors.Errorf("generate serial: %w", err) + } + + template := &x509.Certificate{ + SerialNumber: serial, + Subject: pkix.Name{ + CommonName: "coder-nats-ca", + }, + // Backdate NotBefore to tolerate clock skew between replicas. + NotBefore: now.Add(-time.Hour), + NotAfter: now.Add(DefaultKeyDuration + NATSCATokenDuration + time.Hour), + KeyUsage: x509.KeyUsageCertSign, + BasicConstraintsValid: true, + IsCA: true, + MaxPathLenZero: true, + } + + der, err := x509.CreateCertificate(rand.Reader, template, template, key.Public(), key) + if err != nil { + return "", xerrors.Errorf("create certificate: %w", err) + } + + keyDER, err := x509.MarshalECPrivateKey(key) + if err != nil { + return "", xerrors.Errorf("marshal private key: %w", err) + } + + var secret []byte + secret = append(secret, pem.EncodeToMemory(&pem.Block{Type: caCertPEMBlockType, Bytes: der})...) + secret = append(secret, pem.EncodeToMemory(&pem.Block{Type: caKeyPEMBlockType, Bytes: keyDER})...) + return string(secret), nil +} + +// parseCASecret parses a PEM bundle produced by generateCASecret back into +// the CA certificate and private key. +func parseCASecret(secret string) (*x509.Certificate, crypto.Signer, error) { + var ( + cert *x509.Certificate + key *ecdsa.PrivateKey + ) + rest := []byte(secret) + for { + var block *pem.Block + block, rest = pem.Decode(rest) + if block == nil { + break + } + switch block.Type { + case caCertPEMBlockType: + if cert != nil { + return nil, nil, xerrors.New("multiple certificates in CA secret") + } + var err error + cert, err = x509.ParseCertificate(block.Bytes) + if err != nil { + return nil, nil, xerrors.Errorf("parse certificate: %w", err) + } + case caKeyPEMBlockType: + if key != nil { + return nil, nil, xerrors.New("multiple private keys in CA secret") + } + var err error + key, err = x509.ParseECPrivateKey(block.Bytes) + if err != nil { + return nil, nil, xerrors.Errorf("parse private key: %w", err) + } + default: + return nil, nil, xerrors.Errorf("unexpected PEM block type: %q", block.Type) + } + } + if cert == nil { + return nil, nil, xerrors.New("no certificate in CA secret") + } + if key == nil { + return nil, nil, xerrors.New("no private key in CA secret") + } + if !key.PublicKey.Equal(cert.PublicKey) { + return nil, nil, xerrors.New("private key does not match certificate") + } + return cert, key, nil +} diff --git a/coderd/cryptokeys/ca_internal_test.go b/coderd/cryptokeys/ca_internal_test.go new file mode 100644 index 0000000000000..bdd28eb49d4e8 --- /dev/null +++ b/coderd/cryptokeys/ca_internal_test.go @@ -0,0 +1,182 @@ +package cryptokeys + +import ( + "crypto/x509" + "database/sql" + "sync" + "testing" + "time" + + "github.com/stretchr/testify/require" + + "github.com/coder/coder/v2/coderd/database" + "github.com/coder/coder/v2/coderd/database/dbgen" + "github.com/coder/coder/v2/coderd/database/dbtestutil" + "github.com/coder/coder/v2/testutil" +) + +func TestCASecretRoundTrip(t *testing.T) { + t.Parallel() + + now := time.Now().UTC().Truncate(time.Second) + secret, err := generateCASecret(now) + require.NoError(t, err) + + cert, signer, err := parseCASecret(secret) + require.NoError(t, err) + + require.True(t, cert.IsCA) + require.True(t, cert.BasicConstraintsValid) + require.True(t, cert.MaxPathLenZero) + require.Equal(t, x509.KeyUsageCertSign, cert.KeyUsage) + require.Equal(t, now.Add(-time.Hour), cert.NotBefore) + require.Equal(t, now.Add(DefaultKeyDuration+NATSCATokenDuration+time.Hour), cert.NotAfter) + require.Equal(t, cert.PublicKey, signer.Public()) + + // The cert must be able to verify itself as a trust root. + pool := x509.NewCertPool() + pool.AddCert(cert) + _, err = cert.Verify(x509.VerifyOptions{Roots: pool}) + require.NoError(t, err) +} + +func TestParseCASecretErrors(t *testing.T) { + t.Parallel() + + _, _, err := parseCASecret("") + require.ErrorContains(t, err, "no certificate") + + _, _, err = parseCASecret("not pem at all") + require.ErrorContains(t, err, "no certificate") +} + +func TestFetchNATSCA(t *testing.T) { + t.Parallel() + + t.Run("CreatesWhenMissing", func(t *testing.T) { + t.Parallel() + + db, _ := dbtestutil.NewDB(t) + ctx := testutil.Context(t, testutil.WaitShort) + + ca, err := FetchNATSCA(ctx, db) + require.NoError(t, err) + require.NotNil(t, ca.Cert) + require.NotNil(t, ca.Key) + require.Len(t, ca.TrustBundle, 1) + require.Equal(t, ca.Cert, ca.TrustBundle[0]) + + // A second fetch returns the same CA without inserting another row. + again, err := FetchNATSCA(ctx, db) + require.NoError(t, err) + require.Equal(t, ca.Sequence, again.Sequence) + require.Equal(t, ca.Cert.Raw, again.Cert.Raw) + + keys, err := db.GetCryptoKeysByFeature(ctx, database.CryptoKeyFeatureNatsCa) + require.NoError(t, err) + require.Len(t, keys, 1) + }) + + t.Run("ConcurrentCreate", func(t *testing.T) { + t.Parallel() + + db, _ := dbtestutil.NewDB(t) + ctx := testutil.Context(t, testutil.WaitLong) + + const fetchers = 8 + cas := make([]*NATSCA, fetchers) + errs := make([]error, fetchers) + var wg sync.WaitGroup + for i := range fetchers { + wg.Add(1) + go func() { + defer wg.Done() + cas[i], errs[i] = FetchNATSCA(ctx, db) + }() + } + wg.Wait() + + for i := range fetchers { + require.NoError(t, errs[i]) + require.Equal(t, cas[0].Sequence, cas[i].Sequence) + require.Equal(t, cas[0].Cert.Raw, cas[i].Cert.Raw) + } + + keys, err := db.GetCryptoKeysByFeature(ctx, database.CryptoKeyFeatureNatsCa) + require.NoError(t, err) + require.Len(t, keys, 1) + }) + + t.Run("RotationOverlap", func(t *testing.T) { + t.Parallel() + + db, _ := dbtestutil.NewDB(t) + ctx := testutil.Context(t, testutil.WaitShort) + now := time.Now().UTC() + + // Old CA scheduled for deletion in the future: still a trust root, + // no longer the active signer. + oldKey := dbgen.CryptoKey(t, db, database.CryptoKey{ + Feature: database.CryptoKeyFeatureNatsCa, + Sequence: 1, + StartsAt: now.Add(-2 * time.Hour), + DeletesAt: sql.NullTime{Time: now.Add(time.Hour), Valid: true}, + }) + newKey := dbgen.CryptoKey(t, db, database.CryptoKey{ + Feature: database.CryptoKeyFeatureNatsCa, + Sequence: 2, + StartsAt: now.Add(-time.Hour), + }) + // Deleted key: excluded entirely. + deletedKey := dbgen.CryptoKey(t, db, database.CryptoKey{ + Feature: database.CryptoKeyFeatureNatsCa, + Sequence: 3, + StartsAt: now.Add(-3 * time.Hour), + DeletesAt: sql.NullTime{Time: now.Add(-time.Hour), Valid: true}, + }) + + ca, err := FetchNATSCA(ctx, db) + require.NoError(t, err) + require.Equal(t, newKey.Sequence, ca.Sequence) + + newCert, _, err := parseCASecret(newKey.Secret.String) + require.NoError(t, err) + oldCert, _, err := parseCASecret(oldKey.Secret.String) + require.NoError(t, err) + deletedCert, _, err := parseCASecret(deletedKey.Secret.String) + require.NoError(t, err) + + require.Equal(t, newCert.Raw, ca.Cert.Raw) + require.Len(t, ca.TrustBundle, 2) + bundle := [][]byte{ca.TrustBundle[0].Raw, ca.TrustBundle[1].Raw} + require.Contains(t, bundle, newCert.Raw) + require.Contains(t, bundle, oldCert.Raw) + require.NotContains(t, bundle, deletedCert.Raw) + }) + + t.Run("FutureKeyNotActive", func(t *testing.T) { + t.Parallel() + + db, _ := dbtestutil.NewDB(t) + ctx := testutil.Context(t, testutil.WaitShort) + now := time.Now().UTC() + + current := dbgen.CryptoKey(t, db, database.CryptoKey{ + Feature: database.CryptoKeyFeatureNatsCa, + Sequence: 1, + StartsAt: now.Add(-time.Hour), + }) + // A rotated-in key that hasn't started yet must not be the active + // signer, but its cert belongs in the trust bundle. + _ = dbgen.CryptoKey(t, db, database.CryptoKey{ + Feature: database.CryptoKeyFeatureNatsCa, + Sequence: 2, + StartsAt: now.Add(time.Hour), + }) + + ca, err := FetchNATSCA(ctx, db) + require.NoError(t, err) + require.Equal(t, current.Sequence, ca.Sequence) + require.Len(t, ca.TrustBundle, 2) + }) +} diff --git a/coderd/cryptokeys/rotate.go b/coderd/cryptokeys/rotate.go index e768d53273dd3..f6848c2b2b835 100644 --- a/coderd/cryptokeys/rotate.go +++ b/coderd/cryptokeys/rotate.go @@ -20,6 +20,11 @@ const ( WorkspaceAppsTokenDuration = time.Minute OIDCConvertTokenDuration = time.Minute * 5 TailnetResumeTokenDuration = time.Hour * 24 + // NATSCATokenDuration is the maximum lifetime of a leaf certificate + // minted under the NATS cluster CA. Old CA rows must remain valid trust + // roots for this long after rotation so that replicas holding leaves + // signed by the old CA can still be verified. + NATSCATokenDuration = time.Hour * 24 * 30 // defaultRotationInterval is the default interval at which keys are checked for rotation. defaultRotationInterval = time.Minute * 10 @@ -170,7 +175,7 @@ func (k *rotator) rotateKeys(ctx context.Context) error { } func (k *rotator) insertNewKey(ctx context.Context, tx database.Store, feature database.CryptoKeyFeature, startsAt time.Time) (database.CryptoKey, error) { - secret, err := generateNewSecret(feature) + secret, err := generateNewSecret(feature, startsAt) if err != nil { return database.CryptoKey{}, xerrors.Errorf("generate new secret: %w", err) } @@ -227,7 +232,7 @@ func (k *rotator) rotateKey(ctx context.Context, tx database.Store, key database return []database.CryptoKey{updatedKey, newKey}, nil } -func generateNewSecret(feature database.CryptoKeyFeature) (string, error) { +func generateNewSecret(feature database.CryptoKeyFeature, startsAt time.Time) (string, error) { switch feature { case database.CryptoKeyFeatureWorkspaceAppsAPIKey: return generateKey(32) @@ -237,6 +242,8 @@ func generateNewSecret(feature database.CryptoKeyFeature) (string, error) { return generateKey(64) case database.CryptoKeyFeatureTailnetResume: return generateKey(64) + case database.CryptoKeyFeatureNatsCa: + return generateCASecret(startsAt) } return "", xerrors.Errorf("unknown feature: %s", feature) } @@ -260,6 +267,8 @@ func tokenDuration(feature database.CryptoKeyFeature) time.Duration { return OIDCConvertTokenDuration case database.CryptoKeyFeatureTailnetResume: return TailnetResumeTokenDuration + case database.CryptoKeyFeatureNatsCa: + return NATSCATokenDuration default: return 0 } diff --git a/coderd/cryptokeys/rotate_internal_test.go b/coderd/cryptokeys/rotate_internal_test.go index a8202320aea09..862a5d9554a09 100644 --- a/coderd/cryptokeys/rotate_internal_test.go +++ b/coderd/cryptokeys/rotate_internal_test.go @@ -104,6 +104,58 @@ func Test_rotateKeys(t *testing.T) { require.Equal(t, newKey, keys[0]) }) + t.Run("RotatesNATSCA", func(t *testing.T) { + t.Parallel() + + var ( + db, _ = dbtestutil.NewDB(t) + clock = quartz.NewMock(t) + keyDuration = time.Hour * 24 * 7 + logger = testutil.Logger(t) + ctx = testutil.Context(t, testutil.WaitShort) + ) + + kr := &rotator{ + db: db, + keyDuration: keyDuration, + clock: clock, + logger: logger, + features: []database.CryptoKeyFeature{ + database.CryptoKeyFeatureNatsCa, + }, + } + + now := dbnow(clock) + + oldKey := dbgen.CryptoKey(t, db, database.CryptoKey{ + Feature: database.CryptoKeyFeatureNatsCa, + StartsAt: now, + Sequence: 4, + }) + + // Advance the window to just inside rotation time. + _ = clock.Advance(keyDuration - time.Minute*59) + err := kr.rotateKeys(ctx) + require.NoError(t, err) + + // The old CA must remain a valid trust root for the maximum leaf + // lifetime after rotation. + expectedDeletesAt := oldKey.ExpiresAt(keyDuration).Add(NATSCATokenDuration + time.Hour) + oldKey, err = db.GetCryptoKeyByFeatureAndSequence(ctx, database.GetCryptoKeyByFeatureAndSequenceParams{ + Feature: oldKey.Feature, + Sequence: oldKey.Sequence, + }) + require.NoError(t, err) + require.Equal(t, expectedDeletesAt, oldKey.DeletesAt.Time.UTC()) + + newKey, err := db.GetCryptoKeyByFeatureAndSequence(ctx, database.GetCryptoKeyByFeatureAndSequenceParams{ + Feature: database.CryptoKeyFeatureNatsCa, + Sequence: oldKey.Sequence + 1, + }) + require.NoError(t, err) + requireKey(t, newKey, database.CryptoKeyFeatureNatsCa, oldKey.ExpiresAt(keyDuration), nullTime, oldKey.Sequence+1) + }) + t.Run("DoesNotRotateValidKeys", func(t *testing.T) { t.Parallel() @@ -407,7 +459,7 @@ func Test_rotateKeys(t *testing.T) { keys, err := db.GetCryptoKeys(ctx) require.NoError(t, err) - require.Len(t, keys, 5) + require.Len(t, keys, 6) kbf, err := keysByFeature(keys, database.AllCryptoKeyFeatureValues()) require.NoError(t, err) @@ -420,6 +472,7 @@ func Test_rotateKeys(t *testing.T) { // caused a key to be inserted. require.Len(t, kbf[database.CryptoKeyFeatureTailnetResume], 1) require.Len(t, kbf[database.CryptoKeyFeatureWorkspaceAppsToken], 1) + require.Len(t, kbf[database.CryptoKeyFeatureNatsCa], 1) oidcKey := kbf[database.CryptoKeyFeatureOIDCConvert][0] tailnetKey := kbf[database.CryptoKeyFeatureTailnetResume][0] @@ -586,6 +639,14 @@ func requireKey(t *testing.T, key database.CryptoKey, feature database.CryptoKey require.Equal(t, deletesAt.Time.UTC(), key.DeletesAt.Time.UTC()) require.Equal(t, sequence, key.Sequence) + // The NATS CA secret is a PEM bundle rather than hex-encoded bytes. + if key.Feature == database.CryptoKeyFeatureNatsCa { + cert, _, err := parseCASecret(key.Secret.String) + require.NoError(t, err) + require.True(t, cert.IsCA) + return + } + secret, err := hex.DecodeString(key.Secret.String) require.NoError(t, err) diff --git a/coderd/database/dbgen/dbgen.go b/coderd/database/dbgen/dbgen.go index 3955220efeeda..6ed85c122d487 100644 --- a/coderd/database/dbgen/dbgen.go +++ b/coderd/database/dbgen/dbgen.go @@ -2,13 +2,19 @@ package dbgen import ( "context" + "crypto/ecdsa" + "crypto/elliptic" "crypto/rand" + "crypto/x509" + "crypto/x509/pkix" "database/sql" "encoding/hex" "encoding/json" + "encoding/pem" "errors" "fmt" "maps" + "math/big" "net" "strings" "testing" @@ -2208,10 +2214,44 @@ func newCryptoKeySecret(feature database.CryptoKeyFeature) (string, error) { return generateCryptoKey(64) case database.CryptoKeyFeatureTailnetResume: return generateCryptoKey(64) + case database.CryptoKeyFeatureNatsCa: + return generateCACryptoKeySecret() } return "", xerrors.Errorf("unknown feature: %s", feature) } +// generateCACryptoKeySecret generates a self-signed CA certificate and +// private key as a PEM bundle, matching the secret format that +// coderd/cryptokeys produces for the nats_ca feature. +func generateCACryptoKeySecret() (string, error) { + key, err := ecdsa.GenerateKey(elliptic.P256(), rand.Reader) + if err != nil { + return "", xerrors.Errorf("generate key: %w", err) + } + template := &x509.Certificate{ + SerialNumber: big.NewInt(1), + Subject: pkix.Name{CommonName: "dbgen-ca"}, + NotBefore: time.Now().Add(-time.Hour), + NotAfter: time.Now().Add(24 * time.Hour), + KeyUsage: x509.KeyUsageCertSign, + BasicConstraintsValid: true, + IsCA: true, + MaxPathLenZero: true, + } + der, err := x509.CreateCertificate(rand.Reader, template, template, key.Public(), key) + if err != nil { + return "", xerrors.Errorf("create certificate: %w", err) + } + keyDER, err := x509.MarshalECPrivateKey(key) + if err != nil { + return "", xerrors.Errorf("marshal private key: %w", err) + } + var secret []byte + secret = append(secret, pem.EncodeToMemory(&pem.Block{Type: "CERTIFICATE", Bytes: der})...) + secret = append(secret, pem.EncodeToMemory(&pem.Block{Type: "EC PRIVATE KEY", Bytes: keyDER})...) + return string(secret), nil +} + func generateCryptoKey(length int) (string, error) { b := make([]byte, length) _, err := rand.Read(b) diff --git a/coderd/database/dump.sql b/coderd/database/dump.sql index 6f4d588d67f7a..384f1adf9e1bf 100644 --- a/coderd/database/dump.sql +++ b/coderd/database/dump.sql @@ -373,7 +373,8 @@ CREATE TYPE crypto_key_feature AS ENUM ( 'workspace_apps_token', 'workspace_apps_api_key', 'oidc_convert', - 'tailnet_resume' + 'tailnet_resume', + 'nats_ca' ); CREATE TYPE display_app AS ENUM ( diff --git a/coderd/database/lock.go b/coderd/database/lock.go index 8d0894abc8756..fa3a4ac20b763 100644 --- a/coderd/database/lock.go +++ b/coderd/database/lock.go @@ -16,6 +16,7 @@ const ( LockIDReconcileSystemRoles LockIDBoundaryUsageStats LockIDAIProvidersEnvSeed + LockIDNATSCACreate ) // GenLockID generates a unique and consistent lock ID from a given string. diff --git a/coderd/database/migrations/000520_nats_ca_crypto_key_feature.down.sql b/coderd/database/migrations/000520_nats_ca_crypto_key_feature.down.sql new file mode 100644 index 0000000000000..ec36128a5146a --- /dev/null +++ b/coderd/database/migrations/000520_nats_ca_crypto_key_feature.down.sql @@ -0,0 +1,16 @@ +DELETE FROM crypto_keys WHERE feature = 'nats_ca'; + +CREATE TYPE old_crypto_key_feature AS ENUM ( + 'workspace_apps_token', + 'workspace_apps_api_key', + 'oidc_convert', + 'tailnet_resume' +); + +ALTER TABLE crypto_keys + ALTER COLUMN feature TYPE old_crypto_key_feature + USING (feature::text::old_crypto_key_feature); + +DROP TYPE crypto_key_feature; + +ALTER TYPE old_crypto_key_feature RENAME TO crypto_key_feature; diff --git a/coderd/database/migrations/000520_nats_ca_crypto_key_feature.up.sql b/coderd/database/migrations/000520_nats_ca_crypto_key_feature.up.sql new file mode 100644 index 0000000000000..c37227451d263 --- /dev/null +++ b/coderd/database/migrations/000520_nats_ca_crypto_key_feature.up.sql @@ -0,0 +1 @@ +ALTER TYPE crypto_key_feature ADD VALUE IF NOT EXISTS 'nats_ca'; diff --git a/coderd/database/models.go b/coderd/database/models.go index e52d5f6c4a96d..89ee4e41ce994 100644 --- a/coderd/database/models.go +++ b/coderd/database/models.go @@ -1884,6 +1884,7 @@ const ( CryptoKeyFeatureWorkspaceAppsAPIKey CryptoKeyFeature = "workspace_apps_api_key" CryptoKeyFeatureOIDCConvert CryptoKeyFeature = "oidc_convert" CryptoKeyFeatureTailnetResume CryptoKeyFeature = "tailnet_resume" + CryptoKeyFeatureNatsCa CryptoKeyFeature = "nats_ca" ) func (e *CryptoKeyFeature) Scan(src interface{}) error { @@ -1926,7 +1927,8 @@ func (e CryptoKeyFeature) Valid() bool { case CryptoKeyFeatureWorkspaceAppsToken, CryptoKeyFeatureWorkspaceAppsAPIKey, CryptoKeyFeatureOIDCConvert, - CryptoKeyFeatureTailnetResume: + CryptoKeyFeatureTailnetResume, + CryptoKeyFeatureNatsCa: return true } return false @@ -1938,6 +1940,7 @@ func AllCryptoKeyFeatureValues() []CryptoKeyFeature { CryptoKeyFeatureWorkspaceAppsAPIKey, CryptoKeyFeatureOIDCConvert, CryptoKeyFeatureTailnetResume, + CryptoKeyFeatureNatsCa, } } diff --git a/enterprise/coderd/workspaceproxy_test.go b/enterprise/coderd/workspaceproxy_test.go index 41956485521b8..a9cf27fe71b47 100644 --- a/enterprise/coderd/workspaceproxy_test.go +++ b/enterprise/coderd/workspaceproxy_test.go @@ -1092,6 +1092,12 @@ func TestGetCryptoKeys(t *testing.T) { require.Error(t, err) require.ErrorAs(t, err, &sdkErr) require.Equal(t, http.StatusBadRequest, sdkErr.StatusCode()) + // The NATS cluster CA bundle contains a private key and must never be + // served to workspace proxies. + _, err = proxy.SDKClient.CryptoKeys(ctx, codersdk.CryptoKeyFeature(database.CryptoKeyFeatureNatsCa)) + require.Error(t, err) + require.ErrorAs(t, err, &sdkErr) + require.Equal(t, http.StatusBadRequest, sdkErr.StatusCode()) _, err = proxy.SDKClient.CryptoKeys(ctx, "invalid") require.Error(t, err) require.ErrorAs(t, err, &sdkErr) From 774ad4b3aa92f717d6c683e7f4e6928fa71dfb43 Mon Sep 17 00:00:00 2001 From: Callum Styan Date: Mon, 22 Jun 2026 21:40:49 +0000 Subject: [PATCH 2/2] refactor(coderd/cryptokeys): reuse rotator lock for NATS CA bootstrap FetchNATSCA bootstrapped the nats_ca crypto key under a dedicated LockIDNATSCACreate advisory lock, separate from the rotator's LockIDCryptoKeyRotation. This was safe in practice: the server only starts the rotator (coderd.New) after FetchNATSCA returns during pubsub setup, so the two writers never overlap and never contend on the crypto_keys (feature, sequence) primary key. This is a defensive consistency change, not a bug fix. Reusing the rotator's lock makes the bootstrap path and the rotator mutually exclusive regardless of call ordering, so a future caller that invokes FetchNATSCA concurrently with the rotator can't race the primary key. It also keeps nats_ca consistent with how the other crypto key features are created (single advisory lock). The now-unused LockIDNATSCACreate is removed. --- coderd/cryptokeys/ca.go | 10 ++++++++-- coderd/database/lock.go | 1 - 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/coderd/cryptokeys/ca.go b/coderd/cryptokeys/ca.go index 05f89477c50e9..cfff41843e872 100644 --- a/coderd/cryptokeys/ca.go +++ b/coderd/cryptokeys/ca.go @@ -72,9 +72,15 @@ func FetchNATSCA(ctx context.Context, db database.Store) (*NATSCA, error) { // No active CA exists. Create one inside a transaction under an advisory // lock, re-checking after the lock is acquired so that concurrent callers - // insert exactly one row. This mirrors rotator.rotateKeys. + // insert exactly one row. We reuse the rotator's lock + // (LockIDCryptoKeyRotation) rather than a dedicated one so that this + // bootstrap path and the rotator are always serialized against each + // other on the (feature, sequence) primary key. Today the server starts + // the rotator only after this fetch returns, so they never actually + // overlap; sharing the lock keeps that safe even if a future caller + // invokes FetchNATSCA concurrently with the rotator. err = db.InTx(func(tx database.Store) error { - err := tx.AcquireLock(ctx, database.LockIDNATSCACreate) + err := tx.AcquireLock(ctx, database.LockIDCryptoKeyRotation) if err != nil { return xerrors.Errorf("acquire lock: %w", err) } diff --git a/coderd/database/lock.go b/coderd/database/lock.go index fa3a4ac20b763..8d0894abc8756 100644 --- a/coderd/database/lock.go +++ b/coderd/database/lock.go @@ -16,7 +16,6 @@ const ( LockIDReconcileSystemRoles LockIDBoundaryUsageStats LockIDAIProvidersEnvSeed - LockIDNATSCACreate ) // GenLockID generates a unique and consistent lock ID from a given string.