Skip to content

Commit 2ed8d23

Browse files
committed
Debug previous version test failure in CI/CD
Signed-off-by: jyejare <jyejare@redhat.com>
1 parent af57b86 commit 2ed8d23

File tree

1 file changed

+119
-10
lines changed

1 file changed

+119
-10
lines changed

infra/feast-operator/test/utils/test_util.go

Lines changed: 119 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -179,17 +179,37 @@ func isFeatureStoreHavingRemoteRegistry(namespace, featureStoreName string) (boo
179179
startTime := time.Now()
180180

181181
for time.Since(startTime) < timeout {
182+
// First check if the resource exists
183+
checkCmd := exec.Command("kubectl", "get", "featurestore", featureStoreName, "-n", namespace)
184+
if err := checkCmd.Run(); err != nil {
185+
// Resource doesn't exist yet, retry
186+
fmt.Printf("FeatureStore %s/%s does not exist yet, waiting...\n", namespace, featureStoreName)
187+
time.Sleep(interval)
188+
continue
189+
}
190+
182191
cmd := exec.Command("kubectl", "get", "featurestore", featureStoreName, "-n", namespace,
183192
"-o=jsonpath='{.status.applied.services.registry}'")
184193

185194
output, err := cmd.Output()
186195
if err != nil {
187-
// Retry only on transient errors
188-
if _, ok := err.(*exec.ExitError); ok {
189-
time.Sleep(interval)
190-
continue
196+
// Check if it's a "not found" error in the status path (resource exists but status not ready)
197+
if exitErr, ok := err.(*exec.ExitError); ok {
198+
stderr := string(exitErr.Stderr)
199+
if strings.Contains(stderr, "not found") || strings.Contains(stderr, "NotFound") {
200+
// Status not ready yet, retry
201+
fmt.Printf("FeatureStore %s/%s status not ready yet, waiting...\n", namespace, featureStoreName)
202+
time.Sleep(interval)
203+
continue
204+
}
205+
}
206+
fmt.Printf("Error getting featurestore registry status: %v\n", err)
207+
if output != nil {
208+
fmt.Printf("Output: %s\n", string(output))
191209
}
192-
return false, err // Return immediately on non-transient errors
210+
// For other errors, retry as they might be transient
211+
time.Sleep(interval)
212+
continue
193213
}
194214

195215
// Convert output to string and trim any extra spaces
@@ -429,17 +449,74 @@ func DeleteOperatorDeployment(testDir string) {
429449
func DeployPreviousVersionOperator() {
430450
var err error
431451

452+
// Clean up any existing test namespace and resources from previous test runs
453+
// This is important because E2E tests run before previous version tests and use the same namespace
454+
By("Cleaning up any existing test namespace and resources")
455+
testNamespace := "test-ns-feast"
456+
457+
// Delete the test namespace if it exists (this will also delete all resources in it)
458+
cmd := exec.Command("kubectl", "delete", "ns", testNamespace, "--ignore-not-found=true", "--timeout=60s")
459+
_, err = Run(cmd, "/test/upgrade")
460+
ExpectWithOffset(1, err).NotTo(HaveOccurred())
461+
462+
// Wait for namespace deletion to complete
463+
By("Waiting for test namespace deletion to complete")
464+
nsWaitTimeout := 2 * time.Minute
465+
nsWaitInterval := 2 * time.Second
466+
nsWaitStartTime := time.Now()
467+
nsDeleted := false
468+
for time.Since(nsWaitStartTime) < nsWaitTimeout {
469+
cmd = exec.Command("kubectl", "get", "ns", testNamespace)
470+
err = cmd.Run()
471+
if err != nil {
472+
// Namespace doesn't exist, deletion complete
473+
nsDeleted = true
474+
break
475+
}
476+
time.Sleep(nsWaitInterval)
477+
}
478+
if !nsDeleted {
479+
fmt.Printf("Warning: Namespace %s deletion did not complete within timeout, continuing anyway\n", testNamespace)
480+
}
481+
432482
// Delete existing CRD first to avoid version conflicts when downgrading
433483
// The old operator version may not have v1, but the cluster might have v1 in status.storedVersions
434484
By("Deleting existing CRD to allow downgrade to previous version")
435-
cmd := exec.Command("kubectl", "delete", "crd", "featurestores.feast.dev", "--ignore-not-found=true")
485+
486+
// First, delete any remaining FeatureStore resources across all namespaces to avoid finalizer issues
487+
cmd = exec.Command("kubectl", "delete", "featurestore", "--all", "--all-namespaces", "--ignore-not-found=true")
488+
_, err = Run(cmd, "/test/upgrade")
489+
ExpectWithOffset(1, err).NotTo(HaveOccurred())
490+
491+
// Wait for resources to be deleted
492+
time.Sleep(5 * time.Second)
493+
494+
// Delete the CRD
495+
cmd = exec.Command("kubectl", "delete", "crd", "featurestores.feast.dev", "--ignore-not-found=true")
436496
_, err = Run(cmd, "/test/upgrade")
437497
ExpectWithOffset(1, err).NotTo(HaveOccurred())
438498

439-
// Wait a bit for CRD deletion to complete
440-
time.Sleep(2 * time.Second)
499+
// Wait for CRD deletion to complete by polling
500+
By("Waiting for CRD deletion to complete")
501+
waitTimeout := 2 * time.Minute
502+
waitInterval := 2 * time.Second
503+
waitStartTime := time.Now()
504+
crdDeleted := false
505+
for time.Since(waitStartTime) < waitTimeout {
506+
cmd = exec.Command("kubectl", "get", "crd", "featurestores.feast.dev")
507+
err = cmd.Run()
508+
if err != nil {
509+
// CRD doesn't exist, deletion complete
510+
crdDeleted = true
511+
break
512+
}
513+
time.Sleep(waitInterval)
514+
}
515+
if !crdDeleted {
516+
ExpectWithOffset(1, errors.New("CRD deletion did not complete within timeout")).NotTo(HaveOccurred())
517+
}
441518

442-
cmd = exec.Command("kubectl", "apply", "--server-side", "--force-conflicts", "-f", fmt.Sprintf("https://raw.githubusercontent.com/feast-dev/feast/refs/tags/v%s/infra/feast-operator/dist/install.yaml", feastversion.FeastVersion))
519+
cmd = exec.Command("kubectl", "apply", "-f", fmt.Sprintf("https://raw.githubusercontent.com/feast-dev/feast/refs/tags/v%s/infra/feast-operator/dist/install.yaml", feastversion.FeastVersion))
443520
_, err = Run(cmd, "/test/upgrade")
444521
ExpectWithOffset(1, err).NotTo(HaveOccurred())
445522

@@ -463,9 +540,41 @@ func GetRemoteRegistryPreviousVerCR() string {
463540

464541
// CreateNamespace - create the namespace for tests
465542
func CreateNamespace(namespace string, testDir string) error {
466-
cmd := exec.Command("kubectl", "create", "ns", namespace)
543+
// First check if namespace exists and wait for it to be fully deleted if it's terminating
544+
cmd := exec.Command("kubectl", "get", "ns", namespace)
545+
err := cmd.Run()
546+
if err == nil {
547+
// Namespace exists, check if it's terminating
548+
cmd = exec.Command("kubectl", "get", "ns", namespace, "-o=jsonpath={.status.phase}")
549+
output, err := cmd.Output()
550+
if err == nil && strings.TrimSpace(string(output)) == "Terminating" {
551+
// Wait for termination to complete
552+
timeout := 2 * time.Minute
553+
interval := 2 * time.Second
554+
startTime := time.Now()
555+
for time.Since(startTime) < timeout {
556+
cmd = exec.Command("kubectl", "get", "ns", namespace)
557+
if err := cmd.Run(); err != nil {
558+
// Namespace deleted
559+
break
560+
}
561+
time.Sleep(interval)
562+
}
563+
} else if err == nil && strings.TrimSpace(string(output)) == "Active" {
564+
// Namespace already exists and is active, that's fine
565+
return nil
566+
}
567+
}
568+
569+
// Create the namespace (will fail if it already exists, but we handle that above)
570+
cmd = exec.Command("kubectl", "create", "ns", namespace)
467571
output, err := Run(cmd, testDir)
468572
if err != nil {
573+
// Check if error is because namespace already exists
574+
if strings.Contains(string(output), "AlreadyExists") || strings.Contains(string(output), "already exists") {
575+
// Namespace already exists, that's fine
576+
return nil
577+
}
469578
return fmt.Errorf("failed to create namespace %s: %v\nOutput: %s", namespace, err, output)
470579
}
471580
return nil

0 commit comments

Comments
 (0)