From 1b8b8a5b285e77b8c177943b7589081dfd223bc7 Mon Sep 17 00:00:00 2001 From: pnguyen44 Date: Tue, 16 Jun 2026 11:25:29 -0400 Subject: [PATCH 1/2] HYPERFLEET-1185 - feat: add performance tests and tooling --- .env.example | 2 + .gitignore | 3 + AGENTS.md | 2 +- README.md | 6 + e2e/cluster/perf_cascade_delete_latency.go | 71 +++++++ e2e/cluster/perf_create_latency.go | 47 +++++ e2e/cluster/perf_delete_latency.go | 55 +++++ e2e/cluster/perf_list_filtered_latency.go | 73 +++++++ e2e/cluster/perf_list_latency.go | 43 ++++ e2e/cluster/perf_read_entity_size_latency.go | 54 +++++ e2e/cluster/perf_read_latency.go | 43 ++++ e2e/cluster/perf_update_latency.go | 54 +++++ e2e/nodepool/perf_create_latency.go | 57 ++++++ e2e/nodepool/perf_delete_latency.go | 66 ++++++ perf/README.md | 85 ++++++++ perf/parse-report.sh | 76 +++++++ perf/run-in-cluster.sh | 67 ++++++ perf/seed-clusters.sh | 191 ++++++++++++++++++ pkg/client/cluster.go | 7 +- .../clusters/cluster-request-large.json | 53 +++++ .../clusters/cluster-request-small.json | 31 +++ 21 files changed, 1084 insertions(+), 2 deletions(-) create mode 100644 .env.example create mode 100644 e2e/cluster/perf_cascade_delete_latency.go create mode 100644 e2e/cluster/perf_create_latency.go create mode 100644 e2e/cluster/perf_delete_latency.go create mode 100644 e2e/cluster/perf_list_filtered_latency.go create mode 100644 e2e/cluster/perf_list_latency.go create mode 100644 e2e/cluster/perf_read_entity_size_latency.go create mode 100644 e2e/cluster/perf_read_latency.go create mode 100644 e2e/cluster/perf_update_latency.go create mode 100644 e2e/nodepool/perf_create_latency.go create mode 100644 e2e/nodepool/perf_delete_latency.go create mode 100644 perf/README.md create mode 100755 perf/parse-report.sh create mode 100755 perf/run-in-cluster.sh create mode 100755 perf/seed-clusters.sh create mode 100644 testdata/payloads/clusters/cluster-request-large.json create mode 100644 testdata/payloads/clusters/cluster-request-small.json diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..ce447bd --- /dev/null +++ b/.env.example @@ -0,0 +1,2 @@ +QUAY_USER= +HYPERFLEET_API_URL= diff --git a/.gitignore b/.gitignore index c07f462..d3c12a4 100644 --- a/.gitignore +++ b/.gitignore @@ -48,6 +48,9 @@ vendor/ .env.*.local deploy-scripts/.env +# Perf test results +perf/results/ + # Claude Code files .claude/ diff --git a/AGENTS.md b/AGENTS.md index c43b0a3..e171c0e 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -51,7 +51,7 @@ Pre-flight order: `make check` then `make build`. - **IMPORTANT:** Test files use `.go` extension, NOT `_test.go`. E2E tests are compiled into the binary, not run via `go test`. - Location: `e2e/{suite}/descriptive-name.go` (package matches directory name) -- Test name format: `[Suite: component][category] Description` (e.g., `[Suite: cluster][baseline] Cluster Resource Type Lifecycle`). Known categories: `baseline`, `update`, `delete`, `concurrent`, `negative`. +- Test name format: `[Suite: component][category] Description` (e.g., `[Suite: cluster][baseline] Cluster Resource Type Lifecycle`). Known categories: `baseline`, `update`, `delete`, `concurrent`, `negative`, `perf`. - Test suites auto-register via blank import in `e2e/e2e.go` ### Labels diff --git a/README.md b/README.md index 0215df0..78681f5 100644 --- a/README.md +++ b/README.md @@ -48,6 +48,12 @@ export HYPERFLEET_API_URL=https://api.hyperfleet.example.com Run `./bin/hyperfleet-e2e test --help` for all options. +### Performance Tests + +Performance tests are labeled `perf` and measure baseline latencies for core operations. They run inside the cluster for production-representative numbers. + +See [perf/README.md](perf/README.md). + ## Configuration Configuration priority (highest to lowest): diff --git a/e2e/cluster/perf_cascade_delete_latency.go b/e2e/cluster/perf_cascade_delete_latency.go new file mode 100644 index 0000000..51c2b7e --- /dev/null +++ b/e2e/cluster/perf_cascade_delete_latency.go @@ -0,0 +1,71 @@ +package cluster + +import ( + "context" + "net/http" + "time" + + "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" //nolint:staticcheck // dot import for test readability + + "github.com/openshift-hyperfleet/hyperfleet-e2e/pkg/api/openapi" + "github.com/openshift-hyperfleet/hyperfleet-e2e/pkg/client" + "github.com/openshift-hyperfleet/hyperfleet-e2e/pkg/helper" + "github.com/openshift-hyperfleet/hyperfleet-e2e/pkg/labels" +) + +var _ = ginkgo.Describe("[Suite: cluster][perf] Cascade delete-to-hard-delete latency", + ginkgo.Label(labels.Tier1, labels.Performance), + func() { + var h *helper.Helper + var clusterID string + var nodepoolID string + + ginkgo.BeforeEach(func(ctx context.Context) { + h = helper.New() + + cluster, err := h.Client.CreateClusterFromPayload(ctx, h.TestDataPath("payloads/clusters/cluster-request.json")) + Expect(err).NotTo(HaveOccurred()) + clusterID = *cluster.Id + + ginkgo.DeferCleanup(func(ctx context.Context) { + if err := h.CleanupTestCluster(ctx, clusterID); err != nil { + ginkgo.GinkgoWriter.Printf("Warning: failed to cleanup cluster %s: %v\n", clusterID, err) + } + }) + + ginkgo.By("waiting for cluster to reach Reconciled") + Eventually(h.PollCluster(ctx, clusterID), h.Cfg.Timeouts.Cluster.Reconciled, h.Cfg.Polling.Interval). + Should(helper.HaveResourceCondition(client.ConditionTypeReconciled, openapi.ResourceConditionStatusTrue)) + + ginkgo.By("creating a nodepool on the cluster") + nodepool, err := h.Client.CreateNodePoolFromPayload(ctx, clusterID, h.TestDataPath("payloads/nodepools/nodepool-request.json")) + Expect(err).NotTo(HaveOccurred()) + nodepoolID = *nodepool.Id + + ginkgo.DeferCleanup(func(ctx context.Context) { + if err := h.CleanupTestNodePool(ctx, clusterID, nodepoolID); err != nil { + ginkgo.GinkgoWriter.Printf("Warning: failed to cleanup nodepool %s: %v\n", nodepoolID, err) + } + }) + + ginkgo.By("waiting for nodepool to reach Reconciled") + Eventually(h.PollNodePool(ctx, clusterID, nodepoolID), h.Cfg.Timeouts.NodePool.Reconciled, h.Cfg.Polling.Interval). + Should(helper.HaveResourceCondition(client.ConditionTypeReconciled, openapi.ResourceConditionStatusTrue)) + }) + + ginkgo.It("should cascade-delete a cluster with nodepools and reach hard-delete within acceptable latency", func(ctx context.Context) { + ginkgo.By("deleting cluster (with attached nodepool) and timing until hard-delete (404)") + start := time.Now() + + _, err := h.Client.DeleteCluster(ctx, clusterID) + Expect(err).NotTo(HaveOccurred()) + + Eventually(h.PollClusterHTTPStatus(ctx, clusterID), h.Cfg.Timeouts.Cluster.Deleted, h.Cfg.Polling.Interval). + Should(Equal(http.StatusNotFound)) + + elapsed := time.Since(start) + ginkgo.GinkgoWriter.Printf("[PERF] Cluster cascade delete-to-hard-delete latency: %v\n", elapsed) + }) + }, +) diff --git a/e2e/cluster/perf_create_latency.go b/e2e/cluster/perf_create_latency.go new file mode 100644 index 0000000..8676fd7 --- /dev/null +++ b/e2e/cluster/perf_create_latency.go @@ -0,0 +1,47 @@ +package cluster + +import ( + "context" + "time" + + "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" //nolint:staticcheck // dot import for test readability + + "github.com/openshift-hyperfleet/hyperfleet-e2e/pkg/api/openapi" + "github.com/openshift-hyperfleet/hyperfleet-e2e/pkg/client" + "github.com/openshift-hyperfleet/hyperfleet-e2e/pkg/helper" + "github.com/openshift-hyperfleet/hyperfleet-e2e/pkg/labels" +) + +var _ = ginkgo.Describe("[Suite: cluster][perf] Create-to-reconciled latency", + ginkgo.Label(labels.Tier1, labels.Performance), + func() { + var h *helper.Helper + var clusterID string + + ginkgo.BeforeEach(func(ctx context.Context) { + h = helper.New() + }) + + ginkgo.It("should create a cluster and reach Reconciled within acceptable latency", func(ctx context.Context) { + ginkgo.By("creating a cluster and timing until Reconciled") + start := time.Now() + + cluster, err := h.Client.CreateClusterFromPayload(ctx, h.TestDataPath("payloads/clusters/cluster-request.json")) + Expect(err).NotTo(HaveOccurred()) + clusterID = *cluster.Id + + ginkgo.DeferCleanup(func(ctx context.Context) { + if err := h.CleanupTestCluster(ctx, clusterID); err != nil { + ginkgo.GinkgoWriter.Printf("Warning: failed to cleanup cluster %s: %v\n", clusterID, err) + } + }) + + Eventually(h.PollCluster(ctx, clusterID), h.Cfg.Timeouts.Cluster.Reconciled, h.Cfg.Polling.Interval). + Should(helper.HaveResourceCondition(client.ConditionTypeReconciled, openapi.ResourceConditionStatusTrue)) + + elapsed := time.Since(start) + ginkgo.GinkgoWriter.Printf("[PERF] Cluster create-to-reconciled latency: %v\n", elapsed) + }) + }, +) diff --git a/e2e/cluster/perf_delete_latency.go b/e2e/cluster/perf_delete_latency.go new file mode 100644 index 0000000..d85132a --- /dev/null +++ b/e2e/cluster/perf_delete_latency.go @@ -0,0 +1,55 @@ +package cluster + +import ( + "context" + "net/http" + "time" + + "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" //nolint:staticcheck // dot import for test readability + + "github.com/openshift-hyperfleet/hyperfleet-e2e/pkg/api/openapi" + "github.com/openshift-hyperfleet/hyperfleet-e2e/pkg/client" + "github.com/openshift-hyperfleet/hyperfleet-e2e/pkg/helper" + "github.com/openshift-hyperfleet/hyperfleet-e2e/pkg/labels" +) + +var _ = ginkgo.Describe("[Suite: cluster][perf] Delete-to-hard-delete latency", + ginkgo.Label(labels.Tier1, labels.Performance), + func() { + var h *helper.Helper + var clusterID string + + ginkgo.BeforeEach(func(ctx context.Context) { + h = helper.New() + + cluster, err := h.Client.CreateClusterFromPayload(ctx, h.TestDataPath("payloads/clusters/cluster-request.json")) + Expect(err).NotTo(HaveOccurred()) + clusterID = *cluster.Id + + ginkgo.DeferCleanup(func(ctx context.Context) { + if err := h.CleanupTestCluster(ctx, clusterID); err != nil { + ginkgo.GinkgoWriter.Printf("Warning: failed to cleanup cluster %s: %v\n", clusterID, err) + } + }) + + ginkgo.By("waiting for cluster to reach Reconciled before delete") + Eventually(h.PollCluster(ctx, clusterID), h.Cfg.Timeouts.Cluster.Reconciled, h.Cfg.Polling.Interval). + Should(helper.HaveResourceCondition(client.ConditionTypeReconciled, openapi.ResourceConditionStatusTrue)) + }) + + ginkgo.It("should delete a cluster and reach hard-delete within acceptable latency", func(ctx context.Context) { + ginkgo.By("deleting cluster and timing until hard-delete (404)") + start := time.Now() + + _, err := h.Client.DeleteCluster(ctx, clusterID) + Expect(err).NotTo(HaveOccurred()) + + Eventually(h.PollClusterHTTPStatus(ctx, clusterID), h.Cfg.Timeouts.Cluster.Deleted, h.Cfg.Polling.Interval). + Should(Equal(http.StatusNotFound)) + + elapsed := time.Since(start) + ginkgo.GinkgoWriter.Printf("[PERF] Cluster delete-to-hard-delete latency: %v\n", elapsed) + }) + }, +) diff --git a/e2e/cluster/perf_list_filtered_latency.go b/e2e/cluster/perf_list_filtered_latency.go new file mode 100644 index 0000000..3f58dc9 --- /dev/null +++ b/e2e/cluster/perf_list_filtered_latency.go @@ -0,0 +1,73 @@ +package cluster + +import ( + "context" + "time" + + "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" //nolint:staticcheck // dot import for test readability + + "github.com/openshift-hyperfleet/hyperfleet-e2e/pkg/api/openapi" + "github.com/openshift-hyperfleet/hyperfleet-e2e/pkg/helper" + "github.com/openshift-hyperfleet/hyperfleet-e2e/pkg/labels" +) + +var _ = ginkgo.Describe("[Suite: cluster][perf] API list latency with filters and pagination", + ginkgo.Label(labels.Tier1, labels.Performance), + func() { + var h *helper.Helper + var clusterID string + + ginkgo.BeforeEach(func(ctx context.Context) { + h = helper.New() + + cluster, err := h.Client.CreateClusterFromPayload(ctx, h.TestDataPath("payloads/clusters/cluster-request.json")) + Expect(err).NotTo(HaveOccurred()) + clusterID = *cluster.Id + + ginkgo.DeferCleanup(func(ctx context.Context) { + if err := h.CleanupTestCluster(ctx, clusterID); err != nil { + ginkgo.GinkgoWriter.Printf("Warning: failed to cleanup cluster %s: %v\n", clusterID, err) + } + }) + }) + + ginkgo.It("should list clusters with search filter within acceptable latency", func(ctx context.Context) { + ginkgo.By("measuring GET /clusters?search=... response time") + filter := openapi.SearchParams("labels.environment='test'") + start := time.Now() + _, err := h.Client.ListClustersWithParams(ctx, &openapi.GetClustersParams{ + Search: &filter, + }) + Expect(err).NotTo(HaveOccurred()) + elapsed := time.Since(start) + ginkgo.GinkgoWriter.Printf("[PERF] GET /clusters (search filter) latency: %v\n", elapsed) + }) + + ginkgo.It("should list clusters with page size limit within acceptable latency", func(ctx context.Context) { + ginkgo.By("measuring GET /clusters?pageSize=10 response time") + pageSize := openapi.QueryParamsPageSize(10) + start := time.Now() + _, err := h.Client.ListClustersWithParams(ctx, &openapi.GetClustersParams{ + PageSize: &pageSize, + }) + Expect(err).NotTo(HaveOccurred()) + elapsed := time.Since(start) + ginkgo.GinkgoWriter.Printf("[PERF] GET /clusters (pageSize=10) latency: %v\n", elapsed) + }) + + ginkgo.It("should list clusters with pagination within acceptable latency", func(ctx context.Context) { + ginkgo.By("measuring GET /clusters?page=1&pageSize=10 response time") + page := openapi.QueryParamsPage(1) + pageSize := openapi.QueryParamsPageSize(10) + start := time.Now() + _, err := h.Client.ListClustersWithParams(ctx, &openapi.GetClustersParams{ + Page: &page, + PageSize: &pageSize, + }) + Expect(err).NotTo(HaveOccurred()) + elapsed := time.Since(start) + ginkgo.GinkgoWriter.Printf("[PERF] GET /clusters (page=1, pageSize=10) latency: %v\n", elapsed) + }) + }, +) diff --git a/e2e/cluster/perf_list_latency.go b/e2e/cluster/perf_list_latency.go new file mode 100644 index 0000000..4920f94 --- /dev/null +++ b/e2e/cluster/perf_list_latency.go @@ -0,0 +1,43 @@ +package cluster + +import ( + "context" + "time" + + "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" //nolint:staticcheck // dot import for test readability + + "github.com/openshift-hyperfleet/hyperfleet-e2e/pkg/helper" + "github.com/openshift-hyperfleet/hyperfleet-e2e/pkg/labels" +) + +var _ = ginkgo.Describe("[Suite: cluster][perf] API list latency", + ginkgo.Label(labels.Tier1, labels.Performance), + func() { + var h *helper.Helper + var clusterID string + + ginkgo.BeforeEach(func(ctx context.Context) { + h = helper.New() + + cluster, err := h.Client.CreateClusterFromPayload(ctx, h.TestDataPath("payloads/clusters/cluster-request.json")) + Expect(err).NotTo(HaveOccurred()) + clusterID = *cluster.Id + + ginkgo.DeferCleanup(func(ctx context.Context) { + if err := h.CleanupTestCluster(ctx, clusterID); err != nil { + ginkgo.GinkgoWriter.Printf("Warning: failed to cleanup cluster %s: %v\n", clusterID, err) + } + }) + }) + + ginkgo.It("should list clusters within acceptable latency", func(ctx context.Context) { + ginkgo.By("measuring GET /clusters response time") + start := time.Now() + _, err := h.Client.ListClusters(ctx) + Expect(err).NotTo(HaveOccurred()) + elapsed := time.Since(start) + ginkgo.GinkgoWriter.Printf("[PERF] GET /clusters latency: %v\n", elapsed) + }) + }, +) diff --git a/e2e/cluster/perf_read_entity_size_latency.go b/e2e/cluster/perf_read_entity_size_latency.go new file mode 100644 index 0000000..ef79df3 --- /dev/null +++ b/e2e/cluster/perf_read_entity_size_latency.go @@ -0,0 +1,54 @@ +package cluster + +import ( + "context" + "time" + + "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" //nolint:staticcheck // dot import for test readability + + "github.com/openshift-hyperfleet/hyperfleet-e2e/pkg/helper" + "github.com/openshift-hyperfleet/hyperfleet-e2e/pkg/labels" +) + +var _ = ginkgo.Describe("[Suite: cluster][perf] API read latency by entity size", + ginkgo.Label(labels.Tier1, labels.Performance), + func() { + var h *helper.Helper + + ginkgo.BeforeEach(func(ctx context.Context) { + h = helper.New() + }) + + sizes := []struct { + name string + payload string + }{ + {"small", "payloads/clusters/cluster-request-small.json"}, + {"medium", "payloads/clusters/cluster-request.json"}, + {"large", "payloads/clusters/cluster-request-large.json"}, + } + + for _, size := range sizes { + ginkgo.It("should read a "+size.name+" cluster within acceptable latency", func(ctx context.Context) { + ginkgo.By("creating a " + size.name + " cluster") + cluster, err := h.Client.CreateClusterFromPayload(ctx, h.TestDataPath(size.payload)) + Expect(err).NotTo(HaveOccurred()) + clusterID := *cluster.Id + + ginkgo.DeferCleanup(func(ctx context.Context) { + if err := h.CleanupTestCluster(ctx, clusterID); err != nil { + ginkgo.GinkgoWriter.Printf("Warning: failed to cleanup cluster %s: %v\n", clusterID, err) + } + }) + + ginkgo.By("measuring GET /clusters/{id} response time for " + size.name + " entity") + start := time.Now() + _, err = h.Client.GetCluster(ctx, clusterID) + Expect(err).NotTo(HaveOccurred()) + elapsed := time.Since(start) + ginkgo.GinkgoWriter.Printf("[PERF] GET /clusters/%s (%s entity) latency: %v\n", clusterID, size.name, elapsed) + }) + } + }, +) diff --git a/e2e/cluster/perf_read_latency.go b/e2e/cluster/perf_read_latency.go new file mode 100644 index 0000000..0851ee0 --- /dev/null +++ b/e2e/cluster/perf_read_latency.go @@ -0,0 +1,43 @@ +package cluster + +import ( + "context" + "time" + + "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" //nolint:staticcheck // dot import for test readability + + "github.com/openshift-hyperfleet/hyperfleet-e2e/pkg/helper" + "github.com/openshift-hyperfleet/hyperfleet-e2e/pkg/labels" +) + +var _ = ginkgo.Describe("[Suite: cluster][perf] API read latency", + ginkgo.Label(labels.Tier1, labels.Performance), + func() { + var h *helper.Helper + var clusterID string + + ginkgo.BeforeEach(func(ctx context.Context) { + h = helper.New() + + cluster, err := h.Client.CreateClusterFromPayload(ctx, h.TestDataPath("payloads/clusters/cluster-request.json")) + Expect(err).NotTo(HaveOccurred()) + clusterID = *cluster.Id + + ginkgo.DeferCleanup(func(ctx context.Context) { + if err := h.CleanupTestCluster(ctx, clusterID); err != nil { + ginkgo.GinkgoWriter.Printf("Warning: failed to cleanup cluster %s: %v\n", clusterID, err) + } + }) + }) + + ginkgo.It("should read a cluster within acceptable latency", func(ctx context.Context) { + ginkgo.By("measuring GET /clusters/{id} response time") + start := time.Now() + _, err := h.Client.GetCluster(ctx, clusterID) + Expect(err).NotTo(HaveOccurred()) + elapsed := time.Since(start) + ginkgo.GinkgoWriter.Printf("[PERF] GET /clusters/%s latency: %v\n", clusterID, elapsed) + }) + }, +) diff --git a/e2e/cluster/perf_update_latency.go b/e2e/cluster/perf_update_latency.go new file mode 100644 index 0000000..33f4337 --- /dev/null +++ b/e2e/cluster/perf_update_latency.go @@ -0,0 +1,54 @@ +package cluster + +import ( + "context" + "time" + + "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" //nolint:staticcheck // dot import for test readability + + "github.com/openshift-hyperfleet/hyperfleet-e2e/pkg/api/openapi" + "github.com/openshift-hyperfleet/hyperfleet-e2e/pkg/client" + "github.com/openshift-hyperfleet/hyperfleet-e2e/pkg/helper" + "github.com/openshift-hyperfleet/hyperfleet-e2e/pkg/labels" +) + +var _ = ginkgo.Describe("[Suite: cluster][perf] Update-to-re-reconciled latency", + ginkgo.Label(labels.Tier1, labels.Performance), + func() { + var h *helper.Helper + var clusterID string + + ginkgo.BeforeEach(func(ctx context.Context) { + h = helper.New() + + cluster, err := h.Client.CreateClusterFromPayload(ctx, h.TestDataPath("payloads/clusters/cluster-request.json")) + Expect(err).NotTo(HaveOccurred()) + clusterID = *cluster.Id + + ginkgo.DeferCleanup(func(ctx context.Context) { + if err := h.CleanupTestCluster(ctx, clusterID); err != nil { + ginkgo.GinkgoWriter.Printf("Warning: failed to cleanup cluster %s: %v\n", clusterID, err) + } + }) + + ginkgo.By("waiting for cluster to reach Reconciled before update") + Eventually(h.PollCluster(ctx, clusterID), h.Cfg.Timeouts.Cluster.Reconciled, h.Cfg.Polling.Interval). + Should(helper.HaveResourceCondition(client.ConditionTypeReconciled, openapi.ResourceConditionStatusTrue)) + }) + + ginkgo.It("should update a cluster and reach Reconciled within acceptable latency", func(ctx context.Context) { + ginkgo.By("patching cluster and timing until re-reconciled") + start := time.Now() + + _, err := h.Client.PatchClusterFromPayload(ctx, clusterID, h.TestDataPath("payloads/clusters/cluster-patch.json")) + Expect(err).NotTo(HaveOccurred()) + + Eventually(h.PollCluster(ctx, clusterID), h.Cfg.Timeouts.Cluster.Reconciled, h.Cfg.Polling.Interval). + Should(helper.HaveResourceCondition(client.ConditionTypeReconciled, openapi.ResourceConditionStatusTrue)) + + elapsed := time.Since(start) + ginkgo.GinkgoWriter.Printf("[PERF] Cluster update-to-re-reconciled latency: %v\n", elapsed) + }) + }, +) diff --git a/e2e/nodepool/perf_create_latency.go b/e2e/nodepool/perf_create_latency.go new file mode 100644 index 0000000..d0b92b6 --- /dev/null +++ b/e2e/nodepool/perf_create_latency.go @@ -0,0 +1,57 @@ +package nodepool + +import ( + "context" + "time" + + "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" //nolint:staticcheck // dot import for test readability + + "github.com/openshift-hyperfleet/hyperfleet-e2e/pkg/api/openapi" + "github.com/openshift-hyperfleet/hyperfleet-e2e/pkg/client" + "github.com/openshift-hyperfleet/hyperfleet-e2e/pkg/helper" + "github.com/openshift-hyperfleet/hyperfleet-e2e/pkg/labels" +) + +var _ = ginkgo.Describe("[Suite: nodepool][perf] Create-to-reconciled latency", + ginkgo.Label(labels.Tier1, labels.Performance), + func() { + var h *helper.Helper + var clusterID string + + ginkgo.BeforeEach(func(ctx context.Context) { + h = helper.New() + + var err error + clusterID, err = h.GetTestCluster(ctx, h.TestDataPath("payloads/clusters/cluster-request.json")) + Expect(err).NotTo(HaveOccurred()) + + ginkgo.DeferCleanup(func(ctx context.Context) { + if err := h.CleanupTestCluster(ctx, clusterID); err != nil { + ginkgo.GinkgoWriter.Printf("Warning: failed to cleanup cluster %s: %v\n", clusterID, err) + } + }) + }) + + ginkgo.It("should create a nodepool and reach Reconciled within acceptable latency", func(ctx context.Context) { + ginkgo.By("creating a nodepool and timing until Reconciled") + start := time.Now() + + nodepool, err := h.Client.CreateNodePoolFromPayload(ctx, clusterID, h.TestDataPath("payloads/nodepools/nodepool-request.json")) + Expect(err).NotTo(HaveOccurred()) + nodepoolID := *nodepool.Id + + ginkgo.DeferCleanup(func(ctx context.Context) { + if err := h.CleanupTestNodePool(ctx, clusterID, nodepoolID); err != nil { + ginkgo.GinkgoWriter.Printf("Warning: failed to cleanup nodepool %s: %v\n", nodepoolID, err) + } + }) + + Eventually(h.PollNodePool(ctx, clusterID, nodepoolID), h.Cfg.Timeouts.NodePool.Reconciled, h.Cfg.Polling.Interval). + Should(helper.HaveResourceCondition(client.ConditionTypeReconciled, openapi.ResourceConditionStatusTrue)) + + elapsed := time.Since(start) + ginkgo.GinkgoWriter.Printf("[PERF] NodePool create-to-reconciled latency: %v\n", elapsed) + }) + }, +) diff --git a/e2e/nodepool/perf_delete_latency.go b/e2e/nodepool/perf_delete_latency.go new file mode 100644 index 0000000..42a5fcb --- /dev/null +++ b/e2e/nodepool/perf_delete_latency.go @@ -0,0 +1,66 @@ +package nodepool + +import ( + "context" + "net/http" + "time" + + "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" //nolint:staticcheck // dot import for test readability + + "github.com/openshift-hyperfleet/hyperfleet-e2e/pkg/api/openapi" + "github.com/openshift-hyperfleet/hyperfleet-e2e/pkg/client" + "github.com/openshift-hyperfleet/hyperfleet-e2e/pkg/helper" + "github.com/openshift-hyperfleet/hyperfleet-e2e/pkg/labels" +) + +var _ = ginkgo.Describe("[Suite: nodepool][perf] Delete-to-hard-delete latency", + ginkgo.Label(labels.Tier1, labels.Performance), + func() { + var h *helper.Helper + var clusterID string + var nodepoolID string + + ginkgo.BeforeEach(func(ctx context.Context) { + h = helper.New() + + var err error + clusterID, err = h.GetTestCluster(ctx, h.TestDataPath("payloads/clusters/cluster-request.json")) + Expect(err).NotTo(HaveOccurred()) + + ginkgo.DeferCleanup(func(ctx context.Context) { + if err := h.CleanupTestCluster(ctx, clusterID); err != nil { + ginkgo.GinkgoWriter.Printf("Warning: failed to cleanup cluster %s: %v\n", clusterID, err) + } + }) + + nodepool, err := h.Client.CreateNodePoolFromPayload(ctx, clusterID, h.TestDataPath("payloads/nodepools/nodepool-request.json")) + Expect(err).NotTo(HaveOccurred()) + nodepoolID = *nodepool.Id + + ginkgo.DeferCleanup(func(ctx context.Context) { + if err := h.CleanupTestNodePool(ctx, clusterID, nodepoolID); err != nil { + ginkgo.GinkgoWriter.Printf("Warning: failed to cleanup nodepool %s: %v\n", nodepoolID, err) + } + }) + + ginkgo.By("waiting for nodepool to reach Reconciled before delete") + Eventually(h.PollNodePool(ctx, clusterID, nodepoolID), h.Cfg.Timeouts.NodePool.Reconciled, h.Cfg.Polling.Interval). + Should(helper.HaveResourceCondition(client.ConditionTypeReconciled, openapi.ResourceConditionStatusTrue)) + }) + + ginkgo.It("should delete a nodepool and reach hard-delete within acceptable latency", func(ctx context.Context) { + ginkgo.By("deleting nodepool and timing until hard-delete (404)") + start := time.Now() + + _, err := h.Client.DeleteNodePool(ctx, clusterID, nodepoolID) + Expect(err).NotTo(HaveOccurred()) + + Eventually(h.PollNodePoolHTTPStatus(ctx, clusterID, nodepoolID), h.Cfg.Timeouts.Adapter.Processing, h.Cfg.Polling.Interval). + Should(Equal(http.StatusNotFound)) + + elapsed := time.Since(start) + ginkgo.GinkgoWriter.Printf("[PERF] NodePool delete-to-hard-delete latency: %v\n", elapsed) + }) + }, +) diff --git a/perf/README.md b/perf/README.md new file mode 100644 index 0000000..7532daf --- /dev/null +++ b/perf/README.md @@ -0,0 +1,85 @@ +# Performance Tests + +Lightweight performance tests that measure baseline latencies for core HyperFleet operations. Tests run inside the cluster for production-representative numbers. + +## Table of contents + +- [Prerequisites](#prerequisites) + - [Pub/Sub broker setup](#pubsub-broker-setup) + - [Verify the stack](#verify-the-stack) +- [Seeding data](#seeding-data) +- [Running tests](#running-tests) +- [Parsing results](#parsing-results) + +## Prerequisites + +The full HyperFleet stack (API, Sentinel, adapters, broker) must be deployed and healthy. Use the [hyperfleet-infra](https://github.com/openshift-hyperfleet/hyperfleet-infra) project to set up the environment. + +```bash +cp .env.example .env +``` + +### Pub/Sub broker setup + +Reconciliation tests require adapters to be connected to the correct Pub/Sub topics. In the `hyperfleet-infra` project: + +1. Set `use_pubsub = true` in `terraform/envs/gke/dev.tfvars` +2. Uncomment `pubsub_topic_configs` in `dev.tfvars` to define the topic/subscription topology +3. Run `make install-terraform` to create Pub/Sub resources and generate broker config files in `generated-values-from-terraform/` +4. Run `make install-adapters` to redeploy adapters with the correct broker config + +### Verify the stack + +Confirm the adapters are subscribed to the correct topic (not `placeholder`): + +```bash +kubectl exec -n hyperfleet deploy/adapter1-hyperfleet-adapter -- env | grep BROKER_TOPIC +``` + +Confirm Sentinel is publishing events: + +```bash +kubectl logs -n hyperfleet -l app.kubernetes.io/name=hyperfleet-sentinel --tail=20 +``` + +Look for `"Publishing event"` and `"Published event"` log lines. + +## Seeding data + +For realistic baselines, seed the database with clusters before running tests. The seeded clusters add realistic table size so query planner behavior and index performance reflect production conditions. + +```bash +# Port-forward the API +kubectl port-forward -n hyperfleet svc/hyperfleet-api 8000:8000 + +# In another terminal, seed 1000 clusters +export HYPERFLEET_API_URL=http://localhost:8000 +./perf/seed-clusters.sh 1000 + +# Check what's in the database +./perf/seed-clusters.sh status + +# Clean up seeded clusters only +./perf/seed-clusters.sh cleanup + +# Or delete ALL clusters (clean slate) +./perf/seed-clusters.sh reset +``` + +## Running tests + +Make sure you have [seeded the database](#seeding-data) first for realistic baselines. + +```bash +./perf/run-in-cluster.sh +``` + +This builds the image, pushes it, and runs the tests in-cluster. + +## Parsing results + +```bash +./perf/parse-report.sh +``` + +This extracts `[PERF]` and `[FAIL]` lines from the latest output file and generates a summary report. diff --git a/perf/parse-report.sh b/perf/parse-report.sh new file mode 100755 index 0000000..3b6c738 --- /dev/null +++ b/perf/parse-report.sh @@ -0,0 +1,76 @@ +#!/bin/bash +# Generate a performance baseline report from perf test output. +# +# Usage: +# ./perf/parse-report.sh # uses most recent result +# ./perf/parse-report.sh output.txt # parse a specific file + +set -euo pipefail + +REPO_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" +[[ -f "$REPO_DIR/.env" ]] && set -a && source "$REPO_DIR/.env" && set +a + +RESULTS_DIR="$REPO_DIR/perf/results" + +if [[ -n "${1:-}" && -f "$1" ]]; then + INPUT="$1" + SOURCE="$1" +else + INPUT=$(ls -t "$RESULTS_DIR"/perf-baseline-*.txt 2>/dev/null | grep -v "\-report\.txt$" | head -1 || true) + if [[ -z "$INPUT" || ! -f "$INPUT" ]]; then + echo "ERROR: No results found in $RESULTS_DIR" + echo "Run ./perf/run-in-cluster.sh first, or pass a file:" + echo " ./perf/parse-report.sh " + exit 1 + fi + SOURCE="$INPUT (latest)" +fi + +BASE_NAME="${INPUT%-report.txt}" +BASE_NAME="${BASE_NAME%.txt}" +REPORT_FILE="${BASE_NAME}-report.txt" + +{ + +echo "============================================" +echo " HyperFleet Performance Baseline Report" +echo "============================================" +echo "" + +echo "--- Run Metadata ---" +echo "Source: $SOURCE" +echo "Generated: $(date '+%Y-%m-%d %H:%M:%S')" +KUBE_CTX=$(kubectl config current-context 2>/dev/null || echo "not available") +echo "Cluster: $KUBE_CTX" +echo "" + +echo "--- Test Summary ---" +SUMMARY=$(grep -E "[0-9]+ Passed" "$INPUT" 2>/dev/null | tail -1 || true) +if [[ -n "$SUMMARY" ]]; then + echo "$SUMMARY" +else + echo "No summary line found" +fi +DURATION=$(grep -E "^Ran [0-9]+" "$INPUT" 2>/dev/null | tail -1 || true) +if [[ -n "$DURATION" ]]; then + echo "$DURATION" +fi +echo "" + +echo "--- Baseline Latencies ---" +grep "\[PERF\]" "$INPUT" | sed 's/^[[:space:]]*/ /' || echo " No latency data found" +echo "" + +FAILURES=$(grep -c "\[FAIL\]" "$INPUT" 2>/dev/null || true) +if [[ "${FAILURES:-0}" -gt 0 ]]; then + echo "--- Failures ---" + grep "\[FAIL\]" "$INPUT" | sed 's/^[[:space:]]*/ /' + echo "" +fi + +echo "============================================" + +} | tee "$REPORT_FILE" + +echo "" +echo "Report saved to: $REPORT_FILE" diff --git a/perf/run-in-cluster.sh b/perf/run-in-cluster.sh new file mode 100755 index 0000000..778c1a4 --- /dev/null +++ b/perf/run-in-cluster.sh @@ -0,0 +1,67 @@ +#!/bin/bash +# Build, push, and run perf tests inside the cluster. +# +# Usage: +# ./perf/run-in-cluster.sh # uses QUAY_USER from .env + +set -euo pipefail + +REPO_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" +[[ -f "$REPO_DIR/.env" ]] && set -a && source "$REPO_DIR/.env" && set +a +NAMESPACE="${HF_NAMESPACE:-hyperfleet}" + +if [[ -z "${QUAY_USER:-}" ]]; then + echo "ERROR: QUAY_USER is not set" + echo "Add QUAY_USER=myuser to .env or export it" + exit 1 +fi + +if ! command -v kubectl &>/dev/null; then + echo "ERROR: kubectl is not installed" + exit 1 +fi + +if ! kubectl cluster-info &>/dev/null; then + echo "ERROR: Cannot connect to Kubernetes cluster. Check your kubeconfig and context." + exit 1 +fi + +if ! kubectl get svc hyperfleet-api -n "$NAMESPACE" &>/dev/null; then + echo "ERROR: hyperfleet-api service not found in namespace '$NAMESPACE'" + echo "Make sure the HyperFleet stack (API, Sentinel, adapters, broker) is deployed." + echo "See: https://github.com/openshift-hyperfleet/hyperfleet-infra" + exit 1 +fi + +if ! command -v podman &>/dev/null && ! command -v docker &>/dev/null; then + echo "ERROR: podman or docker is required to build the image" + exit 1 +fi + +echo "=== Building and pushing image ===" +cd "$REPO_DIR" +make image-dev +DEV_TAG="dev-$(git rev-parse --short HEAD)" +IMAGE="quay.io/$QUAY_USER/hyperfleet-e2e:$DEV_TAG" +echo "Image: $IMAGE" +echo "" + +RESULTS_DIR="$REPO_DIR/perf/results" +mkdir -p "$RESULTS_DIR" +OUTPUT_FILE="$RESULTS_DIR/perf-baseline-$(date +%Y%m%d-%H%M%S).txt" + +kubectl delete pod perf-tests -n "$NAMESPACE" --ignore-not-found + +echo "=== Running perf tests in cluster ===" +echo "Output: $OUTPUT_FILE" +echo "" +kubectl run perf-tests --rm -i \ + --image="$IMAGE" \ + --restart=Never \ + --image-pull-policy=Always \ + -n "$NAMESPACE" \ + -- test --label-filter=perf --api-url=http://hyperfleet-api.$NAMESPACE.svc.cluster.local:8000 \ + 2>&1 | tee "$OUTPUT_FILE" + +echo "" +echo "Results saved to: $OUTPUT_FILE" diff --git a/perf/seed-clusters.sh b/perf/seed-clusters.sh new file mode 100755 index 0000000..221afb4 --- /dev/null +++ b/perf/seed-clusters.sh @@ -0,0 +1,191 @@ +#!/bin/bash +# Seed the database with clusters for realistic perf baselines. +# +# Usage: +# ./perf/seed-clusters.sh # seed 1000 clusters (default) +# ./perf/seed-clusters.sh 100 # seed 100 clusters +# ./perf/seed-clusters.sh status # show cluster counts in the database +# ./perf/seed-clusters.sh cleanup # delete seeded clusters only +# ./perf/seed-clusters.sh reset # delete ALL clusters (clean slate) + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +REPO_DIR="$(cd "$SCRIPT_DIR/.." && pwd)" +[[ -f "$REPO_DIR/.env" ]] && set -a && source "$REPO_DIR/.env" && set +a + +API_URL="${HYPERFLEET_API_URL:?ERROR: HYPERFLEET_API_URL is not set}" +API_BASE="$API_URL/api/hyperfleet/v1" +COUNT="${1:-1000}" +SEED_LABEL="perf-seed" +CURL_OPTS="--connect-timeout 10 --max-time 30" + +# --- Functions ---------------------------------------------------------------- + +# Create a single cluster with a unique name via POST /clusters. +create_cluster() { + local i=$1 + local name="perf-seed-$(printf '%04d' "$i")-$(head -c 4 /dev/urandom | od -An -tx1 | tr -d ' ')" + local payload + payload=$(jq --arg name "$name" --arg label "$SEED_LABEL" \ + '.name = $name | .labels[$label] = "true"' \ + "$REPO_DIR/testdata/payloads/clusters/cluster-request.json") + + local status + status=$(curl -s -o /dev/null -w "%{http_code}" $CURL_OPTS \ + -X POST "$API_BASE/clusters" \ + --http1.1 \ + -H "Content-Type: application/json" \ + -H "Accept: application/json" \ + -d "$payload") + + if [[ "$status" == "201" ]]; then + return 0 + else + echo "WARN: cluster $i returned HTTP $status" >&2 + return 1 + fi +} + +# Fetch and delete clusters in batches. Takes a curl query as arguments. +# Usage: delete_in_batches (all clusters) +# delete_in_batches --data-urlencode "search=name like 'perf-seed-%'" +delete_in_batches() { + local deleted=0 + + while true; do + local clusters + clusters=$(curl -G -s $CURL_OPTS "$API_BASE/clusters" \ + --data-urlencode "pageSize=1000" \ + "$@" \ + --http1.1 -H "Accept: application/json") + + local ids + ids=$(echo "$clusters" | jq -r '.items[]?.id // empty') + local batch + batch=$(echo "$ids" | grep -c . || true) + + if [[ "$batch" -eq 0 ]]; then + break + fi + + while IFS= read -r id; do + [[ -z "$id" ]] && continue + [[ "$id" =~ ^[a-zA-Z0-9_-]+$ ]] || continue + curl -s -o /dev/null $CURL_OPTS -X DELETE "$API_BASE/clusters/$id" --http1.1 + deleted=$((deleted + 1)) + if (( deleted % 50 == 0 )); then + echo " Deleted $deleted" + fi + done <<< "$ids" + done + + if [[ "$deleted" -eq 0 ]]; then + echo "No clusters found" + else + echo "Deleted $deleted clusters" + fi +} + +# Delete only perf-seed-* clusters (safe for shared environments). +cleanup_clusters() { + echo "=== Cleaning up seeded clusters ===" + delete_in_batches --data-urlencode "search=name like 'perf-seed-%'" +} + +# Show active and seeded cluster counts. +status_clusters() { + local active + active=$(curl -s $CURL_OPTS "$API_BASE/clusters?pageSize=1" --http1.1 -H "Accept: application/json" | jq '.total // 0') + + local seeded + seeded=$(curl -G -s $CURL_OPTS "$API_BASE/clusters" \ + --data-urlencode "search=name like 'perf-seed-%'" \ + --data-urlencode "pageSize=1" \ + --http1.1 -H "Accept: application/json" | jq '.total // 0') + + echo "=== Database status ===" + echo "Active clusters: $active" + echo " Seeded (perf-seed-*): $seeded" + echo " Other: $(( active - seeded ))" +} + +# Delete ALL clusters in batches (includes soft-deleted). +cleanup_all() { + echo "=== Cleaning up ALL clusters ===" + delete_in_batches +} + +# --- Subcommand dispatch ------------------------------------------------------ + +if [[ "$COUNT" == "status" ]]; then + status_clusters + exit 0 +fi + +if [[ "$COUNT" == "cleanup" ]]; then + cleanup_clusters + exit 0 +fi + +if [[ "$COUNT" == "reset" ]]; then + total=$(curl -s $CURL_OPTS "$API_BASE/clusters?pageSize=1" --http1.1 -H "Accept: application/json" | jq '.total // 0') + echo "WARNING: This will delete ALL $total clusters at $API_URL" + echo "kubectl context: $(kubectl config current-context 2>/dev/null || echo 'unknown')" + read -r -p "Are you sure? (y/N) " confirm + if [[ "$confirm" =~ ^[Yy]$ ]]; then + cleanup_all + else + echo "Aborted." + fi + exit 0 +fi + +if ! [[ "$COUNT" =~ ^[0-9]+$ ]]; then + echo "ERROR: Invalid argument '$COUNT'. Expected a number, 'status', or 'cleanup'." + echo "" + echo "Usage:" + echo " $0 # seed 1000 clusters (default)" + echo " $0 100 # seed 100 clusters" + echo " $0 status # show cluster counts" + echo " $0 cleanup # delete seeded clusters only" + echo " $0 reset # delete ALL clusters (clean slate)" + exit 1 +fi + +# --- Seed clusters (default) -------------------------------------------------- + +existing=$(curl -G -s $CURL_OPTS "$API_BASE/clusters" \ + --data-urlencode "search=name like 'perf-seed-%'" \ + --data-urlencode "pageSize=1" \ + --http1.1 -H "Accept: application/json" | jq '.total // 0') + +if [[ "$existing" -ge "$COUNT" ]]; then + echo "Already have $existing seeded clusters (target: $COUNT). Nothing to do." + exit 0 +fi + +to_create=$((COUNT - existing)) +echo "=== Seeding $to_create clusters (existing: $existing, target: $COUNT) ===" +echo "API: $API_URL" +echo "" + +created=0 +failed=0 +for i in $(seq 1 "$to_create"); do + if create_cluster "$i"; then + created=$((created + 1)) + else + failed=$((failed + 1)) + fi + if (( i % 50 == 0 )); then + echo " Progress: $i / $to_create (created: $created, failed: $failed)" + fi +done + +echo "" +echo "=== Seeding complete ===" +echo "Created: $created" +echo "Failed: $failed" +echo "" +echo "To clean up: ./perf/seed-clusters.sh cleanup" diff --git a/pkg/client/cluster.go b/pkg/client/cluster.go index 560cb69..59a7e08 100644 --- a/pkg/client/cluster.go +++ b/pkg/client/cluster.go @@ -39,7 +39,12 @@ func (c *HyperFleetClient) GetCluster(ctx context.Context, clusterID string) (*o // ListClusters retrieves all clusters. func (c *HyperFleetClient) ListClusters(ctx context.Context) (*openapi.ClusterList, error) { - resp, err := c.GetClusters(ctx, &openapi.GetClustersParams{}) + return c.ListClustersWithParams(ctx, &openapi.GetClustersParams{}) +} + +// ListClustersWithParams retrieves clusters with query parameters (search, pagination, ordering). +func (c *HyperFleetClient) ListClustersWithParams(ctx context.Context, params *openapi.GetClustersParams) (*openapi.ClusterList, error) { + resp, err := c.GetClusters(ctx, params) if err != nil { return nil, fmt.Errorf("failed to list clusters: %w", err) } diff --git a/testdata/payloads/clusters/cluster-request-large.json b/testdata/payloads/clusters/cluster-request-large.json new file mode 100644 index 0000000..f4ae2e9 --- /dev/null +++ b/testdata/payloads/clusters/cluster-request-large.json @@ -0,0 +1,53 @@ +{ + "kind": "Cluster", + "name": "hp-cluster-large-{{.Random}}", + "labels": { + "environment": "test", + "shard": "1", + "team": "platform", + "region": "us-central1", + "tier": "production", + "cost-center": "eng-001", + "owner": "team-infra", + "compliance": "soc2", + "datacenter": "dc-west-01", + "managed-by": "hyperfleet" + }, + "spec": { + "platform": { + "type": "gcp", + "gcp": { + "projectID": "my-gcp-project", + "region": "us-central1", + "zone": "us-central1-a", + "network": "default", + "subnet": "default-subnet", + "subnets": [ + { "id": "subnet-cp-01", "name": "control-plane", "cidr": "10.0.1.0/24", "role": "control-plane" }, + { "id": "subnet-wk-01", "name": "worker-nodes-a", "cidr": "10.0.2.0/24", "role": "worker" }, + { "id": "subnet-wk-02", "name": "worker-nodes-b", "cidr": "10.0.3.0/24", "role": "worker" }, + { "id": "subnet-wk-03", "name": "worker-nodes-c", "cidr": "10.0.4.0/24", "role": "worker" }, + { "id": "subnet-svc-01", "name": "service-mesh", "cidr": "10.0.5.0/24", "role": "service" }, + { "id": "subnet-svc-02", "name": "service-ingress", "cidr": "10.0.6.0/24", "role": "service" }, + { "id": "subnet-mon-01", "name": "monitoring", "cidr": "10.0.7.0/24", "role": "worker" }, + { "id": "subnet-db-01", "name": "database", "cidr": "10.0.8.0/24", "role": "worker" } + ] + } + }, + "release": { + "image": "registry.redhat.io/openshift4/ose-cluster-version-operator:v4.14.0", + "version": "4.14.0" + }, + "networking": { + "clusterNetwork": [ + { "cidr": "10.10.0.0/16", "hostPrefix": 24 }, + { "cidr": "10.11.0.0/16", "hostPrefix": 24 }, + { "cidr": "10.12.0.0/16", "hostPrefix": 24 } + ], + "serviceNetwork": ["10.96.0.0/16", "10.112.0.0/16"] + }, + "dns": { + "baseDomain": "example.com" + } + } +} diff --git a/testdata/payloads/clusters/cluster-request-small.json b/testdata/payloads/clusters/cluster-request-small.json new file mode 100644 index 0000000..2dfaa5f --- /dev/null +++ b/testdata/payloads/clusters/cluster-request-small.json @@ -0,0 +1,31 @@ +{ + "kind": "Cluster", + "name": "hp-cluster-small-{{.Random}}", + "labels": { + "environment": "test" + }, + "spec": { + "platform": { + "type": "gcp", + "gcp": { + "projectID": "my-gcp-project", + "region": "us-central1" + } + }, + "release": { + "version": "4.14.0" + }, + "networking": { + "clusterNetwork": [ + { + "cidr": "10.10.0.0/16", + "hostPrefix": 24 + } + ], + "serviceNetwork": ["10.96.0.0/12"] + }, + "dns": { + "baseDomain": "example.com" + } + } +} From 7bb0de2467014654573b58b58ab5f3964088bad4 Mon Sep 17 00:00:00 2001 From: pnguyen44 Date: Tue, 16 Jun 2026 12:48:13 -0400 Subject: [PATCH 2/2] HYPERFLEET-1185 - chore: address review feedback --- configs/config.yaml | 4 ++++ e2e/nodepool/perf_delete_latency.go | 2 +- perf/parse-report.sh | 11 ++++++++--- perf/run-in-cluster.sh | 8 +++++--- perf/seed-clusters.sh | 9 +++++++-- pkg/config/config.go | 8 ++++++++ pkg/config/defaults.go | 3 +++ 7 files changed, 36 insertions(+), 9 deletions(-) diff --git a/configs/config.yaml b/configs/config.yaml index f6b87fb..9ca6146 100644 --- a/configs/config.yaml +++ b/configs/config.yaml @@ -47,6 +47,10 @@ timeouts: # Can be overridden by: HYPERFLEET_TIMEOUTS_NODEPOOL_RECONCILED reconciled: 2m + # Maximum time to wait for nodepool hard-delete (404 response) + # Can be overridden by: HYPERFLEET_TIMEOUTS_NODEPOOL_DELETED + deleted: 2m + adapter: # Maximum time to wait for adapter processing # diff --git a/e2e/nodepool/perf_delete_latency.go b/e2e/nodepool/perf_delete_latency.go index 42a5fcb..f3e4c43 100644 --- a/e2e/nodepool/perf_delete_latency.go +++ b/e2e/nodepool/perf_delete_latency.go @@ -56,7 +56,7 @@ var _ = ginkgo.Describe("[Suite: nodepool][perf] Delete-to-hard-delete latency", _, err := h.Client.DeleteNodePool(ctx, clusterID, nodepoolID) Expect(err).NotTo(HaveOccurred()) - Eventually(h.PollNodePoolHTTPStatus(ctx, clusterID, nodepoolID), h.Cfg.Timeouts.Adapter.Processing, h.Cfg.Polling.Interval). + Eventually(h.PollNodePoolHTTPStatus(ctx, clusterID, nodepoolID), h.Cfg.Timeouts.NodePool.Deleted, h.Cfg.Polling.Interval). Should(Equal(http.StatusNotFound)) elapsed := time.Since(start) diff --git a/perf/parse-report.sh b/perf/parse-report.sh index 3b6c738..33a7084 100755 --- a/perf/parse-report.sh +++ b/perf/parse-report.sh @@ -12,9 +12,14 @@ REPO_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" RESULTS_DIR="$REPO_DIR/perf/results" -if [[ -n "${1:-}" && -f "$1" ]]; then - INPUT="$1" - SOURCE="$1" +if [[ -n "${1:-}" ]]; then + if [[ -f "$1" ]]; then + INPUT="$1" + SOURCE="$1" + else + echo "ERROR: Input file not found: $1" + exit 1 + fi else INPUT=$(ls -t "$RESULTS_DIR"/perf-baseline-*.txt 2>/dev/null | grep -v "\-report\.txt$" | head -1 || true) if [[ -z "$INPUT" || ! -f "$INPUT" ]]; then diff --git a/perf/run-in-cluster.sh b/perf/run-in-cluster.sh index 778c1a4..58525fc 100755 --- a/perf/run-in-cluster.sh +++ b/perf/run-in-cluster.sh @@ -40,9 +40,11 @@ fi echo "=== Building and pushing image ===" cd "$REPO_DIR" -make image-dev -DEV_TAG="dev-$(git rev-parse --short HEAD)" -IMAGE="quay.io/$QUAY_USER/hyperfleet-e2e:$DEV_TAG" +IMAGE=$(make image-dev 2>&1 | tee /dev/stderr | grep "Dev image pushed:" | awk '{print $NF}') +if [[ -z "$IMAGE" ]]; then + echo "ERROR: Failed to extract image reference from make image-dev output" + exit 1 +fi echo "Image: $IMAGE" echo "" diff --git a/perf/seed-clusters.sh b/perf/seed-clusters.sh index 221afb4..a9dbfac 100755 --- a/perf/seed-clusters.sh +++ b/perf/seed-clusters.sh @@ -72,8 +72,13 @@ delete_in_batches() { while IFS= read -r id; do [[ -z "$id" ]] && continue [[ "$id" =~ ^[a-zA-Z0-9_-]+$ ]] || continue - curl -s -o /dev/null $CURL_OPTS -X DELETE "$API_BASE/clusters/$id" --http1.1 - deleted=$((deleted + 1)) + local http_code + http_code=$(curl -s -o /dev/null -w '%{http_code}' $CURL_OPTS -X DELETE "$API_BASE/clusters/$id" --http1.1) + if [[ "$http_code" =~ ^2 ]]; then + deleted=$((deleted + 1)) + else + echo " WARN: DELETE $id returned HTTP $http_code" + fi if (( deleted % 50 == 0 )); then echo " Deleted $deleted" fi diff --git a/pkg/config/config.go b/pkg/config/config.go index 41872f7..8c69e4e 100644 --- a/pkg/config/config.go +++ b/pkg/config/config.go @@ -160,6 +160,7 @@ type ClusterTimeouts struct { // NodePoolTimeouts contains nodepool-related timeouts type NodePoolTimeouts struct { Reconciled time.Duration `yaml:"reconciled" mapstructure:"reconciled"` + Deleted time.Duration `yaml:"deleted" mapstructure:"deleted"` } // AdapterTimeouts contains adapter-related timeouts @@ -302,6 +303,9 @@ func (c *Config) applyDefaults() { if c.Timeouts.NodePool.Reconciled == 0 { c.Timeouts.NodePool.Reconciled = DefaultNodePoolReconciledTimeout } + if c.Timeouts.NodePool.Deleted == 0 { + c.Timeouts.NodePool.Deleted = DefaultNodePoolDeletedTimeout + } if c.Timeouts.Adapter.Processing == 0 { c.Timeouts.Adapter.Processing = DefaultAdapterProcessingTimeout } @@ -436,6 +440,9 @@ func (c *Config) Validate() error { if c.Timeouts.NodePool.Reconciled <= 0 { return fmt.Errorf("configuration validation failed: timeouts.nodepool.reconciled must be a positive duration, got %v", c.Timeouts.NodePool.Reconciled) } + if c.Timeouts.NodePool.Deleted <= 0 { + return fmt.Errorf("configuration validation failed: timeouts.nodepool.deleted must be a positive duration, got %v", c.Timeouts.NodePool.Deleted) + } if c.Timeouts.Adapter.Processing <= 0 { return fmt.Errorf("configuration validation failed: timeouts.adapter.processing must be a positive duration, got %v", c.Timeouts.Adapter.Processing) } @@ -457,6 +464,7 @@ func (c *Config) Display() { "timeout_cluster_reconciled", c.Timeouts.Cluster.Reconciled, "timeout_cluster_deleted", c.Timeouts.Cluster.Deleted, "timeout_nodepool_reconciled", c.Timeouts.NodePool.Reconciled, + "timeout_nodepool_deleted", c.Timeouts.NodePool.Deleted, "timeout_adapter_processing", c.Timeouts.Adapter.Processing, "polling_interval", c.Polling.Interval, "log_level", c.Log.Level, diff --git a/pkg/config/defaults.go b/pkg/config/defaults.go index bcb82aa..c7d09d7 100644 --- a/pkg/config/defaults.go +++ b/pkg/config/defaults.go @@ -40,6 +40,9 @@ const ( // DefaultNodePoolReconciledTimeout is the default timeout for waiting for a nodepool to become reconciled DefaultNodePoolReconciledTimeout = 5 * time.Minute + // DefaultNodePoolDeletedTimeout is the default timeout for waiting for a nodepool to be hard-deleted (404) + DefaultNodePoolDeletedTimeout = 2 * time.Minute + // DefaultAdapterProcessingTimeout is the default timeout for waiting for adapter conditions DefaultAdapterProcessingTimeout = 5 * time.Minute