diff --git a/checks.md b/checks.md index dbf4c37..186ae53 100644 --- a/checks.md +++ b/checks.md @@ -209,7 +209,10 @@ spec: - Name: `admission-controller-webhook` - Groups: `doks` -When you configure admission controllers with webhooks that have a `failurePolicy` set to `Fail`, it prevents managed components like Cilium, kube-proxy, and CoreDNS from starting on a new node during an upgrade. This will result in the cluster upgrade failing. +Admission control webhooks can disrupt upgrade and node replacement operations by preventing system components from starting. Specifically, this happens when an admission control webhook: +* has failurePolicy set to Fail, +* targets a service other than the Kubernetes apiserver, and +* applies to both kube-system and the namespace of the targeted service. ### Example @@ -233,7 +236,7 @@ webhooks: scope: "Namespaced" clientConfig: service: - namespace: kube-system + namespace: webhook name: webhook-server path: /pods admissionReviewVersions: @@ -244,8 +247,23 @@ webhooks: ### How to Fix +There are a few options: +1. Use the `Ignore` `failurePolicy`. +2. Use an apiserver extension as your webhook service. +3. Explicitly exclude the kube-system namespace. +4. Explicitly exclude the webhook service's namespace. + ```yaml -# Recommended: Exclude objects in the `kube-system` namespace by explicitly specifying a namespaceSelector or objectSelector +# Recommended: Exclude objects in the `webhook` namespace by explicitly specifying a namespaceSelector. + +apiVersion: v1 +kind: Namespace +metadata: + name: webhook + labels: + skip-webhooks: "yes" + +--- apiVersion: admissionregistration.k8s.io/v1beta1 kind: ValidatingWebhookConfiguration metadata: @@ -264,7 +282,7 @@ webhooks: scope: "Namespaced" clientConfig: service: - namespace: kube-system + namespace: webhook name: webhook-server path: /pods admissionReviewVersions: @@ -273,8 +291,8 @@ webhooks: failurePolicy: Fail namespaceSelector: matchExpressions: - - key: "doks.digitalocean.com/webhook" - operator: "Exists" + - key: "skip-webhooks" + operator: "DoesNotExist" ``` ## Pod State diff --git a/checks/doks/admission_controller_webhook.go b/checks/doks/admission_controller_webhook.go index 32a1f1e..c71a854 100644 --- a/checks/doks/admission_controller_webhook.go +++ b/checks/doks/admission_controller_webhook.go @@ -17,10 +17,13 @@ limitations under the License. package doks import ( + "errors" + "github.com/digitalocean/clusterlint/checks" "github.com/digitalocean/clusterlint/kube" ar "k8s.io/api/admissionregistration/v1beta1" corev1 "k8s.io/api/core/v1" + v1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) @@ -43,46 +46,112 @@ func (w *webhookCheck) Groups() []string { // Description returns a detailed human-readable description of what this check // does. func (w *webhookCheck) Description() string { - return "Check for admission controllers that could prevent managed components from starting" + return "Check for admission control webhooks that could cause problems during upgrades" } -// Run runs this check on a set of Kubernetes objects. It can return warnings -// (low-priority problems) and errors (high-priority problems) as well as an -// error value indicating that the check failed to run. +// Run runs this check on a set of Kubernetes objects. func (w *webhookCheck) Run(objects *kube.Objects) ([]checks.Diagnostic, error) { + const apiserverServiceName = "kubernetes" + var diagnostics []checks.Diagnostic for _, config := range objects.ValidatingWebhookConfigurations.Items { - for _, validatingWebhook := range config.Webhooks { - if *validatingWebhook.FailurePolicy == ar.Fail && - validatingWebhook.ClientConfig.Service != nil && - selectorMatchesNamespace(validatingWebhook.NamespaceSelector, objects.SystemNamespace) { - d := checks.Diagnostic{ - Severity: checks.Error, - Message: "Webhook matches objects in the kube-system namespace. This can cause problems when upgrading the cluster.", - Kind: checks.ValidatingWebhookConfiguration, - Object: &config.ObjectMeta, - Owners: config.ObjectMeta.GetOwnerReferences(), - } - diagnostics = append(diagnostics, d) + for _, wh := range config.Webhooks { + if *wh.FailurePolicy == ar.Ignore { + // Webhooks with failurePolicy: Ignore are fine. + continue } + if wh.ClientConfig.Service == nil { + // Webhooks whose targets are external to the cluster are fine. + continue + } + if wh.ClientConfig.Service.Namespace == metav1.NamespaceDefault && + wh.ClientConfig.Service.Name == apiserverServiceName { + // Webhooks that target the kube-apiserver are fine. + continue + } + if !selectorMatchesNamespace(wh.NamespaceSelector, objects.SystemNamespace) { + // Webhooks that don't apply to kube-system are fine. + continue + } + var svcNamespace *v1.Namespace + for _, ns := range objects.Namespaces.Items { + if ns.Name == wh.ClientConfig.Service.Namespace { + svcNamespace = &ns + } + } + if svcNamespace == nil { + return nil, errors.New("webhook refers to service in non-existent namespace") + } + if !selectorMatchesNamespace(wh.NamespaceSelector, svcNamespace) && len(objects.Nodes.Items) > 1 { + // Webhooks that don't apply to their own namespace are fine, as + // long as there's more than one node in the cluster. + continue + } + + d := checks.Diagnostic{ + Severity: checks.Error, + Message: "Validating webhook is configured in such a way that it may be problematic during upgrades.", + Kind: checks.ValidatingWebhookConfiguration, + Object: &config.ObjectMeta, + Owners: config.ObjectMeta.GetOwnerReferences(), + } + diagnostics = append(diagnostics, d) + + // We don't want to produce diagnostics for multiple webhooks in the + // same webhook configuration, so break out of the inner loop if we + // get here. + break } } for _, config := range objects.MutatingWebhookConfigurations.Items { - for _, mutatingWebhook := range config.Webhooks { - if *mutatingWebhook.FailurePolicy == ar.Fail && - mutatingWebhook.ClientConfig.Service != nil && - selectorMatchesNamespace(mutatingWebhook.NamespaceSelector, objects.SystemNamespace) { - d := checks.Diagnostic{ - Severity: checks.Error, - Message: "Webhook matches objects in the kube-system namespace. This can cause problems when upgrading the cluster.", - Kind: checks.MutatingWebhookConfiguration, - Object: &config.ObjectMeta, - Owners: config.ObjectMeta.GetOwnerReferences(), - } - diagnostics = append(diagnostics, d) + for _, wh := range config.Webhooks { + if *wh.FailurePolicy == ar.Ignore { + // Webhooks with failurePolicy: Ignore are fine. + continue } + if wh.ClientConfig.Service == nil { + // Webhooks whose targets are external to the cluster are fine. + continue + } + if wh.ClientConfig.Service.Namespace == metav1.NamespaceDefault && + wh.ClientConfig.Service.Name == apiserverServiceName { + // Webhooks that target the kube-apiserver are fine. + continue + } + if !selectorMatchesNamespace(wh.NamespaceSelector, objects.SystemNamespace) { + // Webhooks that don't apply to kube-system are fine. + continue + } + var svcNamespace *v1.Namespace + for _, ns := range objects.Namespaces.Items { + if ns.Name == wh.ClientConfig.Service.Namespace { + svcNamespace = &ns + } + } + if svcNamespace == nil { + return nil, errors.New("webhook refers to service in non-existent namespace") + } + if !selectorMatchesNamespace(wh.NamespaceSelector, svcNamespace) && len(objects.Nodes.Items) > 1 { + // Webhooks that don't apply to their own namespace are fine, as + // long as there's more than one node in the cluster. + continue + } + + d := checks.Diagnostic{ + Severity: checks.Error, + Message: "Mutating webhook is configured in such a way that it may be problematic during upgrades.", + Kind: checks.MutatingWebhookConfiguration, + Object: &config.ObjectMeta, + Owners: config.ObjectMeta.GetOwnerReferences(), + } + diagnostics = append(diagnostics, d) + + // We don't want to produce diagnostics for multiple webhooks in the + // same webhook configuration, so break out of the inner loop if we + // get here. + break } } return diagnostics, nil diff --git a/checks/doks/admission_controller_webhook_test.go b/checks/doks/admission_controller_webhook_test.go index 32cde42..c355edf 100644 --- a/checks/doks/admission_controller_webhook_test.go +++ b/checks/doks/admission_controller_webhook_test.go @@ -27,6 +27,8 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) +var webhookURL = "https://example.com/webhook" + func TestWebhookCheckMeta(t *testing.T) { webhookCheck := webhookCheck{} assert.Equal(t, "admission-controller-webhook", webhookCheck.Name()) @@ -57,70 +59,181 @@ func TestWebhookError(t *testing.T) { expected: nil, }, { - name: "failure policy is ignore", - objs: initObjects(ar.Ignore), + name: "failure policy is ignore", + objs: webhookTestObjects( + ar.Ignore, + &metav1.LabelSelector{}, + ar.WebhookClientConfig{ + Service: &ar.ServiceReference{ + Namespace: "webhook", + Name: "webhook-service", + }, + }, + 2, + ), expected: nil, }, { - name: "webook does not use service", - objs: webhookURL(), + name: "webook does not use service", + objs: webhookTestObjects( + ar.Fail, + &metav1.LabelSelector{}, + ar.WebhookClientConfig{ + URL: &webhookURL, + }, + 2, + ), expected: nil, }, { - name: "namespace selector is empty", - objs: initObjects(ar.Fail), + name: "webook service is apiserver", + objs: webhookTestObjects( + ar.Fail, + &metav1.LabelSelector{}, + ar.WebhookClientConfig{ + Service: &ar.ServiceReference{ + Namespace: "default", + Name: "kubernetes", + }, + }, + 2, + ), + expected: nil, + }, + { + name: "namespace label selector does not match kube-system", + objs: webhookTestObjects( + ar.Fail, + &metav1.LabelSelector{ + MatchLabels: map[string]string{"non-existent-label-on-namespace": "bar"}, + }, + ar.WebhookClientConfig{ + Service: &ar.ServiceReference{ + Namespace: "webhook", + Name: "webhook-service", + }, + }, + 2, + ), + expected: nil, + }, + { + name: "namespace OpExists expression selector does not match kube-system", + objs: webhookTestObjects( + ar.Fail, + &metav1.LabelSelector{ + MatchExpressions: expr("non-existent", []string{}, metav1.LabelSelectorOpExists), + }, + ar.WebhookClientConfig{ + Service: &ar.ServiceReference{ + Namespace: "webhook", + Name: "webhook-service", + }, + }, + 2, + ), + expected: nil, + }, + { + name: "namespace OpDoesNotExist expression selector does not match kube-system", + objs: webhookTestObjects( + ar.Fail, + &metav1.LabelSelector{ + MatchExpressions: expr("doks_key", []string{}, metav1.LabelSelectorOpDoesNotExist), + }, + ar.WebhookClientConfig{ + Service: &ar.ServiceReference{ + Namespace: "webhook", + Name: "webhook-service", + }, + }, + 2, + ), + expected: nil, + }, + { + name: "namespace OpIn expression selector does not match kube-system", + objs: webhookTestObjects( + ar.Fail, + &metav1.LabelSelector{ + MatchExpressions: expr("doks_key", []string{"non-existent"}, metav1.LabelSelectorOpIn), + }, + ar.WebhookClientConfig{ + Service: &ar.ServiceReference{ + Namespace: "webhook", + Name: "webhook-service", + }, + }, + 2, + ), + expected: nil, + }, + { + name: "namespace OpNotIn expression selector does not match kube-system", + objs: webhookTestObjects( + ar.Fail, + &metav1.LabelSelector{ + MatchExpressions: expr("doks_key", []string{"bar"}, metav1.LabelSelectorOpNotIn), + }, + ar.WebhookClientConfig{ + Service: &ar.ServiceReference{ + Namespace: "webhook", + Name: "webhook-service", + }, + }, + 2, + ), + expected: nil, + }, + { + name: "namespace label selector does not match own namespace", + objs: webhookTestObjects( + ar.Fail, + &metav1.LabelSelector{ + MatchLabels: map[string]string{"doks_key": "bar"}, + }, + ar.WebhookClientConfig{ + Service: &ar.ServiceReference{ + Namespace: "webhook", + Name: "webhook-service", + }, + }, + 2, + ), + expected: nil, + }, + { + name: "single-node cluster", + objs: webhookTestObjects( + ar.Fail, + &metav1.LabelSelector{ + MatchLabels: map[string]string{"doks_key": "bar"}, + }, + ar.WebhookClientConfig{ + Service: &ar.ServiceReference{ + Namespace: "webhook", + Name: "webhook-service", + }, + }, + 1, + ), expected: webhookErrors(), }, { - name: "namespace selector matches label", - objs: label(map[string]string{"doks_key": "bar"}), + name: "webhook applies to its own namespace and kube-system", + objs: webhookTestObjects( + ar.Fail, + &metav1.LabelSelector{}, + ar.WebhookClientConfig{ + Service: &ar.ServiceReference{ + Namespace: "webhook", + Name: "webhook-service", + }, + }, + 2, + ), expected: webhookErrors(), }, - { - name: "namespace selector does not match label", - objs: label(map[string]string{"non-existent-label-on-namespace": "bar"}), - expected: nil, - }, - { - name: "namespace selector matches OpExists expression", - objs: expr("doks_key", []string{}, metav1.LabelSelectorOpExists), - expected: webhookErrors(), - }, - { - name: "namespace selector matches OpDoesNotExist expression", - objs: expr("random", []string{}, metav1.LabelSelectorOpDoesNotExist), - expected: webhookErrors(), - }, - { - name: "namespace selector matches OpIn expression", - objs: expr("doks_key", []string{"bar"}, metav1.LabelSelectorOpIn), - expected: webhookErrors(), - }, - { - name: "namespace selector matches OpNotIn expression", - objs: expr("doks_key", []string{"non-existent"}, metav1.LabelSelectorOpNotIn), - expected: webhookErrors(), - }, - { - name: "namespace selector does not match OpExists expression", - objs: expr("non-existent", []string{}, metav1.LabelSelectorOpExists), - expected: nil, - }, - { - name: "namespace selector does not match OpDoesNotExist expression", - objs: expr("doks_key", []string{}, metav1.LabelSelectorOpDoesNotExist), - expected: nil, - }, - { - name: "namespace selector does not match OpIn expression", - objs: expr("doks_key", []string{"non-existent"}, metav1.LabelSelectorOpIn), - expected: nil, - }, - { - name: "namespace selector does not match OpNotIn expression", - objs: expr("doks_key", []string{"bar"}, metav1.LabelSelectorOpNotIn), - expected: nil, - }, } webhookCheck := webhookCheck{} @@ -134,13 +247,45 @@ func TestWebhookError(t *testing.T) { } } -func initObjects(failurePolicyType ar.FailurePolicyType) *kube.Objects { +func expr(key string, values []string, op metav1.LabelSelectorOperator) []metav1.LabelSelectorRequirement { + return []metav1.LabelSelectorRequirement{{ + Key: key, + Operator: op, + Values: values, + }} +} + +func webhookTestObjects( + failurePolicyType ar.FailurePolicyType, + nsSelector *metav1.LabelSelector, + clientConfig ar.WebhookClientConfig, + numNodes int, +) *kube.Objects { objs := &kube.Objects{ SystemNamespace: &corev1.Namespace{ TypeMeta: metav1.TypeMeta{Kind: "Namespace", APIVersion: "v1"}, ObjectMeta: metav1.ObjectMeta{ Name: "kube-system", - Labels: map[string]string{"doks_key": "bar"}}, + Labels: map[string]string{"doks_key": "bar"}, + }, + }, + Namespaces: &corev1.NamespaceList{ + Items: []corev1.Namespace{ + { + TypeMeta: metav1.TypeMeta{Kind: "Namespace", APIVersion: "v1"}, + ObjectMeta: metav1.ObjectMeta{ + Name: "kube-system", + Labels: map[string]string{"doks_key": "bar"}, + }, + }, + { + TypeMeta: metav1.TypeMeta{Kind: "Namespace", APIVersion: "v1"}, + ObjectMeta: metav1.ObjectMeta{ + Name: "webhook", + Labels: map[string]string{"doks_key": "xyzzy"}, + }, + }, + }, }, MutatingWebhookConfigurations: &ar.MutatingWebhookConfigurationList{ Items: []ar.MutatingWebhookConfiguration{ @@ -153,13 +298,8 @@ func initObjects(failurePolicyType ar.FailurePolicyType) *kube.Objects { { Name: "mw_foo", FailurePolicy: &failurePolicyType, - NamespaceSelector: &metav1.LabelSelector{}, - ClientConfig: ar.WebhookClientConfig{ - Service: &ar.ServiceReference{ - Name: "some-svc", - Namespace: "k8s", - }, - }, + NamespaceSelector: nsSelector, + ClientConfig: clientConfig, }, }, }, @@ -176,83 +316,38 @@ func initObjects(failurePolicyType ar.FailurePolicyType) *kube.Objects { { Name: "vw_foo", FailurePolicy: &failurePolicyType, - NamespaceSelector: &metav1.LabelSelector{}, - ClientConfig: ar.WebhookClientConfig{ - Service: &ar.ServiceReference{ - Name: "some-svc", - Namespace: "k8s", - }, - }, + NamespaceSelector: nsSelector, + ClientConfig: clientConfig, }, }, }, }, }, } - return objs -} -func webhookURL() *kube.Objects { - var url = "https://example.com/webhook/action" - objs := initObjects(ar.Fail) - objs.ValidatingWebhookConfigurations.Items[0].Webhooks[0].ClientConfig = ar.WebhookClientConfig{ - URL: &url, - } - objs.MutatingWebhookConfigurations.Items[0].Webhooks[0].ClientConfig = ar.WebhookClientConfig{ - URL: &url, - } - return objs -} - -func label(label map[string]string) *kube.Objects { - objs := initObjects(ar.Fail) - objs.ValidatingWebhookConfigurations.Items[0].Webhooks[0].NamespaceSelector = &metav1.LabelSelector{ - MatchLabels: label, - } - objs.MutatingWebhookConfigurations.Items[0].Webhooks[0].NamespaceSelector = &metav1.LabelSelector{ - MatchLabels: label, - } - return objs -} - -func expr(key string, values []string, labelOperator metav1.LabelSelectorOperator) *kube.Objects { - objs := initObjects(ar.Fail) - objs.ValidatingWebhookConfigurations.Items[0].Webhooks[0].NamespaceSelector = &metav1.LabelSelector{ - MatchExpressions: []metav1.LabelSelectorRequirement{ - { - Key: key, - Operator: labelOperator, - Values: values, - }, - }, - } - objs.MutatingWebhookConfigurations.Items[0].Webhooks[0].NamespaceSelector = &metav1.LabelSelector{ - MatchExpressions: []metav1.LabelSelectorRequirement{ - { - Key: key, - Operator: labelOperator, - Values: values, - }, - }, + objs.Nodes = &corev1.NodeList{} + for i := 0; i < numNodes; i++ { + objs.Nodes.Items = append(objs.Nodes.Items, corev1.Node{}) } return objs } func webhookErrors() []checks.Diagnostic { - objs := initObjects(ar.Fail) + objs := webhookTestObjects(ar.Fail, nil, ar.WebhookClientConfig{}, 0) validatingConfig := objs.ValidatingWebhookConfigurations.Items[0] mutatingConfig := objs.MutatingWebhookConfigurations.Items[0] + diagnostics := []checks.Diagnostic{ { Severity: checks.Error, - Message: "Webhook matches objects in the kube-system namespace. This can cause problems when upgrading the cluster.", + Message: "Validating webhook is configured in such a way that it may be problematic during upgrades.", Kind: checks.ValidatingWebhookConfiguration, Object: &validatingConfig.ObjectMeta, Owners: validatingConfig.ObjectMeta.GetOwnerReferences(), }, { Severity: checks.Error, - Message: "Webhook matches objects in the kube-system namespace. This can cause problems when upgrading the cluster.", + Message: "Mutating webhook is configured in such a way that it may be problematic during upgrades.", Kind: checks.MutatingWebhookConfiguration, Object: &mutatingConfig.ObjectMeta, Owners: mutatingConfig.ObjectMeta.GetOwnerReferences(), diff --git a/checks/doks/node_name_pod_selector_test.go b/checks/doks/node_name_pod_selector_test.go index 7de2389..2bdfd20 100644 --- a/checks/doks/node_name_pod_selector_test.go +++ b/checks/doks/node_name_pod_selector_test.go @@ -56,7 +56,7 @@ func TestNodeNameError(t *testing.T) { { name: "node name used in node selector", objs: invalidPod(), - expected: warnings(invalidPod(), podSelectorCheck.Name()), + expected: expectedWarnings(invalidPod(), podSelectorCheck.Name()), }, } @@ -89,7 +89,7 @@ func invalidPod() *kube.Objects { return objs } -func warnings(objs *kube.Objects, name string) []checks.Diagnostic { +func expectedWarnings(objs *kube.Objects, name string) []checks.Diagnostic { pod := objs.Pods.Items[0] diagnostics := []checks.Diagnostic{ { diff --git a/kube/objects.go b/kube/objects.go index cf3553a..3f49193 100644 --- a/kube/objects.go +++ b/kube/objects.go @@ -54,6 +54,7 @@ type Objects struct { LimitRanges *corev1.LimitRangeList MutatingWebhookConfigurations *ar.MutatingWebhookConfigurationList ValidatingWebhookConfigurations *ar.ValidatingWebhookConfigurationList + Namespaces *corev1.NamespaceList } // Client encapsulates a client for a Kubernetes cluster. @@ -131,6 +132,10 @@ func (c *Client) FetchObjects(ctx context.Context) (*Objects, error) { objects.ValidatingWebhookConfigurations, err = admissionControllerClient.ValidatingWebhookConfigurations().List(opts) return }) + g.Go(func() (err error) { + objects.Namespaces, err = client.Namespaces().List(opts) + return + }) err := g.Wait() if err != nil {