diff --git a/checks.md b/checks.md index 283172a..9bd554b 100644 --- a/checks.md +++ b/checks.md @@ -71,6 +71,37 @@ spec: image: redis@sha256:dca057ffa2337682333a3aba69cc0e7809819b3cd7fc78f3741d9de8c2a4f08b ``` +## CronJob Concurrency + +- Name: `cronjob-concurrency` +- Groups: `basic` + +We do not recommend having a `concurrencyPolicy` of `Allow` for CronJob resources. If a CronJob-managed Pod does not execute to completion within the expected window, it is possible that multiple Pods pile up over time, leading to several Pods stuck in a pending state and possible resource contention. Instead, prefer `Forbid`, which skips execution of a new job if the previous job has not exited, or `Replace`, which replaces the still-running job with a new job if it has not yet exited. + +### Example + +```yaml +# Not recommended: Having a concurrency policy of Allow +apiVersion: batch/v1beta1 +kind: CronJob +metadata: + name: mycron +spec: + concurrencyPolicy: Allow +``` + +### How to Fix + +```yaml +# Recommended: Having a concurrency policy of Forbid or Replace +apiVersion: batch/v1beta1 +kind: CronJob +metadata: + name: mycron +spec: + concurrencyPolicy: Replace +``` + ## Privileged Containers - Name: `privileged-containers` diff --git a/checks/basic/cronjob_concurrency.go b/checks/basic/cronjob_concurrency.go new file mode 100644 index 0000000..b412f16 --- /dev/null +++ b/checks/basic/cronjob_concurrency.go @@ -0,0 +1,70 @@ +/* +Copyright 2019 DigitalOcean + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package basic + +import ( + "fmt" + + batchv1beta1 "k8s.io/api/batch/v1beta1" + + "github.com/digitalocean/clusterlint/checks" + "github.com/digitalocean/clusterlint/kube" +) + +func init() { + checks.Register(&cronJobConcurrencyCheck{}) +} + +type cronJobConcurrencyCheck struct{} + +// Name returns a unique name for this check. +func (c *cronJobConcurrencyCheck) Name() string { + return "cronjob-concurrency" +} + +// Groups returns a list of group names this check should be part of. +func (c *cronJobConcurrencyCheck) Groups() []string { + return []string{"basic"} +} + +// Description returns a detailed human-readable description of what this check +// does. +func (c *cronJobConcurrencyCheck) Description() string { + return "Check if any cronjobs have a concurrency policy of 'Allow'" +} + +// Run runs this check on a set of Kubernetes objects. It can return warnings +// (low-priority problems) and errors (high-priority problems) as well as an +// error value indicating that the check failed to run. +func (c *cronJobConcurrencyCheck) Run(objects *kube.Objects) ([]checks.Diagnostic, error) { + var diagnostics []checks.Diagnostic + + for _, cronjob := range objects.CronJobs.Items { + if batchv1beta1.AllowConcurrent == cronjob.Spec.ConcurrencyPolicy { + d := checks.Diagnostic{ + Severity: checks.Warning, + Message: fmt.Sprintf("CronJob has a concurrency policy of `%s`. Prefer to use `%s` or `%s`", cronjob.Spec.ConcurrencyPolicy, batchv1beta1.ForbidConcurrent, batchv1beta1.ReplaceConcurrent), + Kind: checks.CronJob, + Object: &cronjob.ObjectMeta, + Owners: cronjob.ObjectMeta.GetOwnerReferences(), + } + diagnostics = append(diagnostics, d) + } + } + + return diagnostics, nil +} diff --git a/checks/basic/cronjob_concurrency_test.go b/checks/basic/cronjob_concurrency_test.go new file mode 100644 index 0000000..edaba57 --- /dev/null +++ b/checks/basic/cronjob_concurrency_test.go @@ -0,0 +1,105 @@ +/* +Copyright 2019 DigitalOcean + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package basic + +import ( + "testing" + + "github.com/stretchr/testify/assert" + batchv1beta1 "k8s.io/api/batch/v1beta1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + "github.com/digitalocean/clusterlint/checks" + "github.com/digitalocean/clusterlint/kube" +) + +func TestCronJobConcurrencyMeta(t *testing.T) { + check := cronJobConcurrencyCheck{} + assert.Equal(t, "cronjob-concurrency", check.Name()) + assert.Equal(t, []string{"basic"}, check.Groups()) + assert.NotEmpty(t, check.Description()) +} + +func TestCronJobConcurrencyCheckRegistration(t *testing.T) { + check := &cronJobConcurrencyCheck{} + ch, err := checks.Get("cronjob-concurrency") + assert.NoError(t, err) + assert.Equal(t, ch, check) +} + +func TestCronJobConcurrency(t *testing.T) { + check := cronJobConcurrencyCheck{} + tests := []struct { + name string + objs *kube.Objects + expected []checks.Diagnostic + }{ + { + name: "cronjob with 'Forbid' policy", + objs: policy(batchv1beta1.ForbidConcurrent), + expected: nil, + }, + { + name: "cronjob with 'Replace' policy", + objs: policy(batchv1beta1.ReplaceConcurrent), + expected: nil, + }, + { + name: "cronjob with 'Allow' policy", + objs: policy(batchv1beta1.AllowConcurrent), + expected: []checks.Diagnostic{ + { + Severity: checks.Warning, + Message: "CronJob has a concurrency policy of `Allow`. Prefer to use `Forbid` or `Replace`", + Kind: checks.CronJob, + Object: &metav1.ObjectMeta{Name: "cronjob_foo"}, + Owners: GetOwners(), + }, + }, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + d, err := check.Run(test.objs) + assert.NoError(t, err) + assert.ElementsMatch(t, test.expected, d) + }) + } +} + +func policy(policy batchv1beta1.ConcurrencyPolicy) *kube.Objects { + objs := initCronJob() + objs.CronJobs.Items[0].Spec = batchv1beta1.CronJobSpec{ + ConcurrencyPolicy: policy, + } + return objs +} + +func initCronJob() *kube.Objects { + objs := &kube.Objects{ + CronJobs: &batchv1beta1.CronJobList{ + Items: []batchv1beta1.CronJob{ + { + TypeMeta: metav1.TypeMeta{Kind: "CronJob", APIVersion: "batch/v1beta1"}, + ObjectMeta: metav1.ObjectMeta{Name: "cronjob_foo"}, + }, + }, + }, + } + return objs +} diff --git a/checks/diagnostic.go b/checks/diagnostic.go index 64eada7..68ede3a 100644 --- a/checks/diagnostic.go +++ b/checks/diagnostic.go @@ -78,4 +78,6 @@ const ( MutatingWebhookConfiguration Kind = "mutating webhook configuration" // Node identifies a Kubernetes node object. Node Kind = "node" + // CronJob identifies Kubernetes objects of kind `cron job` + CronJob Kind = "cron job" ) diff --git a/kube/objects.go b/kube/objects.go index 8421f63..1a0da2c 100644 --- a/kube/objects.go +++ b/kube/objects.go @@ -21,6 +21,7 @@ import ( "golang.org/x/sync/errgroup" ar "k8s.io/api/admissionregistration/v1beta1" + batchv1beta1 "k8s.io/api/batch/v1beta1" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/client-go/kubernetes" @@ -55,6 +56,7 @@ type Objects struct { MutatingWebhookConfigurations *ar.MutatingWebhookConfigurationList ValidatingWebhookConfigurations *ar.ValidatingWebhookConfigurationList Namespaces *corev1.NamespaceList + CronJobs *batchv1beta1.CronJobList } // Client encapsulates a client for a Kubernetes cluster. @@ -67,6 +69,7 @@ type Client struct { func (c *Client) FetchObjects(ctx context.Context, filter ObjectFilter) (*Objects, error) { client := c.KubeClient.CoreV1() admissionControllerClient := c.KubeClient.AdmissionregistrationV1beta1() + batchClient := c.KubeClient.BatchV1beta1() opts := metav1.ListOptions{} objects := &Objects{} @@ -135,6 +138,10 @@ func (c *Client) FetchObjects(ctx context.Context, filter ObjectFilter) (*Object objects.Namespaces, err = client.Namespaces().List(gCtx, opts) return }) + g.Go(func() (err error) { + objects.CronJobs, err = batchClient.CronJobs(corev1.NamespaceAll).List(gCtx, filter.NamespaceOptions(opts)) + return + }) err := g.Wait() if err != nil {