Merge pull request #486 from AkihiroSuda/update-rootless

rootless: refactor libcontainer_specconv & add integration tests
2018-07-05 11:26:26 -07:00 · 2018-07-05 11:26:26 -07:00 · cda0a64eca
parent c111113a2f 580dbe08b9
commit cda0a64eca
25 changed files with 1645 additions and 2295 deletions
--- a/docs/rootless.md
+++ b/docs/rootless.md
@ -2,7 +2,7 @@

 Requirements:
 - runc `ecd55a4135e0a26de884ce436442914f945b1e76` (May 30, 2018) or later
- Some distros such as Debian and Arch Linux require `echo 1 > /proc/sys/kernel/unprivileged_userns_clone`
+- Some distros such as Debian (excluding Ubuntu) and Arch Linux require `echo 1 > /proc/sys/kernel/unprivileged_userns_clone`
 - `newuidmap` and `newgidmap` need to be installed on the host. These commands are provided by the `uidmap` package.
 - `/etc/subuid` and `/etc/subgid` should contain >= 65536 sub-IDs. e.g. `penguin:231072:65536`.
 - To run in a Docker container with non-root `USER`, `docker run --privileged` is still required. See also Jessie's blog: https://blog.jessfraz.com/post/building-container-images-securely-on-kubernetes/
@ -10,7 +10,7 @@ Requirements:

 ## Set up

-Setting up rootless mode also requires some bothersome steps as follows, but you can also use [`rootlesskit`](https://github.com/AkihiroSuda/rootlesskit) for automating these steps.
+Setting up rootless mode also requires some bothersome steps as follows, but you can also use [`rootlesskit`](https://github.com/rootless-containers/rootlesskit) for automating these steps.

 ### Terminal 1:

--- a/executor/runcexecutor/executor.go
+++ b/executor/runcexecutor/executor.go
@ -21,7 +21,7 @@ import (
 	"github.com/moby/buildkit/executor"
 	"github.com/moby/buildkit/executor/oci"
 	"github.com/moby/buildkit/identity"
-	"github.com/moby/buildkit/util/libcontainer_specconv"
+	rootlessspecconv "github.com/moby/buildkit/util/rootless/specconv"
 	"github.com/moby/buildkit/util/system"
 	"github.com/opencontainers/runtime-spec/specs-go"
 	"github.com/pkg/errors"
@ -84,6 +84,8 @@ func New(opt Opt) (executor.Executor, error) {
 		LogFormat:    runc.JSON,
 		PdeathSignal: syscall.SIGKILL,
 		Setpgid:      true,
+		// we don't execute runc with --rootless=(true|false) explicitly,
+		// so as to support non-runc runtimes
 	}

 	w := &runcExecutor{
@ -169,13 +171,11 @@ func (w *runcExecutor) Exec(ctx context.Context, meta executor.Meta, root cache.
 		return errors.Wrapf(err, "failed to create working directory %s", newp)
 	}

+	if err := setOOMScoreAdj(spec); err != nil {
+		return err
+	}
 	if w.rootless {
-		specconv.ToRootless(spec, nil)
-		// TODO(AkihiroSuda): keep Cgroups enabled if /sys/fs/cgroup/cpuset/buildkit exists and writable
-		spec.Linux.CgroupsPath = ""
-		// TODO(AkihiroSuda): ToRootless removes netns, but we should readd netns here
-		// if either SUID or userspace NAT is configured on the host.
-		if err := setOOMScoreAdj(spec); err != nil {
+		if err := rootlessspecconv.ToRootless(spec); err != nil {
 			return err
 		}
 	}
--- a/frontend/dockerfile/dockerfile_test.go
+++ b/frontend/dockerfile/dockerfile_test.go
@ -1115,6 +1115,10 @@ RUN ["ls"]
 }

 func testUser(t *testing.T, sb integration.Sandbox) {
+	if sb.Rootless() {
+		t.Skip("only for rootful worker, due to lack of support for additional gids (https://github.com/opencontainers/runc/issues/1835)")
+	}
+
 	t.Parallel()

 	dockerfile := []byte(`
--- a/hack/dockerfiles/test.Dockerfile
+++ b/hack/dockerfiles/test.Dockerfile
@ -5,7 +5,7 @@ ARG CONTAINERD10_VERSION=v1.0.3
 # available targets: buildkitd, buildkitd.oci_only, buildkitd.containerd_only
 ARG BUILDKIT_TARGET=buildkitd
 ARG REGISTRY_VERSION=2.6
-ARG ROOTLESSKIT_VERSION=1e79dc31d71ea8c1a27f15086be2be2b1d99acaa
+ARG ROOTLESSKIT_VERSION=20b0fc24b305b031a61ef1a1ca456aadafaf5e77

 # The `buildkitd` stage and the `buildctl` stage are placed here
 # so that they can be built quickly with legacy DAG-unaware `docker build --target=...`
@ -75,11 +75,29 @@ RUN go build -ldflags "$(cat .tmp/ldflags) -d"  -o /usr/bin/buildkitd.containerd

 FROM registry:$REGISTRY_VERSION AS registry

+FROM gobuild-base AS rootlesskit-base
+RUN git clone https://github.com/rootless-containers/rootlesskit.git /go/src/github.com/rootless-containers/rootlesskit
+WORKDIR /go/src/github.com/rootless-containers/rootlesskit
+
+FROM rootlesskit-base as rootlesskit
+ARG ROOTLESSKIT_VERSION
+# mitigate https://github.com/moby/moby/pull/35456
+ENV GOOS=linux
+RUN git checkout -q "$ROOTLESSKIT_VERSION" \
+&& go build -o /rootlesskit ./cmd/rootlesskit
+
 FROM unit-tests AS integration-tests
+ENV BUILDKIT_INTEGRATION_ROOTLESS_IDPAIR="1000:1000"
+RUN apk add --no-cache shadow shadow-uidmap sudo \
+  && useradd --create-home --home-dir /home/user --uid 1000 -s /bin/sh user \
+  && echo "XDG_RUNTIME_DIR=/run/user/1000; export XDG_RUNTIME_DIR" >> /home/user/.profile \
+  && mkdir -m 0700 -p /run/user/1000 \
+  && chown -R user /run/user/1000 /home/user
 COPY --from=containerd10 /go/src/github.com/containerd/containerd/bin/containerd* /opt/containerd-1.0/bin/
 COPY --from=buildctl /usr/bin/buildctl /usr/bin/
 COPY --from=buildkitd /usr/bin/buildkitd /usr/bin
 COPY --from=registry /bin/registry /usr/bin
+COPY --from=rootlesskit /rootlesskit /usr/bin/

 FROM buildkit-base AS cross-windows
 ENV GOOS=windows
@ -123,31 +141,18 @@ VOLUME /var/lib/containerd
 VOLUME /run/containerd
 ENTRYPOINT ["containerd"]

-FROM gobuild-base AS rootlesskit-base
-RUN git clone https://github.com/AkihiroSuda/rootlesskit.git /go/src/github.com/AkihiroSuda/rootlesskit
-WORKDIR /go/src/github.com/AkihiroSuda/rootlesskit
-
-FROM rootlesskit-base as rootlesskit
-ARG ROOTLESSKIT_VERSION
-# mitigate https://github.com/moby/moby/pull/35456
-ENV GOOS=linux
-RUN git checkout -q "$ROOTLESSKIT_VERSION" \
-  && go build -o /rootlesskit ./cmd/rootlesskit
-
 # Rootless mode.
 # Still requires `--privileged`.
 FROM buildkit-buildkitd AS rootless
 RUN apk add --no-cache shadow shadow-uidmap \
  && useradd --create-home --home-dir /home/user --uid 1000 user \
-  && mkdir -p /home/user/.local/run /home/user/.local/tmp /home/user/.local/share/buildkit \
-  && chown -R user /home/user
+  && mkdir -p /run/user/1000 /home/user/.local/tmp /home/user/.local/share/buildkit \
+  && chown -R user /run/user/1000 /home/user
 COPY --from=rootlesskit /rootlesskit /usr/bin/
 USER user
 ENV HOME /home/user
 ENV USER user
-# WORKAROUND: this should be typically /run/user/1000,
-# but mkdir under /run is not captured when built using BuildKit. (#429)
-ENV XDG_RUNTIME_DIR=/home/user/.local/run
+ENV XDG_RUNTIME_DIR=/run/user/1000
 ENV TMPDIR=/home/user/.local/tmp
 VOLUME /home/user/.local/share/buildkit
 ENTRYPOINT ["rootlesskit", "buildkitd"]
--- a/util/libcontainer_specconv/README.md
+++ b/util/libcontainer_specconv/README.md
@ -1 +0,0 @@
-Temporary forked from https://github.com/opencontainers/runc/pull/1692
--- a/util/libcontainer_specconv/example.go
+++ b/util/libcontainer_specconv/example.go
@ -1,190 +0,0 @@
-package specconv
-
-import (
-	"os"
-	"sort"
-	"strings"
-
-	"github.com/opencontainers/runc/libcontainer/system"
-	"github.com/opencontainers/runc/libcontainer/user"
-	"github.com/opencontainers/runtime-spec/specs-go"
-)
-
-// RootlessOpts is an optional spec for ToRootless
-type RootlessOpts struct {
-	// Add sub{u,g}id to spec.Linux.{U,G}IDMappings.
-	// Requires newuidmap(1) and newgidmap(1) with suid bit.
-	// Ignored when running in userns.
-	MapSubUIDGID bool
-}
-
-// Run-time context for ToRootless.
-type RootlessContext struct {
-	EUID     uint32
-	EGID     uint32
-	SubUIDs  []user.SubID
-	SubGIDs  []user.SubID
-	UIDMap   []user.IDMap
-	GIDMap   []user.IDMap
-	InUserNS bool
-}
-
-// ToRootless converts the given spec file into one that should work with
-// rootless containers, by removing incompatible options and adding others that
-// are needed.
-func ToRootless(spec *specs.Spec, opts *RootlessOpts) error {
-	var err error
-	ctx := RootlessContext{}
-	ctx.EUID = uint32(os.Geteuid())
-	ctx.EGID = uint32(os.Getegid())
-	ctx.SubUIDs, err = user.CurrentUserSubUIDs()
-	if err != nil && !os.IsNotExist(err) {
-		return err
-	}
-	ctx.SubGIDs, err = user.CurrentGroupSubGIDs()
-	if err != nil && !os.IsNotExist(err) {
-		return err
-	}
-	ctx.UIDMap, err = user.CurrentProcessUIDMap()
-	if err != nil && !os.IsNotExist(err) {
-		return err
-	}
-	uidMapExists := !os.IsNotExist(err)
-	ctx.GIDMap, err = user.CurrentProcessUIDMap()
-	if err != nil && !os.IsNotExist(err) {
-		return err
-	}
-	ctx.InUserNS = uidMapExists && system.UIDMapInUserNS(ctx.UIDMap)
-	return ToRootlessWithContext(ctx, spec, opts)
-}
-
-// ToRootlessWithContext converts the spec with the run-time context.
-// ctx can be internally modified for sorting.
-func ToRootlessWithContext(ctx RootlessContext, spec *specs.Spec, opts *RootlessOpts) error {
-	if opts == nil {
-		opts = &RootlessOpts{}
-	}
-	var namespaces []specs.LinuxNamespace
-
-	// Remove networkns from the spec.
-	for _, ns := range spec.Linux.Namespaces {
-		switch ns.Type {
-		case specs.NetworkNamespace, specs.UserNamespace:
-			// Do nothing.
-		default:
-			namespaces = append(namespaces, ns)
-		}
-	}
-	// Add userns to the spec.
-	namespaces = append(namespaces, specs.LinuxNamespace{
-		Type: specs.UserNamespace,
-	})
-	spec.Linux.Namespaces = namespaces
-
-	// Add mappings for the current user.
-	if ctx.InUserNS {
-		uNextContainerID := int64(0)
-		sort.Sort(idmapSorter(ctx.UIDMap))
-		for _, uidmap := range ctx.UIDMap {
-			spec.Linux.UIDMappings = append(spec.Linux.UIDMappings,
-				specs.LinuxIDMapping{
-					HostID:      uint32(uidmap.ID),
-					ContainerID: uint32(uNextContainerID),
-					Size:        uint32(uidmap.Count),
-				})
-			uNextContainerID += uidmap.Count
-		}
-		gNextContainerID := int64(0)
-		sort.Sort(idmapSorter(ctx.GIDMap))
-		for _, gidmap := range ctx.GIDMap {
-			spec.Linux.GIDMappings = append(spec.Linux.GIDMappings,
-				specs.LinuxIDMapping{
-					HostID:      uint32(gidmap.ID),
-					ContainerID: uint32(gNextContainerID),
-					Size:        uint32(gidmap.Count),
-				})
-			gNextContainerID += gidmap.Count
-		}
-		// opts.MapSubUIDGID is ignored in userns
-	} else {
-		spec.Linux.UIDMappings = []specs.LinuxIDMapping{{
-			HostID:      ctx.EUID,
-			ContainerID: 0,
-			Size:        1,
-		}}
-		spec.Linux.GIDMappings = []specs.LinuxIDMapping{{
-			HostID:      ctx.EGID,
-			ContainerID: 0,
-			Size:        1,
-		}}
-		if opts.MapSubUIDGID {
-			uNextContainerID := int64(1)
-			sort.Sort(subIDSorter(ctx.SubUIDs))
-			for _, subuid := range ctx.SubUIDs {
-				spec.Linux.UIDMappings = append(spec.Linux.UIDMappings,
-					specs.LinuxIDMapping{
-						HostID:      uint32(subuid.SubID),
-						ContainerID: uint32(uNextContainerID),
-						Size:        uint32(subuid.Count),
-					})
-				uNextContainerID += subuid.Count
-			}
-			gNextContainerID := int64(1)
-			sort.Sort(subIDSorter(ctx.SubGIDs))
-			for _, subgid := range ctx.SubGIDs {
-				spec.Linux.GIDMappings = append(spec.Linux.GIDMappings,
-					specs.LinuxIDMapping{
-						HostID:      uint32(subgid.SubID),
-						ContainerID: uint32(gNextContainerID),
-						Size:        uint32(subgid.Count),
-					})
-				gNextContainerID += subgid.Count
-			}
-		}
-	}
-
-	// Fix up mounts.
-	var mounts []specs.Mount
-	for _, mount := range spec.Mounts {
-		// Ignore all mounts that are under /sys.
-		if strings.HasPrefix(mount.Destination, "/sys") {
-			continue
-		}
-
-		// Remove all gid= and uid= mappings.
-		var options []string
-		for _, option := range mount.Options {
-			if !strings.HasPrefix(option, "gid=") && !strings.HasPrefix(option, "uid=") {
-				options = append(options, option)
-			}
-		}
-
-		mount.Options = options
-		mounts = append(mounts, mount)
-	}
-	// Add the sysfs mount as an rbind.
-	mounts = append(mounts, specs.Mount{
-		Source:      "/sys",
-		Destination: "/sys",
-		Type:        "none",
-		Options:     []string{"rbind", "nosuid", "noexec", "nodev", "ro"},
-	})
-	spec.Mounts = mounts
-
-	// Remove cgroup settings.
-	spec.Linux.Resources = nil
-	return nil
-}
-
-// subIDSorter is required for Go <= 1.7
-type subIDSorter []user.SubID
-
-func (x subIDSorter) Len() int           { return len(x) }
-func (x subIDSorter) Swap(i, j int)      { x[i], x[j] = x[j], x[i] }
-func (x subIDSorter) Less(i, j int) bool { return x[i].SubID < x[j].SubID }
-
-type idmapSorter []user.IDMap
-
-func (x idmapSorter) Len() int           { return len(x) }
-func (x idmapSorter) Swap(i, j int)      { x[i], x[j] = x[j], x[i] }
-func (x idmapSorter) Less(i, j int) bool { return x[i].ID < x[j].ID }
--- a/util/libcontainer_specconv/spec_linux_test.go
+++ b/util/libcontainer_specconv/spec_linux_test.go
@ -1,190 +0,0 @@
-// +build linux
-
-package specconv
-
-import (
-	"reflect"
-	"testing"
-
-	"github.com/opencontainers/runc/libcontainer/configs/validate"
-	"github.com/opencontainers/runc/libcontainer/specconv"
-	"github.com/opencontainers/runc/libcontainer/user"
-	"github.com/opencontainers/runtime-spec/specs-go"
-)
-
-func TestRootlessSpecconvValidate(t *testing.T) {
-	specGen := func() *specs.Spec {
-		spec := specconv.Example()
-		spec.Root.Path = "/"
-		return spec
-	}
-	cases := []struct {
-		ctx                 RootlessContext
-		opts                *RootlessOpts
-		additionalValidator func(t *testing.T, s *specs.Spec)
-	}{
-		{
-			ctx: RootlessContext{
-				EUID: 0,
-				EGID: 0,
-			},
-		},
-		{
-			ctx: RootlessContext{
-				EUID: 4242,
-				EGID: 4242,
-			},
-		},
-		{
-			ctx: RootlessContext{
-				EUID: 4242,
-				EGID: 4242,
-				// empty subuid / subgid
-			},
-			opts: &RootlessOpts{
-				MapSubUIDGID: true,
-			},
-		},
-		{
-			ctx: RootlessContext{
-				EUID: 4242,
-				EGID: 4242,
-				SubUIDs: []user.SubID{
-					{
-						Name:  "dummy",
-						SubID: 14242,
-						Count: 65536,
-					},
-					{
-						Name:  "dummy",
-						SubID: 114242,
-						Count: 65536,
-					},
-				},
-				SubGIDs: []user.SubID{
-					{
-						Name:  "dummy",
-						SubID: 14242,
-						Count: 65536,
-					},
-					{
-						Name:  "dummy",
-						SubID: 114242,
-						Count: 65536,
-					},
-				},
-			},
-			opts: &RootlessOpts{
-				MapSubUIDGID: true,
-			},
-			additionalValidator: func(t *testing.T, s *specs.Spec) {
-				expectedUIDMappings := []specs.LinuxIDMapping{
-					{
-						HostID:      4242,
-						ContainerID: 0,
-						Size:        1,
-					},
-					{
-						HostID:      14242,
-						ContainerID: 1,
-						Size:        65536,
-					},
-					{
-						HostID:      114242,
-						ContainerID: 65537,
-						Size:        65536,
-					},
-				}
-				if !reflect.DeepEqual(expectedUIDMappings, s.Linux.UIDMappings) {
-					t.Errorf("expected %#v, got %#v", expectedUIDMappings, s.Linux.UIDMappings)
-				}
-				expectedGIDMappings := expectedUIDMappings
-				if !reflect.DeepEqual(expectedGIDMappings, s.Linux.GIDMappings) {
-					t.Errorf("expected %#v, got %#v", expectedGIDMappings, s.Linux.GIDMappings)
-				}
-			},
-		},
-		{
-			ctx: RootlessContext{
-				EUID: 0,
-				EGID: 0,
-				UIDMap: []user.IDMap{
-					{
-						ID:       0,
-						ParentID: 4242,
-						Count:    1,
-					},
-					{
-						ID:       1,
-						ParentID: 231072,
-						Count:    65536,
-					},
-				},
-				GIDMap: []user.IDMap{
-					{
-						ID:       0,
-						ParentID: 4242,
-						Count:    1,
-					},
-					{
-						ID:       1,
-						ParentID: 231072,
-						Count:    65536,
-					},
-				},
-				InUserNS: true,
-			},
-			additionalValidator: func(t *testing.T, s *specs.Spec) {
-				expectedUIDMappings := []specs.LinuxIDMapping{
-					{
-						HostID:      0,
-						ContainerID: 0,
-						Size:        1,
-					},
-					{
-						HostID:      1,
-						ContainerID: 1,
-						Size:        65536,
-					},
-				}
-				if !reflect.DeepEqual(expectedUIDMappings, s.Linux.UIDMappings) {
-					t.Errorf("expected %#v, got %#v", expectedUIDMappings, s.Linux.UIDMappings)
-				}
-				expectedGIDMappings := expectedUIDMappings
-				if !reflect.DeepEqual(expectedGIDMappings, s.Linux.GIDMappings) {
-					t.Errorf("expected %#v, got %#v", expectedGIDMappings, s.Linux.GIDMappings)
-				}
-			},
-		},
-	}
-
-	for _, c := range cases {
-		spec := specGen()
-		err := ToRootlessWithContext(c.ctx, spec, c.opts)
-		if err != nil {
-			t.Errorf("Couldn't convert a rootful spec to rootless: %v", err)
-		}
-
-		// t.Logf("%#v", spec)
-		if c.additionalValidator != nil {
-			c.additionalValidator(t, spec)
-		}
-
-		opts := &specconv.CreateOpts{
-			CgroupName:       "ContainerID",
-			UseSystemdCgroup: false,
-			Spec:             spec,
-			Rootless:         true,
-		}
-
-		config, err := specconv.CreateLibcontainerConfig(opts)
-		if err != nil {
-			t.Errorf("Couldn't create libcontainer config: %v", err)
-		}
-
-		validator := validate.New()
-		if err := validator.Validate(config); err != nil {
-			t.Errorf("Expected specconv to produce valid rootless container config: %v", err)
-		}
-	}
-}
--- a/util/rootless/specconv/specconv_linux.go
+++ b/util/rootless/specconv/specconv_linux.go
@ -0,0 +1,113 @@
+package specconv
+
+import (
+	"os"
+	"sort"
+	"strings"
+
+	"github.com/opencontainers/runc/libcontainer/system"
+	"github.com/opencontainers/runc/libcontainer/user"
+	"github.com/opencontainers/runtime-spec/specs-go"
+	"github.com/pkg/errors"
+)
+
+// ToRootless converts spec to be compatible with "rootless" runc.
+// * Adds userns (Note: since we are already in userns, ideally we should not need to do this. runc-side issue is tracked at https://github.com/opencontainers/runc/issues/1837)
+// * Fix up mount flags (same as above)
+// * Replace /sys with bind-mount (FIXME: we don't need to do this if netns is unshared)
+func ToRootless(spec *specs.Spec) error {
+	if !system.RunningInUserNS() {
+		return errors.New("needs to be in user namespace")
+	}
+	uidMap, err := user.CurrentProcessUIDMap()
+	if err != nil && !os.IsNotExist(err) {
+		return err
+	}
+	gidMap, err := user.CurrentProcessUIDMap()
+	if err != nil && !os.IsNotExist(err) {
+		return err
+	}
+	return toRootless(spec, uidMap, gidMap)
+}
+
+// toRootless was forked from github.com/opencontainers/runc/libcontainer/specconv
+func toRootless(spec *specs.Spec, uidMap, gidMap []user.IDMap) error {
+	if err := configureUserNS(spec, uidMap, gidMap); err != nil {
+		return err
+	}
+	if err := configureMounts(spec); err != nil {
+		return err
+	}
+
+	// Remove cgroup settings.
+	spec.Linux.Resources = nil
+	spec.Linux.CgroupsPath = ""
+	return nil
+}
+
+// configureUserNS add suserns and the current ID map to the spec.
+// Since we are already in userns, ideally we should not need to add userns.
+// However, currently rootless runc always requires userns to be added.
+// https://github.com/opencontainers/runc/issues/1837
+func configureUserNS(spec *specs.Spec, uidMap, gidMap []user.IDMap) error {
+	spec.Linux.Namespaces = append(spec.Linux.Namespaces, specs.LinuxNamespace{
+		Type: specs.UserNamespace,
+	})
+
+	sort.Slice(uidMap, func(i, j int) bool { return uidMap[i].ID < uidMap[j].ID })
+	uNextContainerID := int64(0)
+	for _, u := range uidMap {
+		spec.Linux.UIDMappings = append(spec.Linux.UIDMappings,
+			specs.LinuxIDMapping{
+				HostID:      uint32(u.ID),
+				ContainerID: uint32(uNextContainerID),
+				Size:        uint32(u.Count),
+			})
+		uNextContainerID += u.Count
+	}
+	sort.Slice(gidMap, func(i, j int) bool { return gidMap[i].ID < gidMap[j].ID })
+	gNextContainerID := int64(0)
+	for _, g := range gidMap {
+		spec.Linux.GIDMappings = append(spec.Linux.GIDMappings,
+			specs.LinuxIDMapping{
+				HostID:      uint32(g.ID),
+				ContainerID: uint32(gNextContainerID),
+				Size:        uint32(g.Count),
+			})
+		gNextContainerID += g.Count
+	}
+	return nil
+}
+
+func configureMounts(spec *specs.Spec) error {
+	var mounts []specs.Mount
+	for _, mount := range spec.Mounts {
+		// Ignore all mounts that are under /sys, because we add /sys later.
+		if strings.HasPrefix(mount.Destination, "/sys") {
+			continue
+		}
+
+		// Remove all gid= and uid= mappings.
+		// Since we are already in userns, ideally we should not need to do this.
+		// https://github.com/opencontainers/runc/issues/1837
+		var options []string
+		for _, option := range mount.Options {
+			if !strings.HasPrefix(option, "gid=") && !strings.HasPrefix(option, "uid=") {
+				options = append(options, option)
+			}
+		}
+		mount.Options = options
+		mounts = append(mounts, mount)
+	}
+
+	// Add the sysfs mount as an rbind, because we can't mount /sys unless we have netns.
+	// TODO: keep original /sys mount when we have netns.
+	mounts = append(mounts, specs.Mount{
+		Source:      "/sys",
+		Destination: "/sys",
+		Type:        "none",
+		Options:     []string{"rbind", "nosuid", "noexec", "nodev", "ro"},
+	})
+	spec.Mounts = mounts
+	return nil
+}
--- a/util/rootless/specconv/specconv_linux_test.go
+++ b/util/rootless/specconv/specconv_linux_test.go
@ -0,0 +1,42 @@
+package specconv
+
+import (
+	"testing"
+
+	"github.com/opencontainers/runc/libcontainer/specconv"
+	"github.com/opencontainers/runc/libcontainer/user"
+	"github.com/opencontainers/runtime-spec/specs-go"
+	"github.com/stretchr/testify/require"
+)
+
+func TestToRootless(t *testing.T) {
+	spec := specconv.Example()
+	uidMap := []user.IDMap{
+		{
+			ID:       0,
+			ParentID: 4242,
+			Count:    1,
+		},
+		{
+			ID:       1,
+			ParentID: 231072,
+			Count:    65536,
+		},
+	}
+	gidMap := uidMap
+	expectedUIDMappings := []specs.LinuxIDMapping{
+		{
+			HostID:      0,
+			ContainerID: 0,
+			Size:        1,
+		},
+		{
+			HostID:      1,
+			ContainerID: 1,
+			Size:        65536,
+		},
+	}
+	err := toRootless(spec, uidMap, gidMap)
+	require.NoError(t, err)
+	require.EqualValues(t, expectedUIDMappings, spec.Linux.UIDMappings)
+}
--- a/util/testutil/integration/containerd.go
+++ b/util/testutil/integration/containerd.go
@ -96,13 +96,13 @@ state = %q
 	buildkitdSock, stop, err := runBuildkitd([]string{"buildkitd",
 		"--oci-worker=false",
 		"--containerd-worker=true",
-		"--containerd-worker-addr", address}, logs)
+		"--containerd-worker-addr", address}, logs, 0, 0)
 	if err != nil {
 		return nil, nil, err
 	}
 	deferF.append(stop)

-	return &cdsandbox{address: address, sandbox: sandbox{address: buildkitdSock, logs: logs, cleanup: deferF}}, cl, nil
+	return &cdsandbox{address: address, sandbox: sandbox{address: buildkitdSock, logs: logs, cleanup: deferF, rootless: false}}, cl, nil
 }

 type cdsandbox struct {
--- a/util/testutil/integration/oci.go
+++ b/util/testutil/integration/oci.go
@ -3,6 +3,7 @@ package integration
 import (
 	"bufio"
 	"bytes"
+	"fmt"
 	"io/ioutil"
 	"os"
 	"os/exec"
@ -12,16 +13,31 @@ import (
 	"time"

 	"github.com/google/shlex"
+	"github.com/pkg/errors"
 )

 func init() {
 	register(&oci{})
+
+	// the rootless uid is defined in hack/dockerfiles/test.Dockerfile
+	if s := os.Getenv("BUILDKIT_INTEGRATION_ROOTLESS_IDPAIR"); s != "" {
+		var uid, gid int
+		if _, err := fmt.Sscanf(s, "%d:%d", &uid, &gid); err != nil {
+			panic(errors.Errorf("unexpected BUILDKIT_INTEGRATION_ROOTLESS_IDPAIR: %q", s))
+		}
+		register(&oci{uid: uid, gid: gid})
+	}
 }

 type oci struct {
+	uid int
+	gid int
 }

 func (s *oci) Name() string {
+	if s.uid != 0 {
+		return "oci-rootless"
+	}
 	return "oci"
 }

@ -33,7 +49,15 @@ func (s *oci) New() (Sandbox, func() error, error) {
 		return nil, nil, err
 	}
 	logs := map[string]*bytes.Buffer{}
-	buildkitdSock, stop, err := runBuildkitd([]string{"buildkitd", "--oci-worker=true", "--containerd-worker=false"}, logs)
+	buildkitdArgs := []string{"buildkitd", "--oci-worker=true", "--containerd-worker=false"}
+	if s.uid != 0 {
+		if s.gid == 0 {
+			return nil, nil, errors.Errorf("unsupported id pair: uid=%d, gid=%d", s.uid, s.gid)
+		}
+		// TODO: make sure the user exists and subuid/subgid are configured.
+		buildkitdArgs = append([]string{"sudo", "-u", fmt.Sprintf("#%d", s.uid), "-i", "--", "rootlesskit"}, buildkitdArgs...)
+	}
+	buildkitdSock, stop, err := runBuildkitd(buildkitdArgs, logs, s.uid, s.gid)
 	if err != nil {
 		return nil, nil, err
 	}
@ -41,13 +65,14 @@ func (s *oci) New() (Sandbox, func() error, error) {
 	deferF := &multiCloser{}
 	deferF.append(stop)

-	return &sandbox{address: buildkitdSock, logs: logs, cleanup: deferF}, deferF.F(), nil
+	return &sandbox{address: buildkitdSock, logs: logs, cleanup: deferF, rootless: s.uid != 0}, deferF.F(), nil
 }

 type sandbox struct {
-	address string
-	logs    map[string]*bytes.Buffer
-	cleanup *multiCloser
+	address  string
+	logs     map[string]*bytes.Buffer
+	cleanup  *multiCloser
+	rootless bool
 }

 func (sb *sandbox) Address() string {
@ -85,7 +110,11 @@ func (sb *sandbox) Cmd(args ...string) *exec.Cmd {
 	return cmd
 }

-func runBuildkitd(args []string, logs map[string]*bytes.Buffer) (address string, cl func() error, err error) {
+func (sb *sandbox) Rootless() bool {
+	return sb.rootless
+}
+
+func runBuildkitd(args []string, logs map[string]*bytes.Buffer, uid, gid int) (address string, cl func() error, err error) {
 	deferF := &multiCloser{}
 	cl = deferF.F()

@ -100,6 +129,9 @@ func runBuildkitd(args []string, logs map[string]*bytes.Buffer) (address string,
 	if err != nil {
 		return "", nil, err
 	}
+	if err := os.Chown(tmpdir, uid, gid); err != nil {
+		return "", nil, err
+	}
 	deferF.append(func() error { return os.RemoveAll(tmpdir) })

 	address = "unix://" + filepath.Join(tmpdir, "buildkitd.sock")
--- a/util/testutil/integration/run.go
+++ b/util/testutil/integration/run.go
@ -17,6 +17,7 @@ type Sandbox interface {
 	PrintLogs(*testing.T)
 	Cmd(...string) *exec.Cmd
 	NewRegistry() (string, error)
+	Rootless() bool
 }

 type Worker interface {
--- a/vendor.conf
+++ b/vendor.conf
@ -60,8 +60,10 @@ github.com/uber/jaeger-lib c48167d9cae5887393dd5e61efd06a4a48b7fbb3
 github.com/codahale/hdrhistogram f8ad88b59a584afeee9d334eff879b104439117b

 github.com/opentracing-contrib/go-stdlib b1a47cfbdd7543e70e9ef3e73d0802ad306cc1cc
-github.com/opencontainers/selinux 74a747aeaf2d66097b6908f572794f49f07dda2c

 # used by dockerfile tests
 gotest.tools v2.1.0
 github.com/google/go-cmp v0.2.0
+
+# used by rootless spec conv test
+github.com/seccomp/libseccomp-golang 32f571b70023028bd57d9288c20efbcb237f3ce0
--- a/vendor/github.com/opencontainers/runc/libcontainer/configs/validate/rootless.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/validate/rootless.go
@ -1,116 +0,0 @@
-package validate
-
-import (
-	"fmt"
-	"os"
-	"reflect"
-	"strings"
-
-	"github.com/opencontainers/runc/libcontainer/configs"
-)
-
-var (
-	geteuid = os.Geteuid
-	getegid = os.Getegid
-)
-
-func (v *ConfigValidator) rootless(config *configs.Config) error {
-	if err := rootlessMappings(config); err != nil {
-		return err
-	}
-	if err := rootlessMount(config); err != nil {
-		return err
-	}
-
-	// XXX: We currently can't verify the user config at all, because
-	//      configs.Config doesn't store the user-related configs. So this
-	//      has to be verified by setupUser() in init_linux.go.
-
-	return nil
-}
-
-func hasIDMapping(id int, mappings []configs.IDMap) bool {
-	for _, m := range mappings {
-		if id >= m.ContainerID && id < m.ContainerID+m.Size {
-			return true
-		}
-	}
-	return false
-}
-
-func rootlessMappings(config *configs.Config) error {
-	if euid := geteuid(); euid != 0 {
-		if !config.Namespaces.Contains(configs.NEWUSER) {
-			return fmt.Errorf("rootless containers require user namespaces")
-		}
-		if len(config.UidMappings) == 0 {
-			return fmt.Errorf("rootless containers requires at least one UID mapping")
-		}
-		if len(config.GidMappings) == 0 {
-			return fmt.Errorf("rootless containers requires at least one GID mapping")
-		}
-	}
-
-	return nil
-}
-
-// cgroup verifies that the user isn't trying to set any cgroup limits or paths.
-func rootlessCgroup(config *configs.Config) error {
-	// Nothing set at all.
-	if config.Cgroups == nil || config.Cgroups.Resources == nil {
-		return nil
-	}
-
-	// Used for comparing to the zero value.
-	left := reflect.ValueOf(*config.Cgroups.Resources)
-	right := reflect.Zero(left.Type())
-
-	// This is all we need to do, since specconv won't add cgroup options in
-	// rootless mode.
-	if !reflect.DeepEqual(left.Interface(), right.Interface()) {
-		return fmt.Errorf("cannot specify resource limits in rootless container")
-	}
-
-	return nil
-}
-
-// mount verifies that the user isn't trying to set up any mounts they don't have
-// the rights to do. In addition, it makes sure that no mount has a `uid=` or
-// `gid=` option that doesn't resolve to root.
-func rootlessMount(config *configs.Config) error {
-	// XXX: We could whitelist allowed devices at this point, but I'm not
-	//      convinced that's a good idea. The kernel is the best arbiter of
-	//      access control.
-
-	for _, mount := range config.Mounts {
-		// Check that the options list doesn't contain any uid= or gid= entries
-		// that don't resolve to root.
-		for _, opt := range strings.Split(mount.Data, ",") {
-			if strings.HasPrefix(opt, "uid=") {
-				var uid int
-				n, err := fmt.Sscanf(opt, "uid=%d", &uid)
-				if n != 1 || err != nil {
-					// Ignore unknown mount options.
-					continue
-				}
-				if !hasIDMapping(uid, config.UidMappings) {
-					return fmt.Errorf("cannot specify uid= mount options for unmapped uid in rootless containers")
-				}
-			}
-
-			if strings.HasPrefix(opt, "gid=") {
-				var gid int
-				n, err := fmt.Sscanf(opt, "gid=%d", &gid)
-				if n != 1 || err != nil {
-					// Ignore unknown mount options.
-					continue
-				}
-				if !hasIDMapping(gid, config.GidMappings) {
-					return fmt.Errorf("cannot specify gid= mount options for unmapped gid in rootless containers")
-				}
-			}
-		}
-	}
-
-	return nil
-}
--- a/vendor/github.com/opencontainers/runc/libcontainer/configs/validate/validator.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/validate/validator.go
@ -1,212 +0,0 @@
-package validate
-
-import (
-	"fmt"
-	"os"
-	"path/filepath"
-	"strings"
-
-	"github.com/opencontainers/runc/libcontainer/configs"
-	"github.com/opencontainers/runc/libcontainer/intelrdt"
-	selinux "github.com/opencontainers/selinux/go-selinux"
-)
-
-type Validator interface {
-	Validate(*configs.Config) error
-}
-
-func New() Validator {
-	return &ConfigValidator{}
-}
-
-type ConfigValidator struct {
-}
-
-func (v *ConfigValidator) Validate(config *configs.Config) error {
-	if err := v.rootfs(config); err != nil {
-		return err
-	}
-	if err := v.network(config); err != nil {
-		return err
-	}
-	if err := v.hostname(config); err != nil {
-		return err
-	}
-	if err := v.security(config); err != nil {
-		return err
-	}
-	if err := v.usernamespace(config); err != nil {
-		return err
-	}
-	if err := v.sysctl(config); err != nil {
-		return err
-	}
-	if err := v.intelrdt(config); err != nil {
-		return err
-	}
-	if config.Rootless {
-		if err := v.rootless(config); err != nil {
-			return err
-		}
-	}
-	return nil
-}
-
-// rootfs validates if the rootfs is an absolute path and is not a symlink
-// to the container's root filesystem.
-func (v *ConfigValidator) rootfs(config *configs.Config) error {
-	if _, err := os.Stat(config.Rootfs); err != nil {
-		if os.IsNotExist(err) {
-			return fmt.Errorf("rootfs (%s) does not exist", config.Rootfs)
-		}
-		return err
-	}
-	cleaned, err := filepath.Abs(config.Rootfs)
-	if err != nil {
-		return err
-	}
-	if cleaned, err = filepath.EvalSymlinks(cleaned); err != nil {
-		return err
-	}
-	if filepath.Clean(config.Rootfs) != cleaned {
-		return fmt.Errorf("%s is not an absolute path or is a symlink", config.Rootfs)
-	}
-	return nil
-}
-
-func (v *ConfigValidator) network(config *configs.Config) error {
-	if !config.Namespaces.Contains(configs.NEWNET) {
-		if len(config.Networks) > 0 || len(config.Routes) > 0 {
-			return fmt.Errorf("unable to apply network settings without a private NET namespace")
-		}
-	}
-	return nil
-}
-
-func (v *ConfigValidator) hostname(config *configs.Config) error {
-	if config.Hostname != "" && !config.Namespaces.Contains(configs.NEWUTS) {
-		return fmt.Errorf("unable to set hostname without a private UTS namespace")
-	}
-	return nil
-}
-
-func (v *ConfigValidator) security(config *configs.Config) error {
-	// restrict sys without mount namespace
-	if (len(config.MaskPaths) > 0 || len(config.ReadonlyPaths) > 0) &&
-		!config.Namespaces.Contains(configs.NEWNS) {
-		return fmt.Errorf("unable to restrict sys entries without a private MNT namespace")
-	}
-	if config.ProcessLabel != "" && !selinux.GetEnabled() {
-		return fmt.Errorf("selinux label is specified in config, but selinux is disabled or not supported")
-	}
-
-	return nil
-}
-
-func (v *ConfigValidator) usernamespace(config *configs.Config) error {
-	if config.Namespaces.Contains(configs.NEWUSER) {
-		if _, err := os.Stat("/proc/self/ns/user"); os.IsNotExist(err) {
-			return fmt.Errorf("USER namespaces aren't enabled in the kernel")
-		}
-	} else {
-		if config.UidMappings != nil || config.GidMappings != nil {
-			return fmt.Errorf("User namespace mappings specified, but USER namespace isn't enabled in the config")
-		}
-	}
-	return nil
-}
-
-// sysctl validates that the specified sysctl keys are valid or not.
-// /proc/sys isn't completely namespaced and depending on which namespaces
-// are specified, a subset of sysctls are permitted.
-func (v *ConfigValidator) sysctl(config *configs.Config) error {
-	validSysctlMap := map[string]bool{
-		"kernel.msgmax":          true,
-		"kernel.msgmnb":          true,
-		"kernel.msgmni":          true,
-		"kernel.sem":             true,
-		"kernel.shmall":          true,
-		"kernel.shmmax":          true,
-		"kernel.shmmni":          true,
-		"kernel.shm_rmid_forced": true,
-	}
-
-	for s := range config.Sysctl {
-		if validSysctlMap[s] || strings.HasPrefix(s, "fs.mqueue.") {
-			if config.Namespaces.Contains(configs.NEWIPC) {
-				continue
-			} else {
-				return fmt.Errorf("sysctl %q is not allowed in the hosts ipc namespace", s)
-			}
-		}
-		if strings.HasPrefix(s, "net.") {
-			if config.Namespaces.Contains(configs.NEWNET) {
-				if path := config.Namespaces.PathOf(configs.NEWNET); path != "" {
-					if err := checkHostNs(s, path); err != nil {
-						return err
-					}
-				}
-				continue
-			} else {
-				return fmt.Errorf("sysctl %q is not allowed in the hosts network namespace", s)
-			}
-		}
-		return fmt.Errorf("sysctl %q is not in a separate kernel namespace", s)
-	}
-
-	return nil
-}
-
-func (v *ConfigValidator) intelrdt(config *configs.Config) error {
-	if config.IntelRdt != nil {
-		if !intelrdt.IsEnabled() {
-			return fmt.Errorf("intelRdt is specified in config, but Intel RDT feature is not supported or enabled")
-		}
-		if config.IntelRdt.L3CacheSchema == "" {
-			return fmt.Errorf("intelRdt is specified in config, but intelRdt.l3CacheSchema is empty")
-		}
-	}
-
-	return nil
-}
-
-func isSymbolicLink(path string) (bool, error) {
-	fi, err := os.Lstat(path)
-	if err != nil {
-		return false, err
-	}
-
-	return fi.Mode()&os.ModeSymlink == os.ModeSymlink, nil
-}
-
-// checkHostNs checks whether network sysctl is used in host namespace.
-func checkHostNs(sysctlConfig string, path string) error {
-	var currentProcessNetns = "/proc/self/ns/net"
-	// readlink on the current processes network namespace
-	destOfCurrentProcess, err := os.Readlink(currentProcessNetns)
-	if err != nil {
-		return fmt.Errorf("read soft link %q error", currentProcessNetns)
-	}
-
-	// First check if the provided path is a symbolic link
-	symLink, err := isSymbolicLink(path)
-	if err != nil {
-		return fmt.Errorf("could not check that %q is a symlink: %v", path, err)
-	}
-
-	if symLink == false {
-		// The provided namespace is not a symbolic link,
-		// it is not the host namespace.
-		return nil
-	}
-
-	// readlink on the path provided in the struct
-	destOfContainer, err := os.Readlink(path)
-	if err != nil {
-		return fmt.Errorf("read soft link %q error", path)
-	}
-	if destOfContainer == destOfCurrentProcess {
-		return fmt.Errorf("sysctl %q is not allowed in the hosts network namespace", sysctlConfig)
-	}
-	return nil
-}
--- a/vendor/github.com/opencontainers/runc/libcontainer/intelrdt/intelrdt.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/intelrdt/intelrdt.go
@ -1,553 +0,0 @@
-// +build linux
-
-package intelrdt
-
-import (
-	"bufio"
-	"fmt"
-	"io/ioutil"
-	"os"
-	"path/filepath"
-	"strconv"
-	"strings"
-	"sync"
-
-	"github.com/opencontainers/runc/libcontainer/configs"
-)
-
-/*
- * About Intel RDT/CAT feature:
- * Intel platforms with new Xeon CPU support Resource Director Technology (RDT).
- * Intel Cache Allocation Technology (CAT) is a sub-feature of RDT. Currently L3
- * Cache is the only resource that is supported in RDT.
- *
- * This feature provides a way for the software to restrict cache allocation to a
- * defined 'subset' of L3 cache which may be overlapping with other 'subsets'.
- * The different subsets are identified by class of service (CLOS) and each CLOS
- * has a capacity bitmask (CBM).
- *
- * For more information about Intel RDT/CAT can be found in the section 17.17
- * of Intel Software Developer Manual.
- *
- * About Intel RDT/CAT kernel interface:
- * In Linux 4.10 kernel or newer, the interface is defined and exposed via
- * "resource control" filesystem, which is a "cgroup-like" interface.
- *
- * Comparing with cgroups, it has similar process management lifecycle and
- * interfaces in a container. But unlike cgroups' hierarchy, it has single level
- * filesystem layout.
- *
- * Intel RDT "resource control" filesystem hierarchy:
- * mount -t resctrl resctrl /sys/fs/resctrl
- * tree /sys/fs/resctrl
- * /sys/fs/resctrl/
- * |-- info
- * |   |-- L3
- * |       |-- cbm_mask
- * |       |-- min_cbm_bits
- * |       |-- num_closids
- * |-- cpus
- * |-- schemata
- * |-- tasks
- * |-- <container_id>
- *     |-- cpus
- *     |-- schemata
- *     |-- tasks
- *
- * For runc, we can make use of `tasks` and `schemata` configuration for L3 cache
- * resource constraints.
- *
- *  The file `tasks` has a list of tasks that belongs to this group (e.g.,
- * <container_id>" group). Tasks can be added to a group by writing the task ID
- * to the "tasks" file  (which will automatically remove them from the previous
- * group to which they belonged). New tasks created by fork(2) and clone(2) are
- * added to the same group as their parent. If a pid is not in any sub group, it is
- * in root group.
- *
- * The file `schemata` has allocation bitmasks/values for L3 cache on each socket,
- * which contains L3 cache id and capacity bitmask (CBM).
- * 	Format: "L3:<cache_id0>=<cbm0>;<cache_id1>=<cbm1>;..."
- * For example, on a two-socket machine, L3's schema line could be `L3:0=ff;1=c0`
- * which means L3 cache id 0's CBM is 0xff, and L3 cache id 1's CBM is 0xc0.
- *
- * The valid L3 cache CBM is a *contiguous bits set* and number of bits that can
- * be set is less than the max bit. The max bits in the CBM is varied among
- * supported Intel Xeon platforms. In Intel RDT "resource control" filesystem
- * layout, the CBM in a group should be a subset of the CBM in root. Kernel will
- * check if it is valid when writing. e.g., 0xfffff in root indicates the max bits
- * of CBM is 20 bits, which mapping to entire L3 cache capacity. Some valid CBM
- * values to set in a group: 0xf, 0xf0, 0x3ff, 0x1f00 and etc.
- *
- * For more information about Intel RDT/CAT kernel interface:
- * https://www.kernel.org/doc/Documentation/x86/intel_rdt_ui.txt
- *
- * An example for runc:
- * Consider a two-socket machine with two L3 caches where the default CBM is
- * 0xfffff and the max CBM length is 20 bits. With this configuration, tasks
- * inside the container only have access to the "upper" 80% of L3 cache id 0 and
- * the "lower" 50% L3 cache id 1:
- *
- * "linux": {
- * 	"intelRdt": {
- * 		"l3CacheSchema": "L3:0=ffff0;1=3ff"
- * 	}
- * }
- */
-
-type Manager interface {
-	// Applies Intel RDT configuration to the process with the specified pid
-	Apply(pid int) error
-
-	// Returns statistics for Intel RDT
-	GetStats() (*Stats, error)
-
-	// Destroys the Intel RDT 'container_id' group
-	Destroy() error
-
-	// Returns Intel RDT path to save in a state file and to be able to
-	// restore the object later
-	GetPath() string
-
-	// Set Intel RDT "resource control" filesystem as configured.
-	Set(container *configs.Config) error
-}
-
-// This implements interface Manager
-type IntelRdtManager struct {
-	mu     sync.Mutex
-	Config *configs.Config
-	Id     string
-	Path   string
-}
-
-const (
-	IntelRdtTasks = "tasks"
-)
-
-var (
-	// The absolute root path of the Intel RDT "resource control" filesystem
-	intelRdtRoot     string
-	intelRdtRootLock sync.Mutex
-
-	// The flag to indicate if Intel RDT is supported
-	isEnabled bool
-)
-
-type intelRdtData struct {
-	root   string
-	config *configs.Config
-	pid    int
-}
-
-// Check if Intel RDT is enabled in init()
-func init() {
-	// 1. Check if hardware and kernel support Intel RDT/CAT feature
-	// "cat_l3" flag is set if supported
-	isFlagSet, err := parseCpuInfoFile("/proc/cpuinfo")
-	if !isFlagSet || err != nil {
-		isEnabled = false
-		return
-	}
-
-	// 2. Check if Intel RDT "resource control" filesystem is mounted
-	// The user guarantees to mount the filesystem
-	isEnabled = isIntelRdtMounted()
-}
-
-// Return the mount point path of Intel RDT "resource control" filesysem
-func findIntelRdtMountpointDir() (string, error) {
-	f, err := os.Open("/proc/self/mountinfo")
-	if err != nil {
-		return "", err
-	}
-	defer f.Close()
-
-	s := bufio.NewScanner(f)
-	for s.Scan() {
-		text := s.Text()
-		fields := strings.Split(text, " ")
-		// Safe as mountinfo encodes mountpoints with spaces as \040.
-		index := strings.Index(text, " - ")
-		postSeparatorFields := strings.Fields(text[index+3:])
-		numPostFields := len(postSeparatorFields)
-
-		// This is an error as we can't detect if the mount is for "Intel RDT"
-		if numPostFields == 0 {
-			return "", fmt.Errorf("Found no fields post '-' in %q", text)
-		}
-
-		if postSeparatorFields[0] == "resctrl" {
-			// Check that the mount is properly formated.
-			if numPostFields < 3 {
-				return "", fmt.Errorf("Error found less than 3 fields post '-' in %q", text)
-			}
-
-			return fields[4], nil
-		}
-	}
-	if err := s.Err(); err != nil {
-		return "", err
-	}
-
-	return "", NewNotFoundError("Intel RDT")
-}
-
-// Gets the root path of Intel RDT "resource control" filesystem
-func getIntelRdtRoot() (string, error) {
-	intelRdtRootLock.Lock()
-	defer intelRdtRootLock.Unlock()
-
-	if intelRdtRoot != "" {
-		return intelRdtRoot, nil
-	}
-
-	root, err := findIntelRdtMountpointDir()
-	if err != nil {
-		return "", err
-	}
-
-	if _, err := os.Stat(root); err != nil {
-		return "", err
-	}
-
-	intelRdtRoot = root
-	return intelRdtRoot, nil
-}
-
-func isIntelRdtMounted() bool {
-	_, err := getIntelRdtRoot()
-	if err != nil {
-		return false
-	}
-
-	return true
-}
-
-func parseCpuInfoFile(path string) (bool, error) {
-	f, err := os.Open(path)
-	if err != nil {
-		return false, err
-	}
-	defer f.Close()
-
-	s := bufio.NewScanner(f)
-	for s.Scan() {
-		if err := s.Err(); err != nil {
-			return false, err
-		}
-
-		text := s.Text()
-		flags := strings.Split(text, " ")
-
-		// "cat_l3" flag is set if Intel RDT/CAT is supported
-		for _, flag := range flags {
-			if flag == "cat_l3" {
-				return true, nil
-			}
-		}
-	}
-	return false, nil
-}
-
-func parseUint(s string, base, bitSize int) (uint64, error) {
-	value, err := strconv.ParseUint(s, base, bitSize)
-	if err != nil {
-		intValue, intErr := strconv.ParseInt(s, base, bitSize)
-		// 1. Handle negative values greater than MinInt64 (and)
-		// 2. Handle negative values lesser than MinInt64
-		if intErr == nil && intValue < 0 {
-			return 0, nil
-		} else if intErr != nil && intErr.(*strconv.NumError).Err == strconv.ErrRange && intValue < 0 {
-			return 0, nil
-		}
-
-		return value, err
-	}
-
-	return value, nil
-}
-
-// Gets a single uint64 value from the specified file.
-func getIntelRdtParamUint(path, file string) (uint64, error) {
-	fileName := filepath.Join(path, file)
-	contents, err := ioutil.ReadFile(fileName)
-	if err != nil {
-		return 0, err
-	}
-
-	res, err := parseUint(strings.TrimSpace(string(contents)), 10, 64)
-	if err != nil {
-		return res, fmt.Errorf("unable to parse %q as a uint from file %q", string(contents), fileName)
-	}
-	return res, nil
-}
-
-// Gets a string value from the specified file
-func getIntelRdtParamString(path, file string) (string, error) {
-	contents, err := ioutil.ReadFile(filepath.Join(path, file))
-	if err != nil {
-		return "", err
-	}
-
-	return strings.TrimSpace(string(contents)), nil
-}
-
-func readTasksFile(dir string) ([]int, error) {
-	f, err := os.Open(filepath.Join(dir, IntelRdtTasks))
-	if err != nil {
-		return nil, err
-	}
-	defer f.Close()
-
-	var (
-		s   = bufio.NewScanner(f)
-		out = []int{}
-	)
-
-	for s.Scan() {
-		if t := s.Text(); t != "" {
-			pid, err := strconv.Atoi(t)
-			if err != nil {
-				return nil, err
-			}
-			out = append(out, pid)
-		}
-	}
-	return out, nil
-}
-
-func writeFile(dir, file, data string) error {
-	if dir == "" {
-		return fmt.Errorf("no such directory for %s", file)
-	}
-	if err := ioutil.WriteFile(filepath.Join(dir, file), []byte(data+"\n"), 0700); err != nil {
-		return fmt.Errorf("failed to write %v to %v: %v", data, file, err)
-	}
-	return nil
-}
-
-func getIntelRdtData(c *configs.Config, pid int) (*intelRdtData, error) {
-	rootPath, err := getIntelRdtRoot()
-	if err != nil {
-		return nil, err
-	}
-	return &intelRdtData{
-		root:   rootPath,
-		config: c,
-		pid:    pid,
-	}, nil
-}
-
-// Get the read-only L3 cache information
-func getL3CacheInfo() (*L3CacheInfo, error) {
-	l3CacheInfo := &L3CacheInfo{}
-
-	rootPath, err := getIntelRdtRoot()
-	if err != nil {
-		return l3CacheInfo, err
-	}
-
-	path := filepath.Join(rootPath, "info", "L3")
-	cbmMask, err := getIntelRdtParamString(path, "cbm_mask")
-	if err != nil {
-		return l3CacheInfo, err
-	}
-	minCbmBits, err := getIntelRdtParamUint(path, "min_cbm_bits")
-	if err != nil {
-		return l3CacheInfo, err
-	}
-	numClosids, err := getIntelRdtParamUint(path, "num_closids")
-	if err != nil {
-		return l3CacheInfo, err
-	}
-
-	l3CacheInfo.CbmMask = cbmMask
-	l3CacheInfo.MinCbmBits = minCbmBits
-	l3CacheInfo.NumClosids = numClosids
-
-	return l3CacheInfo, nil
-}
-
-// WriteIntelRdtTasks writes the specified pid into the "tasks" file
-func WriteIntelRdtTasks(dir string, pid int) error {
-	if dir == "" {
-		return fmt.Errorf("no such directory for %s", IntelRdtTasks)
-	}
-
-	// Dont attach any pid if -1 is specified as a pid
-	if pid != -1 {
-		if err := ioutil.WriteFile(filepath.Join(dir, IntelRdtTasks), []byte(strconv.Itoa(pid)), 0700); err != nil {
-			return fmt.Errorf("failed to write %v to %v: %v", pid, IntelRdtTasks, err)
-		}
-	}
-	return nil
-}
-
-// Check if Intel RDT is enabled
-func IsEnabled() bool {
-	return isEnabled
-}
-
-// Get the 'container_id' path in Intel RDT "resource control" filesystem
-func GetIntelRdtPath(id string) (string, error) {
-	rootPath, err := getIntelRdtRoot()
-	if err != nil {
-		return "", err
-	}
-
-	path := filepath.Join(rootPath, id)
-	return path, nil
-}
-
-// Applies Intel RDT configuration to the process with the specified pid
-func (m *IntelRdtManager) Apply(pid int) (err error) {
-	// If intelRdt is not specified in config, we do nothing
-	if m.Config.IntelRdt == nil {
-		return nil
-	}
-	d, err := getIntelRdtData(m.Config, pid)
-	if err != nil && !IsNotFound(err) {
-		return err
-	}
-
-	m.mu.Lock()
-	defer m.mu.Unlock()
-	path, err := d.join(m.Id)
-	if err != nil {
-		return err
-	}
-
-	m.Path = path
-	return nil
-}
-
-// Destroys the Intel RDT 'container_id' group
-func (m *IntelRdtManager) Destroy() error {
-	m.mu.Lock()
-	defer m.mu.Unlock()
-	if err := os.RemoveAll(m.Path); err != nil {
-		return err
-	}
-	m.Path = ""
-	return nil
-}
-
-// Returns Intel RDT path to save in a state file and to be able to
-// restore the object later
-func (m *IntelRdtManager) GetPath() string {
-	if m.Path == "" {
-		m.Path, _ = GetIntelRdtPath(m.Id)
-	}
-	return m.Path
-}
-
-// Returns statistics for Intel RDT
-func (m *IntelRdtManager) GetStats() (*Stats, error) {
-	// If intelRdt is not specified in config
-	if m.Config.IntelRdt == nil {
-		return nil, nil
-	}
-
-	m.mu.Lock()
-	defer m.mu.Unlock()
-	stats := NewStats()
-
-	// The read-only L3 cache information
-	l3CacheInfo, err := getL3CacheInfo()
-	if err != nil {
-		return nil, err
-	}
-	stats.L3CacheInfo = l3CacheInfo
-
-	// The read-only L3 cache schema in root
-	rootPath, err := getIntelRdtRoot()
-	if err != nil {
-		return nil, err
-	}
-	tmpRootStrings, err := getIntelRdtParamString(rootPath, "schemata")
-	if err != nil {
-		return nil, err
-	}
-	// L3 cache schema is in the first line
-	schemaRootStrings := strings.Split(tmpRootStrings, "\n")
-	stats.L3CacheSchemaRoot = schemaRootStrings[0]
-
-	// The L3 cache schema in 'container_id' group
-	tmpStrings, err := getIntelRdtParamString(m.GetPath(), "schemata")
-	if err != nil {
-		return nil, err
-	}
-	// L3 cache schema is in the first line
-	schemaStrings := strings.Split(tmpStrings, "\n")
-	stats.L3CacheSchema = schemaStrings[0]
-
-	return stats, nil
-}
-
-// Set Intel RDT "resource control" filesystem as configured.
-func (m *IntelRdtManager) Set(container *configs.Config) error {
-	path := m.GetPath()
-
-	// About L3 cache schema file:
-	// The schema has allocation masks/values for L3 cache on each socket,
-	// which contains L3 cache id and capacity bitmask (CBM).
-	//     Format: "L3:<cache_id0>=<cbm0>;<cache_id1>=<cbm1>;..."
-	// For example, on a two-socket machine, L3's schema line could be:
-	//     L3:0=ff;1=c0
-	// Which means L3 cache id 0's CBM is 0xff, and L3 cache id 1's CBM is 0xc0.
-	//
-	// About L3 cache CBM validity:
-	// The valid L3 cache CBM is a *contiguous bits set* and number of
-	// bits that can be set is less than the max bit. The max bits in the
-	// CBM is varied among supported Intel Xeon platforms. In Intel RDT
-	// "resource control" filesystem layout, the CBM in a group should
-	// be a subset of the CBM in root. Kernel will check if it is valid
-	// when writing.
-	// e.g., 0xfffff in root indicates the max bits of CBM is 20 bits,
-	// which mapping to entire L3 cache capacity. Some valid CBM values
-	// to set in a group: 0xf, 0xf0, 0x3ff, 0x1f00 and etc.
-	if container.IntelRdt != nil {
-		l3CacheSchema := container.IntelRdt.L3CacheSchema
-		if l3CacheSchema != "" {
-			if err := writeFile(path, "schemata", l3CacheSchema); err != nil {
-				return err
-			}
-		}
-	}
-
-	return nil
-}
-
-func (raw *intelRdtData) join(id string) (string, error) {
-	path := filepath.Join(raw.root, id)
-	if err := os.MkdirAll(path, 0755); err != nil {
-		return "", err
-	}
-
-	if err := WriteIntelRdtTasks(path, raw.pid); err != nil {
-		return "", err
-	}
-	return path, nil
-}
-
-type NotFoundError struct {
-	ResourceControl string
-}
-
-func (e *NotFoundError) Error() string {
-	return fmt.Sprintf("mountpoint for %s not found", e.ResourceControl)
-}
-
-func NewNotFoundError(res string) error {
-	return &NotFoundError{
-		ResourceControl: res,
-	}
-}
-
-func IsNotFound(err error) bool {
-	if err == nil {
-		return false
-	}
-	_, ok := err.(*NotFoundError)
-	return ok
-}
--- a/vendor/github.com/opencontainers/runc/libcontainer/intelrdt/stats.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/intelrdt/stats.go
@ -1,24 +0,0 @@
-// +build linux
-
-package intelrdt
-
-type L3CacheInfo struct {
-	CbmMask    string `json:"cbm_mask,omitempty"`
-	MinCbmBits uint64 `json:"min_cbm_bits,omitempty"`
-	NumClosids uint64 `json:"num_closids,omitempty"`
-}
-
-type Stats struct {
-	// The read-only L3 cache information
-	L3CacheInfo *L3CacheInfo `json:"l3_cache_info,omitempty"`
-
-	// The read-only L3 cache schema in root
-	L3CacheSchemaRoot string `json:"l3_cache_schema_root,omitempty"`
-
-	// The L3 cache schema in 'container_id' group
-	L3CacheSchema string `json:"l3_cache_schema,omitempty"`
-}
-
-func NewStats() *Stats {
-	return &Stats{}
-}
--- a/vendor/github.com/opencontainers/selinux/LICENSE
+++ b/vendor/github.com/opencontainers/selinux/LICENSE
@ -1,201 +0,0 @@
-                                 Apache License
-                           Version 2.0, January 2004
-                        http://www.apache.org/licenses/
-
-   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
-
-   1. Definitions.
-
-      "License" shall mean the terms and conditions for use, reproduction,
-      and distribution as defined by Sections 1 through 9 of this document.
-
-      "Licensor" shall mean the copyright owner or entity authorized by
-      the copyright owner that is granting the License.
-
-      "Legal Entity" shall mean the union of the acting entity and all
-      other entities that control, are controlled by, or are under common
-      control with that entity. For the purposes of this definition,
-      "control" means (i) the power, direct or indirect, to cause the
-      direction or management of such entity, whether by contract or
-      otherwise, or (ii) ownership of fifty percent (50%) or more of the
-      outstanding shares, or (iii) beneficial ownership of such entity.
-
-      "You" (or "Your") shall mean an individual or Legal Entity
-      exercising permissions granted by this License.
-
-      "Source" form shall mean the preferred form for making modifications,
-      including but not limited to software source code, documentation
-      source, and configuration files.
-
-      "Object" form shall mean any form resulting from mechanical
-      transformation or translation of a Source form, including but
-      not limited to compiled object code, generated documentation,
-      and conversions to other media types.
-
-      "Work" shall mean the work of authorship, whether in Source or
-      Object form, made available under the License, as indicated by a
-      copyright notice that is included in or attached to the work
-      (an example is provided in the Appendix below).
-
-      "Derivative Works" shall mean any work, whether in Source or Object
-      form, that is based on (or derived from) the Work and for which the
-      editorial revisions, annotations, elaborations, or other modifications
-      represent, as a whole, an original work of authorship. For the purposes
-      of this License, Derivative Works shall not include works that remain
-      separable from, or merely link (or bind by name) to the interfaces of,
-      the Work and Derivative Works thereof.
-
-      "Contribution" shall mean any work of authorship, including
-      the original version of the Work and any modifications or additions
-      to that Work or Derivative Works thereof, that is intentionally
-      submitted to Licensor for inclusion in the Work by the copyright owner
-      or by an individual or Legal Entity authorized to submit on behalf of
-      the copyright owner. For the purposes of this definition, "submitted"
-      means any form of electronic, verbal, or written communication sent
-      to the Licensor or its representatives, including but not limited to
-      communication on electronic mailing lists, source code control systems,
-      and issue tracking systems that are managed by, or on behalf of, the
-      Licensor for the purpose of discussing and improving the Work, but
-      excluding communication that is conspicuously marked or otherwise
-      designated in writing by the copyright owner as "Not a Contribution."
-
-      "Contributor" shall mean Licensor and any individual or Legal Entity
-      on behalf of whom a Contribution has been received by Licensor and
-      subsequently incorporated within the Work.
-
-   2. Grant of Copyright License. Subject to the terms and conditions of
-      this License, each Contributor hereby grants to You a perpetual,
-      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-      copyright license to reproduce, prepare Derivative Works of,
-      publicly display, publicly perform, sublicense, and distribute the
-      Work and such Derivative Works in Source or Object form.
-
-   3. Grant of Patent License. Subject to the terms and conditions of
-      this License, each Contributor hereby grants to You a perpetual,
-      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-      (except as stated in this section) patent license to make, have made,
-      use, offer to sell, sell, import, and otherwise transfer the Work,
-      where such license applies only to those patent claims licensable
-      by such Contributor that are necessarily infringed by their
-      Contribution(s) alone or by combination of their Contribution(s)
-      with the Work to which such Contribution(s) was submitted. If You
-      institute patent litigation against any entity (including a
-      cross-claim or counterclaim in a lawsuit) alleging that the Work
-      or a Contribution incorporated within the Work constitutes direct
-      or contributory patent infringement, then any patent licenses
-      granted to You under this License for that Work shall terminate
-      as of the date such litigation is filed.
-
-   4. Redistribution. You may reproduce and distribute copies of the
-      Work or Derivative Works thereof in any medium, with or without
-      modifications, and in Source or Object form, provided that You
-      meet the following conditions:
-
-      (a) You must give any other recipients of the Work or
-          Derivative Works a copy of this License; and
-
-      (b) You must cause any modified files to carry prominent notices
-          stating that You changed the files; and
-
-      (c) You must retain, in the Source form of any Derivative Works
-          that You distribute, all copyright, patent, trademark, and
-          attribution notices from the Source form of the Work,
-          excluding those notices that do not pertain to any part of
-          the Derivative Works; and
-
-      (d) If the Work includes a "NOTICE" text file as part of its
-          distribution, then any Derivative Works that You distribute must
-          include a readable copy of the attribution notices contained
-          within such NOTICE file, excluding those notices that do not
-          pertain to any part of the Derivative Works, in at least one
-          of the following places: within a NOTICE text file distributed
-          as part of the Derivative Works; within the Source form or
-          documentation, if provided along with the Derivative Works; or,
-          within a display generated by the Derivative Works, if and
-          wherever such third-party notices normally appear. The contents
-          of the NOTICE file are for informational purposes only and
-          do not modify the License. You may add Your own attribution
-          notices within Derivative Works that You distribute, alongside
-          or as an addendum to the NOTICE text from the Work, provided
-          that such additional attribution notices cannot be construed
-          as modifying the License.
-
-      You may add Your own copyright statement to Your modifications and
-      may provide additional or different license terms and conditions
-      for use, reproduction, or distribution of Your modifications, or
-      for any such Derivative Works as a whole, provided Your use,
-      reproduction, and distribution of the Work otherwise complies with
-      the conditions stated in this License.
-
-   5. Submission of Contributions. Unless You explicitly state otherwise,
-      any Contribution intentionally submitted for inclusion in the Work
-      by You to the Licensor shall be under the terms and conditions of
-      this License, without any additional terms or conditions.
-      Notwithstanding the above, nothing herein shall supersede or modify
-      the terms of any separate license agreement you may have executed
-      with Licensor regarding such Contributions.
-
-   6. Trademarks. This License does not grant permission to use the trade
-      names, trademarks, service marks, or product names of the Licensor,
-      except as required for reasonable and customary use in describing the
-      origin of the Work and reproducing the content of the NOTICE file.
-
-   7. Disclaimer of Warranty. Unless required by applicable law or
-      agreed to in writing, Licensor provides the Work (and each
-      Contributor provides its Contributions) on an "AS IS" BASIS,
-      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-      implied, including, without limitation, any warranties or conditions
-      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
-      PARTICULAR PURPOSE. You are solely responsible for determining the
-      appropriateness of using or redistributing the Work and assume any
-      risks associated with Your exercise of permissions under this License.
-
-   8. Limitation of Liability. In no event and under no legal theory,
-      whether in tort (including negligence), contract, or otherwise,
-      unless required by applicable law (such as deliberate and grossly
-      negligent acts) or agreed to in writing, shall any Contributor be
-      liable to You for damages, including any direct, indirect, special,
-      incidental, or consequential damages of any character arising as a
-      result of this License or out of the use or inability to use the
-      Work (including but not limited to damages for loss of goodwill,
-      work stoppage, computer failure or malfunction, or any and all
-      other commercial damages or losses), even if such Contributor
-      has been advised of the possibility of such damages.
-
-   9. Accepting Warranty or Additional Liability. While redistributing
-      the Work or Derivative Works thereof, You may choose to offer,
-      and charge a fee for, acceptance of support, warranty, indemnity,
-      or other liability obligations and/or rights consistent with this
-      License. However, in accepting such obligations, You may act only
-      on Your own behalf and on Your sole responsibility, not on behalf
-      of any other Contributor, and only if You agree to indemnify,
-      defend, and hold each Contributor harmless for any liability
-      incurred by, or claims asserted against, such Contributor by reason
-      of your accepting any such warranty or additional liability.
-
-   END OF TERMS AND CONDITIONS
-
-   APPENDIX: How to apply the Apache License to your work.
-
-      To apply the Apache License to your work, attach the following
-      boilerplate notice, with the fields enclosed by brackets "{}"
-      replaced with your own identifying information. (Don't include
-      the brackets!)  The text should be enclosed in the appropriate
-      comment syntax for the file format. We also recommend that a
-      file or class name and description of purpose be included on the
-      same "printed page" as the copyright notice for easier
-      identification within third-party archives.
-
-   Copyright {yyyy} {name of copyright owner}
-
-   Licensed under the Apache License, Version 2.0 (the "License");
-   you may not use this file except in compliance with the License.
-   You may obtain a copy of the License at
-
-       http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
--- a/vendor/github.com/opencontainers/selinux/README.md
+++ b/vendor/github.com/opencontainers/selinux/README.md
@ -1,7 +0,0 @@
-# selinux
-
-[![GoDoc](https://godoc.org/github.com/opencontainers/selinux?status.svg)](https://godoc.org/github.com/opencontainers/selinux) [![Go Report Card](https://goreportcard.com/badge/github.com/opencontainers/selinux)](https://goreportcard.com/report/github.com/opencontainers/selinux) [![Build Status](https://travis-ci.org/opencontainers/selinux.svg?branch=master)](https://travis-ci.org/opencontainers/selinux)
-
-Common SELinux package used across the container ecosystem.
-
-Please see the [godoc](https://godoc.org/github.com/opencontainers/selinux) for more information.
--- a/vendor/github.com/opencontainers/selinux/go-selinux/selinux.go
+++ b/vendor/github.com/opencontainers/selinux/go-selinux/selinux.go
@ -1,688 +0,0 @@
-// +build linux
-
-package selinux
-
-import (
-	"bufio"
-	"bytes"
-	"crypto/rand"
-	"encoding/binary"
-	"fmt"
-	"io"
-	"io/ioutil"
-	"os"
-	"path/filepath"
-	"regexp"
-	"strconv"
-	"strings"
-	"sync"
-	"syscall"
-)
-
-const (
-	// Enforcing constant indicate SELinux is in enforcing mode
-	Enforcing = 1
-	// Permissive constant to indicate SELinux is in permissive mode
-	Permissive = 0
-	// Disabled constant to indicate SELinux is disabled
-	Disabled         = -1
-	selinuxDir       = "/etc/selinux/"
-	selinuxConfig    = selinuxDir + "config"
-	selinuxfsMount   = "/sys/fs/selinux"
-	selinuxTypeTag   = "SELINUXTYPE"
-	selinuxTag       = "SELINUX"
-	xattrNameSelinux = "security.selinux"
-	stRdOnly         = 0x01
-	selinuxfsMagic   = 0xf97cff8c
-)
-
-type selinuxState struct {
-	enabledSet   bool
-	enabled      bool
-	selinuxfsSet bool
-	selinuxfs    string
-	mcsList      map[string]bool
-	sync.Mutex
-}
-
-var (
-	assignRegex = regexp.MustCompile(`^([^=]+)=(.*)$`)
-	state       = selinuxState{
-		mcsList: make(map[string]bool),
-	}
-)
-
-// Context is a representation of the SELinux label broken into 4 parts
-type Context map[string]string
-
-func (s *selinuxState) setEnable(enabled bool) bool {
-	s.Lock()
-	defer s.Unlock()
-	s.enabledSet = true
-	s.enabled = enabled
-	return s.enabled
-}
-
-func (s *selinuxState) getEnabled() bool {
-	s.Lock()
-	enabled := s.enabled
-	enabledSet := s.enabledSet
-	s.Unlock()
-	if enabledSet {
-		return enabled
-	}
-
-	enabled = false
-	if fs := getSelinuxMountPoint(); fs != "" {
-		if con, _ := CurrentLabel(); con != "kernel" {
-			enabled = true
-		}
-	}
-	return s.setEnable(enabled)
-}
-
-// SetDisabled disables selinux support for the package
-func SetDisabled() {
-	state.setEnable(false)
-}
-
-func (s *selinuxState) setSELinuxfs(selinuxfs string) string {
-	s.Lock()
-	defer s.Unlock()
-	s.selinuxfsSet = true
-	s.selinuxfs = selinuxfs
-	return s.selinuxfs
-}
-
-func verifySELinuxfsMount(mnt string) bool {
-	var buf syscall.Statfs_t
-	for {
-		err := syscall.Statfs(mnt, &buf)
-		if err == nil {
-			break
-		}
-		if err == syscall.EAGAIN {
-			continue
-		}
-		return false
-	}
-	if uint32(buf.Type) != uint32(selinuxfsMagic) {
-		return false
-	}
-	if (buf.Flags & stRdOnly) != 0 {
-		return false
-	}
-
-	return true
-}
-
-func findSELinuxfs() string {
-	// fast path: check the default mount first
-	if verifySELinuxfsMount(selinuxfsMount) {
-		return selinuxfsMount
-	}
-
-	// check if selinuxfs is available before going the slow path
-	fs, err := ioutil.ReadFile("/proc/filesystems")
-	if err != nil {
-		return ""
-	}
-	if !bytes.Contains(fs, []byte("\tselinuxfs\n")) {
-		return ""
-	}
-
-	// slow path: try to find among the mounts
-	f, err := os.Open("/proc/self/mountinfo")
-	if err != nil {
-		return ""
-	}
-	defer f.Close()
-
-	scanner := bufio.NewScanner(f)
-	for {
-		mnt := findSELinuxfsMount(scanner)
-		if mnt == "" { // error or not found
-			return ""
-		}
-		if verifySELinuxfsMount(mnt) {
-			return mnt
-		}
-	}
-}
-
-// findSELinuxfsMount returns a next selinuxfs mount point found,
-// if there is one, or an empty string in case of EOF or error.
-func findSELinuxfsMount(s *bufio.Scanner) string {
-	for s.Scan() {
-		txt := s.Text()
-		// The first field after - is fs type.
-		// Safe as spaces in mountpoints are encoded as \040
-		if !strings.Contains(txt, " - selinuxfs ") {
-			continue
-		}
-		const mPos = 5 // mount point is 5th field
-		fields := strings.SplitN(txt, " ", mPos+1)
-		if len(fields) < mPos+1 {
-			continue
-		}
-		return fields[mPos-1]
-	}
-
-	return ""
-}
-
-func (s *selinuxState) getSELinuxfs() string {
-	s.Lock()
-	selinuxfs := s.selinuxfs
-	selinuxfsSet := s.selinuxfsSet
-	s.Unlock()
-	if selinuxfsSet {
-		return selinuxfs
-	}
-
-	return s.setSELinuxfs(findSELinuxfs())
-}
-
-// getSelinuxMountPoint returns the path to the mountpoint of an selinuxfs
-// filesystem or an empty string if no mountpoint is found.  Selinuxfs is
-// a proc-like pseudo-filesystem that exposes the selinux policy API to
-// processes.  The existence of an selinuxfs mount is used to determine
-// whether selinux is currently enabled or not.
-func getSelinuxMountPoint() string {
-	return state.getSELinuxfs()
-}
-
-// GetEnabled returns whether selinux is currently enabled.
-func GetEnabled() bool {
-	return state.getEnabled()
-}
-
-func readConfig(target string) (value string) {
-	var (
-		val, key string
-		bufin    *bufio.Reader
-	)
-
-	in, err := os.Open(selinuxConfig)
-	if err != nil {
-		return ""
-	}
-	defer in.Close()
-
-	bufin = bufio.NewReader(in)
-
-	for done := false; !done; {
-		var line string
-		if line, err = bufin.ReadString('\n'); err != nil {
-			if err != io.EOF {
-				return ""
-			}
-			done = true
-		}
-		line = strings.TrimSpace(line)
-		if len(line) == 0 {
-			// Skip blank lines
-			continue
-		}
-		if line[0] == ';' || line[0] == '#' {
-			// Skip comments
-			continue
-		}
-		if groups := assignRegex.FindStringSubmatch(line); groups != nil {
-			key, val = strings.TrimSpace(groups[1]), strings.TrimSpace(groups[2])
-			if key == target {
-				return strings.Trim(val, "\"")
-			}
-		}
-	}
-	return ""
-}
-
-func getSELinuxPolicyRoot() string {
-	return selinuxDir + readConfig(selinuxTypeTag)
-}
-
-func readCon(name string) (string, error) {
-	var val string
-
-	in, err := os.Open(name)
-	if err != nil {
-		return "", err
-	}
-	defer in.Close()
-
-	_, err = fmt.Fscanf(in, "%s", &val)
-	return strings.Trim(val, "\x00"), err
-}
-
-// SetFileLabel sets the SELinux label for this path or returns an error.
-func SetFileLabel(path string, label string) error {
-	return lsetxattr(path, xattrNameSelinux, []byte(label), 0)
-}
-
-// FileLabel returns the SELinux label for this path or returns an error.
-func FileLabel(path string) (string, error) {
-	label, err := lgetxattr(path, xattrNameSelinux)
-	if err != nil {
-		return "", err
-	}
-	// Trim the NUL byte at the end of the byte buffer, if present.
-	if len(label) > 0 && label[len(label)-1] == '\x00' {
-		label = label[:len(label)-1]
-	}
-	return string(label), nil
-}
-
-/*
-SetFSCreateLabel tells kernel the label to create all file system objects
-created by this task. Setting label="" to return to default.
-*/
-func SetFSCreateLabel(label string) error {
-	return writeCon(fmt.Sprintf("/proc/self/task/%d/attr/fscreate", syscall.Gettid()), label)
-}
-
-/*
-FSCreateLabel returns the default label the kernel which the kernel is using
-for file system objects created by this task. "" indicates default.
-*/
-func FSCreateLabel() (string, error) {
-	return readCon(fmt.Sprintf("/proc/self/task/%d/attr/fscreate", syscall.Gettid()))
-}
-
-// CurrentLabel returns the SELinux label of the current process thread, or an error.
-func CurrentLabel() (string, error) {
-	return readCon(fmt.Sprintf("/proc/self/task/%d/attr/current", syscall.Gettid()))
-}
-
-// PidLabel returns the SELinux label of the given pid, or an error.
-func PidLabel(pid int) (string, error) {
-	return readCon(fmt.Sprintf("/proc/%d/attr/current", pid))
-}
-
-/*
-ExecLabel returns the SELinux label that the kernel will use for any programs
-that are executed by the current process thread, or an error.
-*/
-func ExecLabel() (string, error) {
-	return readCon(fmt.Sprintf("/proc/self/task/%d/attr/exec", syscall.Gettid()))
-}
-
-func writeCon(name string, val string) error {
-	out, err := os.OpenFile(name, os.O_WRONLY, 0)
-	if err != nil {
-		return err
-	}
-	defer out.Close()
-
-	if val != "" {
-		_, err = out.Write([]byte(val))
-	} else {
-		_, err = out.Write(nil)
-	}
-	return err
-}
-
-/*
-CanonicalizeContext takes a context string and writes it to the kernel
-the function then returns the context that the kernel will use.  This function
-can be used to see if two contexts are equivalent
-*/
-func CanonicalizeContext(val string) (string, error) {
-	return readWriteCon(filepath.Join(getSelinuxMountPoint(), "context"), val)
-}
-
-func readWriteCon(name string, val string) (string, error) {
-	var retval string
-	f, err := os.OpenFile(name, os.O_RDWR, 0)
-	if err != nil {
-		return "", err
-	}
-	defer f.Close()
-
-	_, err = f.Write([]byte(val))
-	if err != nil {
-		return "", err
-	}
-
-	_, err = fmt.Fscanf(f, "%s", &retval)
-	return strings.Trim(retval, "\x00"), err
-}
-
-/*
-SetExecLabel sets the SELinux label that the kernel will use for any programs
-that are executed by the current process thread, or an error.
-*/
-func SetExecLabel(label string) error {
-	return writeCon(fmt.Sprintf("/proc/self/task/%d/attr/exec", syscall.Gettid()), label)
-}
-
-// Get returns the Context as a string
-func (c Context) Get() string {
-	if c["level"] != "" {
-		return fmt.Sprintf("%s:%s:%s:%s", c["user"], c["role"], c["type"], c["level"])
-	}
-	return fmt.Sprintf("%s:%s:%s", c["user"], c["role"], c["type"])
-}
-
-// NewContext creates a new Context struct from the specified label
-func NewContext(label string) Context {
-	c := make(Context)
-
-	if len(label) != 0 {
-		con := strings.SplitN(label, ":", 4)
-		c["user"] = con[0]
-		c["role"] = con[1]
-		c["type"] = con[2]
-		if len(con) > 3 {
-			c["level"] = con[3]
-		}
-	}
-	return c
-}
-
-// ReserveLabel reserves the MLS/MCS level component of the specified label
-func ReserveLabel(label string) {
-	if len(label) != 0 {
-		con := strings.SplitN(label, ":", 4)
-		if len(con) > 3 {
-			mcsAdd(con[3])
-		}
-	}
-}
-
-func selinuxEnforcePath() string {
-	return fmt.Sprintf("%s/enforce", getSelinuxMountPoint())
-}
-
-// EnforceMode returns the current SELinux mode Enforcing, Permissive, Disabled
-func EnforceMode() int {
-	var enforce int
-
-	enforceS, err := readCon(selinuxEnforcePath())
-	if err != nil {
-		return -1
-	}
-
-	enforce, err = strconv.Atoi(string(enforceS))
-	if err != nil {
-		return -1
-	}
-	return enforce
-}
-
-/*
-SetEnforceMode sets the current SELinux mode Enforcing, Permissive.
-Disabled is not valid, since this needs to be set at boot time.
-*/
-func SetEnforceMode(mode int) error {
-	return writeCon(selinuxEnforcePath(), fmt.Sprintf("%d", mode))
-}
-
-/*
-DefaultEnforceMode returns the systems default SELinux mode Enforcing,
-Permissive or Disabled. Note this is is just the default at boot time.
-EnforceMode tells you the systems current mode.
-*/
-func DefaultEnforceMode() int {
-	switch readConfig(selinuxTag) {
-	case "enforcing":
-		return Enforcing
-	case "permissive":
-		return Permissive
-	}
-	return Disabled
-}
-
-func mcsAdd(mcs string) error {
-	if mcs == "" {
-		return nil
-	}
-	state.Lock()
-	defer state.Unlock()
-	if state.mcsList[mcs] {
-		return fmt.Errorf("MCS Label already exists")
-	}
-	state.mcsList[mcs] = true
-	return nil
-}
-
-func mcsDelete(mcs string) {
-	if mcs == "" {
-		return
-	}
-	state.Lock()
-	defer state.Unlock()
-	state.mcsList[mcs] = false
-}
-
-func intToMcs(id int, catRange uint32) string {
-	var (
-		SETSIZE = int(catRange)
-		TIER    = SETSIZE
-		ORD     = id
-	)
-
-	if id < 1 || id > 523776 {
-		return ""
-	}
-
-	for ORD > TIER {
-		ORD = ORD - TIER
-		TIER--
-	}
-	TIER = SETSIZE - TIER
-	ORD = ORD + TIER
-	return fmt.Sprintf("s0:c%d,c%d", TIER, ORD)
-}
-
-func uniqMcs(catRange uint32) string {
-	var (
-		n      uint32
-		c1, c2 uint32
-		mcs    string
-	)
-
-	for {
-		binary.Read(rand.Reader, binary.LittleEndian, &n)
-		c1 = n % catRange
-		binary.Read(rand.Reader, binary.LittleEndian, &n)
-		c2 = n % catRange
-		if c1 == c2 {
-			continue
-		} else {
-			if c1 > c2 {
-				c1, c2 = c2, c1
-			}
-		}
-		mcs = fmt.Sprintf("s0:c%d,c%d", c1, c2)
-		if err := mcsAdd(mcs); err != nil {
-			continue
-		}
-		break
-	}
-	return mcs
-}
-
-/*
-ReleaseLabel will unreserve the MLS/MCS Level field of the specified label.
-Allowing it to be used by another process.
-*/
-func ReleaseLabel(label string) {
-	if len(label) != 0 {
-		con := strings.SplitN(label, ":", 4)
-		if len(con) > 3 {
-			mcsDelete(con[3])
-		}
-	}
-}
-
-var roFileLabel string
-
-// ROFileLabel returns the specified SELinux readonly file label
-func ROFileLabel() (fileLabel string) {
-	return roFileLabel
-}
-
-/*
-ContainerLabels returns an allocated processLabel and fileLabel to be used for
-container labeling by the calling process.
-*/
-func ContainerLabels() (processLabel string, fileLabel string) {
-	var (
-		val, key string
-		bufin    *bufio.Reader
-	)
-
-	if !GetEnabled() {
-		return "", ""
-	}
-	lxcPath := fmt.Sprintf("%s/contexts/lxc_contexts", getSELinuxPolicyRoot())
-	in, err := os.Open(lxcPath)
-	if err != nil {
-		return "", ""
-	}
-	defer in.Close()
-
-	bufin = bufio.NewReader(in)
-
-	for done := false; !done; {
-		var line string
-		if line, err = bufin.ReadString('\n'); err != nil {
-			if err == io.EOF {
-				done = true
-			} else {
-				goto exit
-			}
-		}
-		line = strings.TrimSpace(line)
-		if len(line) == 0 {
-			// Skip blank lines
-			continue
-		}
-		if line[0] == ';' || line[0] == '#' {
-			// Skip comments
-			continue
-		}
-		if groups := assignRegex.FindStringSubmatch(line); groups != nil {
-			key, val = strings.TrimSpace(groups[1]), strings.TrimSpace(groups[2])
-			if key == "process" {
-				processLabel = strings.Trim(val, "\"")
-			}
-			if key == "file" {
-				fileLabel = strings.Trim(val, "\"")
-			}
-			if key == "ro_file" {
-				roFileLabel = strings.Trim(val, "\"")
-			}
-		}
-	}
-
-	if processLabel == "" || fileLabel == "" {
-		return "", ""
-	}
-
-	if roFileLabel == "" {
-		roFileLabel = fileLabel
-	}
-exit:
-	scon := NewContext(processLabel)
-	if scon["level"] != "" {
-		mcs := uniqMcs(1024)
-		scon["level"] = mcs
-		processLabel = scon.Get()
-		scon = NewContext(fileLabel)
-		scon["level"] = mcs
-		fileLabel = scon.Get()
-	}
-	return processLabel, fileLabel
-}
-
-// SecurityCheckContext validates that the SELinux label is understood by the kernel
-func SecurityCheckContext(val string) error {
-	return writeCon(fmt.Sprintf("%s/context", getSelinuxMountPoint()), val)
-}
-
-/*
-CopyLevel returns a label with the MLS/MCS level from src label replaces on
-the dest label.
-*/
-func CopyLevel(src, dest string) (string, error) {
-	if src == "" {
-		return "", nil
-	}
-	if err := SecurityCheckContext(src); err != nil {
-		return "", err
-	}
-	if err := SecurityCheckContext(dest); err != nil {
-		return "", err
-	}
-	scon := NewContext(src)
-	tcon := NewContext(dest)
-	mcsDelete(tcon["level"])
-	mcsAdd(scon["level"])
-	tcon["level"] = scon["level"]
-	return tcon.Get(), nil
-}
-
-// Prevent users from relabing system files
-func badPrefix(fpath string) error {
-	var badprefixes = []string{"/usr"}
-
-	for _, prefix := range badprefixes {
-		if fpath == prefix || strings.HasPrefix(fpath, fmt.Sprintf("%s/", prefix)) {
-			return fmt.Errorf("relabeling content in %s is not allowed", prefix)
-		}
-	}
-	return nil
-}
-
-// Chcon changes the fpath file object to the SELinux label label.
-// If the fpath is a directory and recurse is true Chcon will walk the
-// directory tree setting the label
-func Chcon(fpath string, label string, recurse bool) error {
-	if label == "" {
-		return nil
-	}
-	if err := badPrefix(fpath); err != nil {
-		return err
-	}
-	callback := func(p string, info os.FileInfo, err error) error {
-		return SetFileLabel(p, label)
-	}
-
-	if recurse {
-		return filepath.Walk(fpath, callback)
-	}
-
-	return SetFileLabel(fpath, label)
-}
-
-// DupSecOpt takes an SELinux process label and returns security options that
-// can will set the SELinux Type and Level for future container processes
-func DupSecOpt(src string) []string {
-	if src == "" {
-		return nil
-	}
-	con := NewContext(src)
-	if con["user"] == "" ||
-		con["role"] == "" ||
-		con["type"] == "" {
-		return nil
-	}
-	dup := []string{"user:" + con["user"],
-		"role:" + con["role"],
-		"type:" + con["type"],
-	}
-
-	if con["level"] != "" {
-		dup = append(dup, "level:"+con["level"])
-	}
-
-	return dup
-}
-
-// DisableSecOpt returns a security opt that can be used to disabling SELinux
-// labeling support for future container processes
-func DisableSecOpt() []string {
-	return []string{"disable"}
-}
--- a/vendor/github.com/opencontainers/selinux/go-selinux/xattrs.go
+++ b/vendor/github.com/opencontainers/selinux/go-selinux/xattrs.go
@ -1,78 +0,0 @@
-// +build linux
-
-package selinux
-
-import (
-	"syscall"
-	"unsafe"
-)
-
-var _zero uintptr
-
-// Returns a []byte slice if the xattr is set and nil otherwise
-// Requires path and its attribute as arguments
-func lgetxattr(path string, attr string) ([]byte, error) {
-	var sz int
-	pathBytes, err := syscall.BytePtrFromString(path)
-	if err != nil {
-		return nil, err
-	}
-	attrBytes, err := syscall.BytePtrFromString(attr)
-	if err != nil {
-		return nil, err
-	}
-
-	// Start with a 128 length byte array
-	sz = 128
-	dest := make([]byte, sz)
-	destBytes := unsafe.Pointer(&dest[0])
-	_sz, _, errno := syscall.Syscall6(syscall.SYS_LGETXATTR, uintptr(unsafe.Pointer(pathBytes)), uintptr(unsafe.Pointer(attrBytes)), uintptr(destBytes), uintptr(len(dest)), 0, 0)
-
-	switch {
-	case errno == syscall.ENODATA:
-		return nil, errno
-	case errno == syscall.ENOTSUP:
-		return nil, errno
-	case errno == syscall.ERANGE:
-		// 128 byte array might just not be good enough,
-		// A dummy buffer is used ``uintptr(0)`` to get real size
-		// of the xattrs on disk
-		_sz, _, errno = syscall.Syscall6(syscall.SYS_LGETXATTR, uintptr(unsafe.Pointer(pathBytes)), uintptr(unsafe.Pointer(attrBytes)), uintptr(unsafe.Pointer(nil)), uintptr(0), 0, 0)
-		sz = int(_sz)
-		if sz < 0 {
-			return nil, errno
-		}
-		dest = make([]byte, sz)
-		destBytes := unsafe.Pointer(&dest[0])
-		_sz, _, errno = syscall.Syscall6(syscall.SYS_LGETXATTR, uintptr(unsafe.Pointer(pathBytes)), uintptr(unsafe.Pointer(attrBytes)), uintptr(destBytes), uintptr(len(dest)), 0, 0)
-		if errno != 0 {
-			return nil, errno
-		}
-	case errno != 0:
-		return nil, errno
-	}
-	sz = int(_sz)
-	return dest[:sz], nil
-}
-
-func lsetxattr(path string, attr string, data []byte, flags int) error {
-	pathBytes, err := syscall.BytePtrFromString(path)
-	if err != nil {
-		return err
-	}
-	attrBytes, err := syscall.BytePtrFromString(attr)
-	if err != nil {
-		return err
-	}
-	var dataBytes unsafe.Pointer
-	if len(data) > 0 {
-		dataBytes = unsafe.Pointer(&data[0])
-	} else {
-		dataBytes = unsafe.Pointer(&_zero)
-	}
-	_, _, errno := syscall.Syscall6(syscall.SYS_LSETXATTR, uintptr(unsafe.Pointer(pathBytes)), uintptr(unsafe.Pointer(attrBytes)), uintptr(dataBytes), uintptr(len(data)), uintptr(flags), 0)
-	if errno != 0 {
-		return errno
-	}
-	return nil
-}
--- a/vendor/github.com/seccomp/libseccomp-golang/LICENSE
+++ b/vendor/github.com/seccomp/libseccomp-golang/LICENSE
@ -0,0 +1,22 @@
+Copyright (c) 2015 Matthew Heon <mheon@redhat.com>
+Copyright (c) 2015 Paul Moore <pmoore@redhat.com>
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+- Redistributions of source code must retain the above copyright notice,
+  this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright notice,
+  this list of conditions and the following disclaimer in the documentation
+  and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
--- a/vendor/github.com/seccomp/libseccomp-golang/README
+++ b/vendor/github.com/seccomp/libseccomp-golang/README
@ -0,0 +1,26 @@
+libseccomp-golang: Go Language Bindings for the libseccomp Project
+===============================================================================
+https://github.com/seccomp/libseccomp-golang
+https://github.com/seccomp/libseccomp
+
+The libseccomp library provides an easy to use, platform independent, interface
+to the Linux Kernel's syscall filtering mechanism.  The libseccomp API is
+designed to abstract away the underlying BPF based syscall filter language and
+present a more conventional function-call based filtering interface that should
+be familiar to, and easily adopted by, application developers.
+
+The libseccomp-golang library provides a Go based interface to the libseccomp
+library.
+
+* Online Resources
+
+The library source repository currently lives on GitHub at the following URLs:
+
+	-> https://github.com/seccomp/libseccomp-golang
+	-> https://github.com/seccomp/libseccomp
+
+The project mailing list is currently hosted on Google Groups at the URL below,
+please note that a Google account is not required to subscribe to the mailing
+list.
+
+	-> https://groups.google.com/d/forum/libseccomp
--- a/vendor/github.com/seccomp/libseccomp-golang/seccomp.go
+++ b/vendor/github.com/seccomp/libseccomp-golang/seccomp.go
@ -0,0 +1,857 @@
+// +build linux
+
+// Public API specification for libseccomp Go bindings
+// Contains public API for the bindings
+
+// Package seccomp provides bindings for libseccomp, a library wrapping the Linux
+// seccomp syscall. Seccomp enables an application to restrict system call use
+// for itself and its children.
+package seccomp
+
+import (
+	"fmt"
+	"os"
+	"runtime"
+	"strings"
+	"sync"
+	"syscall"
+	"unsafe"
+)
+
+// C wrapping code
+
+// #cgo pkg-config: libseccomp
+// #include <stdlib.h>
+// #include <seccomp.h>
+import "C"
+
+// Exported types
+
+// ScmpArch represents a CPU architecture. Seccomp can restrict syscalls on a
+// per-architecture basis.
+type ScmpArch uint
+
+// ScmpAction represents an action to be taken on a filter rule match in
+// libseccomp
+type ScmpAction uint
+
+// ScmpCompareOp represents a comparison operator which can be used in a filter
+// rule
+type ScmpCompareOp uint
+
+// ScmpCondition represents a rule in a libseccomp filter context
+type ScmpCondition struct {
+	Argument uint          `json:"argument,omitempty"`
+	Op       ScmpCompareOp `json:"operator,omitempty"`
+	Operand1 uint64        `json:"operand_one,omitempty"`
+	Operand2 uint64        `json:"operand_two,omitempty"`
+}
+
+// ScmpSyscall represents a Linux System Call
+type ScmpSyscall int32
+
+// Exported Constants
+
+const (
+	// Valid architectures recognized by libseccomp
+	// ARM64 and all MIPS architectures are unsupported by versions of the
+	// library before v2.2 and will return errors if used
+
+	// ArchInvalid is a placeholder to ensure uninitialized ScmpArch
+	// variables are invalid
+	ArchInvalid ScmpArch = iota
+	// ArchNative is the native architecture of the kernel
+	ArchNative ScmpArch = iota
+	// ArchX86 represents 32-bit x86 syscalls
+	ArchX86 ScmpArch = iota
+	// ArchAMD64 represents 64-bit x86-64 syscalls
+	ArchAMD64 ScmpArch = iota
+	// ArchX32 represents 64-bit x86-64 syscalls (32-bit pointers)
+	ArchX32 ScmpArch = iota
+	// ArchARM represents 32-bit ARM syscalls
+	ArchARM ScmpArch = iota
+	// ArchARM64 represents 64-bit ARM syscalls
+	ArchARM64 ScmpArch = iota
+	// ArchMIPS represents 32-bit MIPS syscalls
+	ArchMIPS ScmpArch = iota
+	// ArchMIPS64 represents 64-bit MIPS syscalls
+	ArchMIPS64 ScmpArch = iota
+	// ArchMIPS64N32 represents 64-bit MIPS syscalls (32-bit pointers)
+	ArchMIPS64N32 ScmpArch = iota
+	// ArchMIPSEL represents 32-bit MIPS syscalls (little endian)
+	ArchMIPSEL ScmpArch = iota
+	// ArchMIPSEL64 represents 64-bit MIPS syscalls (little endian)
+	ArchMIPSEL64 ScmpArch = iota
+	// ArchMIPSEL64N32 represents 64-bit MIPS syscalls (little endian,
+	// 32-bit pointers)
+	ArchMIPSEL64N32 ScmpArch = iota
+	// ArchPPC represents 32-bit POWERPC syscalls
+	ArchPPC ScmpArch = iota
+	// ArchPPC64 represents 64-bit POWER syscalls (big endian)
+	ArchPPC64 ScmpArch = iota
+	// ArchPPC64LE represents 64-bit POWER syscalls (little endian)
+	ArchPPC64LE ScmpArch = iota
+	// ArchS390 represents 31-bit System z/390 syscalls
+	ArchS390 ScmpArch = iota
+	// ArchS390X represents 64-bit System z/390 syscalls
+	ArchS390X ScmpArch = iota
+)
+
+const (
+	// Supported actions on filter match
+
+	// ActInvalid is a placeholder to ensure uninitialized ScmpAction
+	// variables are invalid
+	ActInvalid ScmpAction = iota
+	// ActKill kills the process
+	ActKill ScmpAction = iota
+	// ActTrap throws SIGSYS
+	ActTrap ScmpAction = iota
+	// ActErrno causes the syscall to return a negative error code. This
+	// code can be set with the SetReturnCode method
+	ActErrno ScmpAction = iota
+	// ActTrace causes the syscall to notify tracing processes with the
+	// given error code. This code can be set with the SetReturnCode method
+	ActTrace ScmpAction = iota
+	// ActAllow permits the syscall to continue execution
+	ActAllow ScmpAction = iota
+)
+
+const (
+	// These are comparison operators used in conditional seccomp rules
+	// They are used to compare the value of a single argument of a syscall
+	// against a user-defined constant
+
+	// CompareInvalid is a placeholder to ensure uninitialized ScmpCompareOp
+	// variables are invalid
+	CompareInvalid ScmpCompareOp = iota
+	// CompareNotEqual returns true if the argument is not equal to the
+	// given value
+	CompareNotEqual ScmpCompareOp = iota
+	// CompareLess returns true if the argument is less than the given value
+	CompareLess ScmpCompareOp = iota
+	// CompareLessOrEqual returns true if the argument is less than or equal
+	// to the given value
+	CompareLessOrEqual ScmpCompareOp = iota
+	// CompareEqual returns true if the argument is equal to the given value
+	CompareEqual ScmpCompareOp = iota
+	// CompareGreaterEqual returns true if the argument is greater than or
+	// equal to the given value
+	CompareGreaterEqual ScmpCompareOp = iota
+	// CompareGreater returns true if the argument is greater than the given
+	// value
+	CompareGreater ScmpCompareOp = iota
+	// CompareMaskedEqual returns true if the argument is equal to the given
+	// value, when masked (bitwise &) against the second given value
+	CompareMaskedEqual ScmpCompareOp = iota
+)
+
+// Helpers for types
+
+// GetArchFromString returns an ScmpArch constant from a string representing an
+// architecture
+func GetArchFromString(arch string) (ScmpArch, error) {
+	switch strings.ToLower(arch) {
+	case "x86":
+		return ArchX86, nil
+	case "amd64", "x86-64", "x86_64", "x64":
+		return ArchAMD64, nil
+	case "x32":
+		return ArchX32, nil
+	case "arm":
+		return ArchARM, nil
+	case "arm64", "aarch64":
+		return ArchARM64, nil
+	case "mips":
+		return ArchMIPS, nil
+	case "mips64":
+		return ArchMIPS64, nil
+	case "mips64n32":
+		return ArchMIPS64N32, nil
+	case "mipsel":
+		return ArchMIPSEL, nil
+	case "mipsel64":
+		return ArchMIPSEL64, nil
+	case "mipsel64n32":
+		return ArchMIPSEL64N32, nil
+	case "ppc":
+		return ArchPPC, nil
+	case "ppc64":
+		return ArchPPC64, nil
+	case "ppc64le":
+		return ArchPPC64LE, nil
+	case "s390":
+		return ArchS390, nil
+	case "s390x":
+		return ArchS390X, nil
+	default:
+		return ArchInvalid, fmt.Errorf("cannot convert unrecognized string %s", arch)
+	}
+}
+
+// String returns a string representation of an architecture constant
+func (a ScmpArch) String() string {
+	switch a {
+	case ArchX86:
+		return "x86"
+	case ArchAMD64:
+		return "amd64"
+	case ArchX32:
+		return "x32"
+	case ArchARM:
+		return "arm"
+	case ArchARM64:
+		return "arm64"
+	case ArchMIPS:
+		return "mips"
+	case ArchMIPS64:
+		return "mips64"
+	case ArchMIPS64N32:
+		return "mips64n32"
+	case ArchMIPSEL:
+		return "mipsel"
+	case ArchMIPSEL64:
+		return "mipsel64"
+	case ArchMIPSEL64N32:
+		return "mipsel64n32"
+	case ArchPPC:
+		return "ppc"
+	case ArchPPC64:
+		return "ppc64"
+	case ArchPPC64LE:
+		return "ppc64le"
+	case ArchS390:
+		return "s390"
+	case ArchS390X:
+		return "s390x"
+	case ArchNative:
+		return "native"
+	case ArchInvalid:
+		return "Invalid architecture"
+	default:
+		return "Unknown architecture"
+	}
+}
+
+// String returns a string representation of a comparison operator constant
+func (a ScmpCompareOp) String() string {
+	switch a {
+	case CompareNotEqual:
+		return "Not equal"
+	case CompareLess:
+		return "Less than"
+	case CompareLessOrEqual:
+		return "Less than or equal to"
+	case CompareEqual:
+		return "Equal"
+	case CompareGreaterEqual:
+		return "Greater than or equal to"
+	case CompareGreater:
+		return "Greater than"
+	case CompareMaskedEqual:
+		return "Masked equality"
+	case CompareInvalid:
+		return "Invalid comparison operator"
+	default:
+		return "Unrecognized comparison operator"
+	}
+}
+
+// String returns a string representation of a seccomp match action
+func (a ScmpAction) String() string {
+	switch a & 0xFFFF {
+	case ActKill:
+		return "Action: Kill Process"
+	case ActTrap:
+		return "Action: Send SIGSYS"
+	case ActErrno:
+		return fmt.Sprintf("Action: Return error code %d", (a >> 16))
+	case ActTrace:
+		return fmt.Sprintf("Action: Notify tracing processes with code %d",
+			(a >> 16))
+	case ActAllow:
+		return "Action: Allow system call"
+	default:
+		return "Unrecognized Action"
+	}
+}
+
+// SetReturnCode adds a return code to a supporting ScmpAction, clearing any
+// existing code Only valid on ActErrno and ActTrace. Takes no action otherwise.
+// Accepts 16-bit return code as argument.
+// Returns a valid ScmpAction of the original type with the new error code set.
+func (a ScmpAction) SetReturnCode(code int16) ScmpAction {
+	aTmp := a & 0x0000FFFF
+	if aTmp == ActErrno || aTmp == ActTrace {
+		return (aTmp | (ScmpAction(code)&0xFFFF)<<16)
+	}
+	return a
+}
+
+// GetReturnCode returns the return code of an ScmpAction
+func (a ScmpAction) GetReturnCode() int16 {
+	return int16(a >> 16)
+}
+
+// General utility functions
+
+// GetLibraryVersion returns the version of the library the bindings are built
+// against.
+// The version is formatted as follows: Major.Minor.Micro
+func GetLibraryVersion() (major, minor, micro int) {
+	return verMajor, verMinor, verMicro
+}
+
+// Syscall functions
+
+// GetName retrieves the name of a syscall from its number.
+// Acts on any syscall number.
+// Returns either a string containing the name of the syscall, or an error.
+func (s ScmpSyscall) GetName() (string, error) {
+	return s.GetNameByArch(ArchNative)
+}
+
+// GetNameByArch retrieves the name of a syscall from its number for a given
+// architecture.
+// Acts on any syscall number.
+// Accepts a valid architecture constant.
+// Returns either a string containing the name of the syscall, or an error.
+// if the syscall is unrecognized or an issue occurred.
+func (s ScmpSyscall) GetNameByArch(arch ScmpArch) (string, error) {
+	if err := sanitizeArch(arch); err != nil {
+		return "", err
+	}
+
+	cString := C.seccomp_syscall_resolve_num_arch(arch.toNative(), C.int(s))
+	if cString == nil {
+		return "", fmt.Errorf("could not resolve syscall name")
+	}
+	defer C.free(unsafe.Pointer(cString))
+
+	finalStr := C.GoString(cString)
+	return finalStr, nil
+}
+
+// GetSyscallFromName returns the number of a syscall by name on the kernel's
+// native architecture.
+// Accepts a string containing the name of a syscall.
+// Returns the number of the syscall, or an error if no syscall with that name
+// was found.
+func GetSyscallFromName(name string) (ScmpSyscall, error) {
+	cString := C.CString(name)
+	defer C.free(unsafe.Pointer(cString))
+
+	result := C.seccomp_syscall_resolve_name(cString)
+	if result == scmpError {
+		return 0, fmt.Errorf("could not resolve name to syscall")
+	}
+
+	return ScmpSyscall(result), nil
+}
+
+// GetSyscallFromNameByArch returns the number of a syscall by name for a given
+// architecture's ABI.
+// Accepts the name of a syscall and an architecture constant.
+// Returns the number of the syscall, or an error if an invalid architecture is
+// passed or a syscall with that name was not found.
+func GetSyscallFromNameByArch(name string, arch ScmpArch) (ScmpSyscall, error) {
+	if err := sanitizeArch(arch); err != nil {
+		return 0, err
+	}
+
+	cString := C.CString(name)
+	defer C.free(unsafe.Pointer(cString))
+
+	result := C.seccomp_syscall_resolve_name_arch(arch.toNative(), cString)
+	if result == scmpError {
+		return 0, fmt.Errorf("could not resolve name to syscall")
+	}
+
+	return ScmpSyscall(result), nil
+}
+
+// MakeCondition creates and returns a new condition to attach to a filter rule.
+// Associated rules will only match if this condition is true.
+// Accepts the number the argument we are checking, and a comparison operator
+// and value to compare to.
+// The rule will match if argument $arg (zero-indexed) of the syscall is
+// $COMPARE_OP the provided comparison value.
+// Some comparison operators accept two values. Masked equals, for example,
+// will mask $arg of the syscall with the second value provided (via bitwise
+// AND) and then compare against the first value provided.
+// For example, in the less than or equal case, if the syscall argument was
+// 0 and the value provided was 1, the condition would match, as 0 is less
+// than or equal to 1.
+// Return either an error on bad argument or a valid ScmpCondition struct.
+func MakeCondition(arg uint, comparison ScmpCompareOp, values ...uint64) (ScmpCondition, error) {
+	var condStruct ScmpCondition
+
+	if comparison == CompareInvalid {
+		return condStruct, fmt.Errorf("invalid comparison operator")
+	} else if arg > 5 {
+		return condStruct, fmt.Errorf("syscalls only have up to 6 arguments")
+	} else if len(values) > 2 {
+		return condStruct, fmt.Errorf("conditions can have at most 2 arguments")
+	} else if len(values) == 0 {
+		return condStruct, fmt.Errorf("must provide at least one value to compare against")
+	}
+
+	condStruct.Argument = arg
+	condStruct.Op = comparison
+	condStruct.Operand1 = values[0]
+	if len(values) == 2 {
+		condStruct.Operand2 = values[1]
+	} else {
+		condStruct.Operand2 = 0 // Unused
+	}
+
+	return condStruct, nil
+}
+
+// Utility Functions
+
+// GetNativeArch returns architecture token representing the native kernel
+// architecture
+func GetNativeArch() (ScmpArch, error) {
+	arch := C.seccomp_arch_native()
+
+	return archFromNative(arch)
+}
+
+// Public Filter API
+
+// ScmpFilter represents a filter context in libseccomp.
+// A filter context is initially empty. Rules can be added to it, and it can
+// then be loaded into the kernel.
+type ScmpFilter struct {
+	filterCtx C.scmp_filter_ctx
+	valid     bool
+	lock      sync.Mutex
+}
+
+// NewFilter creates and returns a new filter context.
+// Accepts a default action to be taken for syscalls which match no rules in
+// the filter.
+// Returns a reference to a valid filter context, or nil and an error if the
+// filter context could not be created or an invalid default action was given.
+func NewFilter(defaultAction ScmpAction) (*ScmpFilter, error) {
+	if err := sanitizeAction(defaultAction); err != nil {
+		return nil, err
+	}
+
+	fPtr := C.seccomp_init(defaultAction.toNative())
+	if fPtr == nil {
+		return nil, fmt.Errorf("could not create filter")
+	}
+
+	filter := new(ScmpFilter)
+	filter.filterCtx = fPtr
+	filter.valid = true
+	runtime.SetFinalizer(filter, filterFinalizer)
+
+	return filter, nil
+}
+
+// IsValid determines whether a filter context is valid to use.
+// Some operations (Release and Merge) render filter contexts invalid and
+// consequently prevent further use.
+func (f *ScmpFilter) IsValid() bool {
+	f.lock.Lock()
+	defer f.lock.Unlock()
+
+	return f.valid
+}
+
+// Reset resets a filter context, removing all its existing state.
+// Accepts a new default action to be taken for syscalls which do not match.
+// Returns an error if the filter or action provided are invalid.
+func (f *ScmpFilter) Reset(defaultAction ScmpAction) error {
+	f.lock.Lock()
+	defer f.lock.Unlock()
+
+	if err := sanitizeAction(defaultAction); err != nil {
+		return err
+	} else if !f.valid {
+		return errBadFilter
+	}
+
+	retCode := C.seccomp_reset(f.filterCtx, defaultAction.toNative())
+	if retCode != 0 {
+		return syscall.Errno(-1 * retCode)
+	}
+
+	return nil
+}
+
+// Release releases a filter context, freeing its memory. Should be called after
+// loading into the kernel, when the filter is no longer needed.
+// After calling this function, the given filter is no longer valid and cannot
+// be used.
+// Release() will be invoked automatically when a filter context is garbage
+// collected, but can also be called manually to free memory.
+func (f *ScmpFilter) Release() {
+	f.lock.Lock()
+	defer f.lock.Unlock()
+
+	if !f.valid {
+		return
+	}
+
+	f.valid = false
+	C.seccomp_release(f.filterCtx)
+}
+
+// Merge merges two filter contexts.
+// The source filter src will be released as part of the process, and will no
+// longer be usable or valid after this call.
+// To be merged, filters must NOT share any architectures, and all their
+// attributes (Default Action, Bad Arch Action, No New Privs and TSync bools)
+// must match.
+// The filter src will be merged into the filter this is called on.
+// The architectures of the src filter not present in the destination, and all
+// associated rules, will be added to the destination.
+// Returns an error if merging the filters failed.
+func (f *ScmpFilter) Merge(src *ScmpFilter) error {
+	f.lock.Lock()
+	defer f.lock.Unlock()
+
+	src.lock.Lock()
+	defer src.lock.Unlock()
+
+	if !src.valid || !f.valid {
+		return fmt.Errorf("one or more of the filter contexts is invalid or uninitialized")
+	}
+
+	// Merge the filters
+	retCode := C.seccomp_merge(f.filterCtx, src.filterCtx)
+	if syscall.Errno(-1*retCode) == syscall.EINVAL {
+		return fmt.Errorf("filters could not be merged due to a mismatch in attributes or invalid filter")
+	} else if retCode != 0 {
+		return syscall.Errno(-1 * retCode)
+	}
+
+	src.valid = false
+
+	return nil
+}
+
+// IsArchPresent checks if an architecture is present in a filter.
+// If a filter contains an architecture, it uses its default action for
+// syscalls which do not match rules in it, and its rules can match syscalls
+// for that ABI.
+// If a filter does not contain an architecture, all syscalls made to that
+// kernel ABI will fail with the filter's default Bad Architecture Action
+// (by default, killing the process).
+// Accepts an architecture constant.
+// Returns true if the architecture is present in the filter, false otherwise,
+// and an error on an invalid filter context, architecture constant, or an
+// issue with the call to libseccomp.
+func (f *ScmpFilter) IsArchPresent(arch ScmpArch) (bool, error) {
+	f.lock.Lock()
+	defer f.lock.Unlock()
+
+	if err := sanitizeArch(arch); err != nil {
+		return false, err
+	} else if !f.valid {
+		return false, errBadFilter
+	}
+
+	retCode := C.seccomp_arch_exist(f.filterCtx, arch.toNative())
+	if syscall.Errno(-1*retCode) == syscall.EEXIST {
+		// -EEXIST is "arch not present"
+		return false, nil
+	} else if retCode != 0 {
+		return false, syscall.Errno(-1 * retCode)
+	}
+
+	return true, nil
+}
+
+// AddArch adds an architecture to the filter.
+// Accepts an architecture constant.
+// Returns an error on invalid filter context or architecture token, or an
+// issue with the call to libseccomp.
+func (f *ScmpFilter) AddArch(arch ScmpArch) error {
+	f.lock.Lock()
+	defer f.lock.Unlock()
+
+	if err := sanitizeArch(arch); err != nil {
+		return err
+	} else if !f.valid {
+		return errBadFilter
+	}
+
+	// Libseccomp returns -EEXIST if the specified architecture is already
+	// present. Succeed silently in this case, as it's not fatal, and the
+	// architecture is present already.
+	retCode := C.seccomp_arch_add(f.filterCtx, arch.toNative())
+	if retCode != 0 && syscall.Errno(-1*retCode) != syscall.EEXIST {
+		return syscall.Errno(-1 * retCode)
+	}
+
+	return nil
+}
+
+// RemoveArch removes an architecture from the filter.
+// Accepts an architecture constant.
+// Returns an error on invalid filter context or architecture token, or an
+// issue with the call to libseccomp.
+func (f *ScmpFilter) RemoveArch(arch ScmpArch) error {
+	f.lock.Lock()
+	defer f.lock.Unlock()
+
+	if err := sanitizeArch(arch); err != nil {
+		return err
+	} else if !f.valid {
+		return errBadFilter
+	}
+
+	// Similar to AddArch, -EEXIST is returned if the arch is not present
+	// Succeed silently in that case, this is not fatal and the architecture
+	// is not present in the filter after RemoveArch
+	retCode := C.seccomp_arch_remove(f.filterCtx, arch.toNative())
+	if retCode != 0 && syscall.Errno(-1*retCode) != syscall.EEXIST {
+		return syscall.Errno(-1 * retCode)
+	}
+
+	return nil
+}
+
+// Load loads a filter context into the kernel.
+// Returns an error if the filter context is invalid or the syscall failed.
+func (f *ScmpFilter) Load() error {
+	f.lock.Lock()
+	defer f.lock.Unlock()
+
+	if !f.valid {
+		return errBadFilter
+	}
+
+	if retCode := C.seccomp_load(f.filterCtx); retCode != 0 {
+		return syscall.Errno(-1 * retCode)
+	}
+
+	return nil
+}
+
+// GetDefaultAction returns the default action taken on a syscall which does not
+// match a rule in the filter, or an error if an issue was encountered
+// retrieving the value.
+func (f *ScmpFilter) GetDefaultAction() (ScmpAction, error) {
+	action, err := f.getFilterAttr(filterAttrActDefault)
+	if err != nil {
+		return 0x0, err
+	}
+
+	return actionFromNative(action)
+}
+
+// GetBadArchAction returns the default action taken on a syscall for an
+// architecture not in the filter, or an error if an issue was encountered
+// retrieving the value.
+func (f *ScmpFilter) GetBadArchAction() (ScmpAction, error) {
+	action, err := f.getFilterAttr(filterAttrActBadArch)
+	if err != nil {
+		return 0x0, err
+	}
+
+	return actionFromNative(action)
+}
+
+// GetNoNewPrivsBit returns the current state the No New Privileges bit will be set
+// to on the filter being loaded, or an error if an issue was encountered
+// retrieving the value.
+// The No New Privileges bit tells the kernel that new processes run with exec()
+// cannot gain more privileges than the process that ran exec().
+// For example, a process with No New Privileges set would be unable to exec
+// setuid/setgid executables.
+func (f *ScmpFilter) GetNoNewPrivsBit() (bool, error) {
+	noNewPrivs, err := f.getFilterAttr(filterAttrNNP)
+	if err != nil {
+		return false, err
+	}
+
+	if noNewPrivs == 0 {
+		return false, nil
+	}
+
+	return true, nil
+}
+
+// GetTsyncBit returns whether Thread Synchronization will be enabled on the
+// filter being loaded, or an error if an issue was encountered retrieving the
+// value.
+// Thread Sync ensures that all members of the thread group of the calling
+// process will share the same Seccomp filter set.
+// Tsync is a fairly recent addition to the Linux kernel and older kernels
+// lack support. If the running kernel does not support Tsync and it is
+// requested in a filter, Libseccomp will not enable TSync support and will
+// proceed as normal.
+// This function is unavailable before v2.2 of libseccomp and will return an
+// error.
+func (f *ScmpFilter) GetTsyncBit() (bool, error) {
+	tSync, err := f.getFilterAttr(filterAttrTsync)
+	if err != nil {
+		return false, err
+	}
+
+	if tSync == 0 {
+		return false, nil
+	}
+
+	return true, nil
+}
+
+// SetBadArchAction sets the default action taken on a syscall for an
+// architecture not in the filter, or an error if an issue was encountered
+// setting the value.
+func (f *ScmpFilter) SetBadArchAction(action ScmpAction) error {
+	if err := sanitizeAction(action); err != nil {
+		return err
+	}
+
+	return f.setFilterAttr(filterAttrActBadArch, action.toNative())
+}
+
+// SetNoNewPrivsBit sets the state of the No New Privileges bit, which will be
+// applied on filter load, or an error if an issue was encountered setting the
+// value.
+// Filters with No New Privileges set to 0 can only be loaded if the process
+// has the CAP_SYS_ADMIN capability.
+func (f *ScmpFilter) SetNoNewPrivsBit(state bool) error {
+	var toSet C.uint32_t = 0x0
+
+	if state {
+		toSet = 0x1
+	}
+
+	return f.setFilterAttr(filterAttrNNP, toSet)
+}
+
+// SetTsync sets whether Thread Synchronization will be enabled on the filter
+// being loaded. Returns an error if setting Tsync failed, or the filter is
+// invalid.
+// Thread Sync ensures that all members of the thread group of the calling
+// process will share the same Seccomp filter set.
+// Tsync is a fairly recent addition to the Linux kernel and older kernels
+// lack support. If the running kernel does not support Tsync and it is
+// requested in a filter, Libseccomp will not enable TSync support and will
+// proceed as normal.
+// This function is unavailable before v2.2 of libseccomp and will return an
+// error.
+func (f *ScmpFilter) SetTsync(enable bool) error {
+	var toSet C.uint32_t = 0x0
+
+	if enable {
+		toSet = 0x1
+	}
+
+	return f.setFilterAttr(filterAttrTsync, toSet)
+}
+
+// SetSyscallPriority sets a syscall's priority.
+// This provides a hint to the filter generator in libseccomp about the
+// importance of this syscall. High-priority syscalls are placed
+// first in the filter code, and incur less overhead (at the expense of
+// lower-priority syscalls).
+func (f *ScmpFilter) SetSyscallPriority(call ScmpSyscall, priority uint8) error {
+	f.lock.Lock()
+	defer f.lock.Unlock()
+
+	if !f.valid {
+		return errBadFilter
+	}
+
+	if retCode := C.seccomp_syscall_priority(f.filterCtx, C.int(call),
+		C.uint8_t(priority)); retCode != 0 {
+		return syscall.Errno(-1 * retCode)
+	}
+
+	return nil
+}
+
+// AddRule adds a single rule for an unconditional action on a syscall.
+// Accepts the number of the syscall and the action to be taken on the call
+// being made.
+// Returns an error if an issue was encountered adding the rule.
+func (f *ScmpFilter) AddRule(call ScmpSyscall, action ScmpAction) error {
+	return f.addRuleGeneric(call, action, false, nil)
+}
+
+// AddRuleExact adds a single rule for an unconditional action on a syscall.
+// Accepts the number of the syscall and the action to be taken on the call
+// being made.
+// No modifications will be made to the rule, and it will fail to add if it
+// cannot be applied to the current architecture without modification.
+// The rule will function exactly as described, but it may not function identically
+// (or be able to be applied to) all architectures.
+// Returns an error if an issue was encountered adding the rule.
+func (f *ScmpFilter) AddRuleExact(call ScmpSyscall, action ScmpAction) error {
+	return f.addRuleGeneric(call, action, true, nil)
+}
+
+// AddRuleConditional adds a single rule for a conditional action on a syscall.
+// Returns an error if an issue was encountered adding the rule.
+// All conditions must match for the rule to match.
+// There is a bug in library versions below v2.2.1 which can, in some cases,
+// cause conditions to be lost when more than one are used. Consequently,
+// AddRuleConditional is disabled on library versions lower than v2.2.1
+func (f *ScmpFilter) AddRuleConditional(call ScmpSyscall, action ScmpAction, conds []ScmpCondition) error {
+	return f.addRuleGeneric(call, action, false, conds)
+}
+
+// AddRuleConditionalExact adds a single rule for a conditional action on a
+// syscall.
+// No modifications will be made to the rule, and it will fail to add if it
+// cannot be applied to the current architecture without modification.
+// The rule will function exactly as described, but it may not function identically
+// (or be able to be applied to) all architectures.
+// Returns an error if an issue was encountered adding the rule.
+// There is a bug in library versions below v2.2.1 which can, in some cases,
+// cause conditions to be lost when more than one are used. Consequently,
+// AddRuleConditionalExact is disabled on library versions lower than v2.2.1
+func (f *ScmpFilter) AddRuleConditionalExact(call ScmpSyscall, action ScmpAction, conds []ScmpCondition) error {
+	return f.addRuleGeneric(call, action, true, conds)
+}
+
+// ExportPFC output PFC-formatted, human-readable dump of a filter context's
+// rules to a file.
+// Accepts file to write to (must be open for writing).
+// Returns an error if writing to the file fails.
+func (f *ScmpFilter) ExportPFC(file *os.File) error {
+	f.lock.Lock()
+	defer f.lock.Unlock()
+
+	fd := file.Fd()
+
+	if !f.valid {
+		return errBadFilter
+	}
+
+	if retCode := C.seccomp_export_pfc(f.filterCtx, C.int(fd)); retCode != 0 {
+		return syscall.Errno(-1 * retCode)
+	}
+
+	return nil
+}
+
+// ExportBPF outputs Berkeley Packet Filter-formatted, kernel-readable dump of a
+// filter context's rules to a file.
+// Accepts file to write to (must be open for writing).
+// Returns an error if writing to the file fails.
+func (f *ScmpFilter) ExportBPF(file *os.File) error {
+	f.lock.Lock()
+	defer f.lock.Unlock()
+
+	fd := file.Fd()
+
+	if !f.valid {
+		return errBadFilter
+	}
+
+	if retCode := C.seccomp_export_bpf(f.filterCtx, C.int(fd)); retCode != 0 {
+		return syscall.Errno(-1 * retCode)
+	}
+
+	return nil
+}
--- a/vendor/github.com/seccomp/libseccomp-golang/seccomp_internal.go
+++ b/vendor/github.com/seccomp/libseccomp-golang/seccomp_internal.go
@ -0,0 +1,506 @@
+// +build linux
+
+// Internal functions for libseccomp Go bindings
+// No exported functions
+
+package seccomp
+
+import (
+	"fmt"
+	"os"
+	"syscall"
+)
+
+// Unexported C wrapping code - provides the C-Golang interface
+// Get the seccomp header in scope
+// Need stdlib.h for free() on cstrings
+
+// #cgo pkg-config: libseccomp
+/*
+#include <stdlib.h>
+#include <seccomp.h>
+
+#if SCMP_VER_MAJOR < 2
+#error Minimum supported version of Libseccomp is v2.1.0
+#elif SCMP_VER_MAJOR == 2 && SCMP_VER_MINOR < 1
+#error Minimum supported version of Libseccomp is v2.1.0
+#endif
+
+#define ARCH_BAD ~0
+
+const uint32_t C_ARCH_BAD = ARCH_BAD;
+
+#ifndef SCMP_ARCH_AARCH64
+#define SCMP_ARCH_AARCH64 ARCH_BAD
+#endif
+
+#ifndef SCMP_ARCH_MIPS
+#define SCMP_ARCH_MIPS ARCH_BAD
+#endif
+
+#ifndef SCMP_ARCH_MIPS64
+#define SCMP_ARCH_MIPS64 ARCH_BAD
+#endif
+
+#ifndef SCMP_ARCH_MIPS64N32
+#define SCMP_ARCH_MIPS64N32 ARCH_BAD
+#endif
+
+#ifndef SCMP_ARCH_MIPSEL
+#define SCMP_ARCH_MIPSEL ARCH_BAD
+#endif
+
+#ifndef SCMP_ARCH_MIPSEL64
+#define SCMP_ARCH_MIPSEL64 ARCH_BAD
+#endif
+
+#ifndef SCMP_ARCH_MIPSEL64N32
+#define SCMP_ARCH_MIPSEL64N32 ARCH_BAD
+#endif
+
+#ifndef SCMP_ARCH_PPC
+#define SCMP_ARCH_PPC ARCH_BAD
+#endif
+
+#ifndef SCMP_ARCH_PPC64
+#define SCMP_ARCH_PPC64 ARCH_BAD
+#endif
+
+#ifndef SCMP_ARCH_PPC64LE
+#define SCMP_ARCH_PPC64LE ARCH_BAD
+#endif
+
+#ifndef SCMP_ARCH_S390
+#define SCMP_ARCH_S390 ARCH_BAD
+#endif
+
+#ifndef SCMP_ARCH_S390X
+#define SCMP_ARCH_S390X ARCH_BAD
+#endif
+
+const uint32_t C_ARCH_NATIVE       = SCMP_ARCH_NATIVE;
+const uint32_t C_ARCH_X86          = SCMP_ARCH_X86;
+const uint32_t C_ARCH_X86_64       = SCMP_ARCH_X86_64;
+const uint32_t C_ARCH_X32          = SCMP_ARCH_X32;
+const uint32_t C_ARCH_ARM          = SCMP_ARCH_ARM;
+const uint32_t C_ARCH_AARCH64      = SCMP_ARCH_AARCH64;
+const uint32_t C_ARCH_MIPS         = SCMP_ARCH_MIPS;
+const uint32_t C_ARCH_MIPS64       = SCMP_ARCH_MIPS64;
+const uint32_t C_ARCH_MIPS64N32    = SCMP_ARCH_MIPS64N32;
+const uint32_t C_ARCH_MIPSEL       = SCMP_ARCH_MIPSEL;
+const uint32_t C_ARCH_MIPSEL64     = SCMP_ARCH_MIPSEL64;
+const uint32_t C_ARCH_MIPSEL64N32  = SCMP_ARCH_MIPSEL64N32;
+const uint32_t C_ARCH_PPC          = SCMP_ARCH_PPC;
+const uint32_t C_ARCH_PPC64        = SCMP_ARCH_PPC64;
+const uint32_t C_ARCH_PPC64LE      = SCMP_ARCH_PPC64LE;
+const uint32_t C_ARCH_S390         = SCMP_ARCH_S390;
+const uint32_t C_ARCH_S390X        = SCMP_ARCH_S390X;
+
+const uint32_t C_ACT_KILL          = SCMP_ACT_KILL;
+const uint32_t C_ACT_TRAP          = SCMP_ACT_TRAP;
+const uint32_t C_ACT_ERRNO         = SCMP_ACT_ERRNO(0);
+const uint32_t C_ACT_TRACE         = SCMP_ACT_TRACE(0);
+const uint32_t C_ACT_ALLOW         = SCMP_ACT_ALLOW;
+
+// If TSync is not supported, make sure it doesn't map to a supported filter attribute
+// Don't worry about major version < 2, the minimum version checks should catch that case
+#if SCMP_VER_MAJOR == 2 && SCMP_VER_MINOR < 2
+#define SCMP_FLTATR_CTL_TSYNC _SCMP_CMP_MIN
+#endif
+
+const uint32_t C_ATTRIBUTE_DEFAULT = (uint32_t)SCMP_FLTATR_ACT_DEFAULT;
+const uint32_t C_ATTRIBUTE_BADARCH = (uint32_t)SCMP_FLTATR_ACT_BADARCH;
+const uint32_t C_ATTRIBUTE_NNP     = (uint32_t)SCMP_FLTATR_CTL_NNP;
+const uint32_t C_ATTRIBUTE_TSYNC   = (uint32_t)SCMP_FLTATR_CTL_TSYNC;
+
+const int      C_CMP_NE            = (int)SCMP_CMP_NE;
+const int      C_CMP_LT            = (int)SCMP_CMP_LT;
+const int      C_CMP_LE            = (int)SCMP_CMP_LE;
+const int      C_CMP_EQ            = (int)SCMP_CMP_EQ;
+const int      C_CMP_GE            = (int)SCMP_CMP_GE;
+const int      C_CMP_GT            = (int)SCMP_CMP_GT;
+const int      C_CMP_MASKED_EQ     = (int)SCMP_CMP_MASKED_EQ;
+
+const int      C_VERSION_MAJOR     = SCMP_VER_MAJOR;
+const int      C_VERSION_MINOR     = SCMP_VER_MINOR;
+const int      C_VERSION_MICRO     = SCMP_VER_MICRO;
+
+typedef struct scmp_arg_cmp* scmp_cast_t;
+
+// Wrapper to create an scmp_arg_cmp struct
+void*
+make_struct_arg_cmp(
+                    unsigned int arg,
+                    int compare,
+                    uint64_t a,
+                    uint64_t b
+                   )
+{
+	struct scmp_arg_cmp *s = malloc(sizeof(struct scmp_arg_cmp));
+
+	s->arg = arg;
+	s->op = compare;
+	s->datum_a = a;
+	s->datum_b = b;
+
+	return s;
+}
+*/
+import "C"
+
+// Nonexported types
+type scmpFilterAttr uint32
+
+// Nonexported constants
+
+const (
+	filterAttrActDefault scmpFilterAttr = iota
+	filterAttrActBadArch scmpFilterAttr = iota
+	filterAttrNNP        scmpFilterAttr = iota
+	filterAttrTsync      scmpFilterAttr = iota
+)
+
+const (
+	// An error return from certain libseccomp functions
+	scmpError C.int = -1
+	// Comparison boundaries to check for architecture validity
+	archStart ScmpArch = ArchNative
+	archEnd   ScmpArch = ArchS390X
+	// Comparison boundaries to check for action validity
+	actionStart ScmpAction = ActKill
+	actionEnd   ScmpAction = ActAllow
+	// Comparison boundaries to check for comparison operator validity
+	compareOpStart ScmpCompareOp = CompareNotEqual
+	compareOpEnd   ScmpCompareOp = CompareMaskedEqual
+)
+
+var (
+	// Error thrown on bad filter context
+	errBadFilter = fmt.Errorf("filter is invalid or uninitialized")
+	// Constants representing library major, minor, and micro versions
+	verMajor = int(C.C_VERSION_MAJOR)
+	verMinor = int(C.C_VERSION_MINOR)
+	verMicro = int(C.C_VERSION_MICRO)
+)
+
+// Nonexported functions
+
+// Check if library version is greater than or equal to the given one
+func checkVersionAbove(major, minor, micro int) bool {
+	return (verMajor > major) ||
+		(verMajor == major && verMinor > minor) ||
+		(verMajor == major && verMinor == minor && verMicro >= micro)
+}
+
+// Init function: Verify library version is appropriate
+func init() {
+	if !checkVersionAbove(2, 1, 0) {
+		fmt.Fprintf(os.Stderr, "Libseccomp version too low: minimum supported is 2.1.0, detected %d.%d.%d", C.C_VERSION_MAJOR, C.C_VERSION_MINOR, C.C_VERSION_MICRO)
+		os.Exit(-1)
+	}
+}
+
+// Filter helpers
+
+// Filter finalizer - ensure that kernel context for filters is freed
+func filterFinalizer(f *ScmpFilter) {
+	f.Release()
+}
+
+// Get a raw filter attribute
+func (f *ScmpFilter) getFilterAttr(attr scmpFilterAttr) (C.uint32_t, error) {
+	f.lock.Lock()
+	defer f.lock.Unlock()
+
+	if !f.valid {
+		return 0x0, errBadFilter
+	}
+
+	if !checkVersionAbove(2, 2, 0) && attr == filterAttrTsync {
+		return 0x0, fmt.Errorf("the thread synchronization attribute is not supported in this version of the library")
+	}
+
+	var attribute C.uint32_t
+
+	retCode := C.seccomp_attr_get(f.filterCtx, attr.toNative(), &attribute)
+	if retCode != 0 {
+		return 0x0, syscall.Errno(-1 * retCode)
+	}
+
+	return attribute, nil
+}
+
+// Set a raw filter attribute
+func (f *ScmpFilter) setFilterAttr(attr scmpFilterAttr, value C.uint32_t) error {
+	f.lock.Lock()
+	defer f.lock.Unlock()
+
+	if !f.valid {
+		return errBadFilter
+	}
+
+	if !checkVersionAbove(2, 2, 0) && attr == filterAttrTsync {
+		return fmt.Errorf("the thread synchronization attribute is not supported in this version of the library")
+	}
+
+	retCode := C.seccomp_attr_set(f.filterCtx, attr.toNative(), value)
+	if retCode != 0 {
+		return syscall.Errno(-1 * retCode)
+	}
+
+	return nil
+}
+
+// DOES NOT LOCK OR CHECK VALIDITY
+// Assumes caller has already done this
+// Wrapper for seccomp_rule_add_... functions
+func (f *ScmpFilter) addRuleWrapper(call ScmpSyscall, action ScmpAction, exact bool, cond C.scmp_cast_t) error {
+	var length C.uint
+	if cond != nil {
+		length = 1
+	} else {
+		length = 0
+	}
+
+	var retCode C.int
+	if exact {
+		retCode = C.seccomp_rule_add_exact_array(f.filterCtx, action.toNative(), C.int(call), length, cond)
+	} else {
+		retCode = C.seccomp_rule_add_array(f.filterCtx, action.toNative(), C.int(call), length, cond)
+	}
+
+	if syscall.Errno(-1*retCode) == syscall.EFAULT {
+		return fmt.Errorf("unrecognized syscall")
+	} else if syscall.Errno(-1*retCode) == syscall.EPERM {
+		return fmt.Errorf("requested action matches default action of filter")
+	} else if retCode != 0 {
+		return syscall.Errno(-1 * retCode)
+	}
+
+	return nil
+}
+
+// Generic add function for filter rules
+func (f *ScmpFilter) addRuleGeneric(call ScmpSyscall, action ScmpAction, exact bool, conds []ScmpCondition) error {
+	f.lock.Lock()
+	defer f.lock.Unlock()
+
+	if !f.valid {
+		return errBadFilter
+	}
+
+	if len(conds) == 0 {
+		if err := f.addRuleWrapper(call, action, exact, nil); err != nil {
+			return err
+		}
+	} else {
+		// We don't support conditional filtering in library version v2.1
+		if !checkVersionAbove(2, 2, 1) {
+			return fmt.Errorf("conditional filtering requires libseccomp version >= 2.2.1")
+		}
+
+		for _, cond := range conds {
+			cmpStruct := C.make_struct_arg_cmp(C.uint(cond.Argument), cond.Op.toNative(), C.uint64_t(cond.Operand1), C.uint64_t(cond.Operand2))
+			defer C.free(cmpStruct)
+
+			if err := f.addRuleWrapper(call, action, exact, C.scmp_cast_t(cmpStruct)); err != nil {
+				return err
+			}
+		}
+	}
+
+	return nil
+}
+
+// Generic Helpers
+
+// Helper - Sanitize Arch token input
+func sanitizeArch(in ScmpArch) error {
+	if in < archStart || in > archEnd {
+		return fmt.Errorf("unrecognized architecture")
+	}
+
+	if in.toNative() == C.C_ARCH_BAD {
+		return fmt.Errorf("architecture is not supported on this version of the library")
+	}
+
+	return nil
+}
+
+func sanitizeAction(in ScmpAction) error {
+	inTmp := in & 0x0000FFFF
+	if inTmp < actionStart || inTmp > actionEnd {
+		return fmt.Errorf("unrecognized action")
+	}
+
+	if inTmp != ActTrace && inTmp != ActErrno && (in&0xFFFF0000) != 0 {
+		return fmt.Errorf("highest 16 bits must be zeroed except for Trace and Errno")
+	}
+
+	return nil
+}
+
+func sanitizeCompareOp(in ScmpCompareOp) error {
+	if in < compareOpStart || in > compareOpEnd {
+		return fmt.Errorf("unrecognized comparison operator")
+	}
+
+	return nil
+}
+
+func archFromNative(a C.uint32_t) (ScmpArch, error) {
+	switch a {
+	case C.C_ARCH_X86:
+		return ArchX86, nil
+	case C.C_ARCH_X86_64:
+		return ArchAMD64, nil
+	case C.C_ARCH_X32:
+		return ArchX32, nil
+	case C.C_ARCH_ARM:
+		return ArchARM, nil
+	case C.C_ARCH_NATIVE:
+		return ArchNative, nil
+	case C.C_ARCH_AARCH64:
+		return ArchARM64, nil
+	case C.C_ARCH_MIPS:
+		return ArchMIPS, nil
+	case C.C_ARCH_MIPS64:
+		return ArchMIPS64, nil
+	case C.C_ARCH_MIPS64N32:
+		return ArchMIPS64N32, nil
+	case C.C_ARCH_MIPSEL:
+		return ArchMIPSEL, nil
+	case C.C_ARCH_MIPSEL64:
+		return ArchMIPSEL64, nil
+	case C.C_ARCH_MIPSEL64N32:
+		return ArchMIPSEL64N32, nil
+	case C.C_ARCH_PPC:
+		return ArchPPC, nil
+	case C.C_ARCH_PPC64:
+		return ArchPPC64, nil
+	case C.C_ARCH_PPC64LE:
+		return ArchPPC64LE, nil
+	case C.C_ARCH_S390:
+		return ArchS390, nil
+	case C.C_ARCH_S390X:
+		return ArchS390X, nil
+	default:
+		return 0x0, fmt.Errorf("unrecognized architecture")
+	}
+}
+
+// Only use with sanitized arches, no error handling
+func (a ScmpArch) toNative() C.uint32_t {
+	switch a {
+	case ArchX86:
+		return C.C_ARCH_X86
+	case ArchAMD64:
+		return C.C_ARCH_X86_64
+	case ArchX32:
+		return C.C_ARCH_X32
+	case ArchARM:
+		return C.C_ARCH_ARM
+	case ArchARM64:
+		return C.C_ARCH_AARCH64
+	case ArchMIPS:
+		return C.C_ARCH_MIPS
+	case ArchMIPS64:
+		return C.C_ARCH_MIPS64
+	case ArchMIPS64N32:
+		return C.C_ARCH_MIPS64N32
+	case ArchMIPSEL:
+		return C.C_ARCH_MIPSEL
+	case ArchMIPSEL64:
+		return C.C_ARCH_MIPSEL64
+	case ArchMIPSEL64N32:
+		return C.C_ARCH_MIPSEL64N32
+	case ArchPPC:
+		return C.C_ARCH_PPC
+	case ArchPPC64:
+		return C.C_ARCH_PPC64
+	case ArchPPC64LE:
+		return C.C_ARCH_PPC64LE
+	case ArchS390:
+		return C.C_ARCH_S390
+	case ArchS390X:
+		return C.C_ARCH_S390X
+	case ArchNative:
+		return C.C_ARCH_NATIVE
+	default:
+		return 0x0
+	}
+}
+
+// Only use with sanitized ops, no error handling
+func (a ScmpCompareOp) toNative() C.int {
+	switch a {
+	case CompareNotEqual:
+		return C.C_CMP_NE
+	case CompareLess:
+		return C.C_CMP_LT
+	case CompareLessOrEqual:
+		return C.C_CMP_LE
+	case CompareEqual:
+		return C.C_CMP_EQ
+	case CompareGreaterEqual:
+		return C.C_CMP_GE
+	case CompareGreater:
+		return C.C_CMP_GT
+	case CompareMaskedEqual:
+		return C.C_CMP_MASKED_EQ
+	default:
+		return 0x0
+	}
+}
+
+func actionFromNative(a C.uint32_t) (ScmpAction, error) {
+	aTmp := a & 0xFFFF
+	switch a & 0xFFFF0000 {
+	case C.C_ACT_KILL:
+		return ActKill, nil
+	case C.C_ACT_TRAP:
+		return ActTrap, nil
+	case C.C_ACT_ERRNO:
+		return ActErrno.SetReturnCode(int16(aTmp)), nil
+	case C.C_ACT_TRACE:
+		return ActTrace.SetReturnCode(int16(aTmp)), nil
+	case C.C_ACT_ALLOW:
+		return ActAllow, nil
+	default:
+		return 0x0, fmt.Errorf("unrecognized action")
+	}
+}
+
+// Only use with sanitized actions, no error handling
+func (a ScmpAction) toNative() C.uint32_t {
+	switch a & 0xFFFF {
+	case ActKill:
+		return C.C_ACT_KILL
+	case ActTrap:
+		return C.C_ACT_TRAP
+	case ActErrno:
+		return C.C_ACT_ERRNO | (C.uint32_t(a) >> 16)
+	case ActTrace:
+		return C.C_ACT_TRACE | (C.uint32_t(a) >> 16)
+	case ActAllow:
+		return C.C_ACT_ALLOW
+	default:
+		return 0x0
+	}
+}
+
+// Internal only, assumes safe attribute
+func (a scmpFilterAttr) toNative() uint32 {
+	switch a {
+	case filterAttrActDefault:
+		return uint32(C.C_ATTRIBUTE_DEFAULT)
+	case filterAttrActBadArch:
+		return uint32(C.C_ATTRIBUTE_BADARCH)
+	case filterAttrNNP:
+		return uint32(C.C_ATTRIBUTE_NNP)
+	case filterAttrTsync:
+		return uint32(C.C_ATTRIBUTE_TSYNC)
+	default:
+		return 0x0
+	}
+}
				`@ -1 +0,0 @@`
				`Temporary forked from https://github.com/opencontainers/runc/pull/1692`