Merge pull request #486 from AkihiroSuda/update-rootless

rootless: refactor libcontainer_specconv & add integration tests
docker-18.09
Tõnis Tiigi 2018-07-05 11:26:26 -07:00 committed by GitHub
commit cda0a64eca
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
25 changed files with 1645 additions and 2295 deletions

View File

@ -2,7 +2,7 @@
Requirements:
- runc `ecd55a4135e0a26de884ce436442914f945b1e76` (May 30, 2018) or later
- Some distros such as Debian and Arch Linux require `echo 1 > /proc/sys/kernel/unprivileged_userns_clone`
- Some distros such as Debian (excluding Ubuntu) and Arch Linux require `echo 1 > /proc/sys/kernel/unprivileged_userns_clone`
- `newuidmap` and `newgidmap` need to be installed on the host. These commands are provided by the `uidmap` package.
- `/etc/subuid` and `/etc/subgid` should contain >= 65536 sub-IDs. e.g. `penguin:231072:65536`.
- To run in a Docker container with non-root `USER`, `docker run --privileged` is still required. See also Jessie's blog: https://blog.jessfraz.com/post/building-container-images-securely-on-kubernetes/
@ -10,7 +10,7 @@ Requirements:
## Set up
Setting up rootless mode also requires some bothersome steps as follows, but you can also use [`rootlesskit`](https://github.com/AkihiroSuda/rootlesskit) for automating these steps.
Setting up rootless mode also requires some bothersome steps as follows, but you can also use [`rootlesskit`](https://github.com/rootless-containers/rootlesskit) for automating these steps.
### Terminal 1:

View File

@ -21,7 +21,7 @@ import (
"github.com/moby/buildkit/executor"
"github.com/moby/buildkit/executor/oci"
"github.com/moby/buildkit/identity"
"github.com/moby/buildkit/util/libcontainer_specconv"
rootlessspecconv "github.com/moby/buildkit/util/rootless/specconv"
"github.com/moby/buildkit/util/system"
"github.com/opencontainers/runtime-spec/specs-go"
"github.com/pkg/errors"
@ -84,6 +84,8 @@ func New(opt Opt) (executor.Executor, error) {
LogFormat: runc.JSON,
PdeathSignal: syscall.SIGKILL,
Setpgid: true,
// we don't execute runc with --rootless=(true|false) explicitly,
// so as to support non-runc runtimes
}
w := &runcExecutor{
@ -169,13 +171,11 @@ func (w *runcExecutor) Exec(ctx context.Context, meta executor.Meta, root cache.
return errors.Wrapf(err, "failed to create working directory %s", newp)
}
if err := setOOMScoreAdj(spec); err != nil {
return err
}
if w.rootless {
specconv.ToRootless(spec, nil)
// TODO(AkihiroSuda): keep Cgroups enabled if /sys/fs/cgroup/cpuset/buildkit exists and writable
spec.Linux.CgroupsPath = ""
// TODO(AkihiroSuda): ToRootless removes netns, but we should readd netns here
// if either SUID or userspace NAT is configured on the host.
if err := setOOMScoreAdj(spec); err != nil {
if err := rootlessspecconv.ToRootless(spec); err != nil {
return err
}
}

View File

@ -1115,6 +1115,10 @@ RUN ["ls"]
}
func testUser(t *testing.T, sb integration.Sandbox) {
if sb.Rootless() {
t.Skip("only for rootful worker, due to lack of support for additional gids (https://github.com/opencontainers/runc/issues/1835)")
}
t.Parallel()
dockerfile := []byte(`

View File

@ -5,7 +5,7 @@ ARG CONTAINERD10_VERSION=v1.0.3
# available targets: buildkitd, buildkitd.oci_only, buildkitd.containerd_only
ARG BUILDKIT_TARGET=buildkitd
ARG REGISTRY_VERSION=2.6
ARG ROOTLESSKIT_VERSION=1e79dc31d71ea8c1a27f15086be2be2b1d99acaa
ARG ROOTLESSKIT_VERSION=20b0fc24b305b031a61ef1a1ca456aadafaf5e77
# The `buildkitd` stage and the `buildctl` stage are placed here
# so that they can be built quickly with legacy DAG-unaware `docker build --target=...`
@ -75,11 +75,29 @@ RUN go build -ldflags "$(cat .tmp/ldflags) -d" -o /usr/bin/buildkitd.containerd
FROM registry:$REGISTRY_VERSION AS registry
FROM gobuild-base AS rootlesskit-base
RUN git clone https://github.com/rootless-containers/rootlesskit.git /go/src/github.com/rootless-containers/rootlesskit
WORKDIR /go/src/github.com/rootless-containers/rootlesskit
FROM rootlesskit-base as rootlesskit
ARG ROOTLESSKIT_VERSION
# mitigate https://github.com/moby/moby/pull/35456
ENV GOOS=linux
RUN git checkout -q "$ROOTLESSKIT_VERSION" \
&& go build -o /rootlesskit ./cmd/rootlesskit
FROM unit-tests AS integration-tests
ENV BUILDKIT_INTEGRATION_ROOTLESS_IDPAIR="1000:1000"
RUN apk add --no-cache shadow shadow-uidmap sudo \
&& useradd --create-home --home-dir /home/user --uid 1000 -s /bin/sh user \
&& echo "XDG_RUNTIME_DIR=/run/user/1000; export XDG_RUNTIME_DIR" >> /home/user/.profile \
&& mkdir -m 0700 -p /run/user/1000 \
&& chown -R user /run/user/1000 /home/user
COPY --from=containerd10 /go/src/github.com/containerd/containerd/bin/containerd* /opt/containerd-1.0/bin/
COPY --from=buildctl /usr/bin/buildctl /usr/bin/
COPY --from=buildkitd /usr/bin/buildkitd /usr/bin
COPY --from=registry /bin/registry /usr/bin
COPY --from=rootlesskit /rootlesskit /usr/bin/
FROM buildkit-base AS cross-windows
ENV GOOS=windows
@ -123,31 +141,18 @@ VOLUME /var/lib/containerd
VOLUME /run/containerd
ENTRYPOINT ["containerd"]
FROM gobuild-base AS rootlesskit-base
RUN git clone https://github.com/AkihiroSuda/rootlesskit.git /go/src/github.com/AkihiroSuda/rootlesskit
WORKDIR /go/src/github.com/AkihiroSuda/rootlesskit
FROM rootlesskit-base as rootlesskit
ARG ROOTLESSKIT_VERSION
# mitigate https://github.com/moby/moby/pull/35456
ENV GOOS=linux
RUN git checkout -q "$ROOTLESSKIT_VERSION" \
&& go build -o /rootlesskit ./cmd/rootlesskit
# Rootless mode.
# Still requires `--privileged`.
FROM buildkit-buildkitd AS rootless
RUN apk add --no-cache shadow shadow-uidmap \
&& useradd --create-home --home-dir /home/user --uid 1000 user \
&& mkdir -p /home/user/.local/run /home/user/.local/tmp /home/user/.local/share/buildkit \
&& chown -R user /home/user
&& mkdir -p /run/user/1000 /home/user/.local/tmp /home/user/.local/share/buildkit \
&& chown -R user /run/user/1000 /home/user
COPY --from=rootlesskit /rootlesskit /usr/bin/
USER user
ENV HOME /home/user
ENV USER user
# WORKAROUND: this should be typically /run/user/1000,
# but mkdir under /run is not captured when built using BuildKit. (#429)
ENV XDG_RUNTIME_DIR=/home/user/.local/run
ENV XDG_RUNTIME_DIR=/run/user/1000
ENV TMPDIR=/home/user/.local/tmp
VOLUME /home/user/.local/share/buildkit
ENTRYPOINT ["rootlesskit", "buildkitd"]

View File

@ -1 +0,0 @@
Temporary forked from https://github.com/opencontainers/runc/pull/1692

View File

@ -1,190 +0,0 @@
package specconv
import (
"os"
"sort"
"strings"
"github.com/opencontainers/runc/libcontainer/system"
"github.com/opencontainers/runc/libcontainer/user"
"github.com/opencontainers/runtime-spec/specs-go"
)
// RootlessOpts is an optional spec for ToRootless
type RootlessOpts struct {
// Add sub{u,g}id to spec.Linux.{U,G}IDMappings.
// Requires newuidmap(1) and newgidmap(1) with suid bit.
// Ignored when running in userns.
MapSubUIDGID bool
}
// Run-time context for ToRootless.
type RootlessContext struct {
EUID uint32
EGID uint32
SubUIDs []user.SubID
SubGIDs []user.SubID
UIDMap []user.IDMap
GIDMap []user.IDMap
InUserNS bool
}
// ToRootless converts the given spec file into one that should work with
// rootless containers, by removing incompatible options and adding others that
// are needed.
func ToRootless(spec *specs.Spec, opts *RootlessOpts) error {
var err error
ctx := RootlessContext{}
ctx.EUID = uint32(os.Geteuid())
ctx.EGID = uint32(os.Getegid())
ctx.SubUIDs, err = user.CurrentUserSubUIDs()
if err != nil && !os.IsNotExist(err) {
return err
}
ctx.SubGIDs, err = user.CurrentGroupSubGIDs()
if err != nil && !os.IsNotExist(err) {
return err
}
ctx.UIDMap, err = user.CurrentProcessUIDMap()
if err != nil && !os.IsNotExist(err) {
return err
}
uidMapExists := !os.IsNotExist(err)
ctx.GIDMap, err = user.CurrentProcessUIDMap()
if err != nil && !os.IsNotExist(err) {
return err
}
ctx.InUserNS = uidMapExists && system.UIDMapInUserNS(ctx.UIDMap)
return ToRootlessWithContext(ctx, spec, opts)
}
// ToRootlessWithContext converts the spec with the run-time context.
// ctx can be internally modified for sorting.
func ToRootlessWithContext(ctx RootlessContext, spec *specs.Spec, opts *RootlessOpts) error {
if opts == nil {
opts = &RootlessOpts{}
}
var namespaces []specs.LinuxNamespace
// Remove networkns from the spec.
for _, ns := range spec.Linux.Namespaces {
switch ns.Type {
case specs.NetworkNamespace, specs.UserNamespace:
// Do nothing.
default:
namespaces = append(namespaces, ns)
}
}
// Add userns to the spec.
namespaces = append(namespaces, specs.LinuxNamespace{
Type: specs.UserNamespace,
})
spec.Linux.Namespaces = namespaces
// Add mappings for the current user.
if ctx.InUserNS {
uNextContainerID := int64(0)
sort.Sort(idmapSorter(ctx.UIDMap))
for _, uidmap := range ctx.UIDMap {
spec.Linux.UIDMappings = append(spec.Linux.UIDMappings,
specs.LinuxIDMapping{
HostID: uint32(uidmap.ID),
ContainerID: uint32(uNextContainerID),
Size: uint32(uidmap.Count),
})
uNextContainerID += uidmap.Count
}
gNextContainerID := int64(0)
sort.Sort(idmapSorter(ctx.GIDMap))
for _, gidmap := range ctx.GIDMap {
spec.Linux.GIDMappings = append(spec.Linux.GIDMappings,
specs.LinuxIDMapping{
HostID: uint32(gidmap.ID),
ContainerID: uint32(gNextContainerID),
Size: uint32(gidmap.Count),
})
gNextContainerID += gidmap.Count
}
// opts.MapSubUIDGID is ignored in userns
} else {
spec.Linux.UIDMappings = []specs.LinuxIDMapping{{
HostID: ctx.EUID,
ContainerID: 0,
Size: 1,
}}
spec.Linux.GIDMappings = []specs.LinuxIDMapping{{
HostID: ctx.EGID,
ContainerID: 0,
Size: 1,
}}
if opts.MapSubUIDGID {
uNextContainerID := int64(1)
sort.Sort(subIDSorter(ctx.SubUIDs))
for _, subuid := range ctx.SubUIDs {
spec.Linux.UIDMappings = append(spec.Linux.UIDMappings,
specs.LinuxIDMapping{
HostID: uint32(subuid.SubID),
ContainerID: uint32(uNextContainerID),
Size: uint32(subuid.Count),
})
uNextContainerID += subuid.Count
}
gNextContainerID := int64(1)
sort.Sort(subIDSorter(ctx.SubGIDs))
for _, subgid := range ctx.SubGIDs {
spec.Linux.GIDMappings = append(spec.Linux.GIDMappings,
specs.LinuxIDMapping{
HostID: uint32(subgid.SubID),
ContainerID: uint32(gNextContainerID),
Size: uint32(subgid.Count),
})
gNextContainerID += subgid.Count
}
}
}
// Fix up mounts.
var mounts []specs.Mount
for _, mount := range spec.Mounts {
// Ignore all mounts that are under /sys.
if strings.HasPrefix(mount.Destination, "/sys") {
continue
}
// Remove all gid= and uid= mappings.
var options []string
for _, option := range mount.Options {
if !strings.HasPrefix(option, "gid=") && !strings.HasPrefix(option, "uid=") {
options = append(options, option)
}
}
mount.Options = options
mounts = append(mounts, mount)
}
// Add the sysfs mount as an rbind.
mounts = append(mounts, specs.Mount{
Source: "/sys",
Destination: "/sys",
Type: "none",
Options: []string{"rbind", "nosuid", "noexec", "nodev", "ro"},
})
spec.Mounts = mounts
// Remove cgroup settings.
spec.Linux.Resources = nil
return nil
}
// subIDSorter is required for Go <= 1.7
type subIDSorter []user.SubID
func (x subIDSorter) Len() int { return len(x) }
func (x subIDSorter) Swap(i, j int) { x[i], x[j] = x[j], x[i] }
func (x subIDSorter) Less(i, j int) bool { return x[i].SubID < x[j].SubID }
type idmapSorter []user.IDMap
func (x idmapSorter) Len() int { return len(x) }
func (x idmapSorter) Swap(i, j int) { x[i], x[j] = x[j], x[i] }
func (x idmapSorter) Less(i, j int) bool { return x[i].ID < x[j].ID }

View File

@ -1,190 +0,0 @@
// +build linux
package specconv
import (
"reflect"
"testing"
"github.com/opencontainers/runc/libcontainer/configs/validate"
"github.com/opencontainers/runc/libcontainer/specconv"
"github.com/opencontainers/runc/libcontainer/user"
"github.com/opencontainers/runtime-spec/specs-go"
)
func TestRootlessSpecconvValidate(t *testing.T) {
specGen := func() *specs.Spec {
spec := specconv.Example()
spec.Root.Path = "/"
return spec
}
cases := []struct {
ctx RootlessContext
opts *RootlessOpts
additionalValidator func(t *testing.T, s *specs.Spec)
}{
{
ctx: RootlessContext{
EUID: 0,
EGID: 0,
},
},
{
ctx: RootlessContext{
EUID: 4242,
EGID: 4242,
},
},
{
ctx: RootlessContext{
EUID: 4242,
EGID: 4242,
// empty subuid / subgid
},
opts: &RootlessOpts{
MapSubUIDGID: true,
},
},
{
ctx: RootlessContext{
EUID: 4242,
EGID: 4242,
SubUIDs: []user.SubID{
{
Name: "dummy",
SubID: 14242,
Count: 65536,
},
{
Name: "dummy",
SubID: 114242,
Count: 65536,
},
},
SubGIDs: []user.SubID{
{
Name: "dummy",
SubID: 14242,
Count: 65536,
},
{
Name: "dummy",
SubID: 114242,
Count: 65536,
},
},
},
opts: &RootlessOpts{
MapSubUIDGID: true,
},
additionalValidator: func(t *testing.T, s *specs.Spec) {
expectedUIDMappings := []specs.LinuxIDMapping{
{
HostID: 4242,
ContainerID: 0,
Size: 1,
},
{
HostID: 14242,
ContainerID: 1,
Size: 65536,
},
{
HostID: 114242,
ContainerID: 65537,
Size: 65536,
},
}
if !reflect.DeepEqual(expectedUIDMappings, s.Linux.UIDMappings) {
t.Errorf("expected %#v, got %#v", expectedUIDMappings, s.Linux.UIDMappings)
}
expectedGIDMappings := expectedUIDMappings
if !reflect.DeepEqual(expectedGIDMappings, s.Linux.GIDMappings) {
t.Errorf("expected %#v, got %#v", expectedGIDMappings, s.Linux.GIDMappings)
}
},
},
{
ctx: RootlessContext{
EUID: 0,
EGID: 0,
UIDMap: []user.IDMap{
{
ID: 0,
ParentID: 4242,
Count: 1,
},
{
ID: 1,
ParentID: 231072,
Count: 65536,
},
},
GIDMap: []user.IDMap{
{
ID: 0,
ParentID: 4242,
Count: 1,
},
{
ID: 1,
ParentID: 231072,
Count: 65536,
},
},
InUserNS: true,
},
additionalValidator: func(t *testing.T, s *specs.Spec) {
expectedUIDMappings := []specs.LinuxIDMapping{
{
HostID: 0,
ContainerID: 0,
Size: 1,
},
{
HostID: 1,
ContainerID: 1,
Size: 65536,
},
}
if !reflect.DeepEqual(expectedUIDMappings, s.Linux.UIDMappings) {
t.Errorf("expected %#v, got %#v", expectedUIDMappings, s.Linux.UIDMappings)
}
expectedGIDMappings := expectedUIDMappings
if !reflect.DeepEqual(expectedGIDMappings, s.Linux.GIDMappings) {
t.Errorf("expected %#v, got %#v", expectedGIDMappings, s.Linux.GIDMappings)
}
},
},
}
for _, c := range cases {
spec := specGen()
err := ToRootlessWithContext(c.ctx, spec, c.opts)
if err != nil {
t.Errorf("Couldn't convert a rootful spec to rootless: %v", err)
}
// t.Logf("%#v", spec)
if c.additionalValidator != nil {
c.additionalValidator(t, spec)
}
opts := &specconv.CreateOpts{
CgroupName: "ContainerID",
UseSystemdCgroup: false,
Spec: spec,
Rootless: true,
}
config, err := specconv.CreateLibcontainerConfig(opts)
if err != nil {
t.Errorf("Couldn't create libcontainer config: %v", err)
}
validator := validate.New()
if err := validator.Validate(config); err != nil {
t.Errorf("Expected specconv to produce valid rootless container config: %v", err)
}
}
}

View File

@ -0,0 +1,113 @@
package specconv
import (
"os"
"sort"
"strings"
"github.com/opencontainers/runc/libcontainer/system"
"github.com/opencontainers/runc/libcontainer/user"
"github.com/opencontainers/runtime-spec/specs-go"
"github.com/pkg/errors"
)
// ToRootless converts spec to be compatible with "rootless" runc.
// * Adds userns (Note: since we are already in userns, ideally we should not need to do this. runc-side issue is tracked at https://github.com/opencontainers/runc/issues/1837)
// * Fix up mount flags (same as above)
// * Replace /sys with bind-mount (FIXME: we don't need to do this if netns is unshared)
func ToRootless(spec *specs.Spec) error {
if !system.RunningInUserNS() {
return errors.New("needs to be in user namespace")
}
uidMap, err := user.CurrentProcessUIDMap()
if err != nil && !os.IsNotExist(err) {
return err
}
gidMap, err := user.CurrentProcessUIDMap()
if err != nil && !os.IsNotExist(err) {
return err
}
return toRootless(spec, uidMap, gidMap)
}
// toRootless was forked from github.com/opencontainers/runc/libcontainer/specconv
func toRootless(spec *specs.Spec, uidMap, gidMap []user.IDMap) error {
if err := configureUserNS(spec, uidMap, gidMap); err != nil {
return err
}
if err := configureMounts(spec); err != nil {
return err
}
// Remove cgroup settings.
spec.Linux.Resources = nil
spec.Linux.CgroupsPath = ""
return nil
}
// configureUserNS add suserns and the current ID map to the spec.
// Since we are already in userns, ideally we should not need to add userns.
// However, currently rootless runc always requires userns to be added.
// https://github.com/opencontainers/runc/issues/1837
func configureUserNS(spec *specs.Spec, uidMap, gidMap []user.IDMap) error {
spec.Linux.Namespaces = append(spec.Linux.Namespaces, specs.LinuxNamespace{
Type: specs.UserNamespace,
})
sort.Slice(uidMap, func(i, j int) bool { return uidMap[i].ID < uidMap[j].ID })
uNextContainerID := int64(0)
for _, u := range uidMap {
spec.Linux.UIDMappings = append(spec.Linux.UIDMappings,
specs.LinuxIDMapping{
HostID: uint32(u.ID),
ContainerID: uint32(uNextContainerID),
Size: uint32(u.Count),
})
uNextContainerID += u.Count
}
sort.Slice(gidMap, func(i, j int) bool { return gidMap[i].ID < gidMap[j].ID })
gNextContainerID := int64(0)
for _, g := range gidMap {
spec.Linux.GIDMappings = append(spec.Linux.GIDMappings,
specs.LinuxIDMapping{
HostID: uint32(g.ID),
ContainerID: uint32(gNextContainerID),
Size: uint32(g.Count),
})
gNextContainerID += g.Count
}
return nil
}
func configureMounts(spec *specs.Spec) error {
var mounts []specs.Mount
for _, mount := range spec.Mounts {
// Ignore all mounts that are under /sys, because we add /sys later.
if strings.HasPrefix(mount.Destination, "/sys") {
continue
}
// Remove all gid= and uid= mappings.
// Since we are already in userns, ideally we should not need to do this.
// https://github.com/opencontainers/runc/issues/1837
var options []string
for _, option := range mount.Options {
if !strings.HasPrefix(option, "gid=") && !strings.HasPrefix(option, "uid=") {
options = append(options, option)
}
}
mount.Options = options
mounts = append(mounts, mount)
}
// Add the sysfs mount as an rbind, because we can't mount /sys unless we have netns.
// TODO: keep original /sys mount when we have netns.
mounts = append(mounts, specs.Mount{
Source: "/sys",
Destination: "/sys",
Type: "none",
Options: []string{"rbind", "nosuid", "noexec", "nodev", "ro"},
})
spec.Mounts = mounts
return nil
}

View File

@ -0,0 +1,42 @@
package specconv
import (
"testing"
"github.com/opencontainers/runc/libcontainer/specconv"
"github.com/opencontainers/runc/libcontainer/user"
"github.com/opencontainers/runtime-spec/specs-go"
"github.com/stretchr/testify/require"
)
func TestToRootless(t *testing.T) {
spec := specconv.Example()
uidMap := []user.IDMap{
{
ID: 0,
ParentID: 4242,
Count: 1,
},
{
ID: 1,
ParentID: 231072,
Count: 65536,
},
}
gidMap := uidMap
expectedUIDMappings := []specs.LinuxIDMapping{
{
HostID: 0,
ContainerID: 0,
Size: 1,
},
{
HostID: 1,
ContainerID: 1,
Size: 65536,
},
}
err := toRootless(spec, uidMap, gidMap)
require.NoError(t, err)
require.EqualValues(t, expectedUIDMappings, spec.Linux.UIDMappings)
}

View File

@ -96,13 +96,13 @@ state = %q
buildkitdSock, stop, err := runBuildkitd([]string{"buildkitd",
"--oci-worker=false",
"--containerd-worker=true",
"--containerd-worker-addr", address}, logs)
"--containerd-worker-addr", address}, logs, 0, 0)
if err != nil {
return nil, nil, err
}
deferF.append(stop)
return &cdsandbox{address: address, sandbox: sandbox{address: buildkitdSock, logs: logs, cleanup: deferF}}, cl, nil
return &cdsandbox{address: address, sandbox: sandbox{address: buildkitdSock, logs: logs, cleanup: deferF, rootless: false}}, cl, nil
}
type cdsandbox struct {

View File

@ -3,6 +3,7 @@ package integration
import (
"bufio"
"bytes"
"fmt"
"io/ioutil"
"os"
"os/exec"
@ -12,16 +13,31 @@ import (
"time"
"github.com/google/shlex"
"github.com/pkg/errors"
)
func init() {
register(&oci{})
// the rootless uid is defined in hack/dockerfiles/test.Dockerfile
if s := os.Getenv("BUILDKIT_INTEGRATION_ROOTLESS_IDPAIR"); s != "" {
var uid, gid int
if _, err := fmt.Sscanf(s, "%d:%d", &uid, &gid); err != nil {
panic(errors.Errorf("unexpected BUILDKIT_INTEGRATION_ROOTLESS_IDPAIR: %q", s))
}
register(&oci{uid: uid, gid: gid})
}
}
type oci struct {
uid int
gid int
}
func (s *oci) Name() string {
if s.uid != 0 {
return "oci-rootless"
}
return "oci"
}
@ -33,7 +49,15 @@ func (s *oci) New() (Sandbox, func() error, error) {
return nil, nil, err
}
logs := map[string]*bytes.Buffer{}
buildkitdSock, stop, err := runBuildkitd([]string{"buildkitd", "--oci-worker=true", "--containerd-worker=false"}, logs)
buildkitdArgs := []string{"buildkitd", "--oci-worker=true", "--containerd-worker=false"}
if s.uid != 0 {
if s.gid == 0 {
return nil, nil, errors.Errorf("unsupported id pair: uid=%d, gid=%d", s.uid, s.gid)
}
// TODO: make sure the user exists and subuid/subgid are configured.
buildkitdArgs = append([]string{"sudo", "-u", fmt.Sprintf("#%d", s.uid), "-i", "--", "rootlesskit"}, buildkitdArgs...)
}
buildkitdSock, stop, err := runBuildkitd(buildkitdArgs, logs, s.uid, s.gid)
if err != nil {
return nil, nil, err
}
@ -41,13 +65,14 @@ func (s *oci) New() (Sandbox, func() error, error) {
deferF := &multiCloser{}
deferF.append(stop)
return &sandbox{address: buildkitdSock, logs: logs, cleanup: deferF}, deferF.F(), nil
return &sandbox{address: buildkitdSock, logs: logs, cleanup: deferF, rootless: s.uid != 0}, deferF.F(), nil
}
type sandbox struct {
address string
logs map[string]*bytes.Buffer
cleanup *multiCloser
address string
logs map[string]*bytes.Buffer
cleanup *multiCloser
rootless bool
}
func (sb *sandbox) Address() string {
@ -85,7 +110,11 @@ func (sb *sandbox) Cmd(args ...string) *exec.Cmd {
return cmd
}
func runBuildkitd(args []string, logs map[string]*bytes.Buffer) (address string, cl func() error, err error) {
func (sb *sandbox) Rootless() bool {
return sb.rootless
}
func runBuildkitd(args []string, logs map[string]*bytes.Buffer, uid, gid int) (address string, cl func() error, err error) {
deferF := &multiCloser{}
cl = deferF.F()
@ -100,6 +129,9 @@ func runBuildkitd(args []string, logs map[string]*bytes.Buffer) (address string,
if err != nil {
return "", nil, err
}
if err := os.Chown(tmpdir, uid, gid); err != nil {
return "", nil, err
}
deferF.append(func() error { return os.RemoveAll(tmpdir) })
address = "unix://" + filepath.Join(tmpdir, "buildkitd.sock")

View File

@ -17,6 +17,7 @@ type Sandbox interface {
PrintLogs(*testing.T)
Cmd(...string) *exec.Cmd
NewRegistry() (string, error)
Rootless() bool
}
type Worker interface {

View File

@ -60,8 +60,10 @@ github.com/uber/jaeger-lib c48167d9cae5887393dd5e61efd06a4a48b7fbb3
github.com/codahale/hdrhistogram f8ad88b59a584afeee9d334eff879b104439117b
github.com/opentracing-contrib/go-stdlib b1a47cfbdd7543e70e9ef3e73d0802ad306cc1cc
github.com/opencontainers/selinux 74a747aeaf2d66097b6908f572794f49f07dda2c
# used by dockerfile tests
gotest.tools v2.1.0
github.com/google/go-cmp v0.2.0
# used by rootless spec conv test
github.com/seccomp/libseccomp-golang 32f571b70023028bd57d9288c20efbcb237f3ce0

View File

@ -1,116 +0,0 @@
package validate
import (
"fmt"
"os"
"reflect"
"strings"
"github.com/opencontainers/runc/libcontainer/configs"
)
var (
geteuid = os.Geteuid
getegid = os.Getegid
)
func (v *ConfigValidator) rootless(config *configs.Config) error {
if err := rootlessMappings(config); err != nil {
return err
}
if err := rootlessMount(config); err != nil {
return err
}
// XXX: We currently can't verify the user config at all, because
// configs.Config doesn't store the user-related configs. So this
// has to be verified by setupUser() in init_linux.go.
return nil
}
func hasIDMapping(id int, mappings []configs.IDMap) bool {
for _, m := range mappings {
if id >= m.ContainerID && id < m.ContainerID+m.Size {
return true
}
}
return false
}
func rootlessMappings(config *configs.Config) error {
if euid := geteuid(); euid != 0 {
if !config.Namespaces.Contains(configs.NEWUSER) {
return fmt.Errorf("rootless containers require user namespaces")
}
if len(config.UidMappings) == 0 {
return fmt.Errorf("rootless containers requires at least one UID mapping")
}
if len(config.GidMappings) == 0 {
return fmt.Errorf("rootless containers requires at least one GID mapping")
}
}
return nil
}
// cgroup verifies that the user isn't trying to set any cgroup limits or paths.
func rootlessCgroup(config *configs.Config) error {
// Nothing set at all.
if config.Cgroups == nil || config.Cgroups.Resources == nil {
return nil
}
// Used for comparing to the zero value.
left := reflect.ValueOf(*config.Cgroups.Resources)
right := reflect.Zero(left.Type())
// This is all we need to do, since specconv won't add cgroup options in
// rootless mode.
if !reflect.DeepEqual(left.Interface(), right.Interface()) {
return fmt.Errorf("cannot specify resource limits in rootless container")
}
return nil
}
// mount verifies that the user isn't trying to set up any mounts they don't have
// the rights to do. In addition, it makes sure that no mount has a `uid=` or
// `gid=` option that doesn't resolve to root.
func rootlessMount(config *configs.Config) error {
// XXX: We could whitelist allowed devices at this point, but I'm not
// convinced that's a good idea. The kernel is the best arbiter of
// access control.
for _, mount := range config.Mounts {
// Check that the options list doesn't contain any uid= or gid= entries
// that don't resolve to root.
for _, opt := range strings.Split(mount.Data, ",") {
if strings.HasPrefix(opt, "uid=") {
var uid int
n, err := fmt.Sscanf(opt, "uid=%d", &uid)
if n != 1 || err != nil {
// Ignore unknown mount options.
continue
}
if !hasIDMapping(uid, config.UidMappings) {
return fmt.Errorf("cannot specify uid= mount options for unmapped uid in rootless containers")
}
}
if strings.HasPrefix(opt, "gid=") {
var gid int
n, err := fmt.Sscanf(opt, "gid=%d", &gid)
if n != 1 || err != nil {
// Ignore unknown mount options.
continue
}
if !hasIDMapping(gid, config.GidMappings) {
return fmt.Errorf("cannot specify gid= mount options for unmapped gid in rootless containers")
}
}
}
}
return nil
}

View File

@ -1,212 +0,0 @@
package validate
import (
"fmt"
"os"
"path/filepath"
"strings"
"github.com/opencontainers/runc/libcontainer/configs"
"github.com/opencontainers/runc/libcontainer/intelrdt"
selinux "github.com/opencontainers/selinux/go-selinux"
)
type Validator interface {
Validate(*configs.Config) error
}
func New() Validator {
return &ConfigValidator{}
}
type ConfigValidator struct {
}
func (v *ConfigValidator) Validate(config *configs.Config) error {
if err := v.rootfs(config); err != nil {
return err
}
if err := v.network(config); err != nil {
return err
}
if err := v.hostname(config); err != nil {
return err
}
if err := v.security(config); err != nil {
return err
}
if err := v.usernamespace(config); err != nil {
return err
}
if err := v.sysctl(config); err != nil {
return err
}
if err := v.intelrdt(config); err != nil {
return err
}
if config.Rootless {
if err := v.rootless(config); err != nil {
return err
}
}
return nil
}
// rootfs validates if the rootfs is an absolute path and is not a symlink
// to the container's root filesystem.
func (v *ConfigValidator) rootfs(config *configs.Config) error {
if _, err := os.Stat(config.Rootfs); err != nil {
if os.IsNotExist(err) {
return fmt.Errorf("rootfs (%s) does not exist", config.Rootfs)
}
return err
}
cleaned, err := filepath.Abs(config.Rootfs)
if err != nil {
return err
}
if cleaned, err = filepath.EvalSymlinks(cleaned); err != nil {
return err
}
if filepath.Clean(config.Rootfs) != cleaned {
return fmt.Errorf("%s is not an absolute path or is a symlink", config.Rootfs)
}
return nil
}
func (v *ConfigValidator) network(config *configs.Config) error {
if !config.Namespaces.Contains(configs.NEWNET) {
if len(config.Networks) > 0 || len(config.Routes) > 0 {
return fmt.Errorf("unable to apply network settings without a private NET namespace")
}
}
return nil
}
func (v *ConfigValidator) hostname(config *configs.Config) error {
if config.Hostname != "" && !config.Namespaces.Contains(configs.NEWUTS) {
return fmt.Errorf("unable to set hostname without a private UTS namespace")
}
return nil
}
func (v *ConfigValidator) security(config *configs.Config) error {
// restrict sys without mount namespace
if (len(config.MaskPaths) > 0 || len(config.ReadonlyPaths) > 0) &&
!config.Namespaces.Contains(configs.NEWNS) {
return fmt.Errorf("unable to restrict sys entries without a private MNT namespace")
}
if config.ProcessLabel != "" && !selinux.GetEnabled() {
return fmt.Errorf("selinux label is specified in config, but selinux is disabled or not supported")
}
return nil
}
func (v *ConfigValidator) usernamespace(config *configs.Config) error {
if config.Namespaces.Contains(configs.NEWUSER) {
if _, err := os.Stat("/proc/self/ns/user"); os.IsNotExist(err) {
return fmt.Errorf("USER namespaces aren't enabled in the kernel")
}
} else {
if config.UidMappings != nil || config.GidMappings != nil {
return fmt.Errorf("User namespace mappings specified, but USER namespace isn't enabled in the config")
}
}
return nil
}
// sysctl validates that the specified sysctl keys are valid or not.
// /proc/sys isn't completely namespaced and depending on which namespaces
// are specified, a subset of sysctls are permitted.
func (v *ConfigValidator) sysctl(config *configs.Config) error {
validSysctlMap := map[string]bool{
"kernel.msgmax": true,
"kernel.msgmnb": true,
"kernel.msgmni": true,
"kernel.sem": true,
"kernel.shmall": true,
"kernel.shmmax": true,
"kernel.shmmni": true,
"kernel.shm_rmid_forced": true,
}
for s := range config.Sysctl {
if validSysctlMap[s] || strings.HasPrefix(s, "fs.mqueue.") {
if config.Namespaces.Contains(configs.NEWIPC) {
continue
} else {
return fmt.Errorf("sysctl %q is not allowed in the hosts ipc namespace", s)
}
}
if strings.HasPrefix(s, "net.") {
if config.Namespaces.Contains(configs.NEWNET) {
if path := config.Namespaces.PathOf(configs.NEWNET); path != "" {
if err := checkHostNs(s, path); err != nil {
return err
}
}
continue
} else {
return fmt.Errorf("sysctl %q is not allowed in the hosts network namespace", s)
}
}
return fmt.Errorf("sysctl %q is not in a separate kernel namespace", s)
}
return nil
}
func (v *ConfigValidator) intelrdt(config *configs.Config) error {
if config.IntelRdt != nil {
if !intelrdt.IsEnabled() {
return fmt.Errorf("intelRdt is specified in config, but Intel RDT feature is not supported or enabled")
}
if config.IntelRdt.L3CacheSchema == "" {
return fmt.Errorf("intelRdt is specified in config, but intelRdt.l3CacheSchema is empty")
}
}
return nil
}
func isSymbolicLink(path string) (bool, error) {
fi, err := os.Lstat(path)
if err != nil {
return false, err
}
return fi.Mode()&os.ModeSymlink == os.ModeSymlink, nil
}
// checkHostNs checks whether network sysctl is used in host namespace.
func checkHostNs(sysctlConfig string, path string) error {
var currentProcessNetns = "/proc/self/ns/net"
// readlink on the current processes network namespace
destOfCurrentProcess, err := os.Readlink(currentProcessNetns)
if err != nil {
return fmt.Errorf("read soft link %q error", currentProcessNetns)
}
// First check if the provided path is a symbolic link
symLink, err := isSymbolicLink(path)
if err != nil {
return fmt.Errorf("could not check that %q is a symlink: %v", path, err)
}
if symLink == false {
// The provided namespace is not a symbolic link,
// it is not the host namespace.
return nil
}
// readlink on the path provided in the struct
destOfContainer, err := os.Readlink(path)
if err != nil {
return fmt.Errorf("read soft link %q error", path)
}
if destOfContainer == destOfCurrentProcess {
return fmt.Errorf("sysctl %q is not allowed in the hosts network namespace", sysctlConfig)
}
return nil
}

View File

@ -1,553 +0,0 @@
// +build linux
package intelrdt
import (
"bufio"
"fmt"
"io/ioutil"
"os"
"path/filepath"
"strconv"
"strings"
"sync"
"github.com/opencontainers/runc/libcontainer/configs"
)
/*
* About Intel RDT/CAT feature:
* Intel platforms with new Xeon CPU support Resource Director Technology (RDT).
* Intel Cache Allocation Technology (CAT) is a sub-feature of RDT. Currently L3
* Cache is the only resource that is supported in RDT.
*
* This feature provides a way for the software to restrict cache allocation to a
* defined 'subset' of L3 cache which may be overlapping with other 'subsets'.
* The different subsets are identified by class of service (CLOS) and each CLOS
* has a capacity bitmask (CBM).
*
* For more information about Intel RDT/CAT can be found in the section 17.17
* of Intel Software Developer Manual.
*
* About Intel RDT/CAT kernel interface:
* In Linux 4.10 kernel or newer, the interface is defined and exposed via
* "resource control" filesystem, which is a "cgroup-like" interface.
*
* Comparing with cgroups, it has similar process management lifecycle and
* interfaces in a container. But unlike cgroups' hierarchy, it has single level
* filesystem layout.
*
* Intel RDT "resource control" filesystem hierarchy:
* mount -t resctrl resctrl /sys/fs/resctrl
* tree /sys/fs/resctrl
* /sys/fs/resctrl/
* |-- info
* | |-- L3
* | |-- cbm_mask
* | |-- min_cbm_bits
* | |-- num_closids
* |-- cpus
* |-- schemata
* |-- tasks
* |-- <container_id>
* |-- cpus
* |-- schemata
* |-- tasks
*
* For runc, we can make use of `tasks` and `schemata` configuration for L3 cache
* resource constraints.
*
* The file `tasks` has a list of tasks that belongs to this group (e.g.,
* <container_id>" group). Tasks can be added to a group by writing the task ID
* to the "tasks" file (which will automatically remove them from the previous
* group to which they belonged). New tasks created by fork(2) and clone(2) are
* added to the same group as their parent. If a pid is not in any sub group, it is
* in root group.
*
* The file `schemata` has allocation bitmasks/values for L3 cache on each socket,
* which contains L3 cache id and capacity bitmask (CBM).
* Format: "L3:<cache_id0>=<cbm0>;<cache_id1>=<cbm1>;..."
* For example, on a two-socket machine, L3's schema line could be `L3:0=ff;1=c0`
* which means L3 cache id 0's CBM is 0xff, and L3 cache id 1's CBM is 0xc0.
*
* The valid L3 cache CBM is a *contiguous bits set* and number of bits that can
* be set is less than the max bit. The max bits in the CBM is varied among
* supported Intel Xeon platforms. In Intel RDT "resource control" filesystem
* layout, the CBM in a group should be a subset of the CBM in root. Kernel will
* check if it is valid when writing. e.g., 0xfffff in root indicates the max bits
* of CBM is 20 bits, which mapping to entire L3 cache capacity. Some valid CBM
* values to set in a group: 0xf, 0xf0, 0x3ff, 0x1f00 and etc.
*
* For more information about Intel RDT/CAT kernel interface:
* https://www.kernel.org/doc/Documentation/x86/intel_rdt_ui.txt
*
* An example for runc:
* Consider a two-socket machine with two L3 caches where the default CBM is
* 0xfffff and the max CBM length is 20 bits. With this configuration, tasks
* inside the container only have access to the "upper" 80% of L3 cache id 0 and
* the "lower" 50% L3 cache id 1:
*
* "linux": {
* "intelRdt": {
* "l3CacheSchema": "L3:0=ffff0;1=3ff"
* }
* }
*/
type Manager interface {
// Applies Intel RDT configuration to the process with the specified pid
Apply(pid int) error
// Returns statistics for Intel RDT
GetStats() (*Stats, error)
// Destroys the Intel RDT 'container_id' group
Destroy() error
// Returns Intel RDT path to save in a state file and to be able to
// restore the object later
GetPath() string
// Set Intel RDT "resource control" filesystem as configured.
Set(container *configs.Config) error
}
// This implements interface Manager
type IntelRdtManager struct {
mu sync.Mutex
Config *configs.Config
Id string
Path string
}
const (
IntelRdtTasks = "tasks"
)
var (
// The absolute root path of the Intel RDT "resource control" filesystem
intelRdtRoot string
intelRdtRootLock sync.Mutex
// The flag to indicate if Intel RDT is supported
isEnabled bool
)
type intelRdtData struct {
root string
config *configs.Config
pid int
}
// Check if Intel RDT is enabled in init()
func init() {
// 1. Check if hardware and kernel support Intel RDT/CAT feature
// "cat_l3" flag is set if supported
isFlagSet, err := parseCpuInfoFile("/proc/cpuinfo")
if !isFlagSet || err != nil {
isEnabled = false
return
}
// 2. Check if Intel RDT "resource control" filesystem is mounted
// The user guarantees to mount the filesystem
isEnabled = isIntelRdtMounted()
}
// Return the mount point path of Intel RDT "resource control" filesysem
func findIntelRdtMountpointDir() (string, error) {
f, err := os.Open("/proc/self/mountinfo")
if err != nil {
return "", err
}
defer f.Close()
s := bufio.NewScanner(f)
for s.Scan() {
text := s.Text()
fields := strings.Split(text, " ")
// Safe as mountinfo encodes mountpoints with spaces as \040.
index := strings.Index(text, " - ")
postSeparatorFields := strings.Fields(text[index+3:])
numPostFields := len(postSeparatorFields)
// This is an error as we can't detect if the mount is for "Intel RDT"
if numPostFields == 0 {
return "", fmt.Errorf("Found no fields post '-' in %q", text)
}
if postSeparatorFields[0] == "resctrl" {
// Check that the mount is properly formated.
if numPostFields < 3 {
return "", fmt.Errorf("Error found less than 3 fields post '-' in %q", text)
}
return fields[4], nil
}
}
if err := s.Err(); err != nil {
return "", err
}
return "", NewNotFoundError("Intel RDT")
}
// Gets the root path of Intel RDT "resource control" filesystem
func getIntelRdtRoot() (string, error) {
intelRdtRootLock.Lock()
defer intelRdtRootLock.Unlock()
if intelRdtRoot != "" {
return intelRdtRoot, nil
}
root, err := findIntelRdtMountpointDir()
if err != nil {
return "", err
}
if _, err := os.Stat(root); err != nil {
return "", err
}
intelRdtRoot = root
return intelRdtRoot, nil
}
func isIntelRdtMounted() bool {
_, err := getIntelRdtRoot()
if err != nil {
return false
}
return true
}
func parseCpuInfoFile(path string) (bool, error) {
f, err := os.Open(path)
if err != nil {
return false, err
}
defer f.Close()
s := bufio.NewScanner(f)
for s.Scan() {
if err := s.Err(); err != nil {
return false, err
}
text := s.Text()
flags := strings.Split(text, " ")
// "cat_l3" flag is set if Intel RDT/CAT is supported
for _, flag := range flags {
if flag == "cat_l3" {
return true, nil
}
}
}
return false, nil
}
func parseUint(s string, base, bitSize int) (uint64, error) {
value, err := strconv.ParseUint(s, base, bitSize)
if err != nil {
intValue, intErr := strconv.ParseInt(s, base, bitSize)
// 1. Handle negative values greater than MinInt64 (and)
// 2. Handle negative values lesser than MinInt64
if intErr == nil && intValue < 0 {
return 0, nil
} else if intErr != nil && intErr.(*strconv.NumError).Err == strconv.ErrRange && intValue < 0 {
return 0, nil
}
return value, err
}
return value, nil
}
// Gets a single uint64 value from the specified file.
func getIntelRdtParamUint(path, file string) (uint64, error) {
fileName := filepath.Join(path, file)
contents, err := ioutil.ReadFile(fileName)
if err != nil {
return 0, err
}
res, err := parseUint(strings.TrimSpace(string(contents)), 10, 64)
if err != nil {
return res, fmt.Errorf("unable to parse %q as a uint from file %q", string(contents), fileName)
}
return res, nil
}
// Gets a string value from the specified file
func getIntelRdtParamString(path, file string) (string, error) {
contents, err := ioutil.ReadFile(filepath.Join(path, file))
if err != nil {
return "", err
}
return strings.TrimSpace(string(contents)), nil
}
func readTasksFile(dir string) ([]int, error) {
f, err := os.Open(filepath.Join(dir, IntelRdtTasks))
if err != nil {
return nil, err
}
defer f.Close()
var (
s = bufio.NewScanner(f)
out = []int{}
)
for s.Scan() {
if t := s.Text(); t != "" {
pid, err := strconv.Atoi(t)
if err != nil {
return nil, err
}
out = append(out, pid)
}
}
return out, nil
}
func writeFile(dir, file, data string) error {
if dir == "" {
return fmt.Errorf("no such directory for %s", file)
}
if err := ioutil.WriteFile(filepath.Join(dir, file), []byte(data+"\n"), 0700); err != nil {
return fmt.Errorf("failed to write %v to %v: %v", data, file, err)
}
return nil
}
func getIntelRdtData(c *configs.Config, pid int) (*intelRdtData, error) {
rootPath, err := getIntelRdtRoot()
if err != nil {
return nil, err
}
return &intelRdtData{
root: rootPath,
config: c,
pid: pid,
}, nil
}
// Get the read-only L3 cache information
func getL3CacheInfo() (*L3CacheInfo, error) {
l3CacheInfo := &L3CacheInfo{}
rootPath, err := getIntelRdtRoot()
if err != nil {
return l3CacheInfo, err
}
path := filepath.Join(rootPath, "info", "L3")
cbmMask, err := getIntelRdtParamString(path, "cbm_mask")
if err != nil {
return l3CacheInfo, err
}
minCbmBits, err := getIntelRdtParamUint(path, "min_cbm_bits")
if err != nil {
return l3CacheInfo, err
}
numClosids, err := getIntelRdtParamUint(path, "num_closids")
if err != nil {
return l3CacheInfo, err
}
l3CacheInfo.CbmMask = cbmMask
l3CacheInfo.MinCbmBits = minCbmBits
l3CacheInfo.NumClosids = numClosids
return l3CacheInfo, nil
}
// WriteIntelRdtTasks writes the specified pid into the "tasks" file
func WriteIntelRdtTasks(dir string, pid int) error {
if dir == "" {
return fmt.Errorf("no such directory for %s", IntelRdtTasks)
}
// Dont attach any pid if -1 is specified as a pid
if pid != -1 {
if err := ioutil.WriteFile(filepath.Join(dir, IntelRdtTasks), []byte(strconv.Itoa(pid)), 0700); err != nil {
return fmt.Errorf("failed to write %v to %v: %v", pid, IntelRdtTasks, err)
}
}
return nil
}
// Check if Intel RDT is enabled
func IsEnabled() bool {
return isEnabled
}
// Get the 'container_id' path in Intel RDT "resource control" filesystem
func GetIntelRdtPath(id string) (string, error) {
rootPath, err := getIntelRdtRoot()
if err != nil {
return "", err
}
path := filepath.Join(rootPath, id)
return path, nil
}
// Applies Intel RDT configuration to the process with the specified pid
func (m *IntelRdtManager) Apply(pid int) (err error) {
// If intelRdt is not specified in config, we do nothing
if m.Config.IntelRdt == nil {
return nil
}
d, err := getIntelRdtData(m.Config, pid)
if err != nil && !IsNotFound(err) {
return err
}
m.mu.Lock()
defer m.mu.Unlock()
path, err := d.join(m.Id)
if err != nil {
return err
}
m.Path = path
return nil
}
// Destroys the Intel RDT 'container_id' group
func (m *IntelRdtManager) Destroy() error {
m.mu.Lock()
defer m.mu.Unlock()
if err := os.RemoveAll(m.Path); err != nil {
return err
}
m.Path = ""
return nil
}
// Returns Intel RDT path to save in a state file and to be able to
// restore the object later
func (m *IntelRdtManager) GetPath() string {
if m.Path == "" {
m.Path, _ = GetIntelRdtPath(m.Id)
}
return m.Path
}
// Returns statistics for Intel RDT
func (m *IntelRdtManager) GetStats() (*Stats, error) {
// If intelRdt is not specified in config
if m.Config.IntelRdt == nil {
return nil, nil
}
m.mu.Lock()
defer m.mu.Unlock()
stats := NewStats()
// The read-only L3 cache information
l3CacheInfo, err := getL3CacheInfo()
if err != nil {
return nil, err
}
stats.L3CacheInfo = l3CacheInfo
// The read-only L3 cache schema in root
rootPath, err := getIntelRdtRoot()
if err != nil {
return nil, err
}
tmpRootStrings, err := getIntelRdtParamString(rootPath, "schemata")
if err != nil {
return nil, err
}
// L3 cache schema is in the first line
schemaRootStrings := strings.Split(tmpRootStrings, "\n")
stats.L3CacheSchemaRoot = schemaRootStrings[0]
// The L3 cache schema in 'container_id' group
tmpStrings, err := getIntelRdtParamString(m.GetPath(), "schemata")
if err != nil {
return nil, err
}
// L3 cache schema is in the first line
schemaStrings := strings.Split(tmpStrings, "\n")
stats.L3CacheSchema = schemaStrings[0]
return stats, nil
}
// Set Intel RDT "resource control" filesystem as configured.
func (m *IntelRdtManager) Set(container *configs.Config) error {
path := m.GetPath()
// About L3 cache schema file:
// The schema has allocation masks/values for L3 cache on each socket,
// which contains L3 cache id and capacity bitmask (CBM).
// Format: "L3:<cache_id0>=<cbm0>;<cache_id1>=<cbm1>;..."
// For example, on a two-socket machine, L3's schema line could be:
// L3:0=ff;1=c0
// Which means L3 cache id 0's CBM is 0xff, and L3 cache id 1's CBM is 0xc0.
//
// About L3 cache CBM validity:
// The valid L3 cache CBM is a *contiguous bits set* and number of
// bits that can be set is less than the max bit. The max bits in the
// CBM is varied among supported Intel Xeon platforms. In Intel RDT
// "resource control" filesystem layout, the CBM in a group should
// be a subset of the CBM in root. Kernel will check if it is valid
// when writing.
// e.g., 0xfffff in root indicates the max bits of CBM is 20 bits,
// which mapping to entire L3 cache capacity. Some valid CBM values
// to set in a group: 0xf, 0xf0, 0x3ff, 0x1f00 and etc.
if container.IntelRdt != nil {
l3CacheSchema := container.IntelRdt.L3CacheSchema
if l3CacheSchema != "" {
if err := writeFile(path, "schemata", l3CacheSchema); err != nil {
return err
}
}
}
return nil
}
func (raw *intelRdtData) join(id string) (string, error) {
path := filepath.Join(raw.root, id)
if err := os.MkdirAll(path, 0755); err != nil {
return "", err
}
if err := WriteIntelRdtTasks(path, raw.pid); err != nil {
return "", err
}
return path, nil
}
type NotFoundError struct {
ResourceControl string
}
func (e *NotFoundError) Error() string {
return fmt.Sprintf("mountpoint for %s not found", e.ResourceControl)
}
func NewNotFoundError(res string) error {
return &NotFoundError{
ResourceControl: res,
}
}
func IsNotFound(err error) bool {
if err == nil {
return false
}
_, ok := err.(*NotFoundError)
return ok
}

View File

@ -1,24 +0,0 @@
// +build linux
package intelrdt
type L3CacheInfo struct {
CbmMask string `json:"cbm_mask,omitempty"`
MinCbmBits uint64 `json:"min_cbm_bits,omitempty"`
NumClosids uint64 `json:"num_closids,omitempty"`
}
type Stats struct {
// The read-only L3 cache information
L3CacheInfo *L3CacheInfo `json:"l3_cache_info,omitempty"`
// The read-only L3 cache schema in root
L3CacheSchemaRoot string `json:"l3_cache_schema_root,omitempty"`
// The L3 cache schema in 'container_id' group
L3CacheSchema string `json:"l3_cache_schema,omitempty"`
}
func NewStats() *Stats {
return &Stats{}
}

View File

@ -1,201 +0,0 @@
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "{}"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright {yyyy} {name of copyright owner}
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

View File

@ -1,7 +0,0 @@
# selinux
[![GoDoc](https://godoc.org/github.com/opencontainers/selinux?status.svg)](https://godoc.org/github.com/opencontainers/selinux) [![Go Report Card](https://goreportcard.com/badge/github.com/opencontainers/selinux)](https://goreportcard.com/report/github.com/opencontainers/selinux) [![Build Status](https://travis-ci.org/opencontainers/selinux.svg?branch=master)](https://travis-ci.org/opencontainers/selinux)
Common SELinux package used across the container ecosystem.
Please see the [godoc](https://godoc.org/github.com/opencontainers/selinux) for more information.

View File

@ -1,688 +0,0 @@
// +build linux
package selinux
import (
"bufio"
"bytes"
"crypto/rand"
"encoding/binary"
"fmt"
"io"
"io/ioutil"
"os"
"path/filepath"
"regexp"
"strconv"
"strings"
"sync"
"syscall"
)
const (
// Enforcing constant indicate SELinux is in enforcing mode
Enforcing = 1
// Permissive constant to indicate SELinux is in permissive mode
Permissive = 0
// Disabled constant to indicate SELinux is disabled
Disabled = -1
selinuxDir = "/etc/selinux/"
selinuxConfig = selinuxDir + "config"
selinuxfsMount = "/sys/fs/selinux"
selinuxTypeTag = "SELINUXTYPE"
selinuxTag = "SELINUX"
xattrNameSelinux = "security.selinux"
stRdOnly = 0x01
selinuxfsMagic = 0xf97cff8c
)
type selinuxState struct {
enabledSet bool
enabled bool
selinuxfsSet bool
selinuxfs string
mcsList map[string]bool
sync.Mutex
}
var (
assignRegex = regexp.MustCompile(`^([^=]+)=(.*)$`)
state = selinuxState{
mcsList: make(map[string]bool),
}
)
// Context is a representation of the SELinux label broken into 4 parts
type Context map[string]string
func (s *selinuxState) setEnable(enabled bool) bool {
s.Lock()
defer s.Unlock()
s.enabledSet = true
s.enabled = enabled
return s.enabled
}
func (s *selinuxState) getEnabled() bool {
s.Lock()
enabled := s.enabled
enabledSet := s.enabledSet
s.Unlock()
if enabledSet {
return enabled
}
enabled = false
if fs := getSelinuxMountPoint(); fs != "" {
if con, _ := CurrentLabel(); con != "kernel" {
enabled = true
}
}
return s.setEnable(enabled)
}
// SetDisabled disables selinux support for the package
func SetDisabled() {
state.setEnable(false)
}
func (s *selinuxState) setSELinuxfs(selinuxfs string) string {
s.Lock()
defer s.Unlock()
s.selinuxfsSet = true
s.selinuxfs = selinuxfs
return s.selinuxfs
}
func verifySELinuxfsMount(mnt string) bool {
var buf syscall.Statfs_t
for {
err := syscall.Statfs(mnt, &buf)
if err == nil {
break
}
if err == syscall.EAGAIN {
continue
}
return false
}
if uint32(buf.Type) != uint32(selinuxfsMagic) {
return false
}
if (buf.Flags & stRdOnly) != 0 {
return false
}
return true
}
func findSELinuxfs() string {
// fast path: check the default mount first
if verifySELinuxfsMount(selinuxfsMount) {
return selinuxfsMount
}
// check if selinuxfs is available before going the slow path
fs, err := ioutil.ReadFile("/proc/filesystems")
if err != nil {
return ""
}
if !bytes.Contains(fs, []byte("\tselinuxfs\n")) {
return ""
}
// slow path: try to find among the mounts
f, err := os.Open("/proc/self/mountinfo")
if err != nil {
return ""
}
defer f.Close()
scanner := bufio.NewScanner(f)
for {
mnt := findSELinuxfsMount(scanner)
if mnt == "" { // error or not found
return ""
}
if verifySELinuxfsMount(mnt) {
return mnt
}
}
}
// findSELinuxfsMount returns a next selinuxfs mount point found,
// if there is one, or an empty string in case of EOF or error.
func findSELinuxfsMount(s *bufio.Scanner) string {
for s.Scan() {
txt := s.Text()
// The first field after - is fs type.
// Safe as spaces in mountpoints are encoded as \040
if !strings.Contains(txt, " - selinuxfs ") {
continue
}
const mPos = 5 // mount point is 5th field
fields := strings.SplitN(txt, " ", mPos+1)
if len(fields) < mPos+1 {
continue
}
return fields[mPos-1]
}
return ""
}
func (s *selinuxState) getSELinuxfs() string {
s.Lock()
selinuxfs := s.selinuxfs
selinuxfsSet := s.selinuxfsSet
s.Unlock()
if selinuxfsSet {
return selinuxfs
}
return s.setSELinuxfs(findSELinuxfs())
}
// getSelinuxMountPoint returns the path to the mountpoint of an selinuxfs
// filesystem or an empty string if no mountpoint is found. Selinuxfs is
// a proc-like pseudo-filesystem that exposes the selinux policy API to
// processes. The existence of an selinuxfs mount is used to determine
// whether selinux is currently enabled or not.
func getSelinuxMountPoint() string {
return state.getSELinuxfs()
}
// GetEnabled returns whether selinux is currently enabled.
func GetEnabled() bool {
return state.getEnabled()
}
func readConfig(target string) (value string) {
var (
val, key string
bufin *bufio.Reader
)
in, err := os.Open(selinuxConfig)
if err != nil {
return ""
}
defer in.Close()
bufin = bufio.NewReader(in)
for done := false; !done; {
var line string
if line, err = bufin.ReadString('\n'); err != nil {
if err != io.EOF {
return ""
}
done = true
}
line = strings.TrimSpace(line)
if len(line) == 0 {
// Skip blank lines
continue
}
if line[0] == ';' || line[0] == '#' {
// Skip comments
continue
}
if groups := assignRegex.FindStringSubmatch(line); groups != nil {
key, val = strings.TrimSpace(groups[1]), strings.TrimSpace(groups[2])
if key == target {
return strings.Trim(val, "\"")
}
}
}
return ""
}
func getSELinuxPolicyRoot() string {
return selinuxDir + readConfig(selinuxTypeTag)
}
func readCon(name string) (string, error) {
var val string
in, err := os.Open(name)
if err != nil {
return "", err
}
defer in.Close()
_, err = fmt.Fscanf(in, "%s", &val)
return strings.Trim(val, "\x00"), err
}
// SetFileLabel sets the SELinux label for this path or returns an error.
func SetFileLabel(path string, label string) error {
return lsetxattr(path, xattrNameSelinux, []byte(label), 0)
}
// FileLabel returns the SELinux label for this path or returns an error.
func FileLabel(path string) (string, error) {
label, err := lgetxattr(path, xattrNameSelinux)
if err != nil {
return "", err
}
// Trim the NUL byte at the end of the byte buffer, if present.
if len(label) > 0 && label[len(label)-1] == '\x00' {
label = label[:len(label)-1]
}
return string(label), nil
}
/*
SetFSCreateLabel tells kernel the label to create all file system objects
created by this task. Setting label="" to return to default.
*/
func SetFSCreateLabel(label string) error {
return writeCon(fmt.Sprintf("/proc/self/task/%d/attr/fscreate", syscall.Gettid()), label)
}
/*
FSCreateLabel returns the default label the kernel which the kernel is using
for file system objects created by this task. "" indicates default.
*/
func FSCreateLabel() (string, error) {
return readCon(fmt.Sprintf("/proc/self/task/%d/attr/fscreate", syscall.Gettid()))
}
// CurrentLabel returns the SELinux label of the current process thread, or an error.
func CurrentLabel() (string, error) {
return readCon(fmt.Sprintf("/proc/self/task/%d/attr/current", syscall.Gettid()))
}
// PidLabel returns the SELinux label of the given pid, or an error.
func PidLabel(pid int) (string, error) {
return readCon(fmt.Sprintf("/proc/%d/attr/current", pid))
}
/*
ExecLabel returns the SELinux label that the kernel will use for any programs
that are executed by the current process thread, or an error.
*/
func ExecLabel() (string, error) {
return readCon(fmt.Sprintf("/proc/self/task/%d/attr/exec", syscall.Gettid()))
}
func writeCon(name string, val string) error {
out, err := os.OpenFile(name, os.O_WRONLY, 0)
if err != nil {
return err
}
defer out.Close()
if val != "" {
_, err = out.Write([]byte(val))
} else {
_, err = out.Write(nil)
}
return err
}
/*
CanonicalizeContext takes a context string and writes it to the kernel
the function then returns the context that the kernel will use. This function
can be used to see if two contexts are equivalent
*/
func CanonicalizeContext(val string) (string, error) {
return readWriteCon(filepath.Join(getSelinuxMountPoint(), "context"), val)
}
func readWriteCon(name string, val string) (string, error) {
var retval string
f, err := os.OpenFile(name, os.O_RDWR, 0)
if err != nil {
return "", err
}
defer f.Close()
_, err = f.Write([]byte(val))
if err != nil {
return "", err
}
_, err = fmt.Fscanf(f, "%s", &retval)
return strings.Trim(retval, "\x00"), err
}
/*
SetExecLabel sets the SELinux label that the kernel will use for any programs
that are executed by the current process thread, or an error.
*/
func SetExecLabel(label string) error {
return writeCon(fmt.Sprintf("/proc/self/task/%d/attr/exec", syscall.Gettid()), label)
}
// Get returns the Context as a string
func (c Context) Get() string {
if c["level"] != "" {
return fmt.Sprintf("%s:%s:%s:%s", c["user"], c["role"], c["type"], c["level"])
}
return fmt.Sprintf("%s:%s:%s", c["user"], c["role"], c["type"])
}
// NewContext creates a new Context struct from the specified label
func NewContext(label string) Context {
c := make(Context)
if len(label) != 0 {
con := strings.SplitN(label, ":", 4)
c["user"] = con[0]
c["role"] = con[1]
c["type"] = con[2]
if len(con) > 3 {
c["level"] = con[3]
}
}
return c
}
// ReserveLabel reserves the MLS/MCS level component of the specified label
func ReserveLabel(label string) {
if len(label) != 0 {
con := strings.SplitN(label, ":", 4)
if len(con) > 3 {
mcsAdd(con[3])
}
}
}
func selinuxEnforcePath() string {
return fmt.Sprintf("%s/enforce", getSelinuxMountPoint())
}
// EnforceMode returns the current SELinux mode Enforcing, Permissive, Disabled
func EnforceMode() int {
var enforce int
enforceS, err := readCon(selinuxEnforcePath())
if err != nil {
return -1
}
enforce, err = strconv.Atoi(string(enforceS))
if err != nil {
return -1
}
return enforce
}
/*
SetEnforceMode sets the current SELinux mode Enforcing, Permissive.
Disabled is not valid, since this needs to be set at boot time.
*/
func SetEnforceMode(mode int) error {
return writeCon(selinuxEnforcePath(), fmt.Sprintf("%d", mode))
}
/*
DefaultEnforceMode returns the systems default SELinux mode Enforcing,
Permissive or Disabled. Note this is is just the default at boot time.
EnforceMode tells you the systems current mode.
*/
func DefaultEnforceMode() int {
switch readConfig(selinuxTag) {
case "enforcing":
return Enforcing
case "permissive":
return Permissive
}
return Disabled
}
func mcsAdd(mcs string) error {
if mcs == "" {
return nil
}
state.Lock()
defer state.Unlock()
if state.mcsList[mcs] {
return fmt.Errorf("MCS Label already exists")
}
state.mcsList[mcs] = true
return nil
}
func mcsDelete(mcs string) {
if mcs == "" {
return
}
state.Lock()
defer state.Unlock()
state.mcsList[mcs] = false
}
func intToMcs(id int, catRange uint32) string {
var (
SETSIZE = int(catRange)
TIER = SETSIZE
ORD = id
)
if id < 1 || id > 523776 {
return ""
}
for ORD > TIER {
ORD = ORD - TIER
TIER--
}
TIER = SETSIZE - TIER
ORD = ORD + TIER
return fmt.Sprintf("s0:c%d,c%d", TIER, ORD)
}
func uniqMcs(catRange uint32) string {
var (
n uint32
c1, c2 uint32
mcs string
)
for {
binary.Read(rand.Reader, binary.LittleEndian, &n)
c1 = n % catRange
binary.Read(rand.Reader, binary.LittleEndian, &n)
c2 = n % catRange
if c1 == c2 {
continue
} else {
if c1 > c2 {
c1, c2 = c2, c1
}
}
mcs = fmt.Sprintf("s0:c%d,c%d", c1, c2)
if err := mcsAdd(mcs); err != nil {
continue
}
break
}
return mcs
}
/*
ReleaseLabel will unreserve the MLS/MCS Level field of the specified label.
Allowing it to be used by another process.
*/
func ReleaseLabel(label string) {
if len(label) != 0 {
con := strings.SplitN(label, ":", 4)
if len(con) > 3 {
mcsDelete(con[3])
}
}
}
var roFileLabel string
// ROFileLabel returns the specified SELinux readonly file label
func ROFileLabel() (fileLabel string) {
return roFileLabel
}
/*
ContainerLabels returns an allocated processLabel and fileLabel to be used for
container labeling by the calling process.
*/
func ContainerLabels() (processLabel string, fileLabel string) {
var (
val, key string
bufin *bufio.Reader
)
if !GetEnabled() {
return "", ""
}
lxcPath := fmt.Sprintf("%s/contexts/lxc_contexts", getSELinuxPolicyRoot())
in, err := os.Open(lxcPath)
if err != nil {
return "", ""
}
defer in.Close()
bufin = bufio.NewReader(in)
for done := false; !done; {
var line string
if line, err = bufin.ReadString('\n'); err != nil {
if err == io.EOF {
done = true
} else {
goto exit
}
}
line = strings.TrimSpace(line)
if len(line) == 0 {
// Skip blank lines
continue
}
if line[0] == ';' || line[0] == '#' {
// Skip comments
continue
}
if groups := assignRegex.FindStringSubmatch(line); groups != nil {
key, val = strings.TrimSpace(groups[1]), strings.TrimSpace(groups[2])
if key == "process" {
processLabel = strings.Trim(val, "\"")
}
if key == "file" {
fileLabel = strings.Trim(val, "\"")
}
if key == "ro_file" {
roFileLabel = strings.Trim(val, "\"")
}
}
}
if processLabel == "" || fileLabel == "" {
return "", ""
}
if roFileLabel == "" {
roFileLabel = fileLabel
}
exit:
scon := NewContext(processLabel)
if scon["level"] != "" {
mcs := uniqMcs(1024)
scon["level"] = mcs
processLabel = scon.Get()
scon = NewContext(fileLabel)
scon["level"] = mcs
fileLabel = scon.Get()
}
return processLabel, fileLabel
}
// SecurityCheckContext validates that the SELinux label is understood by the kernel
func SecurityCheckContext(val string) error {
return writeCon(fmt.Sprintf("%s/context", getSelinuxMountPoint()), val)
}
/*
CopyLevel returns a label with the MLS/MCS level from src label replaces on
the dest label.
*/
func CopyLevel(src, dest string) (string, error) {
if src == "" {
return "", nil
}
if err := SecurityCheckContext(src); err != nil {
return "", err
}
if err := SecurityCheckContext(dest); err != nil {
return "", err
}
scon := NewContext(src)
tcon := NewContext(dest)
mcsDelete(tcon["level"])
mcsAdd(scon["level"])
tcon["level"] = scon["level"]
return tcon.Get(), nil
}
// Prevent users from relabing system files
func badPrefix(fpath string) error {
var badprefixes = []string{"/usr"}
for _, prefix := range badprefixes {
if fpath == prefix || strings.HasPrefix(fpath, fmt.Sprintf("%s/", prefix)) {
return fmt.Errorf("relabeling content in %s is not allowed", prefix)
}
}
return nil
}
// Chcon changes the fpath file object to the SELinux label label.
// If the fpath is a directory and recurse is true Chcon will walk the
// directory tree setting the label
func Chcon(fpath string, label string, recurse bool) error {
if label == "" {
return nil
}
if err := badPrefix(fpath); err != nil {
return err
}
callback := func(p string, info os.FileInfo, err error) error {
return SetFileLabel(p, label)
}
if recurse {
return filepath.Walk(fpath, callback)
}
return SetFileLabel(fpath, label)
}
// DupSecOpt takes an SELinux process label and returns security options that
// can will set the SELinux Type and Level for future container processes
func DupSecOpt(src string) []string {
if src == "" {
return nil
}
con := NewContext(src)
if con["user"] == "" ||
con["role"] == "" ||
con["type"] == "" {
return nil
}
dup := []string{"user:" + con["user"],
"role:" + con["role"],
"type:" + con["type"],
}
if con["level"] != "" {
dup = append(dup, "level:"+con["level"])
}
return dup
}
// DisableSecOpt returns a security opt that can be used to disabling SELinux
// labeling support for future container processes
func DisableSecOpt() []string {
return []string{"disable"}
}

View File

@ -1,78 +0,0 @@
// +build linux
package selinux
import (
"syscall"
"unsafe"
)
var _zero uintptr
// Returns a []byte slice if the xattr is set and nil otherwise
// Requires path and its attribute as arguments
func lgetxattr(path string, attr string) ([]byte, error) {
var sz int
pathBytes, err := syscall.BytePtrFromString(path)
if err != nil {
return nil, err
}
attrBytes, err := syscall.BytePtrFromString(attr)
if err != nil {
return nil, err
}
// Start with a 128 length byte array
sz = 128
dest := make([]byte, sz)
destBytes := unsafe.Pointer(&dest[0])
_sz, _, errno := syscall.Syscall6(syscall.SYS_LGETXATTR, uintptr(unsafe.Pointer(pathBytes)), uintptr(unsafe.Pointer(attrBytes)), uintptr(destBytes), uintptr(len(dest)), 0, 0)
switch {
case errno == syscall.ENODATA:
return nil, errno
case errno == syscall.ENOTSUP:
return nil, errno
case errno == syscall.ERANGE:
// 128 byte array might just not be good enough,
// A dummy buffer is used ``uintptr(0)`` to get real size
// of the xattrs on disk
_sz, _, errno = syscall.Syscall6(syscall.SYS_LGETXATTR, uintptr(unsafe.Pointer(pathBytes)), uintptr(unsafe.Pointer(attrBytes)), uintptr(unsafe.Pointer(nil)), uintptr(0), 0, 0)
sz = int(_sz)
if sz < 0 {
return nil, errno
}
dest = make([]byte, sz)
destBytes := unsafe.Pointer(&dest[0])
_sz, _, errno = syscall.Syscall6(syscall.SYS_LGETXATTR, uintptr(unsafe.Pointer(pathBytes)), uintptr(unsafe.Pointer(attrBytes)), uintptr(destBytes), uintptr(len(dest)), 0, 0)
if errno != 0 {
return nil, errno
}
case errno != 0:
return nil, errno
}
sz = int(_sz)
return dest[:sz], nil
}
func lsetxattr(path string, attr string, data []byte, flags int) error {
pathBytes, err := syscall.BytePtrFromString(path)
if err != nil {
return err
}
attrBytes, err := syscall.BytePtrFromString(attr)
if err != nil {
return err
}
var dataBytes unsafe.Pointer
if len(data) > 0 {
dataBytes = unsafe.Pointer(&data[0])
} else {
dataBytes = unsafe.Pointer(&_zero)
}
_, _, errno := syscall.Syscall6(syscall.SYS_LSETXATTR, uintptr(unsafe.Pointer(pathBytes)), uintptr(unsafe.Pointer(attrBytes)), uintptr(dataBytes), uintptr(len(data)), uintptr(flags), 0)
if errno != 0 {
return errno
}
return nil
}

22
vendor/github.com/seccomp/libseccomp-golang/LICENSE generated vendored Normal file
View File

@ -0,0 +1,22 @@
Copyright (c) 2015 Matthew Heon <mheon@redhat.com>
Copyright (c) 2015 Paul Moore <pmoore@redhat.com>
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
- Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

26
vendor/github.com/seccomp/libseccomp-golang/README generated vendored Normal file
View File

@ -0,0 +1,26 @@
libseccomp-golang: Go Language Bindings for the libseccomp Project
===============================================================================
https://github.com/seccomp/libseccomp-golang
https://github.com/seccomp/libseccomp
The libseccomp library provides an easy to use, platform independent, interface
to the Linux Kernel's syscall filtering mechanism. The libseccomp API is
designed to abstract away the underlying BPF based syscall filter language and
present a more conventional function-call based filtering interface that should
be familiar to, and easily adopted by, application developers.
The libseccomp-golang library provides a Go based interface to the libseccomp
library.
* Online Resources
The library source repository currently lives on GitHub at the following URLs:
-> https://github.com/seccomp/libseccomp-golang
-> https://github.com/seccomp/libseccomp
The project mailing list is currently hosted on Google Groups at the URL below,
please note that a Google account is not required to subscribe to the mailing
list.
-> https://groups.google.com/d/forum/libseccomp

857
vendor/github.com/seccomp/libseccomp-golang/seccomp.go generated vendored Normal file
View File

@ -0,0 +1,857 @@
// +build linux
// Public API specification for libseccomp Go bindings
// Contains public API for the bindings
// Package seccomp provides bindings for libseccomp, a library wrapping the Linux
// seccomp syscall. Seccomp enables an application to restrict system call use
// for itself and its children.
package seccomp
import (
"fmt"
"os"
"runtime"
"strings"
"sync"
"syscall"
"unsafe"
)
// C wrapping code
// #cgo pkg-config: libseccomp
// #include <stdlib.h>
// #include <seccomp.h>
import "C"
// Exported types
// ScmpArch represents a CPU architecture. Seccomp can restrict syscalls on a
// per-architecture basis.
type ScmpArch uint
// ScmpAction represents an action to be taken on a filter rule match in
// libseccomp
type ScmpAction uint
// ScmpCompareOp represents a comparison operator which can be used in a filter
// rule
type ScmpCompareOp uint
// ScmpCondition represents a rule in a libseccomp filter context
type ScmpCondition struct {
Argument uint `json:"argument,omitempty"`
Op ScmpCompareOp `json:"operator,omitempty"`
Operand1 uint64 `json:"operand_one,omitempty"`
Operand2 uint64 `json:"operand_two,omitempty"`
}
// ScmpSyscall represents a Linux System Call
type ScmpSyscall int32
// Exported Constants
const (
// Valid architectures recognized by libseccomp
// ARM64 and all MIPS architectures are unsupported by versions of the
// library before v2.2 and will return errors if used
// ArchInvalid is a placeholder to ensure uninitialized ScmpArch
// variables are invalid
ArchInvalid ScmpArch = iota
// ArchNative is the native architecture of the kernel
ArchNative ScmpArch = iota
// ArchX86 represents 32-bit x86 syscalls
ArchX86 ScmpArch = iota
// ArchAMD64 represents 64-bit x86-64 syscalls
ArchAMD64 ScmpArch = iota
// ArchX32 represents 64-bit x86-64 syscalls (32-bit pointers)
ArchX32 ScmpArch = iota
// ArchARM represents 32-bit ARM syscalls
ArchARM ScmpArch = iota
// ArchARM64 represents 64-bit ARM syscalls
ArchARM64 ScmpArch = iota
// ArchMIPS represents 32-bit MIPS syscalls
ArchMIPS ScmpArch = iota
// ArchMIPS64 represents 64-bit MIPS syscalls
ArchMIPS64 ScmpArch = iota
// ArchMIPS64N32 represents 64-bit MIPS syscalls (32-bit pointers)
ArchMIPS64N32 ScmpArch = iota
// ArchMIPSEL represents 32-bit MIPS syscalls (little endian)
ArchMIPSEL ScmpArch = iota
// ArchMIPSEL64 represents 64-bit MIPS syscalls (little endian)
ArchMIPSEL64 ScmpArch = iota
// ArchMIPSEL64N32 represents 64-bit MIPS syscalls (little endian,
// 32-bit pointers)
ArchMIPSEL64N32 ScmpArch = iota
// ArchPPC represents 32-bit POWERPC syscalls
ArchPPC ScmpArch = iota
// ArchPPC64 represents 64-bit POWER syscalls (big endian)
ArchPPC64 ScmpArch = iota
// ArchPPC64LE represents 64-bit POWER syscalls (little endian)
ArchPPC64LE ScmpArch = iota
// ArchS390 represents 31-bit System z/390 syscalls
ArchS390 ScmpArch = iota
// ArchS390X represents 64-bit System z/390 syscalls
ArchS390X ScmpArch = iota
)
const (
// Supported actions on filter match
// ActInvalid is a placeholder to ensure uninitialized ScmpAction
// variables are invalid
ActInvalid ScmpAction = iota
// ActKill kills the process
ActKill ScmpAction = iota
// ActTrap throws SIGSYS
ActTrap ScmpAction = iota
// ActErrno causes the syscall to return a negative error code. This
// code can be set with the SetReturnCode method
ActErrno ScmpAction = iota
// ActTrace causes the syscall to notify tracing processes with the
// given error code. This code can be set with the SetReturnCode method
ActTrace ScmpAction = iota
// ActAllow permits the syscall to continue execution
ActAllow ScmpAction = iota
)
const (
// These are comparison operators used in conditional seccomp rules
// They are used to compare the value of a single argument of a syscall
// against a user-defined constant
// CompareInvalid is a placeholder to ensure uninitialized ScmpCompareOp
// variables are invalid
CompareInvalid ScmpCompareOp = iota
// CompareNotEqual returns true if the argument is not equal to the
// given value
CompareNotEqual ScmpCompareOp = iota
// CompareLess returns true if the argument is less than the given value
CompareLess ScmpCompareOp = iota
// CompareLessOrEqual returns true if the argument is less than or equal
// to the given value
CompareLessOrEqual ScmpCompareOp = iota
// CompareEqual returns true if the argument is equal to the given value
CompareEqual ScmpCompareOp = iota
// CompareGreaterEqual returns true if the argument is greater than or
// equal to the given value
CompareGreaterEqual ScmpCompareOp = iota
// CompareGreater returns true if the argument is greater than the given
// value
CompareGreater ScmpCompareOp = iota
// CompareMaskedEqual returns true if the argument is equal to the given
// value, when masked (bitwise &) against the second given value
CompareMaskedEqual ScmpCompareOp = iota
)
// Helpers for types
// GetArchFromString returns an ScmpArch constant from a string representing an
// architecture
func GetArchFromString(arch string) (ScmpArch, error) {
switch strings.ToLower(arch) {
case "x86":
return ArchX86, nil
case "amd64", "x86-64", "x86_64", "x64":
return ArchAMD64, nil
case "x32":
return ArchX32, nil
case "arm":
return ArchARM, nil
case "arm64", "aarch64":
return ArchARM64, nil
case "mips":
return ArchMIPS, nil
case "mips64":
return ArchMIPS64, nil
case "mips64n32":
return ArchMIPS64N32, nil
case "mipsel":
return ArchMIPSEL, nil
case "mipsel64":
return ArchMIPSEL64, nil
case "mipsel64n32":
return ArchMIPSEL64N32, nil
case "ppc":
return ArchPPC, nil
case "ppc64":
return ArchPPC64, nil
case "ppc64le":
return ArchPPC64LE, nil
case "s390":
return ArchS390, nil
case "s390x":
return ArchS390X, nil
default:
return ArchInvalid, fmt.Errorf("cannot convert unrecognized string %s", arch)
}
}
// String returns a string representation of an architecture constant
func (a ScmpArch) String() string {
switch a {
case ArchX86:
return "x86"
case ArchAMD64:
return "amd64"
case ArchX32:
return "x32"
case ArchARM:
return "arm"
case ArchARM64:
return "arm64"
case ArchMIPS:
return "mips"
case ArchMIPS64:
return "mips64"
case ArchMIPS64N32:
return "mips64n32"
case ArchMIPSEL:
return "mipsel"
case ArchMIPSEL64:
return "mipsel64"
case ArchMIPSEL64N32:
return "mipsel64n32"
case ArchPPC:
return "ppc"
case ArchPPC64:
return "ppc64"
case ArchPPC64LE:
return "ppc64le"
case ArchS390:
return "s390"
case ArchS390X:
return "s390x"
case ArchNative:
return "native"
case ArchInvalid:
return "Invalid architecture"
default:
return "Unknown architecture"
}
}
// String returns a string representation of a comparison operator constant
func (a ScmpCompareOp) String() string {
switch a {
case CompareNotEqual:
return "Not equal"
case CompareLess:
return "Less than"
case CompareLessOrEqual:
return "Less than or equal to"
case CompareEqual:
return "Equal"
case CompareGreaterEqual:
return "Greater than or equal to"
case CompareGreater:
return "Greater than"
case CompareMaskedEqual:
return "Masked equality"
case CompareInvalid:
return "Invalid comparison operator"
default:
return "Unrecognized comparison operator"
}
}
// String returns a string representation of a seccomp match action
func (a ScmpAction) String() string {
switch a & 0xFFFF {
case ActKill:
return "Action: Kill Process"
case ActTrap:
return "Action: Send SIGSYS"
case ActErrno:
return fmt.Sprintf("Action: Return error code %d", (a >> 16))
case ActTrace:
return fmt.Sprintf("Action: Notify tracing processes with code %d",
(a >> 16))
case ActAllow:
return "Action: Allow system call"
default:
return "Unrecognized Action"
}
}
// SetReturnCode adds a return code to a supporting ScmpAction, clearing any
// existing code Only valid on ActErrno and ActTrace. Takes no action otherwise.
// Accepts 16-bit return code as argument.
// Returns a valid ScmpAction of the original type with the new error code set.
func (a ScmpAction) SetReturnCode(code int16) ScmpAction {
aTmp := a & 0x0000FFFF
if aTmp == ActErrno || aTmp == ActTrace {
return (aTmp | (ScmpAction(code)&0xFFFF)<<16)
}
return a
}
// GetReturnCode returns the return code of an ScmpAction
func (a ScmpAction) GetReturnCode() int16 {
return int16(a >> 16)
}
// General utility functions
// GetLibraryVersion returns the version of the library the bindings are built
// against.
// The version is formatted as follows: Major.Minor.Micro
func GetLibraryVersion() (major, minor, micro int) {
return verMajor, verMinor, verMicro
}
// Syscall functions
// GetName retrieves the name of a syscall from its number.
// Acts on any syscall number.
// Returns either a string containing the name of the syscall, or an error.
func (s ScmpSyscall) GetName() (string, error) {
return s.GetNameByArch(ArchNative)
}
// GetNameByArch retrieves the name of a syscall from its number for a given
// architecture.
// Acts on any syscall number.
// Accepts a valid architecture constant.
// Returns either a string containing the name of the syscall, or an error.
// if the syscall is unrecognized or an issue occurred.
func (s ScmpSyscall) GetNameByArch(arch ScmpArch) (string, error) {
if err := sanitizeArch(arch); err != nil {
return "", err
}
cString := C.seccomp_syscall_resolve_num_arch(arch.toNative(), C.int(s))
if cString == nil {
return "", fmt.Errorf("could not resolve syscall name")
}
defer C.free(unsafe.Pointer(cString))
finalStr := C.GoString(cString)
return finalStr, nil
}
// GetSyscallFromName returns the number of a syscall by name on the kernel's
// native architecture.
// Accepts a string containing the name of a syscall.
// Returns the number of the syscall, or an error if no syscall with that name
// was found.
func GetSyscallFromName(name string) (ScmpSyscall, error) {
cString := C.CString(name)
defer C.free(unsafe.Pointer(cString))
result := C.seccomp_syscall_resolve_name(cString)
if result == scmpError {
return 0, fmt.Errorf("could not resolve name to syscall")
}
return ScmpSyscall(result), nil
}
// GetSyscallFromNameByArch returns the number of a syscall by name for a given
// architecture's ABI.
// Accepts the name of a syscall and an architecture constant.
// Returns the number of the syscall, or an error if an invalid architecture is
// passed or a syscall with that name was not found.
func GetSyscallFromNameByArch(name string, arch ScmpArch) (ScmpSyscall, error) {
if err := sanitizeArch(arch); err != nil {
return 0, err
}
cString := C.CString(name)
defer C.free(unsafe.Pointer(cString))
result := C.seccomp_syscall_resolve_name_arch(arch.toNative(), cString)
if result == scmpError {
return 0, fmt.Errorf("could not resolve name to syscall")
}
return ScmpSyscall(result), nil
}
// MakeCondition creates and returns a new condition to attach to a filter rule.
// Associated rules will only match if this condition is true.
// Accepts the number the argument we are checking, and a comparison operator
// and value to compare to.
// The rule will match if argument $arg (zero-indexed) of the syscall is
// $COMPARE_OP the provided comparison value.
// Some comparison operators accept two values. Masked equals, for example,
// will mask $arg of the syscall with the second value provided (via bitwise
// AND) and then compare against the first value provided.
// For example, in the less than or equal case, if the syscall argument was
// 0 and the value provided was 1, the condition would match, as 0 is less
// than or equal to 1.
// Return either an error on bad argument or a valid ScmpCondition struct.
func MakeCondition(arg uint, comparison ScmpCompareOp, values ...uint64) (ScmpCondition, error) {
var condStruct ScmpCondition
if comparison == CompareInvalid {
return condStruct, fmt.Errorf("invalid comparison operator")
} else if arg > 5 {
return condStruct, fmt.Errorf("syscalls only have up to 6 arguments")
} else if len(values) > 2 {
return condStruct, fmt.Errorf("conditions can have at most 2 arguments")
} else if len(values) == 0 {
return condStruct, fmt.Errorf("must provide at least one value to compare against")
}
condStruct.Argument = arg
condStruct.Op = comparison
condStruct.Operand1 = values[0]
if len(values) == 2 {
condStruct.Operand2 = values[1]
} else {
condStruct.Operand2 = 0 // Unused
}
return condStruct, nil
}
// Utility Functions
// GetNativeArch returns architecture token representing the native kernel
// architecture
func GetNativeArch() (ScmpArch, error) {
arch := C.seccomp_arch_native()
return archFromNative(arch)
}
// Public Filter API
// ScmpFilter represents a filter context in libseccomp.
// A filter context is initially empty. Rules can be added to it, and it can
// then be loaded into the kernel.
type ScmpFilter struct {
filterCtx C.scmp_filter_ctx
valid bool
lock sync.Mutex
}
// NewFilter creates and returns a new filter context.
// Accepts a default action to be taken for syscalls which match no rules in
// the filter.
// Returns a reference to a valid filter context, or nil and an error if the
// filter context could not be created or an invalid default action was given.
func NewFilter(defaultAction ScmpAction) (*ScmpFilter, error) {
if err := sanitizeAction(defaultAction); err != nil {
return nil, err
}
fPtr := C.seccomp_init(defaultAction.toNative())
if fPtr == nil {
return nil, fmt.Errorf("could not create filter")
}
filter := new(ScmpFilter)
filter.filterCtx = fPtr
filter.valid = true
runtime.SetFinalizer(filter, filterFinalizer)
return filter, nil
}
// IsValid determines whether a filter context is valid to use.
// Some operations (Release and Merge) render filter contexts invalid and
// consequently prevent further use.
func (f *ScmpFilter) IsValid() bool {
f.lock.Lock()
defer f.lock.Unlock()
return f.valid
}
// Reset resets a filter context, removing all its existing state.
// Accepts a new default action to be taken for syscalls which do not match.
// Returns an error if the filter or action provided are invalid.
func (f *ScmpFilter) Reset(defaultAction ScmpAction) error {
f.lock.Lock()
defer f.lock.Unlock()
if err := sanitizeAction(defaultAction); err != nil {
return err
} else if !f.valid {
return errBadFilter
}
retCode := C.seccomp_reset(f.filterCtx, defaultAction.toNative())
if retCode != 0 {
return syscall.Errno(-1 * retCode)
}
return nil
}
// Release releases a filter context, freeing its memory. Should be called after
// loading into the kernel, when the filter is no longer needed.
// After calling this function, the given filter is no longer valid and cannot
// be used.
// Release() will be invoked automatically when a filter context is garbage
// collected, but can also be called manually to free memory.
func (f *ScmpFilter) Release() {
f.lock.Lock()
defer f.lock.Unlock()
if !f.valid {
return
}
f.valid = false
C.seccomp_release(f.filterCtx)
}
// Merge merges two filter contexts.
// The source filter src will be released as part of the process, and will no
// longer be usable or valid after this call.
// To be merged, filters must NOT share any architectures, and all their
// attributes (Default Action, Bad Arch Action, No New Privs and TSync bools)
// must match.
// The filter src will be merged into the filter this is called on.
// The architectures of the src filter not present in the destination, and all
// associated rules, will be added to the destination.
// Returns an error if merging the filters failed.
func (f *ScmpFilter) Merge(src *ScmpFilter) error {
f.lock.Lock()
defer f.lock.Unlock()
src.lock.Lock()
defer src.lock.Unlock()
if !src.valid || !f.valid {
return fmt.Errorf("one or more of the filter contexts is invalid or uninitialized")
}
// Merge the filters
retCode := C.seccomp_merge(f.filterCtx, src.filterCtx)
if syscall.Errno(-1*retCode) == syscall.EINVAL {
return fmt.Errorf("filters could not be merged due to a mismatch in attributes or invalid filter")
} else if retCode != 0 {
return syscall.Errno(-1 * retCode)
}
src.valid = false
return nil
}
// IsArchPresent checks if an architecture is present in a filter.
// If a filter contains an architecture, it uses its default action for
// syscalls which do not match rules in it, and its rules can match syscalls
// for that ABI.
// If a filter does not contain an architecture, all syscalls made to that
// kernel ABI will fail with the filter's default Bad Architecture Action
// (by default, killing the process).
// Accepts an architecture constant.
// Returns true if the architecture is present in the filter, false otherwise,
// and an error on an invalid filter context, architecture constant, or an
// issue with the call to libseccomp.
func (f *ScmpFilter) IsArchPresent(arch ScmpArch) (bool, error) {
f.lock.Lock()
defer f.lock.Unlock()
if err := sanitizeArch(arch); err != nil {
return false, err
} else if !f.valid {
return false, errBadFilter
}
retCode := C.seccomp_arch_exist(f.filterCtx, arch.toNative())
if syscall.Errno(-1*retCode) == syscall.EEXIST {
// -EEXIST is "arch not present"
return false, nil
} else if retCode != 0 {
return false, syscall.Errno(-1 * retCode)
}
return true, nil
}
// AddArch adds an architecture to the filter.
// Accepts an architecture constant.
// Returns an error on invalid filter context or architecture token, or an
// issue with the call to libseccomp.
func (f *ScmpFilter) AddArch(arch ScmpArch) error {
f.lock.Lock()
defer f.lock.Unlock()
if err := sanitizeArch(arch); err != nil {
return err
} else if !f.valid {
return errBadFilter
}
// Libseccomp returns -EEXIST if the specified architecture is already
// present. Succeed silently in this case, as it's not fatal, and the
// architecture is present already.
retCode := C.seccomp_arch_add(f.filterCtx, arch.toNative())
if retCode != 0 && syscall.Errno(-1*retCode) != syscall.EEXIST {
return syscall.Errno(-1 * retCode)
}
return nil
}
// RemoveArch removes an architecture from the filter.
// Accepts an architecture constant.
// Returns an error on invalid filter context or architecture token, or an
// issue with the call to libseccomp.
func (f *ScmpFilter) RemoveArch(arch ScmpArch) error {
f.lock.Lock()
defer f.lock.Unlock()
if err := sanitizeArch(arch); err != nil {
return err
} else if !f.valid {
return errBadFilter
}
// Similar to AddArch, -EEXIST is returned if the arch is not present
// Succeed silently in that case, this is not fatal and the architecture
// is not present in the filter after RemoveArch
retCode := C.seccomp_arch_remove(f.filterCtx, arch.toNative())
if retCode != 0 && syscall.Errno(-1*retCode) != syscall.EEXIST {
return syscall.Errno(-1 * retCode)
}
return nil
}
// Load loads a filter context into the kernel.
// Returns an error if the filter context is invalid or the syscall failed.
func (f *ScmpFilter) Load() error {
f.lock.Lock()
defer f.lock.Unlock()
if !f.valid {
return errBadFilter
}
if retCode := C.seccomp_load(f.filterCtx); retCode != 0 {
return syscall.Errno(-1 * retCode)
}
return nil
}
// GetDefaultAction returns the default action taken on a syscall which does not
// match a rule in the filter, or an error if an issue was encountered
// retrieving the value.
func (f *ScmpFilter) GetDefaultAction() (ScmpAction, error) {
action, err := f.getFilterAttr(filterAttrActDefault)
if err != nil {
return 0x0, err
}
return actionFromNative(action)
}
// GetBadArchAction returns the default action taken on a syscall for an
// architecture not in the filter, or an error if an issue was encountered
// retrieving the value.
func (f *ScmpFilter) GetBadArchAction() (ScmpAction, error) {
action, err := f.getFilterAttr(filterAttrActBadArch)
if err != nil {
return 0x0, err
}
return actionFromNative(action)
}
// GetNoNewPrivsBit returns the current state the No New Privileges bit will be set
// to on the filter being loaded, or an error if an issue was encountered
// retrieving the value.
// The No New Privileges bit tells the kernel that new processes run with exec()
// cannot gain more privileges than the process that ran exec().
// For example, a process with No New Privileges set would be unable to exec
// setuid/setgid executables.
func (f *ScmpFilter) GetNoNewPrivsBit() (bool, error) {
noNewPrivs, err := f.getFilterAttr(filterAttrNNP)
if err != nil {
return false, err
}
if noNewPrivs == 0 {
return false, nil
}
return true, nil
}
// GetTsyncBit returns whether Thread Synchronization will be enabled on the
// filter being loaded, or an error if an issue was encountered retrieving the
// value.
// Thread Sync ensures that all members of the thread group of the calling
// process will share the same Seccomp filter set.
// Tsync is a fairly recent addition to the Linux kernel and older kernels
// lack support. If the running kernel does not support Tsync and it is
// requested in a filter, Libseccomp will not enable TSync support and will
// proceed as normal.
// This function is unavailable before v2.2 of libseccomp and will return an
// error.
func (f *ScmpFilter) GetTsyncBit() (bool, error) {
tSync, err := f.getFilterAttr(filterAttrTsync)
if err != nil {
return false, err
}
if tSync == 0 {
return false, nil
}
return true, nil
}
// SetBadArchAction sets the default action taken on a syscall for an
// architecture not in the filter, or an error if an issue was encountered
// setting the value.
func (f *ScmpFilter) SetBadArchAction(action ScmpAction) error {
if err := sanitizeAction(action); err != nil {
return err
}
return f.setFilterAttr(filterAttrActBadArch, action.toNative())
}
// SetNoNewPrivsBit sets the state of the No New Privileges bit, which will be
// applied on filter load, or an error if an issue was encountered setting the
// value.
// Filters with No New Privileges set to 0 can only be loaded if the process
// has the CAP_SYS_ADMIN capability.
func (f *ScmpFilter) SetNoNewPrivsBit(state bool) error {
var toSet C.uint32_t = 0x0
if state {
toSet = 0x1
}
return f.setFilterAttr(filterAttrNNP, toSet)
}
// SetTsync sets whether Thread Synchronization will be enabled on the filter
// being loaded. Returns an error if setting Tsync failed, or the filter is
// invalid.
// Thread Sync ensures that all members of the thread group of the calling
// process will share the same Seccomp filter set.
// Tsync is a fairly recent addition to the Linux kernel and older kernels
// lack support. If the running kernel does not support Tsync and it is
// requested in a filter, Libseccomp will not enable TSync support and will
// proceed as normal.
// This function is unavailable before v2.2 of libseccomp and will return an
// error.
func (f *ScmpFilter) SetTsync(enable bool) error {
var toSet C.uint32_t = 0x0
if enable {
toSet = 0x1
}
return f.setFilterAttr(filterAttrTsync, toSet)
}
// SetSyscallPriority sets a syscall's priority.
// This provides a hint to the filter generator in libseccomp about the
// importance of this syscall. High-priority syscalls are placed
// first in the filter code, and incur less overhead (at the expense of
// lower-priority syscalls).
func (f *ScmpFilter) SetSyscallPriority(call ScmpSyscall, priority uint8) error {
f.lock.Lock()
defer f.lock.Unlock()
if !f.valid {
return errBadFilter
}
if retCode := C.seccomp_syscall_priority(f.filterCtx, C.int(call),
C.uint8_t(priority)); retCode != 0 {
return syscall.Errno(-1 * retCode)
}
return nil
}
// AddRule adds a single rule for an unconditional action on a syscall.
// Accepts the number of the syscall and the action to be taken on the call
// being made.
// Returns an error if an issue was encountered adding the rule.
func (f *ScmpFilter) AddRule(call ScmpSyscall, action ScmpAction) error {
return f.addRuleGeneric(call, action, false, nil)
}
// AddRuleExact adds a single rule for an unconditional action on a syscall.
// Accepts the number of the syscall and the action to be taken on the call
// being made.
// No modifications will be made to the rule, and it will fail to add if it
// cannot be applied to the current architecture without modification.
// The rule will function exactly as described, but it may not function identically
// (or be able to be applied to) all architectures.
// Returns an error if an issue was encountered adding the rule.
func (f *ScmpFilter) AddRuleExact(call ScmpSyscall, action ScmpAction) error {
return f.addRuleGeneric(call, action, true, nil)
}
// AddRuleConditional adds a single rule for a conditional action on a syscall.
// Returns an error if an issue was encountered adding the rule.
// All conditions must match for the rule to match.
// There is a bug in library versions below v2.2.1 which can, in some cases,
// cause conditions to be lost when more than one are used. Consequently,
// AddRuleConditional is disabled on library versions lower than v2.2.1
func (f *ScmpFilter) AddRuleConditional(call ScmpSyscall, action ScmpAction, conds []ScmpCondition) error {
return f.addRuleGeneric(call, action, false, conds)
}
// AddRuleConditionalExact adds a single rule for a conditional action on a
// syscall.
// No modifications will be made to the rule, and it will fail to add if it
// cannot be applied to the current architecture without modification.
// The rule will function exactly as described, but it may not function identically
// (or be able to be applied to) all architectures.
// Returns an error if an issue was encountered adding the rule.
// There is a bug in library versions below v2.2.1 which can, in some cases,
// cause conditions to be lost when more than one are used. Consequently,
// AddRuleConditionalExact is disabled on library versions lower than v2.2.1
func (f *ScmpFilter) AddRuleConditionalExact(call ScmpSyscall, action ScmpAction, conds []ScmpCondition) error {
return f.addRuleGeneric(call, action, true, conds)
}
// ExportPFC output PFC-formatted, human-readable dump of a filter context's
// rules to a file.
// Accepts file to write to (must be open for writing).
// Returns an error if writing to the file fails.
func (f *ScmpFilter) ExportPFC(file *os.File) error {
f.lock.Lock()
defer f.lock.Unlock()
fd := file.Fd()
if !f.valid {
return errBadFilter
}
if retCode := C.seccomp_export_pfc(f.filterCtx, C.int(fd)); retCode != 0 {
return syscall.Errno(-1 * retCode)
}
return nil
}
// ExportBPF outputs Berkeley Packet Filter-formatted, kernel-readable dump of a
// filter context's rules to a file.
// Accepts file to write to (must be open for writing).
// Returns an error if writing to the file fails.
func (f *ScmpFilter) ExportBPF(file *os.File) error {
f.lock.Lock()
defer f.lock.Unlock()
fd := file.Fd()
if !f.valid {
return errBadFilter
}
if retCode := C.seccomp_export_bpf(f.filterCtx, C.int(fd)); retCode != 0 {
return syscall.Errno(-1 * retCode)
}
return nil
}

View File

@ -0,0 +1,506 @@
// +build linux
// Internal functions for libseccomp Go bindings
// No exported functions
package seccomp
import (
"fmt"
"os"
"syscall"
)
// Unexported C wrapping code - provides the C-Golang interface
// Get the seccomp header in scope
// Need stdlib.h for free() on cstrings
// #cgo pkg-config: libseccomp
/*
#include <stdlib.h>
#include <seccomp.h>
#if SCMP_VER_MAJOR < 2
#error Minimum supported version of Libseccomp is v2.1.0
#elif SCMP_VER_MAJOR == 2 && SCMP_VER_MINOR < 1
#error Minimum supported version of Libseccomp is v2.1.0
#endif
#define ARCH_BAD ~0
const uint32_t C_ARCH_BAD = ARCH_BAD;
#ifndef SCMP_ARCH_AARCH64
#define SCMP_ARCH_AARCH64 ARCH_BAD
#endif
#ifndef SCMP_ARCH_MIPS
#define SCMP_ARCH_MIPS ARCH_BAD
#endif
#ifndef SCMP_ARCH_MIPS64
#define SCMP_ARCH_MIPS64 ARCH_BAD
#endif
#ifndef SCMP_ARCH_MIPS64N32
#define SCMP_ARCH_MIPS64N32 ARCH_BAD
#endif
#ifndef SCMP_ARCH_MIPSEL
#define SCMP_ARCH_MIPSEL ARCH_BAD
#endif
#ifndef SCMP_ARCH_MIPSEL64
#define SCMP_ARCH_MIPSEL64 ARCH_BAD
#endif
#ifndef SCMP_ARCH_MIPSEL64N32
#define SCMP_ARCH_MIPSEL64N32 ARCH_BAD
#endif
#ifndef SCMP_ARCH_PPC
#define SCMP_ARCH_PPC ARCH_BAD
#endif
#ifndef SCMP_ARCH_PPC64
#define SCMP_ARCH_PPC64 ARCH_BAD
#endif
#ifndef SCMP_ARCH_PPC64LE
#define SCMP_ARCH_PPC64LE ARCH_BAD
#endif
#ifndef SCMP_ARCH_S390
#define SCMP_ARCH_S390 ARCH_BAD
#endif
#ifndef SCMP_ARCH_S390X
#define SCMP_ARCH_S390X ARCH_BAD
#endif
const uint32_t C_ARCH_NATIVE = SCMP_ARCH_NATIVE;
const uint32_t C_ARCH_X86 = SCMP_ARCH_X86;
const uint32_t C_ARCH_X86_64 = SCMP_ARCH_X86_64;
const uint32_t C_ARCH_X32 = SCMP_ARCH_X32;
const uint32_t C_ARCH_ARM = SCMP_ARCH_ARM;
const uint32_t C_ARCH_AARCH64 = SCMP_ARCH_AARCH64;
const uint32_t C_ARCH_MIPS = SCMP_ARCH_MIPS;
const uint32_t C_ARCH_MIPS64 = SCMP_ARCH_MIPS64;
const uint32_t C_ARCH_MIPS64N32 = SCMP_ARCH_MIPS64N32;
const uint32_t C_ARCH_MIPSEL = SCMP_ARCH_MIPSEL;
const uint32_t C_ARCH_MIPSEL64 = SCMP_ARCH_MIPSEL64;
const uint32_t C_ARCH_MIPSEL64N32 = SCMP_ARCH_MIPSEL64N32;
const uint32_t C_ARCH_PPC = SCMP_ARCH_PPC;
const uint32_t C_ARCH_PPC64 = SCMP_ARCH_PPC64;
const uint32_t C_ARCH_PPC64LE = SCMP_ARCH_PPC64LE;
const uint32_t C_ARCH_S390 = SCMP_ARCH_S390;
const uint32_t C_ARCH_S390X = SCMP_ARCH_S390X;
const uint32_t C_ACT_KILL = SCMP_ACT_KILL;
const uint32_t C_ACT_TRAP = SCMP_ACT_TRAP;
const uint32_t C_ACT_ERRNO = SCMP_ACT_ERRNO(0);
const uint32_t C_ACT_TRACE = SCMP_ACT_TRACE(0);
const uint32_t C_ACT_ALLOW = SCMP_ACT_ALLOW;
// If TSync is not supported, make sure it doesn't map to a supported filter attribute
// Don't worry about major version < 2, the minimum version checks should catch that case
#if SCMP_VER_MAJOR == 2 && SCMP_VER_MINOR < 2
#define SCMP_FLTATR_CTL_TSYNC _SCMP_CMP_MIN
#endif
const uint32_t C_ATTRIBUTE_DEFAULT = (uint32_t)SCMP_FLTATR_ACT_DEFAULT;
const uint32_t C_ATTRIBUTE_BADARCH = (uint32_t)SCMP_FLTATR_ACT_BADARCH;
const uint32_t C_ATTRIBUTE_NNP = (uint32_t)SCMP_FLTATR_CTL_NNP;
const uint32_t C_ATTRIBUTE_TSYNC = (uint32_t)SCMP_FLTATR_CTL_TSYNC;
const int C_CMP_NE = (int)SCMP_CMP_NE;
const int C_CMP_LT = (int)SCMP_CMP_LT;
const int C_CMP_LE = (int)SCMP_CMP_LE;
const int C_CMP_EQ = (int)SCMP_CMP_EQ;
const int C_CMP_GE = (int)SCMP_CMP_GE;
const int C_CMP_GT = (int)SCMP_CMP_GT;
const int C_CMP_MASKED_EQ = (int)SCMP_CMP_MASKED_EQ;
const int C_VERSION_MAJOR = SCMP_VER_MAJOR;
const int C_VERSION_MINOR = SCMP_VER_MINOR;
const int C_VERSION_MICRO = SCMP_VER_MICRO;
typedef struct scmp_arg_cmp* scmp_cast_t;
// Wrapper to create an scmp_arg_cmp struct
void*
make_struct_arg_cmp(
unsigned int arg,
int compare,
uint64_t a,
uint64_t b
)
{
struct scmp_arg_cmp *s = malloc(sizeof(struct scmp_arg_cmp));
s->arg = arg;
s->op = compare;
s->datum_a = a;
s->datum_b = b;
return s;
}
*/
import "C"
// Nonexported types
type scmpFilterAttr uint32
// Nonexported constants
const (
filterAttrActDefault scmpFilterAttr = iota
filterAttrActBadArch scmpFilterAttr = iota
filterAttrNNP scmpFilterAttr = iota
filterAttrTsync scmpFilterAttr = iota
)
const (
// An error return from certain libseccomp functions
scmpError C.int = -1
// Comparison boundaries to check for architecture validity
archStart ScmpArch = ArchNative
archEnd ScmpArch = ArchS390X
// Comparison boundaries to check for action validity
actionStart ScmpAction = ActKill
actionEnd ScmpAction = ActAllow
// Comparison boundaries to check for comparison operator validity
compareOpStart ScmpCompareOp = CompareNotEqual
compareOpEnd ScmpCompareOp = CompareMaskedEqual
)
var (
// Error thrown on bad filter context
errBadFilter = fmt.Errorf("filter is invalid or uninitialized")
// Constants representing library major, minor, and micro versions
verMajor = int(C.C_VERSION_MAJOR)
verMinor = int(C.C_VERSION_MINOR)
verMicro = int(C.C_VERSION_MICRO)
)
// Nonexported functions
// Check if library version is greater than or equal to the given one
func checkVersionAbove(major, minor, micro int) bool {
return (verMajor > major) ||
(verMajor == major && verMinor > minor) ||
(verMajor == major && verMinor == minor && verMicro >= micro)
}
// Init function: Verify library version is appropriate
func init() {
if !checkVersionAbove(2, 1, 0) {
fmt.Fprintf(os.Stderr, "Libseccomp version too low: minimum supported is 2.1.0, detected %d.%d.%d", C.C_VERSION_MAJOR, C.C_VERSION_MINOR, C.C_VERSION_MICRO)
os.Exit(-1)
}
}
// Filter helpers
// Filter finalizer - ensure that kernel context for filters is freed
func filterFinalizer(f *ScmpFilter) {
f.Release()
}
// Get a raw filter attribute
func (f *ScmpFilter) getFilterAttr(attr scmpFilterAttr) (C.uint32_t, error) {
f.lock.Lock()
defer f.lock.Unlock()
if !f.valid {
return 0x0, errBadFilter
}
if !checkVersionAbove(2, 2, 0) && attr == filterAttrTsync {
return 0x0, fmt.Errorf("the thread synchronization attribute is not supported in this version of the library")
}
var attribute C.uint32_t
retCode := C.seccomp_attr_get(f.filterCtx, attr.toNative(), &attribute)
if retCode != 0 {
return 0x0, syscall.Errno(-1 * retCode)
}
return attribute, nil
}
// Set a raw filter attribute
func (f *ScmpFilter) setFilterAttr(attr scmpFilterAttr, value C.uint32_t) error {
f.lock.Lock()
defer f.lock.Unlock()
if !f.valid {
return errBadFilter
}
if !checkVersionAbove(2, 2, 0) && attr == filterAttrTsync {
return fmt.Errorf("the thread synchronization attribute is not supported in this version of the library")
}
retCode := C.seccomp_attr_set(f.filterCtx, attr.toNative(), value)
if retCode != 0 {
return syscall.Errno(-1 * retCode)
}
return nil
}
// DOES NOT LOCK OR CHECK VALIDITY
// Assumes caller has already done this
// Wrapper for seccomp_rule_add_... functions
func (f *ScmpFilter) addRuleWrapper(call ScmpSyscall, action ScmpAction, exact bool, cond C.scmp_cast_t) error {
var length C.uint
if cond != nil {
length = 1
} else {
length = 0
}
var retCode C.int
if exact {
retCode = C.seccomp_rule_add_exact_array(f.filterCtx, action.toNative(), C.int(call), length, cond)
} else {
retCode = C.seccomp_rule_add_array(f.filterCtx, action.toNative(), C.int(call), length, cond)
}
if syscall.Errno(-1*retCode) == syscall.EFAULT {
return fmt.Errorf("unrecognized syscall")
} else if syscall.Errno(-1*retCode) == syscall.EPERM {
return fmt.Errorf("requested action matches default action of filter")
} else if retCode != 0 {
return syscall.Errno(-1 * retCode)
}
return nil
}
// Generic add function for filter rules
func (f *ScmpFilter) addRuleGeneric(call ScmpSyscall, action ScmpAction, exact bool, conds []ScmpCondition) error {
f.lock.Lock()
defer f.lock.Unlock()
if !f.valid {
return errBadFilter
}
if len(conds) == 0 {
if err := f.addRuleWrapper(call, action, exact, nil); err != nil {
return err
}
} else {
// We don't support conditional filtering in library version v2.1
if !checkVersionAbove(2, 2, 1) {
return fmt.Errorf("conditional filtering requires libseccomp version >= 2.2.1")
}
for _, cond := range conds {
cmpStruct := C.make_struct_arg_cmp(C.uint(cond.Argument), cond.Op.toNative(), C.uint64_t(cond.Operand1), C.uint64_t(cond.Operand2))
defer C.free(cmpStruct)
if err := f.addRuleWrapper(call, action, exact, C.scmp_cast_t(cmpStruct)); err != nil {
return err
}
}
}
return nil
}
// Generic Helpers
// Helper - Sanitize Arch token input
func sanitizeArch(in ScmpArch) error {
if in < archStart || in > archEnd {
return fmt.Errorf("unrecognized architecture")
}
if in.toNative() == C.C_ARCH_BAD {
return fmt.Errorf("architecture is not supported on this version of the library")
}
return nil
}
func sanitizeAction(in ScmpAction) error {
inTmp := in & 0x0000FFFF
if inTmp < actionStart || inTmp > actionEnd {
return fmt.Errorf("unrecognized action")
}
if inTmp != ActTrace && inTmp != ActErrno && (in&0xFFFF0000) != 0 {
return fmt.Errorf("highest 16 bits must be zeroed except for Trace and Errno")
}
return nil
}
func sanitizeCompareOp(in ScmpCompareOp) error {
if in < compareOpStart || in > compareOpEnd {
return fmt.Errorf("unrecognized comparison operator")
}
return nil
}
func archFromNative(a C.uint32_t) (ScmpArch, error) {
switch a {
case C.C_ARCH_X86:
return ArchX86, nil
case C.C_ARCH_X86_64:
return ArchAMD64, nil
case C.C_ARCH_X32:
return ArchX32, nil
case C.C_ARCH_ARM:
return ArchARM, nil
case C.C_ARCH_NATIVE:
return ArchNative, nil
case C.C_ARCH_AARCH64:
return ArchARM64, nil
case C.C_ARCH_MIPS:
return ArchMIPS, nil
case C.C_ARCH_MIPS64:
return ArchMIPS64, nil
case C.C_ARCH_MIPS64N32:
return ArchMIPS64N32, nil
case C.C_ARCH_MIPSEL:
return ArchMIPSEL, nil
case C.C_ARCH_MIPSEL64:
return ArchMIPSEL64, nil
case C.C_ARCH_MIPSEL64N32:
return ArchMIPSEL64N32, nil
case C.C_ARCH_PPC:
return ArchPPC, nil
case C.C_ARCH_PPC64:
return ArchPPC64, nil
case C.C_ARCH_PPC64LE:
return ArchPPC64LE, nil
case C.C_ARCH_S390:
return ArchS390, nil
case C.C_ARCH_S390X:
return ArchS390X, nil
default:
return 0x0, fmt.Errorf("unrecognized architecture")
}
}
// Only use with sanitized arches, no error handling
func (a ScmpArch) toNative() C.uint32_t {
switch a {
case ArchX86:
return C.C_ARCH_X86
case ArchAMD64:
return C.C_ARCH_X86_64
case ArchX32:
return C.C_ARCH_X32
case ArchARM:
return C.C_ARCH_ARM
case ArchARM64:
return C.C_ARCH_AARCH64
case ArchMIPS:
return C.C_ARCH_MIPS
case ArchMIPS64:
return C.C_ARCH_MIPS64
case ArchMIPS64N32:
return C.C_ARCH_MIPS64N32
case ArchMIPSEL:
return C.C_ARCH_MIPSEL
case ArchMIPSEL64:
return C.C_ARCH_MIPSEL64
case ArchMIPSEL64N32:
return C.C_ARCH_MIPSEL64N32
case ArchPPC:
return C.C_ARCH_PPC
case ArchPPC64:
return C.C_ARCH_PPC64
case ArchPPC64LE:
return C.C_ARCH_PPC64LE
case ArchS390:
return C.C_ARCH_S390
case ArchS390X:
return C.C_ARCH_S390X
case ArchNative:
return C.C_ARCH_NATIVE
default:
return 0x0
}
}
// Only use with sanitized ops, no error handling
func (a ScmpCompareOp) toNative() C.int {
switch a {
case CompareNotEqual:
return C.C_CMP_NE
case CompareLess:
return C.C_CMP_LT
case CompareLessOrEqual:
return C.C_CMP_LE
case CompareEqual:
return C.C_CMP_EQ
case CompareGreaterEqual:
return C.C_CMP_GE
case CompareGreater:
return C.C_CMP_GT
case CompareMaskedEqual:
return C.C_CMP_MASKED_EQ
default:
return 0x0
}
}
func actionFromNative(a C.uint32_t) (ScmpAction, error) {
aTmp := a & 0xFFFF
switch a & 0xFFFF0000 {
case C.C_ACT_KILL:
return ActKill, nil
case C.C_ACT_TRAP:
return ActTrap, nil
case C.C_ACT_ERRNO:
return ActErrno.SetReturnCode(int16(aTmp)), nil
case C.C_ACT_TRACE:
return ActTrace.SetReturnCode(int16(aTmp)), nil
case C.C_ACT_ALLOW:
return ActAllow, nil
default:
return 0x0, fmt.Errorf("unrecognized action")
}
}
// Only use with sanitized actions, no error handling
func (a ScmpAction) toNative() C.uint32_t {
switch a & 0xFFFF {
case ActKill:
return C.C_ACT_KILL
case ActTrap:
return C.C_ACT_TRAP
case ActErrno:
return C.C_ACT_ERRNO | (C.uint32_t(a) >> 16)
case ActTrace:
return C.C_ACT_TRACE | (C.uint32_t(a) >> 16)
case ActAllow:
return C.C_ACT_ALLOW
default:
return 0x0
}
}
// Internal only, assumes safe attribute
func (a scmpFilterAttr) toNative() uint32 {
switch a {
case filterAttrActDefault:
return uint32(C.C_ATTRIBUTE_DEFAULT)
case filterAttrActBadArch:
return uint32(C.C_ATTRIBUTE_BADARCH)
case filterAttrNNP:
return uint32(C.C_ATTRIBUTE_NNP)
case filterAttrTsync:
return uint32(C.C_ATTRIBUTE_TSYNC)
default:
return 0x0
}
}