Merge pull request #486 from AkihiroSuda/update-rootless
rootless: refactor libcontainer_specconv & add integration testsdocker-18.09
commit
cda0a64eca
|
@ -2,7 +2,7 @@
|
|||
|
||||
Requirements:
|
||||
- runc `ecd55a4135e0a26de884ce436442914f945b1e76` (May 30, 2018) or later
|
||||
- Some distros such as Debian and Arch Linux require `echo 1 > /proc/sys/kernel/unprivileged_userns_clone`
|
||||
- Some distros such as Debian (excluding Ubuntu) and Arch Linux require `echo 1 > /proc/sys/kernel/unprivileged_userns_clone`
|
||||
- `newuidmap` and `newgidmap` need to be installed on the host. These commands are provided by the `uidmap` package.
|
||||
- `/etc/subuid` and `/etc/subgid` should contain >= 65536 sub-IDs. e.g. `penguin:231072:65536`.
|
||||
- To run in a Docker container with non-root `USER`, `docker run --privileged` is still required. See also Jessie's blog: https://blog.jessfraz.com/post/building-container-images-securely-on-kubernetes/
|
||||
|
@ -10,7 +10,7 @@ Requirements:
|
|||
|
||||
## Set up
|
||||
|
||||
Setting up rootless mode also requires some bothersome steps as follows, but you can also use [`rootlesskit`](https://github.com/AkihiroSuda/rootlesskit) for automating these steps.
|
||||
Setting up rootless mode also requires some bothersome steps as follows, but you can also use [`rootlesskit`](https://github.com/rootless-containers/rootlesskit) for automating these steps.
|
||||
|
||||
### Terminal 1:
|
||||
|
||||
|
|
|
@ -21,7 +21,7 @@ import (
|
|||
"github.com/moby/buildkit/executor"
|
||||
"github.com/moby/buildkit/executor/oci"
|
||||
"github.com/moby/buildkit/identity"
|
||||
"github.com/moby/buildkit/util/libcontainer_specconv"
|
||||
rootlessspecconv "github.com/moby/buildkit/util/rootless/specconv"
|
||||
"github.com/moby/buildkit/util/system"
|
||||
"github.com/opencontainers/runtime-spec/specs-go"
|
||||
"github.com/pkg/errors"
|
||||
|
@ -84,6 +84,8 @@ func New(opt Opt) (executor.Executor, error) {
|
|||
LogFormat: runc.JSON,
|
||||
PdeathSignal: syscall.SIGKILL,
|
||||
Setpgid: true,
|
||||
// we don't execute runc with --rootless=(true|false) explicitly,
|
||||
// so as to support non-runc runtimes
|
||||
}
|
||||
|
||||
w := &runcExecutor{
|
||||
|
@ -169,13 +171,11 @@ func (w *runcExecutor) Exec(ctx context.Context, meta executor.Meta, root cache.
|
|||
return errors.Wrapf(err, "failed to create working directory %s", newp)
|
||||
}
|
||||
|
||||
if err := setOOMScoreAdj(spec); err != nil {
|
||||
return err
|
||||
}
|
||||
if w.rootless {
|
||||
specconv.ToRootless(spec, nil)
|
||||
// TODO(AkihiroSuda): keep Cgroups enabled if /sys/fs/cgroup/cpuset/buildkit exists and writable
|
||||
spec.Linux.CgroupsPath = ""
|
||||
// TODO(AkihiroSuda): ToRootless removes netns, but we should readd netns here
|
||||
// if either SUID or userspace NAT is configured on the host.
|
||||
if err := setOOMScoreAdj(spec); err != nil {
|
||||
if err := rootlessspecconv.ToRootless(spec); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1115,6 +1115,10 @@ RUN ["ls"]
|
|||
}
|
||||
|
||||
func testUser(t *testing.T, sb integration.Sandbox) {
|
||||
if sb.Rootless() {
|
||||
t.Skip("only for rootful worker, due to lack of support for additional gids (https://github.com/opencontainers/runc/issues/1835)")
|
||||
}
|
||||
|
||||
t.Parallel()
|
||||
|
||||
dockerfile := []byte(`
|
||||
|
|
|
@ -5,7 +5,7 @@ ARG CONTAINERD10_VERSION=v1.0.3
|
|||
# available targets: buildkitd, buildkitd.oci_only, buildkitd.containerd_only
|
||||
ARG BUILDKIT_TARGET=buildkitd
|
||||
ARG REGISTRY_VERSION=2.6
|
||||
ARG ROOTLESSKIT_VERSION=1e79dc31d71ea8c1a27f15086be2be2b1d99acaa
|
||||
ARG ROOTLESSKIT_VERSION=20b0fc24b305b031a61ef1a1ca456aadafaf5e77
|
||||
|
||||
# The `buildkitd` stage and the `buildctl` stage are placed here
|
||||
# so that they can be built quickly with legacy DAG-unaware `docker build --target=...`
|
||||
|
@ -75,11 +75,29 @@ RUN go build -ldflags "$(cat .tmp/ldflags) -d" -o /usr/bin/buildkitd.containerd
|
|||
|
||||
FROM registry:$REGISTRY_VERSION AS registry
|
||||
|
||||
FROM gobuild-base AS rootlesskit-base
|
||||
RUN git clone https://github.com/rootless-containers/rootlesskit.git /go/src/github.com/rootless-containers/rootlesskit
|
||||
WORKDIR /go/src/github.com/rootless-containers/rootlesskit
|
||||
|
||||
FROM rootlesskit-base as rootlesskit
|
||||
ARG ROOTLESSKIT_VERSION
|
||||
# mitigate https://github.com/moby/moby/pull/35456
|
||||
ENV GOOS=linux
|
||||
RUN git checkout -q "$ROOTLESSKIT_VERSION" \
|
||||
&& go build -o /rootlesskit ./cmd/rootlesskit
|
||||
|
||||
FROM unit-tests AS integration-tests
|
||||
ENV BUILDKIT_INTEGRATION_ROOTLESS_IDPAIR="1000:1000"
|
||||
RUN apk add --no-cache shadow shadow-uidmap sudo \
|
||||
&& useradd --create-home --home-dir /home/user --uid 1000 -s /bin/sh user \
|
||||
&& echo "XDG_RUNTIME_DIR=/run/user/1000; export XDG_RUNTIME_DIR" >> /home/user/.profile \
|
||||
&& mkdir -m 0700 -p /run/user/1000 \
|
||||
&& chown -R user /run/user/1000 /home/user
|
||||
COPY --from=containerd10 /go/src/github.com/containerd/containerd/bin/containerd* /opt/containerd-1.0/bin/
|
||||
COPY --from=buildctl /usr/bin/buildctl /usr/bin/
|
||||
COPY --from=buildkitd /usr/bin/buildkitd /usr/bin
|
||||
COPY --from=registry /bin/registry /usr/bin
|
||||
COPY --from=rootlesskit /rootlesskit /usr/bin/
|
||||
|
||||
FROM buildkit-base AS cross-windows
|
||||
ENV GOOS=windows
|
||||
|
@ -123,31 +141,18 @@ VOLUME /var/lib/containerd
|
|||
VOLUME /run/containerd
|
||||
ENTRYPOINT ["containerd"]
|
||||
|
||||
FROM gobuild-base AS rootlesskit-base
|
||||
RUN git clone https://github.com/AkihiroSuda/rootlesskit.git /go/src/github.com/AkihiroSuda/rootlesskit
|
||||
WORKDIR /go/src/github.com/AkihiroSuda/rootlesskit
|
||||
|
||||
FROM rootlesskit-base as rootlesskit
|
||||
ARG ROOTLESSKIT_VERSION
|
||||
# mitigate https://github.com/moby/moby/pull/35456
|
||||
ENV GOOS=linux
|
||||
RUN git checkout -q "$ROOTLESSKIT_VERSION" \
|
||||
&& go build -o /rootlesskit ./cmd/rootlesskit
|
||||
|
||||
# Rootless mode.
|
||||
# Still requires `--privileged`.
|
||||
FROM buildkit-buildkitd AS rootless
|
||||
RUN apk add --no-cache shadow shadow-uidmap \
|
||||
&& useradd --create-home --home-dir /home/user --uid 1000 user \
|
||||
&& mkdir -p /home/user/.local/run /home/user/.local/tmp /home/user/.local/share/buildkit \
|
||||
&& chown -R user /home/user
|
||||
&& mkdir -p /run/user/1000 /home/user/.local/tmp /home/user/.local/share/buildkit \
|
||||
&& chown -R user /run/user/1000 /home/user
|
||||
COPY --from=rootlesskit /rootlesskit /usr/bin/
|
||||
USER user
|
||||
ENV HOME /home/user
|
||||
ENV USER user
|
||||
# WORKAROUND: this should be typically /run/user/1000,
|
||||
# but mkdir under /run is not captured when built using BuildKit. (#429)
|
||||
ENV XDG_RUNTIME_DIR=/home/user/.local/run
|
||||
ENV XDG_RUNTIME_DIR=/run/user/1000
|
||||
ENV TMPDIR=/home/user/.local/tmp
|
||||
VOLUME /home/user/.local/share/buildkit
|
||||
ENTRYPOINT ["rootlesskit", "buildkitd"]
|
||||
|
|
|
@ -1 +0,0 @@
|
|||
Temporary forked from https://github.com/opencontainers/runc/pull/1692
|
|
@ -1,190 +0,0 @@
|
|||
package specconv
|
||||
|
||||
import (
|
||||
"os"
|
||||
"sort"
|
||||
"strings"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/system"
|
||||
"github.com/opencontainers/runc/libcontainer/user"
|
||||
"github.com/opencontainers/runtime-spec/specs-go"
|
||||
)
|
||||
|
||||
// RootlessOpts is an optional spec for ToRootless
|
||||
type RootlessOpts struct {
|
||||
// Add sub{u,g}id to spec.Linux.{U,G}IDMappings.
|
||||
// Requires newuidmap(1) and newgidmap(1) with suid bit.
|
||||
// Ignored when running in userns.
|
||||
MapSubUIDGID bool
|
||||
}
|
||||
|
||||
// Run-time context for ToRootless.
|
||||
type RootlessContext struct {
|
||||
EUID uint32
|
||||
EGID uint32
|
||||
SubUIDs []user.SubID
|
||||
SubGIDs []user.SubID
|
||||
UIDMap []user.IDMap
|
||||
GIDMap []user.IDMap
|
||||
InUserNS bool
|
||||
}
|
||||
|
||||
// ToRootless converts the given spec file into one that should work with
|
||||
// rootless containers, by removing incompatible options and adding others that
|
||||
// are needed.
|
||||
func ToRootless(spec *specs.Spec, opts *RootlessOpts) error {
|
||||
var err error
|
||||
ctx := RootlessContext{}
|
||||
ctx.EUID = uint32(os.Geteuid())
|
||||
ctx.EGID = uint32(os.Getegid())
|
||||
ctx.SubUIDs, err = user.CurrentUserSubUIDs()
|
||||
if err != nil && !os.IsNotExist(err) {
|
||||
return err
|
||||
}
|
||||
ctx.SubGIDs, err = user.CurrentGroupSubGIDs()
|
||||
if err != nil && !os.IsNotExist(err) {
|
||||
return err
|
||||
}
|
||||
ctx.UIDMap, err = user.CurrentProcessUIDMap()
|
||||
if err != nil && !os.IsNotExist(err) {
|
||||
return err
|
||||
}
|
||||
uidMapExists := !os.IsNotExist(err)
|
||||
ctx.GIDMap, err = user.CurrentProcessUIDMap()
|
||||
if err != nil && !os.IsNotExist(err) {
|
||||
return err
|
||||
}
|
||||
ctx.InUserNS = uidMapExists && system.UIDMapInUserNS(ctx.UIDMap)
|
||||
return ToRootlessWithContext(ctx, spec, opts)
|
||||
}
|
||||
|
||||
// ToRootlessWithContext converts the spec with the run-time context.
|
||||
// ctx can be internally modified for sorting.
|
||||
func ToRootlessWithContext(ctx RootlessContext, spec *specs.Spec, opts *RootlessOpts) error {
|
||||
if opts == nil {
|
||||
opts = &RootlessOpts{}
|
||||
}
|
||||
var namespaces []specs.LinuxNamespace
|
||||
|
||||
// Remove networkns from the spec.
|
||||
for _, ns := range spec.Linux.Namespaces {
|
||||
switch ns.Type {
|
||||
case specs.NetworkNamespace, specs.UserNamespace:
|
||||
// Do nothing.
|
||||
default:
|
||||
namespaces = append(namespaces, ns)
|
||||
}
|
||||
}
|
||||
// Add userns to the spec.
|
||||
namespaces = append(namespaces, specs.LinuxNamespace{
|
||||
Type: specs.UserNamespace,
|
||||
})
|
||||
spec.Linux.Namespaces = namespaces
|
||||
|
||||
// Add mappings for the current user.
|
||||
if ctx.InUserNS {
|
||||
uNextContainerID := int64(0)
|
||||
sort.Sort(idmapSorter(ctx.UIDMap))
|
||||
for _, uidmap := range ctx.UIDMap {
|
||||
spec.Linux.UIDMappings = append(spec.Linux.UIDMappings,
|
||||
specs.LinuxIDMapping{
|
||||
HostID: uint32(uidmap.ID),
|
||||
ContainerID: uint32(uNextContainerID),
|
||||
Size: uint32(uidmap.Count),
|
||||
})
|
||||
uNextContainerID += uidmap.Count
|
||||
}
|
||||
gNextContainerID := int64(0)
|
||||
sort.Sort(idmapSorter(ctx.GIDMap))
|
||||
for _, gidmap := range ctx.GIDMap {
|
||||
spec.Linux.GIDMappings = append(spec.Linux.GIDMappings,
|
||||
specs.LinuxIDMapping{
|
||||
HostID: uint32(gidmap.ID),
|
||||
ContainerID: uint32(gNextContainerID),
|
||||
Size: uint32(gidmap.Count),
|
||||
})
|
||||
gNextContainerID += gidmap.Count
|
||||
}
|
||||
// opts.MapSubUIDGID is ignored in userns
|
||||
} else {
|
||||
spec.Linux.UIDMappings = []specs.LinuxIDMapping{{
|
||||
HostID: ctx.EUID,
|
||||
ContainerID: 0,
|
||||
Size: 1,
|
||||
}}
|
||||
spec.Linux.GIDMappings = []specs.LinuxIDMapping{{
|
||||
HostID: ctx.EGID,
|
||||
ContainerID: 0,
|
||||
Size: 1,
|
||||
}}
|
||||
if opts.MapSubUIDGID {
|
||||
uNextContainerID := int64(1)
|
||||
sort.Sort(subIDSorter(ctx.SubUIDs))
|
||||
for _, subuid := range ctx.SubUIDs {
|
||||
spec.Linux.UIDMappings = append(spec.Linux.UIDMappings,
|
||||
specs.LinuxIDMapping{
|
||||
HostID: uint32(subuid.SubID),
|
||||
ContainerID: uint32(uNextContainerID),
|
||||
Size: uint32(subuid.Count),
|
||||
})
|
||||
uNextContainerID += subuid.Count
|
||||
}
|
||||
gNextContainerID := int64(1)
|
||||
sort.Sort(subIDSorter(ctx.SubGIDs))
|
||||
for _, subgid := range ctx.SubGIDs {
|
||||
spec.Linux.GIDMappings = append(spec.Linux.GIDMappings,
|
||||
specs.LinuxIDMapping{
|
||||
HostID: uint32(subgid.SubID),
|
||||
ContainerID: uint32(gNextContainerID),
|
||||
Size: uint32(subgid.Count),
|
||||
})
|
||||
gNextContainerID += subgid.Count
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Fix up mounts.
|
||||
var mounts []specs.Mount
|
||||
for _, mount := range spec.Mounts {
|
||||
// Ignore all mounts that are under /sys.
|
||||
if strings.HasPrefix(mount.Destination, "/sys") {
|
||||
continue
|
||||
}
|
||||
|
||||
// Remove all gid= and uid= mappings.
|
||||
var options []string
|
||||
for _, option := range mount.Options {
|
||||
if !strings.HasPrefix(option, "gid=") && !strings.HasPrefix(option, "uid=") {
|
||||
options = append(options, option)
|
||||
}
|
||||
}
|
||||
|
||||
mount.Options = options
|
||||
mounts = append(mounts, mount)
|
||||
}
|
||||
// Add the sysfs mount as an rbind.
|
||||
mounts = append(mounts, specs.Mount{
|
||||
Source: "/sys",
|
||||
Destination: "/sys",
|
||||
Type: "none",
|
||||
Options: []string{"rbind", "nosuid", "noexec", "nodev", "ro"},
|
||||
})
|
||||
spec.Mounts = mounts
|
||||
|
||||
// Remove cgroup settings.
|
||||
spec.Linux.Resources = nil
|
||||
return nil
|
||||
}
|
||||
|
||||
// subIDSorter is required for Go <= 1.7
|
||||
type subIDSorter []user.SubID
|
||||
|
||||
func (x subIDSorter) Len() int { return len(x) }
|
||||
func (x subIDSorter) Swap(i, j int) { x[i], x[j] = x[j], x[i] }
|
||||
func (x subIDSorter) Less(i, j int) bool { return x[i].SubID < x[j].SubID }
|
||||
|
||||
type idmapSorter []user.IDMap
|
||||
|
||||
func (x idmapSorter) Len() int { return len(x) }
|
||||
func (x idmapSorter) Swap(i, j int) { x[i], x[j] = x[j], x[i] }
|
||||
func (x idmapSorter) Less(i, j int) bool { return x[i].ID < x[j].ID }
|
|
@ -1,190 +0,0 @@
|
|||
// +build linux
|
||||
|
||||
package specconv
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"testing"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/configs/validate"
|
||||
"github.com/opencontainers/runc/libcontainer/specconv"
|
||||
"github.com/opencontainers/runc/libcontainer/user"
|
||||
"github.com/opencontainers/runtime-spec/specs-go"
|
||||
)
|
||||
|
||||
func TestRootlessSpecconvValidate(t *testing.T) {
|
||||
specGen := func() *specs.Spec {
|
||||
spec := specconv.Example()
|
||||
spec.Root.Path = "/"
|
||||
return spec
|
||||
}
|
||||
cases := []struct {
|
||||
ctx RootlessContext
|
||||
opts *RootlessOpts
|
||||
additionalValidator func(t *testing.T, s *specs.Spec)
|
||||
}{
|
||||
{
|
||||
ctx: RootlessContext{
|
||||
EUID: 0,
|
||||
EGID: 0,
|
||||
},
|
||||
},
|
||||
{
|
||||
ctx: RootlessContext{
|
||||
EUID: 4242,
|
||||
EGID: 4242,
|
||||
},
|
||||
},
|
||||
{
|
||||
ctx: RootlessContext{
|
||||
EUID: 4242,
|
||||
EGID: 4242,
|
||||
// empty subuid / subgid
|
||||
},
|
||||
opts: &RootlessOpts{
|
||||
MapSubUIDGID: true,
|
||||
},
|
||||
},
|
||||
{
|
||||
ctx: RootlessContext{
|
||||
EUID: 4242,
|
||||
EGID: 4242,
|
||||
SubUIDs: []user.SubID{
|
||||
{
|
||||
Name: "dummy",
|
||||
SubID: 14242,
|
||||
Count: 65536,
|
||||
},
|
||||
{
|
||||
Name: "dummy",
|
||||
SubID: 114242,
|
||||
Count: 65536,
|
||||
},
|
||||
},
|
||||
SubGIDs: []user.SubID{
|
||||
{
|
||||
Name: "dummy",
|
||||
SubID: 14242,
|
||||
Count: 65536,
|
||||
},
|
||||
{
|
||||
Name: "dummy",
|
||||
SubID: 114242,
|
||||
Count: 65536,
|
||||
},
|
||||
},
|
||||
},
|
||||
opts: &RootlessOpts{
|
||||
MapSubUIDGID: true,
|
||||
},
|
||||
additionalValidator: func(t *testing.T, s *specs.Spec) {
|
||||
expectedUIDMappings := []specs.LinuxIDMapping{
|
||||
{
|
||||
HostID: 4242,
|
||||
ContainerID: 0,
|
||||
Size: 1,
|
||||
},
|
||||
{
|
||||
HostID: 14242,
|
||||
ContainerID: 1,
|
||||
Size: 65536,
|
||||
},
|
||||
{
|
||||
HostID: 114242,
|
||||
ContainerID: 65537,
|
||||
Size: 65536,
|
||||
},
|
||||
}
|
||||
if !reflect.DeepEqual(expectedUIDMappings, s.Linux.UIDMappings) {
|
||||
t.Errorf("expected %#v, got %#v", expectedUIDMappings, s.Linux.UIDMappings)
|
||||
}
|
||||
expectedGIDMappings := expectedUIDMappings
|
||||
if !reflect.DeepEqual(expectedGIDMappings, s.Linux.GIDMappings) {
|
||||
t.Errorf("expected %#v, got %#v", expectedGIDMappings, s.Linux.GIDMappings)
|
||||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
ctx: RootlessContext{
|
||||
EUID: 0,
|
||||
EGID: 0,
|
||||
UIDMap: []user.IDMap{
|
||||
{
|
||||
ID: 0,
|
||||
ParentID: 4242,
|
||||
Count: 1,
|
||||
},
|
||||
{
|
||||
ID: 1,
|
||||
ParentID: 231072,
|
||||
Count: 65536,
|
||||
},
|
||||
},
|
||||
GIDMap: []user.IDMap{
|
||||
{
|
||||
ID: 0,
|
||||
ParentID: 4242,
|
||||
Count: 1,
|
||||
},
|
||||
{
|
||||
ID: 1,
|
||||
ParentID: 231072,
|
||||
Count: 65536,
|
||||
},
|
||||
},
|
||||
InUserNS: true,
|
||||
},
|
||||
additionalValidator: func(t *testing.T, s *specs.Spec) {
|
||||
expectedUIDMappings := []specs.LinuxIDMapping{
|
||||
{
|
||||
HostID: 0,
|
||||
ContainerID: 0,
|
||||
Size: 1,
|
||||
},
|
||||
{
|
||||
HostID: 1,
|
||||
ContainerID: 1,
|
||||
Size: 65536,
|
||||
},
|
||||
}
|
||||
if !reflect.DeepEqual(expectedUIDMappings, s.Linux.UIDMappings) {
|
||||
t.Errorf("expected %#v, got %#v", expectedUIDMappings, s.Linux.UIDMappings)
|
||||
}
|
||||
expectedGIDMappings := expectedUIDMappings
|
||||
if !reflect.DeepEqual(expectedGIDMappings, s.Linux.GIDMappings) {
|
||||
t.Errorf("expected %#v, got %#v", expectedGIDMappings, s.Linux.GIDMappings)
|
||||
}
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for _, c := range cases {
|
||||
spec := specGen()
|
||||
err := ToRootlessWithContext(c.ctx, spec, c.opts)
|
||||
if err != nil {
|
||||
t.Errorf("Couldn't convert a rootful spec to rootless: %v", err)
|
||||
}
|
||||
|
||||
// t.Logf("%#v", spec)
|
||||
if c.additionalValidator != nil {
|
||||
c.additionalValidator(t, spec)
|
||||
}
|
||||
|
||||
opts := &specconv.CreateOpts{
|
||||
CgroupName: "ContainerID",
|
||||
UseSystemdCgroup: false,
|
||||
Spec: spec,
|
||||
Rootless: true,
|
||||
}
|
||||
|
||||
config, err := specconv.CreateLibcontainerConfig(opts)
|
||||
if err != nil {
|
||||
t.Errorf("Couldn't create libcontainer config: %v", err)
|
||||
}
|
||||
|
||||
validator := validate.New()
|
||||
if err := validator.Validate(config); err != nil {
|
||||
t.Errorf("Expected specconv to produce valid rootless container config: %v", err)
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,113 @@
|
|||
package specconv
|
||||
|
||||
import (
|
||||
"os"
|
||||
"sort"
|
||||
"strings"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/system"
|
||||
"github.com/opencontainers/runc/libcontainer/user"
|
||||
"github.com/opencontainers/runtime-spec/specs-go"
|
||||
"github.com/pkg/errors"
|
||||
)
|
||||
|
||||
// ToRootless converts spec to be compatible with "rootless" runc.
|
||||
// * Adds userns (Note: since we are already in userns, ideally we should not need to do this. runc-side issue is tracked at https://github.com/opencontainers/runc/issues/1837)
|
||||
// * Fix up mount flags (same as above)
|
||||
// * Replace /sys with bind-mount (FIXME: we don't need to do this if netns is unshared)
|
||||
func ToRootless(spec *specs.Spec) error {
|
||||
if !system.RunningInUserNS() {
|
||||
return errors.New("needs to be in user namespace")
|
||||
}
|
||||
uidMap, err := user.CurrentProcessUIDMap()
|
||||
if err != nil && !os.IsNotExist(err) {
|
||||
return err
|
||||
}
|
||||
gidMap, err := user.CurrentProcessUIDMap()
|
||||
if err != nil && !os.IsNotExist(err) {
|
||||
return err
|
||||
}
|
||||
return toRootless(spec, uidMap, gidMap)
|
||||
}
|
||||
|
||||
// toRootless was forked from github.com/opencontainers/runc/libcontainer/specconv
|
||||
func toRootless(spec *specs.Spec, uidMap, gidMap []user.IDMap) error {
|
||||
if err := configureUserNS(spec, uidMap, gidMap); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := configureMounts(spec); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Remove cgroup settings.
|
||||
spec.Linux.Resources = nil
|
||||
spec.Linux.CgroupsPath = ""
|
||||
return nil
|
||||
}
|
||||
|
||||
// configureUserNS add suserns and the current ID map to the spec.
|
||||
// Since we are already in userns, ideally we should not need to add userns.
|
||||
// However, currently rootless runc always requires userns to be added.
|
||||
// https://github.com/opencontainers/runc/issues/1837
|
||||
func configureUserNS(spec *specs.Spec, uidMap, gidMap []user.IDMap) error {
|
||||
spec.Linux.Namespaces = append(spec.Linux.Namespaces, specs.LinuxNamespace{
|
||||
Type: specs.UserNamespace,
|
||||
})
|
||||
|
||||
sort.Slice(uidMap, func(i, j int) bool { return uidMap[i].ID < uidMap[j].ID })
|
||||
uNextContainerID := int64(0)
|
||||
for _, u := range uidMap {
|
||||
spec.Linux.UIDMappings = append(spec.Linux.UIDMappings,
|
||||
specs.LinuxIDMapping{
|
||||
HostID: uint32(u.ID),
|
||||
ContainerID: uint32(uNextContainerID),
|
||||
Size: uint32(u.Count),
|
||||
})
|
||||
uNextContainerID += u.Count
|
||||
}
|
||||
sort.Slice(gidMap, func(i, j int) bool { return gidMap[i].ID < gidMap[j].ID })
|
||||
gNextContainerID := int64(0)
|
||||
for _, g := range gidMap {
|
||||
spec.Linux.GIDMappings = append(spec.Linux.GIDMappings,
|
||||
specs.LinuxIDMapping{
|
||||
HostID: uint32(g.ID),
|
||||
ContainerID: uint32(gNextContainerID),
|
||||
Size: uint32(g.Count),
|
||||
})
|
||||
gNextContainerID += g.Count
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func configureMounts(spec *specs.Spec) error {
|
||||
var mounts []specs.Mount
|
||||
for _, mount := range spec.Mounts {
|
||||
// Ignore all mounts that are under /sys, because we add /sys later.
|
||||
if strings.HasPrefix(mount.Destination, "/sys") {
|
||||
continue
|
||||
}
|
||||
|
||||
// Remove all gid= and uid= mappings.
|
||||
// Since we are already in userns, ideally we should not need to do this.
|
||||
// https://github.com/opencontainers/runc/issues/1837
|
||||
var options []string
|
||||
for _, option := range mount.Options {
|
||||
if !strings.HasPrefix(option, "gid=") && !strings.HasPrefix(option, "uid=") {
|
||||
options = append(options, option)
|
||||
}
|
||||
}
|
||||
mount.Options = options
|
||||
mounts = append(mounts, mount)
|
||||
}
|
||||
|
||||
// Add the sysfs mount as an rbind, because we can't mount /sys unless we have netns.
|
||||
// TODO: keep original /sys mount when we have netns.
|
||||
mounts = append(mounts, specs.Mount{
|
||||
Source: "/sys",
|
||||
Destination: "/sys",
|
||||
Type: "none",
|
||||
Options: []string{"rbind", "nosuid", "noexec", "nodev", "ro"},
|
||||
})
|
||||
spec.Mounts = mounts
|
||||
return nil
|
||||
}
|
|
@ -0,0 +1,42 @@
|
|||
package specconv
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/specconv"
|
||||
"github.com/opencontainers/runc/libcontainer/user"
|
||||
"github.com/opencontainers/runtime-spec/specs-go"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestToRootless(t *testing.T) {
|
||||
spec := specconv.Example()
|
||||
uidMap := []user.IDMap{
|
||||
{
|
||||
ID: 0,
|
||||
ParentID: 4242,
|
||||
Count: 1,
|
||||
},
|
||||
{
|
||||
ID: 1,
|
||||
ParentID: 231072,
|
||||
Count: 65536,
|
||||
},
|
||||
}
|
||||
gidMap := uidMap
|
||||
expectedUIDMappings := []specs.LinuxIDMapping{
|
||||
{
|
||||
HostID: 0,
|
||||
ContainerID: 0,
|
||||
Size: 1,
|
||||
},
|
||||
{
|
||||
HostID: 1,
|
||||
ContainerID: 1,
|
||||
Size: 65536,
|
||||
},
|
||||
}
|
||||
err := toRootless(spec, uidMap, gidMap)
|
||||
require.NoError(t, err)
|
||||
require.EqualValues(t, expectedUIDMappings, spec.Linux.UIDMappings)
|
||||
}
|
|
@ -96,13 +96,13 @@ state = %q
|
|||
buildkitdSock, stop, err := runBuildkitd([]string{"buildkitd",
|
||||
"--oci-worker=false",
|
||||
"--containerd-worker=true",
|
||||
"--containerd-worker-addr", address}, logs)
|
||||
"--containerd-worker-addr", address}, logs, 0, 0)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
deferF.append(stop)
|
||||
|
||||
return &cdsandbox{address: address, sandbox: sandbox{address: buildkitdSock, logs: logs, cleanup: deferF}}, cl, nil
|
||||
return &cdsandbox{address: address, sandbox: sandbox{address: buildkitdSock, logs: logs, cleanup: deferF, rootless: false}}, cl, nil
|
||||
}
|
||||
|
||||
type cdsandbox struct {
|
||||
|
|
|
@ -3,6 +3,7 @@ package integration
|
|||
import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"os/exec"
|
||||
|
@ -12,16 +13,31 @@ import (
|
|||
"time"
|
||||
|
||||
"github.com/google/shlex"
|
||||
"github.com/pkg/errors"
|
||||
)
|
||||
|
||||
func init() {
|
||||
register(&oci{})
|
||||
|
||||
// the rootless uid is defined in hack/dockerfiles/test.Dockerfile
|
||||
if s := os.Getenv("BUILDKIT_INTEGRATION_ROOTLESS_IDPAIR"); s != "" {
|
||||
var uid, gid int
|
||||
if _, err := fmt.Sscanf(s, "%d:%d", &uid, &gid); err != nil {
|
||||
panic(errors.Errorf("unexpected BUILDKIT_INTEGRATION_ROOTLESS_IDPAIR: %q", s))
|
||||
}
|
||||
register(&oci{uid: uid, gid: gid})
|
||||
}
|
||||
}
|
||||
|
||||
type oci struct {
|
||||
uid int
|
||||
gid int
|
||||
}
|
||||
|
||||
func (s *oci) Name() string {
|
||||
if s.uid != 0 {
|
||||
return "oci-rootless"
|
||||
}
|
||||
return "oci"
|
||||
}
|
||||
|
||||
|
@ -33,7 +49,15 @@ func (s *oci) New() (Sandbox, func() error, error) {
|
|||
return nil, nil, err
|
||||
}
|
||||
logs := map[string]*bytes.Buffer{}
|
||||
buildkitdSock, stop, err := runBuildkitd([]string{"buildkitd", "--oci-worker=true", "--containerd-worker=false"}, logs)
|
||||
buildkitdArgs := []string{"buildkitd", "--oci-worker=true", "--containerd-worker=false"}
|
||||
if s.uid != 0 {
|
||||
if s.gid == 0 {
|
||||
return nil, nil, errors.Errorf("unsupported id pair: uid=%d, gid=%d", s.uid, s.gid)
|
||||
}
|
||||
// TODO: make sure the user exists and subuid/subgid are configured.
|
||||
buildkitdArgs = append([]string{"sudo", "-u", fmt.Sprintf("#%d", s.uid), "-i", "--", "rootlesskit"}, buildkitdArgs...)
|
||||
}
|
||||
buildkitdSock, stop, err := runBuildkitd(buildkitdArgs, logs, s.uid, s.gid)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
|
@ -41,13 +65,14 @@ func (s *oci) New() (Sandbox, func() error, error) {
|
|||
deferF := &multiCloser{}
|
||||
deferF.append(stop)
|
||||
|
||||
return &sandbox{address: buildkitdSock, logs: logs, cleanup: deferF}, deferF.F(), nil
|
||||
return &sandbox{address: buildkitdSock, logs: logs, cleanup: deferF, rootless: s.uid != 0}, deferF.F(), nil
|
||||
}
|
||||
|
||||
type sandbox struct {
|
||||
address string
|
||||
logs map[string]*bytes.Buffer
|
||||
cleanup *multiCloser
|
||||
address string
|
||||
logs map[string]*bytes.Buffer
|
||||
cleanup *multiCloser
|
||||
rootless bool
|
||||
}
|
||||
|
||||
func (sb *sandbox) Address() string {
|
||||
|
@ -85,7 +110,11 @@ func (sb *sandbox) Cmd(args ...string) *exec.Cmd {
|
|||
return cmd
|
||||
}
|
||||
|
||||
func runBuildkitd(args []string, logs map[string]*bytes.Buffer) (address string, cl func() error, err error) {
|
||||
func (sb *sandbox) Rootless() bool {
|
||||
return sb.rootless
|
||||
}
|
||||
|
||||
func runBuildkitd(args []string, logs map[string]*bytes.Buffer, uid, gid int) (address string, cl func() error, err error) {
|
||||
deferF := &multiCloser{}
|
||||
cl = deferF.F()
|
||||
|
||||
|
@ -100,6 +129,9 @@ func runBuildkitd(args []string, logs map[string]*bytes.Buffer) (address string,
|
|||
if err != nil {
|
||||
return "", nil, err
|
||||
}
|
||||
if err := os.Chown(tmpdir, uid, gid); err != nil {
|
||||
return "", nil, err
|
||||
}
|
||||
deferF.append(func() error { return os.RemoveAll(tmpdir) })
|
||||
|
||||
address = "unix://" + filepath.Join(tmpdir, "buildkitd.sock")
|
||||
|
|
|
@ -17,6 +17,7 @@ type Sandbox interface {
|
|||
PrintLogs(*testing.T)
|
||||
Cmd(...string) *exec.Cmd
|
||||
NewRegistry() (string, error)
|
||||
Rootless() bool
|
||||
}
|
||||
|
||||
type Worker interface {
|
||||
|
|
|
@ -60,8 +60,10 @@ github.com/uber/jaeger-lib c48167d9cae5887393dd5e61efd06a4a48b7fbb3
|
|||
github.com/codahale/hdrhistogram f8ad88b59a584afeee9d334eff879b104439117b
|
||||
|
||||
github.com/opentracing-contrib/go-stdlib b1a47cfbdd7543e70e9ef3e73d0802ad306cc1cc
|
||||
github.com/opencontainers/selinux 74a747aeaf2d66097b6908f572794f49f07dda2c
|
||||
|
||||
# used by dockerfile tests
|
||||
gotest.tools v2.1.0
|
||||
github.com/google/go-cmp v0.2.0
|
||||
|
||||
# used by rootless spec conv test
|
||||
github.com/seccomp/libseccomp-golang 32f571b70023028bd57d9288c20efbcb237f3ce0
|
||||
|
|
116
vendor/github.com/opencontainers/runc/libcontainer/configs/validate/rootless.go
generated
vendored
116
vendor/github.com/opencontainers/runc/libcontainer/configs/validate/rootless.go
generated
vendored
|
@ -1,116 +0,0 @@
|
|||
package validate
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"reflect"
|
||||
"strings"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
var (
|
||||
geteuid = os.Geteuid
|
||||
getegid = os.Getegid
|
||||
)
|
||||
|
||||
func (v *ConfigValidator) rootless(config *configs.Config) error {
|
||||
if err := rootlessMappings(config); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := rootlessMount(config); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// XXX: We currently can't verify the user config at all, because
|
||||
// configs.Config doesn't store the user-related configs. So this
|
||||
// has to be verified by setupUser() in init_linux.go.
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func hasIDMapping(id int, mappings []configs.IDMap) bool {
|
||||
for _, m := range mappings {
|
||||
if id >= m.ContainerID && id < m.ContainerID+m.Size {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func rootlessMappings(config *configs.Config) error {
|
||||
if euid := geteuid(); euid != 0 {
|
||||
if !config.Namespaces.Contains(configs.NEWUSER) {
|
||||
return fmt.Errorf("rootless containers require user namespaces")
|
||||
}
|
||||
if len(config.UidMappings) == 0 {
|
||||
return fmt.Errorf("rootless containers requires at least one UID mapping")
|
||||
}
|
||||
if len(config.GidMappings) == 0 {
|
||||
return fmt.Errorf("rootless containers requires at least one GID mapping")
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// cgroup verifies that the user isn't trying to set any cgroup limits or paths.
|
||||
func rootlessCgroup(config *configs.Config) error {
|
||||
// Nothing set at all.
|
||||
if config.Cgroups == nil || config.Cgroups.Resources == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Used for comparing to the zero value.
|
||||
left := reflect.ValueOf(*config.Cgroups.Resources)
|
||||
right := reflect.Zero(left.Type())
|
||||
|
||||
// This is all we need to do, since specconv won't add cgroup options in
|
||||
// rootless mode.
|
||||
if !reflect.DeepEqual(left.Interface(), right.Interface()) {
|
||||
return fmt.Errorf("cannot specify resource limits in rootless container")
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// mount verifies that the user isn't trying to set up any mounts they don't have
|
||||
// the rights to do. In addition, it makes sure that no mount has a `uid=` or
|
||||
// `gid=` option that doesn't resolve to root.
|
||||
func rootlessMount(config *configs.Config) error {
|
||||
// XXX: We could whitelist allowed devices at this point, but I'm not
|
||||
// convinced that's a good idea. The kernel is the best arbiter of
|
||||
// access control.
|
||||
|
||||
for _, mount := range config.Mounts {
|
||||
// Check that the options list doesn't contain any uid= or gid= entries
|
||||
// that don't resolve to root.
|
||||
for _, opt := range strings.Split(mount.Data, ",") {
|
||||
if strings.HasPrefix(opt, "uid=") {
|
||||
var uid int
|
||||
n, err := fmt.Sscanf(opt, "uid=%d", &uid)
|
||||
if n != 1 || err != nil {
|
||||
// Ignore unknown mount options.
|
||||
continue
|
||||
}
|
||||
if !hasIDMapping(uid, config.UidMappings) {
|
||||
return fmt.Errorf("cannot specify uid= mount options for unmapped uid in rootless containers")
|
||||
}
|
||||
}
|
||||
|
||||
if strings.HasPrefix(opt, "gid=") {
|
||||
var gid int
|
||||
n, err := fmt.Sscanf(opt, "gid=%d", &gid)
|
||||
if n != 1 || err != nil {
|
||||
// Ignore unknown mount options.
|
||||
continue
|
||||
}
|
||||
if !hasIDMapping(gid, config.GidMappings) {
|
||||
return fmt.Errorf("cannot specify gid= mount options for unmapped gid in rootless containers")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
212
vendor/github.com/opencontainers/runc/libcontainer/configs/validate/validator.go
generated
vendored
212
vendor/github.com/opencontainers/runc/libcontainer/configs/validate/validator.go
generated
vendored
|
@ -1,212 +0,0 @@
|
|||
package validate
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
"github.com/opencontainers/runc/libcontainer/intelrdt"
|
||||
selinux "github.com/opencontainers/selinux/go-selinux"
|
||||
)
|
||||
|
||||
type Validator interface {
|
||||
Validate(*configs.Config) error
|
||||
}
|
||||
|
||||
func New() Validator {
|
||||
return &ConfigValidator{}
|
||||
}
|
||||
|
||||
type ConfigValidator struct {
|
||||
}
|
||||
|
||||
func (v *ConfigValidator) Validate(config *configs.Config) error {
|
||||
if err := v.rootfs(config); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := v.network(config); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := v.hostname(config); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := v.security(config); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := v.usernamespace(config); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := v.sysctl(config); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := v.intelrdt(config); err != nil {
|
||||
return err
|
||||
}
|
||||
if config.Rootless {
|
||||
if err := v.rootless(config); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// rootfs validates if the rootfs is an absolute path and is not a symlink
|
||||
// to the container's root filesystem.
|
||||
func (v *ConfigValidator) rootfs(config *configs.Config) error {
|
||||
if _, err := os.Stat(config.Rootfs); err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
return fmt.Errorf("rootfs (%s) does not exist", config.Rootfs)
|
||||
}
|
||||
return err
|
||||
}
|
||||
cleaned, err := filepath.Abs(config.Rootfs)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if cleaned, err = filepath.EvalSymlinks(cleaned); err != nil {
|
||||
return err
|
||||
}
|
||||
if filepath.Clean(config.Rootfs) != cleaned {
|
||||
return fmt.Errorf("%s is not an absolute path or is a symlink", config.Rootfs)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (v *ConfigValidator) network(config *configs.Config) error {
|
||||
if !config.Namespaces.Contains(configs.NEWNET) {
|
||||
if len(config.Networks) > 0 || len(config.Routes) > 0 {
|
||||
return fmt.Errorf("unable to apply network settings without a private NET namespace")
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (v *ConfigValidator) hostname(config *configs.Config) error {
|
||||
if config.Hostname != "" && !config.Namespaces.Contains(configs.NEWUTS) {
|
||||
return fmt.Errorf("unable to set hostname without a private UTS namespace")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (v *ConfigValidator) security(config *configs.Config) error {
|
||||
// restrict sys without mount namespace
|
||||
if (len(config.MaskPaths) > 0 || len(config.ReadonlyPaths) > 0) &&
|
||||
!config.Namespaces.Contains(configs.NEWNS) {
|
||||
return fmt.Errorf("unable to restrict sys entries without a private MNT namespace")
|
||||
}
|
||||
if config.ProcessLabel != "" && !selinux.GetEnabled() {
|
||||
return fmt.Errorf("selinux label is specified in config, but selinux is disabled or not supported")
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (v *ConfigValidator) usernamespace(config *configs.Config) error {
|
||||
if config.Namespaces.Contains(configs.NEWUSER) {
|
||||
if _, err := os.Stat("/proc/self/ns/user"); os.IsNotExist(err) {
|
||||
return fmt.Errorf("USER namespaces aren't enabled in the kernel")
|
||||
}
|
||||
} else {
|
||||
if config.UidMappings != nil || config.GidMappings != nil {
|
||||
return fmt.Errorf("User namespace mappings specified, but USER namespace isn't enabled in the config")
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// sysctl validates that the specified sysctl keys are valid or not.
|
||||
// /proc/sys isn't completely namespaced and depending on which namespaces
|
||||
// are specified, a subset of sysctls are permitted.
|
||||
func (v *ConfigValidator) sysctl(config *configs.Config) error {
|
||||
validSysctlMap := map[string]bool{
|
||||
"kernel.msgmax": true,
|
||||
"kernel.msgmnb": true,
|
||||
"kernel.msgmni": true,
|
||||
"kernel.sem": true,
|
||||
"kernel.shmall": true,
|
||||
"kernel.shmmax": true,
|
||||
"kernel.shmmni": true,
|
||||
"kernel.shm_rmid_forced": true,
|
||||
}
|
||||
|
||||
for s := range config.Sysctl {
|
||||
if validSysctlMap[s] || strings.HasPrefix(s, "fs.mqueue.") {
|
||||
if config.Namespaces.Contains(configs.NEWIPC) {
|
||||
continue
|
||||
} else {
|
||||
return fmt.Errorf("sysctl %q is not allowed in the hosts ipc namespace", s)
|
||||
}
|
||||
}
|
||||
if strings.HasPrefix(s, "net.") {
|
||||
if config.Namespaces.Contains(configs.NEWNET) {
|
||||
if path := config.Namespaces.PathOf(configs.NEWNET); path != "" {
|
||||
if err := checkHostNs(s, path); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
continue
|
||||
} else {
|
||||
return fmt.Errorf("sysctl %q is not allowed in the hosts network namespace", s)
|
||||
}
|
||||
}
|
||||
return fmt.Errorf("sysctl %q is not in a separate kernel namespace", s)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (v *ConfigValidator) intelrdt(config *configs.Config) error {
|
||||
if config.IntelRdt != nil {
|
||||
if !intelrdt.IsEnabled() {
|
||||
return fmt.Errorf("intelRdt is specified in config, but Intel RDT feature is not supported or enabled")
|
||||
}
|
||||
if config.IntelRdt.L3CacheSchema == "" {
|
||||
return fmt.Errorf("intelRdt is specified in config, but intelRdt.l3CacheSchema is empty")
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func isSymbolicLink(path string) (bool, error) {
|
||||
fi, err := os.Lstat(path)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
|
||||
return fi.Mode()&os.ModeSymlink == os.ModeSymlink, nil
|
||||
}
|
||||
|
||||
// checkHostNs checks whether network sysctl is used in host namespace.
|
||||
func checkHostNs(sysctlConfig string, path string) error {
|
||||
var currentProcessNetns = "/proc/self/ns/net"
|
||||
// readlink on the current processes network namespace
|
||||
destOfCurrentProcess, err := os.Readlink(currentProcessNetns)
|
||||
if err != nil {
|
||||
return fmt.Errorf("read soft link %q error", currentProcessNetns)
|
||||
}
|
||||
|
||||
// First check if the provided path is a symbolic link
|
||||
symLink, err := isSymbolicLink(path)
|
||||
if err != nil {
|
||||
return fmt.Errorf("could not check that %q is a symlink: %v", path, err)
|
||||
}
|
||||
|
||||
if symLink == false {
|
||||
// The provided namespace is not a symbolic link,
|
||||
// it is not the host namespace.
|
||||
return nil
|
||||
}
|
||||
|
||||
// readlink on the path provided in the struct
|
||||
destOfContainer, err := os.Readlink(path)
|
||||
if err != nil {
|
||||
return fmt.Errorf("read soft link %q error", path)
|
||||
}
|
||||
if destOfContainer == destOfCurrentProcess {
|
||||
return fmt.Errorf("sysctl %q is not allowed in the hosts network namespace", sysctlConfig)
|
||||
}
|
||||
return nil
|
||||
}
|
|
@ -1,553 +0,0 @@
|
|||
// +build linux
|
||||
|
||||
package intelrdt
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
/*
|
||||
* About Intel RDT/CAT feature:
|
||||
* Intel platforms with new Xeon CPU support Resource Director Technology (RDT).
|
||||
* Intel Cache Allocation Technology (CAT) is a sub-feature of RDT. Currently L3
|
||||
* Cache is the only resource that is supported in RDT.
|
||||
*
|
||||
* This feature provides a way for the software to restrict cache allocation to a
|
||||
* defined 'subset' of L3 cache which may be overlapping with other 'subsets'.
|
||||
* The different subsets are identified by class of service (CLOS) and each CLOS
|
||||
* has a capacity bitmask (CBM).
|
||||
*
|
||||
* For more information about Intel RDT/CAT can be found in the section 17.17
|
||||
* of Intel Software Developer Manual.
|
||||
*
|
||||
* About Intel RDT/CAT kernel interface:
|
||||
* In Linux 4.10 kernel or newer, the interface is defined and exposed via
|
||||
* "resource control" filesystem, which is a "cgroup-like" interface.
|
||||
*
|
||||
* Comparing with cgroups, it has similar process management lifecycle and
|
||||
* interfaces in a container. But unlike cgroups' hierarchy, it has single level
|
||||
* filesystem layout.
|
||||
*
|
||||
* Intel RDT "resource control" filesystem hierarchy:
|
||||
* mount -t resctrl resctrl /sys/fs/resctrl
|
||||
* tree /sys/fs/resctrl
|
||||
* /sys/fs/resctrl/
|
||||
* |-- info
|
||||
* | |-- L3
|
||||
* | |-- cbm_mask
|
||||
* | |-- min_cbm_bits
|
||||
* | |-- num_closids
|
||||
* |-- cpus
|
||||
* |-- schemata
|
||||
* |-- tasks
|
||||
* |-- <container_id>
|
||||
* |-- cpus
|
||||
* |-- schemata
|
||||
* |-- tasks
|
||||
*
|
||||
* For runc, we can make use of `tasks` and `schemata` configuration for L3 cache
|
||||
* resource constraints.
|
||||
*
|
||||
* The file `tasks` has a list of tasks that belongs to this group (e.g.,
|
||||
* <container_id>" group). Tasks can be added to a group by writing the task ID
|
||||
* to the "tasks" file (which will automatically remove them from the previous
|
||||
* group to which they belonged). New tasks created by fork(2) and clone(2) are
|
||||
* added to the same group as their parent. If a pid is not in any sub group, it is
|
||||
* in root group.
|
||||
*
|
||||
* The file `schemata` has allocation bitmasks/values for L3 cache on each socket,
|
||||
* which contains L3 cache id and capacity bitmask (CBM).
|
||||
* Format: "L3:<cache_id0>=<cbm0>;<cache_id1>=<cbm1>;..."
|
||||
* For example, on a two-socket machine, L3's schema line could be `L3:0=ff;1=c0`
|
||||
* which means L3 cache id 0's CBM is 0xff, and L3 cache id 1's CBM is 0xc0.
|
||||
*
|
||||
* The valid L3 cache CBM is a *contiguous bits set* and number of bits that can
|
||||
* be set is less than the max bit. The max bits in the CBM is varied among
|
||||
* supported Intel Xeon platforms. In Intel RDT "resource control" filesystem
|
||||
* layout, the CBM in a group should be a subset of the CBM in root. Kernel will
|
||||
* check if it is valid when writing. e.g., 0xfffff in root indicates the max bits
|
||||
* of CBM is 20 bits, which mapping to entire L3 cache capacity. Some valid CBM
|
||||
* values to set in a group: 0xf, 0xf0, 0x3ff, 0x1f00 and etc.
|
||||
*
|
||||
* For more information about Intel RDT/CAT kernel interface:
|
||||
* https://www.kernel.org/doc/Documentation/x86/intel_rdt_ui.txt
|
||||
*
|
||||
* An example for runc:
|
||||
* Consider a two-socket machine with two L3 caches where the default CBM is
|
||||
* 0xfffff and the max CBM length is 20 bits. With this configuration, tasks
|
||||
* inside the container only have access to the "upper" 80% of L3 cache id 0 and
|
||||
* the "lower" 50% L3 cache id 1:
|
||||
*
|
||||
* "linux": {
|
||||
* "intelRdt": {
|
||||
* "l3CacheSchema": "L3:0=ffff0;1=3ff"
|
||||
* }
|
||||
* }
|
||||
*/
|
||||
|
||||
type Manager interface {
|
||||
// Applies Intel RDT configuration to the process with the specified pid
|
||||
Apply(pid int) error
|
||||
|
||||
// Returns statistics for Intel RDT
|
||||
GetStats() (*Stats, error)
|
||||
|
||||
// Destroys the Intel RDT 'container_id' group
|
||||
Destroy() error
|
||||
|
||||
// Returns Intel RDT path to save in a state file and to be able to
|
||||
// restore the object later
|
||||
GetPath() string
|
||||
|
||||
// Set Intel RDT "resource control" filesystem as configured.
|
||||
Set(container *configs.Config) error
|
||||
}
|
||||
|
||||
// This implements interface Manager
|
||||
type IntelRdtManager struct {
|
||||
mu sync.Mutex
|
||||
Config *configs.Config
|
||||
Id string
|
||||
Path string
|
||||
}
|
||||
|
||||
const (
|
||||
IntelRdtTasks = "tasks"
|
||||
)
|
||||
|
||||
var (
|
||||
// The absolute root path of the Intel RDT "resource control" filesystem
|
||||
intelRdtRoot string
|
||||
intelRdtRootLock sync.Mutex
|
||||
|
||||
// The flag to indicate if Intel RDT is supported
|
||||
isEnabled bool
|
||||
)
|
||||
|
||||
type intelRdtData struct {
|
||||
root string
|
||||
config *configs.Config
|
||||
pid int
|
||||
}
|
||||
|
||||
// Check if Intel RDT is enabled in init()
|
||||
func init() {
|
||||
// 1. Check if hardware and kernel support Intel RDT/CAT feature
|
||||
// "cat_l3" flag is set if supported
|
||||
isFlagSet, err := parseCpuInfoFile("/proc/cpuinfo")
|
||||
if !isFlagSet || err != nil {
|
||||
isEnabled = false
|
||||
return
|
||||
}
|
||||
|
||||
// 2. Check if Intel RDT "resource control" filesystem is mounted
|
||||
// The user guarantees to mount the filesystem
|
||||
isEnabled = isIntelRdtMounted()
|
||||
}
|
||||
|
||||
// Return the mount point path of Intel RDT "resource control" filesysem
|
||||
func findIntelRdtMountpointDir() (string, error) {
|
||||
f, err := os.Open("/proc/self/mountinfo")
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
s := bufio.NewScanner(f)
|
||||
for s.Scan() {
|
||||
text := s.Text()
|
||||
fields := strings.Split(text, " ")
|
||||
// Safe as mountinfo encodes mountpoints with spaces as \040.
|
||||
index := strings.Index(text, " - ")
|
||||
postSeparatorFields := strings.Fields(text[index+3:])
|
||||
numPostFields := len(postSeparatorFields)
|
||||
|
||||
// This is an error as we can't detect if the mount is for "Intel RDT"
|
||||
if numPostFields == 0 {
|
||||
return "", fmt.Errorf("Found no fields post '-' in %q", text)
|
||||
}
|
||||
|
||||
if postSeparatorFields[0] == "resctrl" {
|
||||
// Check that the mount is properly formated.
|
||||
if numPostFields < 3 {
|
||||
return "", fmt.Errorf("Error found less than 3 fields post '-' in %q", text)
|
||||
}
|
||||
|
||||
return fields[4], nil
|
||||
}
|
||||
}
|
||||
if err := s.Err(); err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
return "", NewNotFoundError("Intel RDT")
|
||||
}
|
||||
|
||||
// Gets the root path of Intel RDT "resource control" filesystem
|
||||
func getIntelRdtRoot() (string, error) {
|
||||
intelRdtRootLock.Lock()
|
||||
defer intelRdtRootLock.Unlock()
|
||||
|
||||
if intelRdtRoot != "" {
|
||||
return intelRdtRoot, nil
|
||||
}
|
||||
|
||||
root, err := findIntelRdtMountpointDir()
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
if _, err := os.Stat(root); err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
intelRdtRoot = root
|
||||
return intelRdtRoot, nil
|
||||
}
|
||||
|
||||
func isIntelRdtMounted() bool {
|
||||
_, err := getIntelRdtRoot()
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
func parseCpuInfoFile(path string) (bool, error) {
|
||||
f, err := os.Open(path)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
s := bufio.NewScanner(f)
|
||||
for s.Scan() {
|
||||
if err := s.Err(); err != nil {
|
||||
return false, err
|
||||
}
|
||||
|
||||
text := s.Text()
|
||||
flags := strings.Split(text, " ")
|
||||
|
||||
// "cat_l3" flag is set if Intel RDT/CAT is supported
|
||||
for _, flag := range flags {
|
||||
if flag == "cat_l3" {
|
||||
return true, nil
|
||||
}
|
||||
}
|
||||
}
|
||||
return false, nil
|
||||
}
|
||||
|
||||
func parseUint(s string, base, bitSize int) (uint64, error) {
|
||||
value, err := strconv.ParseUint(s, base, bitSize)
|
||||
if err != nil {
|
||||
intValue, intErr := strconv.ParseInt(s, base, bitSize)
|
||||
// 1. Handle negative values greater than MinInt64 (and)
|
||||
// 2. Handle negative values lesser than MinInt64
|
||||
if intErr == nil && intValue < 0 {
|
||||
return 0, nil
|
||||
} else if intErr != nil && intErr.(*strconv.NumError).Err == strconv.ErrRange && intValue < 0 {
|
||||
return 0, nil
|
||||
}
|
||||
|
||||
return value, err
|
||||
}
|
||||
|
||||
return value, nil
|
||||
}
|
||||
|
||||
// Gets a single uint64 value from the specified file.
|
||||
func getIntelRdtParamUint(path, file string) (uint64, error) {
|
||||
fileName := filepath.Join(path, file)
|
||||
contents, err := ioutil.ReadFile(fileName)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
res, err := parseUint(strings.TrimSpace(string(contents)), 10, 64)
|
||||
if err != nil {
|
||||
return res, fmt.Errorf("unable to parse %q as a uint from file %q", string(contents), fileName)
|
||||
}
|
||||
return res, nil
|
||||
}
|
||||
|
||||
// Gets a string value from the specified file
|
||||
func getIntelRdtParamString(path, file string) (string, error) {
|
||||
contents, err := ioutil.ReadFile(filepath.Join(path, file))
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
return strings.TrimSpace(string(contents)), nil
|
||||
}
|
||||
|
||||
func readTasksFile(dir string) ([]int, error) {
|
||||
f, err := os.Open(filepath.Join(dir, IntelRdtTasks))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
var (
|
||||
s = bufio.NewScanner(f)
|
||||
out = []int{}
|
||||
)
|
||||
|
||||
for s.Scan() {
|
||||
if t := s.Text(); t != "" {
|
||||
pid, err := strconv.Atoi(t)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
out = append(out, pid)
|
||||
}
|
||||
}
|
||||
return out, nil
|
||||
}
|
||||
|
||||
func writeFile(dir, file, data string) error {
|
||||
if dir == "" {
|
||||
return fmt.Errorf("no such directory for %s", file)
|
||||
}
|
||||
if err := ioutil.WriteFile(filepath.Join(dir, file), []byte(data+"\n"), 0700); err != nil {
|
||||
return fmt.Errorf("failed to write %v to %v: %v", data, file, err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func getIntelRdtData(c *configs.Config, pid int) (*intelRdtData, error) {
|
||||
rootPath, err := getIntelRdtRoot()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &intelRdtData{
|
||||
root: rootPath,
|
||||
config: c,
|
||||
pid: pid,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// Get the read-only L3 cache information
|
||||
func getL3CacheInfo() (*L3CacheInfo, error) {
|
||||
l3CacheInfo := &L3CacheInfo{}
|
||||
|
||||
rootPath, err := getIntelRdtRoot()
|
||||
if err != nil {
|
||||
return l3CacheInfo, err
|
||||
}
|
||||
|
||||
path := filepath.Join(rootPath, "info", "L3")
|
||||
cbmMask, err := getIntelRdtParamString(path, "cbm_mask")
|
||||
if err != nil {
|
||||
return l3CacheInfo, err
|
||||
}
|
||||
minCbmBits, err := getIntelRdtParamUint(path, "min_cbm_bits")
|
||||
if err != nil {
|
||||
return l3CacheInfo, err
|
||||
}
|
||||
numClosids, err := getIntelRdtParamUint(path, "num_closids")
|
||||
if err != nil {
|
||||
return l3CacheInfo, err
|
||||
}
|
||||
|
||||
l3CacheInfo.CbmMask = cbmMask
|
||||
l3CacheInfo.MinCbmBits = minCbmBits
|
||||
l3CacheInfo.NumClosids = numClosids
|
||||
|
||||
return l3CacheInfo, nil
|
||||
}
|
||||
|
||||
// WriteIntelRdtTasks writes the specified pid into the "tasks" file
|
||||
func WriteIntelRdtTasks(dir string, pid int) error {
|
||||
if dir == "" {
|
||||
return fmt.Errorf("no such directory for %s", IntelRdtTasks)
|
||||
}
|
||||
|
||||
// Dont attach any pid if -1 is specified as a pid
|
||||
if pid != -1 {
|
||||
if err := ioutil.WriteFile(filepath.Join(dir, IntelRdtTasks), []byte(strconv.Itoa(pid)), 0700); err != nil {
|
||||
return fmt.Errorf("failed to write %v to %v: %v", pid, IntelRdtTasks, err)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Check if Intel RDT is enabled
|
||||
func IsEnabled() bool {
|
||||
return isEnabled
|
||||
}
|
||||
|
||||
// Get the 'container_id' path in Intel RDT "resource control" filesystem
|
||||
func GetIntelRdtPath(id string) (string, error) {
|
||||
rootPath, err := getIntelRdtRoot()
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
path := filepath.Join(rootPath, id)
|
||||
return path, nil
|
||||
}
|
||||
|
||||
// Applies Intel RDT configuration to the process with the specified pid
|
||||
func (m *IntelRdtManager) Apply(pid int) (err error) {
|
||||
// If intelRdt is not specified in config, we do nothing
|
||||
if m.Config.IntelRdt == nil {
|
||||
return nil
|
||||
}
|
||||
d, err := getIntelRdtData(m.Config, pid)
|
||||
if err != nil && !IsNotFound(err) {
|
||||
return err
|
||||
}
|
||||
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
path, err := d.join(m.Id)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
m.Path = path
|
||||
return nil
|
||||
}
|
||||
|
||||
// Destroys the Intel RDT 'container_id' group
|
||||
func (m *IntelRdtManager) Destroy() error {
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
if err := os.RemoveAll(m.Path); err != nil {
|
||||
return err
|
||||
}
|
||||
m.Path = ""
|
||||
return nil
|
||||
}
|
||||
|
||||
// Returns Intel RDT path to save in a state file and to be able to
|
||||
// restore the object later
|
||||
func (m *IntelRdtManager) GetPath() string {
|
||||
if m.Path == "" {
|
||||
m.Path, _ = GetIntelRdtPath(m.Id)
|
||||
}
|
||||
return m.Path
|
||||
}
|
||||
|
||||
// Returns statistics for Intel RDT
|
||||
func (m *IntelRdtManager) GetStats() (*Stats, error) {
|
||||
// If intelRdt is not specified in config
|
||||
if m.Config.IntelRdt == nil {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
stats := NewStats()
|
||||
|
||||
// The read-only L3 cache information
|
||||
l3CacheInfo, err := getL3CacheInfo()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
stats.L3CacheInfo = l3CacheInfo
|
||||
|
||||
// The read-only L3 cache schema in root
|
||||
rootPath, err := getIntelRdtRoot()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
tmpRootStrings, err := getIntelRdtParamString(rootPath, "schemata")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
// L3 cache schema is in the first line
|
||||
schemaRootStrings := strings.Split(tmpRootStrings, "\n")
|
||||
stats.L3CacheSchemaRoot = schemaRootStrings[0]
|
||||
|
||||
// The L3 cache schema in 'container_id' group
|
||||
tmpStrings, err := getIntelRdtParamString(m.GetPath(), "schemata")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
// L3 cache schema is in the first line
|
||||
schemaStrings := strings.Split(tmpStrings, "\n")
|
||||
stats.L3CacheSchema = schemaStrings[0]
|
||||
|
||||
return stats, nil
|
||||
}
|
||||
|
||||
// Set Intel RDT "resource control" filesystem as configured.
|
||||
func (m *IntelRdtManager) Set(container *configs.Config) error {
|
||||
path := m.GetPath()
|
||||
|
||||
// About L3 cache schema file:
|
||||
// The schema has allocation masks/values for L3 cache on each socket,
|
||||
// which contains L3 cache id and capacity bitmask (CBM).
|
||||
// Format: "L3:<cache_id0>=<cbm0>;<cache_id1>=<cbm1>;..."
|
||||
// For example, on a two-socket machine, L3's schema line could be:
|
||||
// L3:0=ff;1=c0
|
||||
// Which means L3 cache id 0's CBM is 0xff, and L3 cache id 1's CBM is 0xc0.
|
||||
//
|
||||
// About L3 cache CBM validity:
|
||||
// The valid L3 cache CBM is a *contiguous bits set* and number of
|
||||
// bits that can be set is less than the max bit. The max bits in the
|
||||
// CBM is varied among supported Intel Xeon platforms. In Intel RDT
|
||||
// "resource control" filesystem layout, the CBM in a group should
|
||||
// be a subset of the CBM in root. Kernel will check if it is valid
|
||||
// when writing.
|
||||
// e.g., 0xfffff in root indicates the max bits of CBM is 20 bits,
|
||||
// which mapping to entire L3 cache capacity. Some valid CBM values
|
||||
// to set in a group: 0xf, 0xf0, 0x3ff, 0x1f00 and etc.
|
||||
if container.IntelRdt != nil {
|
||||
l3CacheSchema := container.IntelRdt.L3CacheSchema
|
||||
if l3CacheSchema != "" {
|
||||
if err := writeFile(path, "schemata", l3CacheSchema); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (raw *intelRdtData) join(id string) (string, error) {
|
||||
path := filepath.Join(raw.root, id)
|
||||
if err := os.MkdirAll(path, 0755); err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
if err := WriteIntelRdtTasks(path, raw.pid); err != nil {
|
||||
return "", err
|
||||
}
|
||||
return path, nil
|
||||
}
|
||||
|
||||
type NotFoundError struct {
|
||||
ResourceControl string
|
||||
}
|
||||
|
||||
func (e *NotFoundError) Error() string {
|
||||
return fmt.Sprintf("mountpoint for %s not found", e.ResourceControl)
|
||||
}
|
||||
|
||||
func NewNotFoundError(res string) error {
|
||||
return &NotFoundError{
|
||||
ResourceControl: res,
|
||||
}
|
||||
}
|
||||
|
||||
func IsNotFound(err error) bool {
|
||||
if err == nil {
|
||||
return false
|
||||
}
|
||||
_, ok := err.(*NotFoundError)
|
||||
return ok
|
||||
}
|
|
@ -1,24 +0,0 @@
|
|||
// +build linux
|
||||
|
||||
package intelrdt
|
||||
|
||||
type L3CacheInfo struct {
|
||||
CbmMask string `json:"cbm_mask,omitempty"`
|
||||
MinCbmBits uint64 `json:"min_cbm_bits,omitempty"`
|
||||
NumClosids uint64 `json:"num_closids,omitempty"`
|
||||
}
|
||||
|
||||
type Stats struct {
|
||||
// The read-only L3 cache information
|
||||
L3CacheInfo *L3CacheInfo `json:"l3_cache_info,omitempty"`
|
||||
|
||||
// The read-only L3 cache schema in root
|
||||
L3CacheSchemaRoot string `json:"l3_cache_schema_root,omitempty"`
|
||||
|
||||
// The L3 cache schema in 'container_id' group
|
||||
L3CacheSchema string `json:"l3_cache_schema,omitempty"`
|
||||
}
|
||||
|
||||
func NewStats() *Stats {
|
||||
return &Stats{}
|
||||
}
|
|
@ -1,201 +0,0 @@
|
|||
Apache License
|
||||
Version 2.0, January 2004
|
||||
http://www.apache.org/licenses/
|
||||
|
||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||
|
||||
1. Definitions.
|
||||
|
||||
"License" shall mean the terms and conditions for use, reproduction,
|
||||
and distribution as defined by Sections 1 through 9 of this document.
|
||||
|
||||
"Licensor" shall mean the copyright owner or entity authorized by
|
||||
the copyright owner that is granting the License.
|
||||
|
||||
"Legal Entity" shall mean the union of the acting entity and all
|
||||
other entities that control, are controlled by, or are under common
|
||||
control with that entity. For the purposes of this definition,
|
||||
"control" means (i) the power, direct or indirect, to cause the
|
||||
direction or management of such entity, whether by contract or
|
||||
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||
|
||||
"You" (or "Your") shall mean an individual or Legal Entity
|
||||
exercising permissions granted by this License.
|
||||
|
||||
"Source" form shall mean the preferred form for making modifications,
|
||||
including but not limited to software source code, documentation
|
||||
source, and configuration files.
|
||||
|
||||
"Object" form shall mean any form resulting from mechanical
|
||||
transformation or translation of a Source form, including but
|
||||
not limited to compiled object code, generated documentation,
|
||||
and conversions to other media types.
|
||||
|
||||
"Work" shall mean the work of authorship, whether in Source or
|
||||
Object form, made available under the License, as indicated by a
|
||||
copyright notice that is included in or attached to the work
|
||||
(an example is provided in the Appendix below).
|
||||
|
||||
"Derivative Works" shall mean any work, whether in Source or Object
|
||||
form, that is based on (or derived from) the Work and for which the
|
||||
editorial revisions, annotations, elaborations, or other modifications
|
||||
represent, as a whole, an original work of authorship. For the purposes
|
||||
of this License, Derivative Works shall not include works that remain
|
||||
separable from, or merely link (or bind by name) to the interfaces of,
|
||||
the Work and Derivative Works thereof.
|
||||
|
||||
"Contribution" shall mean any work of authorship, including
|
||||
the original version of the Work and any modifications or additions
|
||||
to that Work or Derivative Works thereof, that is intentionally
|
||||
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||
or by an individual or Legal Entity authorized to submit on behalf of
|
||||
the copyright owner. For the purposes of this definition, "submitted"
|
||||
means any form of electronic, verbal, or written communication sent
|
||||
to the Licensor or its representatives, including but not limited to
|
||||
communication on electronic mailing lists, source code control systems,
|
||||
and issue tracking systems that are managed by, or on behalf of, the
|
||||
Licensor for the purpose of discussing and improving the Work, but
|
||||
excluding communication that is conspicuously marked or otherwise
|
||||
designated in writing by the copyright owner as "Not a Contribution."
|
||||
|
||||
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||
on behalf of whom a Contribution has been received by Licensor and
|
||||
subsequently incorporated within the Work.
|
||||
|
||||
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
copyright license to reproduce, prepare Derivative Works of,
|
||||
publicly display, publicly perform, sublicense, and distribute the
|
||||
Work and such Derivative Works in Source or Object form.
|
||||
|
||||
3. Grant of Patent License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
(except as stated in this section) patent license to make, have made,
|
||||
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||
where such license applies only to those patent claims licensable
|
||||
by such Contributor that are necessarily infringed by their
|
||||
Contribution(s) alone or by combination of their Contribution(s)
|
||||
with the Work to which such Contribution(s) was submitted. If You
|
||||
institute patent litigation against any entity (including a
|
||||
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||
or a Contribution incorporated within the Work constitutes direct
|
||||
or contributory patent infringement, then any patent licenses
|
||||
granted to You under this License for that Work shall terminate
|
||||
as of the date such litigation is filed.
|
||||
|
||||
4. Redistribution. You may reproduce and distribute copies of the
|
||||
Work or Derivative Works thereof in any medium, with or without
|
||||
modifications, and in Source or Object form, provided that You
|
||||
meet the following conditions:
|
||||
|
||||
(a) You must give any other recipients of the Work or
|
||||
Derivative Works a copy of this License; and
|
||||
|
||||
(b) You must cause any modified files to carry prominent notices
|
||||
stating that You changed the files; and
|
||||
|
||||
(c) You must retain, in the Source form of any Derivative Works
|
||||
that You distribute, all copyright, patent, trademark, and
|
||||
attribution notices from the Source form of the Work,
|
||||
excluding those notices that do not pertain to any part of
|
||||
the Derivative Works; and
|
||||
|
||||
(d) If the Work includes a "NOTICE" text file as part of its
|
||||
distribution, then any Derivative Works that You distribute must
|
||||
include a readable copy of the attribution notices contained
|
||||
within such NOTICE file, excluding those notices that do not
|
||||
pertain to any part of the Derivative Works, in at least one
|
||||
of the following places: within a NOTICE text file distributed
|
||||
as part of the Derivative Works; within the Source form or
|
||||
documentation, if provided along with the Derivative Works; or,
|
||||
within a display generated by the Derivative Works, if and
|
||||
wherever such third-party notices normally appear. The contents
|
||||
of the NOTICE file are for informational purposes only and
|
||||
do not modify the License. You may add Your own attribution
|
||||
notices within Derivative Works that You distribute, alongside
|
||||
or as an addendum to the NOTICE text from the Work, provided
|
||||
that such additional attribution notices cannot be construed
|
||||
as modifying the License.
|
||||
|
||||
You may add Your own copyright statement to Your modifications and
|
||||
may provide additional or different license terms and conditions
|
||||
for use, reproduction, or distribution of Your modifications, or
|
||||
for any such Derivative Works as a whole, provided Your use,
|
||||
reproduction, and distribution of the Work otherwise complies with
|
||||
the conditions stated in this License.
|
||||
|
||||
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||
any Contribution intentionally submitted for inclusion in the Work
|
||||
by You to the Licensor shall be under the terms and conditions of
|
||||
this License, without any additional terms or conditions.
|
||||
Notwithstanding the above, nothing herein shall supersede or modify
|
||||
the terms of any separate license agreement you may have executed
|
||||
with Licensor regarding such Contributions.
|
||||
|
||||
6. Trademarks. This License does not grant permission to use the trade
|
||||
names, trademarks, service marks, or product names of the Licensor,
|
||||
except as required for reasonable and customary use in describing the
|
||||
origin of the Work and reproducing the content of the NOTICE file.
|
||||
|
||||
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||
agreed to in writing, Licensor provides the Work (and each
|
||||
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
implied, including, without limitation, any warranties or conditions
|
||||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||
appropriateness of using or redistributing the Work and assume any
|
||||
risks associated with Your exercise of permissions under this License.
|
||||
|
||||
8. Limitation of Liability. In no event and under no legal theory,
|
||||
whether in tort (including negligence), contract, or otherwise,
|
||||
unless required by applicable law (such as deliberate and grossly
|
||||
negligent acts) or agreed to in writing, shall any Contributor be
|
||||
liable to You for damages, including any direct, indirect, special,
|
||||
incidental, or consequential damages of any character arising as a
|
||||
result of this License or out of the use or inability to use the
|
||||
Work (including but not limited to damages for loss of goodwill,
|
||||
work stoppage, computer failure or malfunction, or any and all
|
||||
other commercial damages or losses), even if such Contributor
|
||||
has been advised of the possibility of such damages.
|
||||
|
||||
9. Accepting Warranty or Additional Liability. While redistributing
|
||||
the Work or Derivative Works thereof, You may choose to offer,
|
||||
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||
or other liability obligations and/or rights consistent with this
|
||||
License. However, in accepting such obligations, You may act only
|
||||
on Your own behalf and on Your sole responsibility, not on behalf
|
||||
of any other Contributor, and only if You agree to indemnify,
|
||||
defend, and hold each Contributor harmless for any liability
|
||||
incurred by, or claims asserted against, such Contributor by reason
|
||||
of your accepting any such warranty or additional liability.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
APPENDIX: How to apply the Apache License to your work.
|
||||
|
||||
To apply the Apache License to your work, attach the following
|
||||
boilerplate notice, with the fields enclosed by brackets "{}"
|
||||
replaced with your own identifying information. (Don't include
|
||||
the brackets!) The text should be enclosed in the appropriate
|
||||
comment syntax for the file format. We also recommend that a
|
||||
file or class name and description of purpose be included on the
|
||||
same "printed page" as the copyright notice for easier
|
||||
identification within third-party archives.
|
||||
|
||||
Copyright {yyyy} {name of copyright owner}
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
|
@ -1,7 +0,0 @@
|
|||
# selinux
|
||||
|
||||
[![GoDoc](https://godoc.org/github.com/opencontainers/selinux?status.svg)](https://godoc.org/github.com/opencontainers/selinux) [![Go Report Card](https://goreportcard.com/badge/github.com/opencontainers/selinux)](https://goreportcard.com/report/github.com/opencontainers/selinux) [![Build Status](https://travis-ci.org/opencontainers/selinux.svg?branch=master)](https://travis-ci.org/opencontainers/selinux)
|
||||
|
||||
Common SELinux package used across the container ecosystem.
|
||||
|
||||
Please see the [godoc](https://godoc.org/github.com/opencontainers/selinux) for more information.
|
|
@ -1,688 +0,0 @@
|
|||
// +build linux
|
||||
|
||||
package selinux
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
"crypto/rand"
|
||||
"encoding/binary"
|
||||
"fmt"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"regexp"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
"syscall"
|
||||
)
|
||||
|
||||
const (
|
||||
// Enforcing constant indicate SELinux is in enforcing mode
|
||||
Enforcing = 1
|
||||
// Permissive constant to indicate SELinux is in permissive mode
|
||||
Permissive = 0
|
||||
// Disabled constant to indicate SELinux is disabled
|
||||
Disabled = -1
|
||||
selinuxDir = "/etc/selinux/"
|
||||
selinuxConfig = selinuxDir + "config"
|
||||
selinuxfsMount = "/sys/fs/selinux"
|
||||
selinuxTypeTag = "SELINUXTYPE"
|
||||
selinuxTag = "SELINUX"
|
||||
xattrNameSelinux = "security.selinux"
|
||||
stRdOnly = 0x01
|
||||
selinuxfsMagic = 0xf97cff8c
|
||||
)
|
||||
|
||||
type selinuxState struct {
|
||||
enabledSet bool
|
||||
enabled bool
|
||||
selinuxfsSet bool
|
||||
selinuxfs string
|
||||
mcsList map[string]bool
|
||||
sync.Mutex
|
||||
}
|
||||
|
||||
var (
|
||||
assignRegex = regexp.MustCompile(`^([^=]+)=(.*)$`)
|
||||
state = selinuxState{
|
||||
mcsList: make(map[string]bool),
|
||||
}
|
||||
)
|
||||
|
||||
// Context is a representation of the SELinux label broken into 4 parts
|
||||
type Context map[string]string
|
||||
|
||||
func (s *selinuxState) setEnable(enabled bool) bool {
|
||||
s.Lock()
|
||||
defer s.Unlock()
|
||||
s.enabledSet = true
|
||||
s.enabled = enabled
|
||||
return s.enabled
|
||||
}
|
||||
|
||||
func (s *selinuxState) getEnabled() bool {
|
||||
s.Lock()
|
||||
enabled := s.enabled
|
||||
enabledSet := s.enabledSet
|
||||
s.Unlock()
|
||||
if enabledSet {
|
||||
return enabled
|
||||
}
|
||||
|
||||
enabled = false
|
||||
if fs := getSelinuxMountPoint(); fs != "" {
|
||||
if con, _ := CurrentLabel(); con != "kernel" {
|
||||
enabled = true
|
||||
}
|
||||
}
|
||||
return s.setEnable(enabled)
|
||||
}
|
||||
|
||||
// SetDisabled disables selinux support for the package
|
||||
func SetDisabled() {
|
||||
state.setEnable(false)
|
||||
}
|
||||
|
||||
func (s *selinuxState) setSELinuxfs(selinuxfs string) string {
|
||||
s.Lock()
|
||||
defer s.Unlock()
|
||||
s.selinuxfsSet = true
|
||||
s.selinuxfs = selinuxfs
|
||||
return s.selinuxfs
|
||||
}
|
||||
|
||||
func verifySELinuxfsMount(mnt string) bool {
|
||||
var buf syscall.Statfs_t
|
||||
for {
|
||||
err := syscall.Statfs(mnt, &buf)
|
||||
if err == nil {
|
||||
break
|
||||
}
|
||||
if err == syscall.EAGAIN {
|
||||
continue
|
||||
}
|
||||
return false
|
||||
}
|
||||
if uint32(buf.Type) != uint32(selinuxfsMagic) {
|
||||
return false
|
||||
}
|
||||
if (buf.Flags & stRdOnly) != 0 {
|
||||
return false
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
func findSELinuxfs() string {
|
||||
// fast path: check the default mount first
|
||||
if verifySELinuxfsMount(selinuxfsMount) {
|
||||
return selinuxfsMount
|
||||
}
|
||||
|
||||
// check if selinuxfs is available before going the slow path
|
||||
fs, err := ioutil.ReadFile("/proc/filesystems")
|
||||
if err != nil {
|
||||
return ""
|
||||
}
|
||||
if !bytes.Contains(fs, []byte("\tselinuxfs\n")) {
|
||||
return ""
|
||||
}
|
||||
|
||||
// slow path: try to find among the mounts
|
||||
f, err := os.Open("/proc/self/mountinfo")
|
||||
if err != nil {
|
||||
return ""
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
scanner := bufio.NewScanner(f)
|
||||
for {
|
||||
mnt := findSELinuxfsMount(scanner)
|
||||
if mnt == "" { // error or not found
|
||||
return ""
|
||||
}
|
||||
if verifySELinuxfsMount(mnt) {
|
||||
return mnt
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// findSELinuxfsMount returns a next selinuxfs mount point found,
|
||||
// if there is one, or an empty string in case of EOF or error.
|
||||
func findSELinuxfsMount(s *bufio.Scanner) string {
|
||||
for s.Scan() {
|
||||
txt := s.Text()
|
||||
// The first field after - is fs type.
|
||||
// Safe as spaces in mountpoints are encoded as \040
|
||||
if !strings.Contains(txt, " - selinuxfs ") {
|
||||
continue
|
||||
}
|
||||
const mPos = 5 // mount point is 5th field
|
||||
fields := strings.SplitN(txt, " ", mPos+1)
|
||||
if len(fields) < mPos+1 {
|
||||
continue
|
||||
}
|
||||
return fields[mPos-1]
|
||||
}
|
||||
|
||||
return ""
|
||||
}
|
||||
|
||||
func (s *selinuxState) getSELinuxfs() string {
|
||||
s.Lock()
|
||||
selinuxfs := s.selinuxfs
|
||||
selinuxfsSet := s.selinuxfsSet
|
||||
s.Unlock()
|
||||
if selinuxfsSet {
|
||||
return selinuxfs
|
||||
}
|
||||
|
||||
return s.setSELinuxfs(findSELinuxfs())
|
||||
}
|
||||
|
||||
// getSelinuxMountPoint returns the path to the mountpoint of an selinuxfs
|
||||
// filesystem or an empty string if no mountpoint is found. Selinuxfs is
|
||||
// a proc-like pseudo-filesystem that exposes the selinux policy API to
|
||||
// processes. The existence of an selinuxfs mount is used to determine
|
||||
// whether selinux is currently enabled or not.
|
||||
func getSelinuxMountPoint() string {
|
||||
return state.getSELinuxfs()
|
||||
}
|
||||
|
||||
// GetEnabled returns whether selinux is currently enabled.
|
||||
func GetEnabled() bool {
|
||||
return state.getEnabled()
|
||||
}
|
||||
|
||||
func readConfig(target string) (value string) {
|
||||
var (
|
||||
val, key string
|
||||
bufin *bufio.Reader
|
||||
)
|
||||
|
||||
in, err := os.Open(selinuxConfig)
|
||||
if err != nil {
|
||||
return ""
|
||||
}
|
||||
defer in.Close()
|
||||
|
||||
bufin = bufio.NewReader(in)
|
||||
|
||||
for done := false; !done; {
|
||||
var line string
|
||||
if line, err = bufin.ReadString('\n'); err != nil {
|
||||
if err != io.EOF {
|
||||
return ""
|
||||
}
|
||||
done = true
|
||||
}
|
||||
line = strings.TrimSpace(line)
|
||||
if len(line) == 0 {
|
||||
// Skip blank lines
|
||||
continue
|
||||
}
|
||||
if line[0] == ';' || line[0] == '#' {
|
||||
// Skip comments
|
||||
continue
|
||||
}
|
||||
if groups := assignRegex.FindStringSubmatch(line); groups != nil {
|
||||
key, val = strings.TrimSpace(groups[1]), strings.TrimSpace(groups[2])
|
||||
if key == target {
|
||||
return strings.Trim(val, "\"")
|
||||
}
|
||||
}
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func getSELinuxPolicyRoot() string {
|
||||
return selinuxDir + readConfig(selinuxTypeTag)
|
||||
}
|
||||
|
||||
func readCon(name string) (string, error) {
|
||||
var val string
|
||||
|
||||
in, err := os.Open(name)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
defer in.Close()
|
||||
|
||||
_, err = fmt.Fscanf(in, "%s", &val)
|
||||
return strings.Trim(val, "\x00"), err
|
||||
}
|
||||
|
||||
// SetFileLabel sets the SELinux label for this path or returns an error.
|
||||
func SetFileLabel(path string, label string) error {
|
||||
return lsetxattr(path, xattrNameSelinux, []byte(label), 0)
|
||||
}
|
||||
|
||||
// FileLabel returns the SELinux label for this path or returns an error.
|
||||
func FileLabel(path string) (string, error) {
|
||||
label, err := lgetxattr(path, xattrNameSelinux)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
// Trim the NUL byte at the end of the byte buffer, if present.
|
||||
if len(label) > 0 && label[len(label)-1] == '\x00' {
|
||||
label = label[:len(label)-1]
|
||||
}
|
||||
return string(label), nil
|
||||
}
|
||||
|
||||
/*
|
||||
SetFSCreateLabel tells kernel the label to create all file system objects
|
||||
created by this task. Setting label="" to return to default.
|
||||
*/
|
||||
func SetFSCreateLabel(label string) error {
|
||||
return writeCon(fmt.Sprintf("/proc/self/task/%d/attr/fscreate", syscall.Gettid()), label)
|
||||
}
|
||||
|
||||
/*
|
||||
FSCreateLabel returns the default label the kernel which the kernel is using
|
||||
for file system objects created by this task. "" indicates default.
|
||||
*/
|
||||
func FSCreateLabel() (string, error) {
|
||||
return readCon(fmt.Sprintf("/proc/self/task/%d/attr/fscreate", syscall.Gettid()))
|
||||
}
|
||||
|
||||
// CurrentLabel returns the SELinux label of the current process thread, or an error.
|
||||
func CurrentLabel() (string, error) {
|
||||
return readCon(fmt.Sprintf("/proc/self/task/%d/attr/current", syscall.Gettid()))
|
||||
}
|
||||
|
||||
// PidLabel returns the SELinux label of the given pid, or an error.
|
||||
func PidLabel(pid int) (string, error) {
|
||||
return readCon(fmt.Sprintf("/proc/%d/attr/current", pid))
|
||||
}
|
||||
|
||||
/*
|
||||
ExecLabel returns the SELinux label that the kernel will use for any programs
|
||||
that are executed by the current process thread, or an error.
|
||||
*/
|
||||
func ExecLabel() (string, error) {
|
||||
return readCon(fmt.Sprintf("/proc/self/task/%d/attr/exec", syscall.Gettid()))
|
||||
}
|
||||
|
||||
func writeCon(name string, val string) error {
|
||||
out, err := os.OpenFile(name, os.O_WRONLY, 0)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer out.Close()
|
||||
|
||||
if val != "" {
|
||||
_, err = out.Write([]byte(val))
|
||||
} else {
|
||||
_, err = out.Write(nil)
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
/*
|
||||
CanonicalizeContext takes a context string and writes it to the kernel
|
||||
the function then returns the context that the kernel will use. This function
|
||||
can be used to see if two contexts are equivalent
|
||||
*/
|
||||
func CanonicalizeContext(val string) (string, error) {
|
||||
return readWriteCon(filepath.Join(getSelinuxMountPoint(), "context"), val)
|
||||
}
|
||||
|
||||
func readWriteCon(name string, val string) (string, error) {
|
||||
var retval string
|
||||
f, err := os.OpenFile(name, os.O_RDWR, 0)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
_, err = f.Write([]byte(val))
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
_, err = fmt.Fscanf(f, "%s", &retval)
|
||||
return strings.Trim(retval, "\x00"), err
|
||||
}
|
||||
|
||||
/*
|
||||
SetExecLabel sets the SELinux label that the kernel will use for any programs
|
||||
that are executed by the current process thread, or an error.
|
||||
*/
|
||||
func SetExecLabel(label string) error {
|
||||
return writeCon(fmt.Sprintf("/proc/self/task/%d/attr/exec", syscall.Gettid()), label)
|
||||
}
|
||||
|
||||
// Get returns the Context as a string
|
||||
func (c Context) Get() string {
|
||||
if c["level"] != "" {
|
||||
return fmt.Sprintf("%s:%s:%s:%s", c["user"], c["role"], c["type"], c["level"])
|
||||
}
|
||||
return fmt.Sprintf("%s:%s:%s", c["user"], c["role"], c["type"])
|
||||
}
|
||||
|
||||
// NewContext creates a new Context struct from the specified label
|
||||
func NewContext(label string) Context {
|
||||
c := make(Context)
|
||||
|
||||
if len(label) != 0 {
|
||||
con := strings.SplitN(label, ":", 4)
|
||||
c["user"] = con[0]
|
||||
c["role"] = con[1]
|
||||
c["type"] = con[2]
|
||||
if len(con) > 3 {
|
||||
c["level"] = con[3]
|
||||
}
|
||||
}
|
||||
return c
|
||||
}
|
||||
|
||||
// ReserveLabel reserves the MLS/MCS level component of the specified label
|
||||
func ReserveLabel(label string) {
|
||||
if len(label) != 0 {
|
||||
con := strings.SplitN(label, ":", 4)
|
||||
if len(con) > 3 {
|
||||
mcsAdd(con[3])
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func selinuxEnforcePath() string {
|
||||
return fmt.Sprintf("%s/enforce", getSelinuxMountPoint())
|
||||
}
|
||||
|
||||
// EnforceMode returns the current SELinux mode Enforcing, Permissive, Disabled
|
||||
func EnforceMode() int {
|
||||
var enforce int
|
||||
|
||||
enforceS, err := readCon(selinuxEnforcePath())
|
||||
if err != nil {
|
||||
return -1
|
||||
}
|
||||
|
||||
enforce, err = strconv.Atoi(string(enforceS))
|
||||
if err != nil {
|
||||
return -1
|
||||
}
|
||||
return enforce
|
||||
}
|
||||
|
||||
/*
|
||||
SetEnforceMode sets the current SELinux mode Enforcing, Permissive.
|
||||
Disabled is not valid, since this needs to be set at boot time.
|
||||
*/
|
||||
func SetEnforceMode(mode int) error {
|
||||
return writeCon(selinuxEnforcePath(), fmt.Sprintf("%d", mode))
|
||||
}
|
||||
|
||||
/*
|
||||
DefaultEnforceMode returns the systems default SELinux mode Enforcing,
|
||||
Permissive or Disabled. Note this is is just the default at boot time.
|
||||
EnforceMode tells you the systems current mode.
|
||||
*/
|
||||
func DefaultEnforceMode() int {
|
||||
switch readConfig(selinuxTag) {
|
||||
case "enforcing":
|
||||
return Enforcing
|
||||
case "permissive":
|
||||
return Permissive
|
||||
}
|
||||
return Disabled
|
||||
}
|
||||
|
||||
func mcsAdd(mcs string) error {
|
||||
if mcs == "" {
|
||||
return nil
|
||||
}
|
||||
state.Lock()
|
||||
defer state.Unlock()
|
||||
if state.mcsList[mcs] {
|
||||
return fmt.Errorf("MCS Label already exists")
|
||||
}
|
||||
state.mcsList[mcs] = true
|
||||
return nil
|
||||
}
|
||||
|
||||
func mcsDelete(mcs string) {
|
||||
if mcs == "" {
|
||||
return
|
||||
}
|
||||
state.Lock()
|
||||
defer state.Unlock()
|
||||
state.mcsList[mcs] = false
|
||||
}
|
||||
|
||||
func intToMcs(id int, catRange uint32) string {
|
||||
var (
|
||||
SETSIZE = int(catRange)
|
||||
TIER = SETSIZE
|
||||
ORD = id
|
||||
)
|
||||
|
||||
if id < 1 || id > 523776 {
|
||||
return ""
|
||||
}
|
||||
|
||||
for ORD > TIER {
|
||||
ORD = ORD - TIER
|
||||
TIER--
|
||||
}
|
||||
TIER = SETSIZE - TIER
|
||||
ORD = ORD + TIER
|
||||
return fmt.Sprintf("s0:c%d,c%d", TIER, ORD)
|
||||
}
|
||||
|
||||
func uniqMcs(catRange uint32) string {
|
||||
var (
|
||||
n uint32
|
||||
c1, c2 uint32
|
||||
mcs string
|
||||
)
|
||||
|
||||
for {
|
||||
binary.Read(rand.Reader, binary.LittleEndian, &n)
|
||||
c1 = n % catRange
|
||||
binary.Read(rand.Reader, binary.LittleEndian, &n)
|
||||
c2 = n % catRange
|
||||
if c1 == c2 {
|
||||
continue
|
||||
} else {
|
||||
if c1 > c2 {
|
||||
c1, c2 = c2, c1
|
||||
}
|
||||
}
|
||||
mcs = fmt.Sprintf("s0:c%d,c%d", c1, c2)
|
||||
if err := mcsAdd(mcs); err != nil {
|
||||
continue
|
||||
}
|
||||
break
|
||||
}
|
||||
return mcs
|
||||
}
|
||||
|
||||
/*
|
||||
ReleaseLabel will unreserve the MLS/MCS Level field of the specified label.
|
||||
Allowing it to be used by another process.
|
||||
*/
|
||||
func ReleaseLabel(label string) {
|
||||
if len(label) != 0 {
|
||||
con := strings.SplitN(label, ":", 4)
|
||||
if len(con) > 3 {
|
||||
mcsDelete(con[3])
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var roFileLabel string
|
||||
|
||||
// ROFileLabel returns the specified SELinux readonly file label
|
||||
func ROFileLabel() (fileLabel string) {
|
||||
return roFileLabel
|
||||
}
|
||||
|
||||
/*
|
||||
ContainerLabels returns an allocated processLabel and fileLabel to be used for
|
||||
container labeling by the calling process.
|
||||
*/
|
||||
func ContainerLabels() (processLabel string, fileLabel string) {
|
||||
var (
|
||||
val, key string
|
||||
bufin *bufio.Reader
|
||||
)
|
||||
|
||||
if !GetEnabled() {
|
||||
return "", ""
|
||||
}
|
||||
lxcPath := fmt.Sprintf("%s/contexts/lxc_contexts", getSELinuxPolicyRoot())
|
||||
in, err := os.Open(lxcPath)
|
||||
if err != nil {
|
||||
return "", ""
|
||||
}
|
||||
defer in.Close()
|
||||
|
||||
bufin = bufio.NewReader(in)
|
||||
|
||||
for done := false; !done; {
|
||||
var line string
|
||||
if line, err = bufin.ReadString('\n'); err != nil {
|
||||
if err == io.EOF {
|
||||
done = true
|
||||
} else {
|
||||
goto exit
|
||||
}
|
||||
}
|
||||
line = strings.TrimSpace(line)
|
||||
if len(line) == 0 {
|
||||
// Skip blank lines
|
||||
continue
|
||||
}
|
||||
if line[0] == ';' || line[0] == '#' {
|
||||
// Skip comments
|
||||
continue
|
||||
}
|
||||
if groups := assignRegex.FindStringSubmatch(line); groups != nil {
|
||||
key, val = strings.TrimSpace(groups[1]), strings.TrimSpace(groups[2])
|
||||
if key == "process" {
|
||||
processLabel = strings.Trim(val, "\"")
|
||||
}
|
||||
if key == "file" {
|
||||
fileLabel = strings.Trim(val, "\"")
|
||||
}
|
||||
if key == "ro_file" {
|
||||
roFileLabel = strings.Trim(val, "\"")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if processLabel == "" || fileLabel == "" {
|
||||
return "", ""
|
||||
}
|
||||
|
||||
if roFileLabel == "" {
|
||||
roFileLabel = fileLabel
|
||||
}
|
||||
exit:
|
||||
scon := NewContext(processLabel)
|
||||
if scon["level"] != "" {
|
||||
mcs := uniqMcs(1024)
|
||||
scon["level"] = mcs
|
||||
processLabel = scon.Get()
|
||||
scon = NewContext(fileLabel)
|
||||
scon["level"] = mcs
|
||||
fileLabel = scon.Get()
|
||||
}
|
||||
return processLabel, fileLabel
|
||||
}
|
||||
|
||||
// SecurityCheckContext validates that the SELinux label is understood by the kernel
|
||||
func SecurityCheckContext(val string) error {
|
||||
return writeCon(fmt.Sprintf("%s/context", getSelinuxMountPoint()), val)
|
||||
}
|
||||
|
||||
/*
|
||||
CopyLevel returns a label with the MLS/MCS level from src label replaces on
|
||||
the dest label.
|
||||
*/
|
||||
func CopyLevel(src, dest string) (string, error) {
|
||||
if src == "" {
|
||||
return "", nil
|
||||
}
|
||||
if err := SecurityCheckContext(src); err != nil {
|
||||
return "", err
|
||||
}
|
||||
if err := SecurityCheckContext(dest); err != nil {
|
||||
return "", err
|
||||
}
|
||||
scon := NewContext(src)
|
||||
tcon := NewContext(dest)
|
||||
mcsDelete(tcon["level"])
|
||||
mcsAdd(scon["level"])
|
||||
tcon["level"] = scon["level"]
|
||||
return tcon.Get(), nil
|
||||
}
|
||||
|
||||
// Prevent users from relabing system files
|
||||
func badPrefix(fpath string) error {
|
||||
var badprefixes = []string{"/usr"}
|
||||
|
||||
for _, prefix := range badprefixes {
|
||||
if fpath == prefix || strings.HasPrefix(fpath, fmt.Sprintf("%s/", prefix)) {
|
||||
return fmt.Errorf("relabeling content in %s is not allowed", prefix)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Chcon changes the fpath file object to the SELinux label label.
|
||||
// If the fpath is a directory and recurse is true Chcon will walk the
|
||||
// directory tree setting the label
|
||||
func Chcon(fpath string, label string, recurse bool) error {
|
||||
if label == "" {
|
||||
return nil
|
||||
}
|
||||
if err := badPrefix(fpath); err != nil {
|
||||
return err
|
||||
}
|
||||
callback := func(p string, info os.FileInfo, err error) error {
|
||||
return SetFileLabel(p, label)
|
||||
}
|
||||
|
||||
if recurse {
|
||||
return filepath.Walk(fpath, callback)
|
||||
}
|
||||
|
||||
return SetFileLabel(fpath, label)
|
||||
}
|
||||
|
||||
// DupSecOpt takes an SELinux process label and returns security options that
|
||||
// can will set the SELinux Type and Level for future container processes
|
||||
func DupSecOpt(src string) []string {
|
||||
if src == "" {
|
||||
return nil
|
||||
}
|
||||
con := NewContext(src)
|
||||
if con["user"] == "" ||
|
||||
con["role"] == "" ||
|
||||
con["type"] == "" {
|
||||
return nil
|
||||
}
|
||||
dup := []string{"user:" + con["user"],
|
||||
"role:" + con["role"],
|
||||
"type:" + con["type"],
|
||||
}
|
||||
|
||||
if con["level"] != "" {
|
||||
dup = append(dup, "level:"+con["level"])
|
||||
}
|
||||
|
||||
return dup
|
||||
}
|
||||
|
||||
// DisableSecOpt returns a security opt that can be used to disabling SELinux
|
||||
// labeling support for future container processes
|
||||
func DisableSecOpt() []string {
|
||||
return []string{"disable"}
|
||||
}
|
|
@ -1,78 +0,0 @@
|
|||
// +build linux
|
||||
|
||||
package selinux
|
||||
|
||||
import (
|
||||
"syscall"
|
||||
"unsafe"
|
||||
)
|
||||
|
||||
var _zero uintptr
|
||||
|
||||
// Returns a []byte slice if the xattr is set and nil otherwise
|
||||
// Requires path and its attribute as arguments
|
||||
func lgetxattr(path string, attr string) ([]byte, error) {
|
||||
var sz int
|
||||
pathBytes, err := syscall.BytePtrFromString(path)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
attrBytes, err := syscall.BytePtrFromString(attr)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Start with a 128 length byte array
|
||||
sz = 128
|
||||
dest := make([]byte, sz)
|
||||
destBytes := unsafe.Pointer(&dest[0])
|
||||
_sz, _, errno := syscall.Syscall6(syscall.SYS_LGETXATTR, uintptr(unsafe.Pointer(pathBytes)), uintptr(unsafe.Pointer(attrBytes)), uintptr(destBytes), uintptr(len(dest)), 0, 0)
|
||||
|
||||
switch {
|
||||
case errno == syscall.ENODATA:
|
||||
return nil, errno
|
||||
case errno == syscall.ENOTSUP:
|
||||
return nil, errno
|
||||
case errno == syscall.ERANGE:
|
||||
// 128 byte array might just not be good enough,
|
||||
// A dummy buffer is used ``uintptr(0)`` to get real size
|
||||
// of the xattrs on disk
|
||||
_sz, _, errno = syscall.Syscall6(syscall.SYS_LGETXATTR, uintptr(unsafe.Pointer(pathBytes)), uintptr(unsafe.Pointer(attrBytes)), uintptr(unsafe.Pointer(nil)), uintptr(0), 0, 0)
|
||||
sz = int(_sz)
|
||||
if sz < 0 {
|
||||
return nil, errno
|
||||
}
|
||||
dest = make([]byte, sz)
|
||||
destBytes := unsafe.Pointer(&dest[0])
|
||||
_sz, _, errno = syscall.Syscall6(syscall.SYS_LGETXATTR, uintptr(unsafe.Pointer(pathBytes)), uintptr(unsafe.Pointer(attrBytes)), uintptr(destBytes), uintptr(len(dest)), 0, 0)
|
||||
if errno != 0 {
|
||||
return nil, errno
|
||||
}
|
||||
case errno != 0:
|
||||
return nil, errno
|
||||
}
|
||||
sz = int(_sz)
|
||||
return dest[:sz], nil
|
||||
}
|
||||
|
||||
func lsetxattr(path string, attr string, data []byte, flags int) error {
|
||||
pathBytes, err := syscall.BytePtrFromString(path)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
attrBytes, err := syscall.BytePtrFromString(attr)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
var dataBytes unsafe.Pointer
|
||||
if len(data) > 0 {
|
||||
dataBytes = unsafe.Pointer(&data[0])
|
||||
} else {
|
||||
dataBytes = unsafe.Pointer(&_zero)
|
||||
}
|
||||
_, _, errno := syscall.Syscall6(syscall.SYS_LSETXATTR, uintptr(unsafe.Pointer(pathBytes)), uintptr(unsafe.Pointer(attrBytes)), uintptr(dataBytes), uintptr(len(data)), uintptr(flags), 0)
|
||||
if errno != 0 {
|
||||
return errno
|
||||
}
|
||||
return nil
|
||||
}
|
|
@ -0,0 +1,22 @@
|
|||
Copyright (c) 2015 Matthew Heon <mheon@redhat.com>
|
||||
Copyright (c) 2015 Paul Moore <pmoore@redhat.com>
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
- Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
- Redistributions in binary form must reproduce the above copyright notice,
|
||||
this list of conditions and the following disclaimer in the documentation
|
||||
and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
@ -0,0 +1,26 @@
|
|||
libseccomp-golang: Go Language Bindings for the libseccomp Project
|
||||
===============================================================================
|
||||
https://github.com/seccomp/libseccomp-golang
|
||||
https://github.com/seccomp/libseccomp
|
||||
|
||||
The libseccomp library provides an easy to use, platform independent, interface
|
||||
to the Linux Kernel's syscall filtering mechanism. The libseccomp API is
|
||||
designed to abstract away the underlying BPF based syscall filter language and
|
||||
present a more conventional function-call based filtering interface that should
|
||||
be familiar to, and easily adopted by, application developers.
|
||||
|
||||
The libseccomp-golang library provides a Go based interface to the libseccomp
|
||||
library.
|
||||
|
||||
* Online Resources
|
||||
|
||||
The library source repository currently lives on GitHub at the following URLs:
|
||||
|
||||
-> https://github.com/seccomp/libseccomp-golang
|
||||
-> https://github.com/seccomp/libseccomp
|
||||
|
||||
The project mailing list is currently hosted on Google Groups at the URL below,
|
||||
please note that a Google account is not required to subscribe to the mailing
|
||||
list.
|
||||
|
||||
-> https://groups.google.com/d/forum/libseccomp
|
|
@ -0,0 +1,857 @@
|
|||
// +build linux
|
||||
|
||||
// Public API specification for libseccomp Go bindings
|
||||
// Contains public API for the bindings
|
||||
|
||||
// Package seccomp provides bindings for libseccomp, a library wrapping the Linux
|
||||
// seccomp syscall. Seccomp enables an application to restrict system call use
|
||||
// for itself and its children.
|
||||
package seccomp
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"runtime"
|
||||
"strings"
|
||||
"sync"
|
||||
"syscall"
|
||||
"unsafe"
|
||||
)
|
||||
|
||||
// C wrapping code
|
||||
|
||||
// #cgo pkg-config: libseccomp
|
||||
// #include <stdlib.h>
|
||||
// #include <seccomp.h>
|
||||
import "C"
|
||||
|
||||
// Exported types
|
||||
|
||||
// ScmpArch represents a CPU architecture. Seccomp can restrict syscalls on a
|
||||
// per-architecture basis.
|
||||
type ScmpArch uint
|
||||
|
||||
// ScmpAction represents an action to be taken on a filter rule match in
|
||||
// libseccomp
|
||||
type ScmpAction uint
|
||||
|
||||
// ScmpCompareOp represents a comparison operator which can be used in a filter
|
||||
// rule
|
||||
type ScmpCompareOp uint
|
||||
|
||||
// ScmpCondition represents a rule in a libseccomp filter context
|
||||
type ScmpCondition struct {
|
||||
Argument uint `json:"argument,omitempty"`
|
||||
Op ScmpCompareOp `json:"operator,omitempty"`
|
||||
Operand1 uint64 `json:"operand_one,omitempty"`
|
||||
Operand2 uint64 `json:"operand_two,omitempty"`
|
||||
}
|
||||
|
||||
// ScmpSyscall represents a Linux System Call
|
||||
type ScmpSyscall int32
|
||||
|
||||
// Exported Constants
|
||||
|
||||
const (
|
||||
// Valid architectures recognized by libseccomp
|
||||
// ARM64 and all MIPS architectures are unsupported by versions of the
|
||||
// library before v2.2 and will return errors if used
|
||||
|
||||
// ArchInvalid is a placeholder to ensure uninitialized ScmpArch
|
||||
// variables are invalid
|
||||
ArchInvalid ScmpArch = iota
|
||||
// ArchNative is the native architecture of the kernel
|
||||
ArchNative ScmpArch = iota
|
||||
// ArchX86 represents 32-bit x86 syscalls
|
||||
ArchX86 ScmpArch = iota
|
||||
// ArchAMD64 represents 64-bit x86-64 syscalls
|
||||
ArchAMD64 ScmpArch = iota
|
||||
// ArchX32 represents 64-bit x86-64 syscalls (32-bit pointers)
|
||||
ArchX32 ScmpArch = iota
|
||||
// ArchARM represents 32-bit ARM syscalls
|
||||
ArchARM ScmpArch = iota
|
||||
// ArchARM64 represents 64-bit ARM syscalls
|
||||
ArchARM64 ScmpArch = iota
|
||||
// ArchMIPS represents 32-bit MIPS syscalls
|
||||
ArchMIPS ScmpArch = iota
|
||||
// ArchMIPS64 represents 64-bit MIPS syscalls
|
||||
ArchMIPS64 ScmpArch = iota
|
||||
// ArchMIPS64N32 represents 64-bit MIPS syscalls (32-bit pointers)
|
||||
ArchMIPS64N32 ScmpArch = iota
|
||||
// ArchMIPSEL represents 32-bit MIPS syscalls (little endian)
|
||||
ArchMIPSEL ScmpArch = iota
|
||||
// ArchMIPSEL64 represents 64-bit MIPS syscalls (little endian)
|
||||
ArchMIPSEL64 ScmpArch = iota
|
||||
// ArchMIPSEL64N32 represents 64-bit MIPS syscalls (little endian,
|
||||
// 32-bit pointers)
|
||||
ArchMIPSEL64N32 ScmpArch = iota
|
||||
// ArchPPC represents 32-bit POWERPC syscalls
|
||||
ArchPPC ScmpArch = iota
|
||||
// ArchPPC64 represents 64-bit POWER syscalls (big endian)
|
||||
ArchPPC64 ScmpArch = iota
|
||||
// ArchPPC64LE represents 64-bit POWER syscalls (little endian)
|
||||
ArchPPC64LE ScmpArch = iota
|
||||
// ArchS390 represents 31-bit System z/390 syscalls
|
||||
ArchS390 ScmpArch = iota
|
||||
// ArchS390X represents 64-bit System z/390 syscalls
|
||||
ArchS390X ScmpArch = iota
|
||||
)
|
||||
|
||||
const (
|
||||
// Supported actions on filter match
|
||||
|
||||
// ActInvalid is a placeholder to ensure uninitialized ScmpAction
|
||||
// variables are invalid
|
||||
ActInvalid ScmpAction = iota
|
||||
// ActKill kills the process
|
||||
ActKill ScmpAction = iota
|
||||
// ActTrap throws SIGSYS
|
||||
ActTrap ScmpAction = iota
|
||||
// ActErrno causes the syscall to return a negative error code. This
|
||||
// code can be set with the SetReturnCode method
|
||||
ActErrno ScmpAction = iota
|
||||
// ActTrace causes the syscall to notify tracing processes with the
|
||||
// given error code. This code can be set with the SetReturnCode method
|
||||
ActTrace ScmpAction = iota
|
||||
// ActAllow permits the syscall to continue execution
|
||||
ActAllow ScmpAction = iota
|
||||
)
|
||||
|
||||
const (
|
||||
// These are comparison operators used in conditional seccomp rules
|
||||
// They are used to compare the value of a single argument of a syscall
|
||||
// against a user-defined constant
|
||||
|
||||
// CompareInvalid is a placeholder to ensure uninitialized ScmpCompareOp
|
||||
// variables are invalid
|
||||
CompareInvalid ScmpCompareOp = iota
|
||||
// CompareNotEqual returns true if the argument is not equal to the
|
||||
// given value
|
||||
CompareNotEqual ScmpCompareOp = iota
|
||||
// CompareLess returns true if the argument is less than the given value
|
||||
CompareLess ScmpCompareOp = iota
|
||||
// CompareLessOrEqual returns true if the argument is less than or equal
|
||||
// to the given value
|
||||
CompareLessOrEqual ScmpCompareOp = iota
|
||||
// CompareEqual returns true if the argument is equal to the given value
|
||||
CompareEqual ScmpCompareOp = iota
|
||||
// CompareGreaterEqual returns true if the argument is greater than or
|
||||
// equal to the given value
|
||||
CompareGreaterEqual ScmpCompareOp = iota
|
||||
// CompareGreater returns true if the argument is greater than the given
|
||||
// value
|
||||
CompareGreater ScmpCompareOp = iota
|
||||
// CompareMaskedEqual returns true if the argument is equal to the given
|
||||
// value, when masked (bitwise &) against the second given value
|
||||
CompareMaskedEqual ScmpCompareOp = iota
|
||||
)
|
||||
|
||||
// Helpers for types
|
||||
|
||||
// GetArchFromString returns an ScmpArch constant from a string representing an
|
||||
// architecture
|
||||
func GetArchFromString(arch string) (ScmpArch, error) {
|
||||
switch strings.ToLower(arch) {
|
||||
case "x86":
|
||||
return ArchX86, nil
|
||||
case "amd64", "x86-64", "x86_64", "x64":
|
||||
return ArchAMD64, nil
|
||||
case "x32":
|
||||
return ArchX32, nil
|
||||
case "arm":
|
||||
return ArchARM, nil
|
||||
case "arm64", "aarch64":
|
||||
return ArchARM64, nil
|
||||
case "mips":
|
||||
return ArchMIPS, nil
|
||||
case "mips64":
|
||||
return ArchMIPS64, nil
|
||||
case "mips64n32":
|
||||
return ArchMIPS64N32, nil
|
||||
case "mipsel":
|
||||
return ArchMIPSEL, nil
|
||||
case "mipsel64":
|
||||
return ArchMIPSEL64, nil
|
||||
case "mipsel64n32":
|
||||
return ArchMIPSEL64N32, nil
|
||||
case "ppc":
|
||||
return ArchPPC, nil
|
||||
case "ppc64":
|
||||
return ArchPPC64, nil
|
||||
case "ppc64le":
|
||||
return ArchPPC64LE, nil
|
||||
case "s390":
|
||||
return ArchS390, nil
|
||||
case "s390x":
|
||||
return ArchS390X, nil
|
||||
default:
|
||||
return ArchInvalid, fmt.Errorf("cannot convert unrecognized string %s", arch)
|
||||
}
|
||||
}
|
||||
|
||||
// String returns a string representation of an architecture constant
|
||||
func (a ScmpArch) String() string {
|
||||
switch a {
|
||||
case ArchX86:
|
||||
return "x86"
|
||||
case ArchAMD64:
|
||||
return "amd64"
|
||||
case ArchX32:
|
||||
return "x32"
|
||||
case ArchARM:
|
||||
return "arm"
|
||||
case ArchARM64:
|
||||
return "arm64"
|
||||
case ArchMIPS:
|
||||
return "mips"
|
||||
case ArchMIPS64:
|
||||
return "mips64"
|
||||
case ArchMIPS64N32:
|
||||
return "mips64n32"
|
||||
case ArchMIPSEL:
|
||||
return "mipsel"
|
||||
case ArchMIPSEL64:
|
||||
return "mipsel64"
|
||||
case ArchMIPSEL64N32:
|
||||
return "mipsel64n32"
|
||||
case ArchPPC:
|
||||
return "ppc"
|
||||
case ArchPPC64:
|
||||
return "ppc64"
|
||||
case ArchPPC64LE:
|
||||
return "ppc64le"
|
||||
case ArchS390:
|
||||
return "s390"
|
||||
case ArchS390X:
|
||||
return "s390x"
|
||||
case ArchNative:
|
||||
return "native"
|
||||
case ArchInvalid:
|
||||
return "Invalid architecture"
|
||||
default:
|
||||
return "Unknown architecture"
|
||||
}
|
||||
}
|
||||
|
||||
// String returns a string representation of a comparison operator constant
|
||||
func (a ScmpCompareOp) String() string {
|
||||
switch a {
|
||||
case CompareNotEqual:
|
||||
return "Not equal"
|
||||
case CompareLess:
|
||||
return "Less than"
|
||||
case CompareLessOrEqual:
|
||||
return "Less than or equal to"
|
||||
case CompareEqual:
|
||||
return "Equal"
|
||||
case CompareGreaterEqual:
|
||||
return "Greater than or equal to"
|
||||
case CompareGreater:
|
||||
return "Greater than"
|
||||
case CompareMaskedEqual:
|
||||
return "Masked equality"
|
||||
case CompareInvalid:
|
||||
return "Invalid comparison operator"
|
||||
default:
|
||||
return "Unrecognized comparison operator"
|
||||
}
|
||||
}
|
||||
|
||||
// String returns a string representation of a seccomp match action
|
||||
func (a ScmpAction) String() string {
|
||||
switch a & 0xFFFF {
|
||||
case ActKill:
|
||||
return "Action: Kill Process"
|
||||
case ActTrap:
|
||||
return "Action: Send SIGSYS"
|
||||
case ActErrno:
|
||||
return fmt.Sprintf("Action: Return error code %d", (a >> 16))
|
||||
case ActTrace:
|
||||
return fmt.Sprintf("Action: Notify tracing processes with code %d",
|
||||
(a >> 16))
|
||||
case ActAllow:
|
||||
return "Action: Allow system call"
|
||||
default:
|
||||
return "Unrecognized Action"
|
||||
}
|
||||
}
|
||||
|
||||
// SetReturnCode adds a return code to a supporting ScmpAction, clearing any
|
||||
// existing code Only valid on ActErrno and ActTrace. Takes no action otherwise.
|
||||
// Accepts 16-bit return code as argument.
|
||||
// Returns a valid ScmpAction of the original type with the new error code set.
|
||||
func (a ScmpAction) SetReturnCode(code int16) ScmpAction {
|
||||
aTmp := a & 0x0000FFFF
|
||||
if aTmp == ActErrno || aTmp == ActTrace {
|
||||
return (aTmp | (ScmpAction(code)&0xFFFF)<<16)
|
||||
}
|
||||
return a
|
||||
}
|
||||
|
||||
// GetReturnCode returns the return code of an ScmpAction
|
||||
func (a ScmpAction) GetReturnCode() int16 {
|
||||
return int16(a >> 16)
|
||||
}
|
||||
|
||||
// General utility functions
|
||||
|
||||
// GetLibraryVersion returns the version of the library the bindings are built
|
||||
// against.
|
||||
// The version is formatted as follows: Major.Minor.Micro
|
||||
func GetLibraryVersion() (major, minor, micro int) {
|
||||
return verMajor, verMinor, verMicro
|
||||
}
|
||||
|
||||
// Syscall functions
|
||||
|
||||
// GetName retrieves the name of a syscall from its number.
|
||||
// Acts on any syscall number.
|
||||
// Returns either a string containing the name of the syscall, or an error.
|
||||
func (s ScmpSyscall) GetName() (string, error) {
|
||||
return s.GetNameByArch(ArchNative)
|
||||
}
|
||||
|
||||
// GetNameByArch retrieves the name of a syscall from its number for a given
|
||||
// architecture.
|
||||
// Acts on any syscall number.
|
||||
// Accepts a valid architecture constant.
|
||||
// Returns either a string containing the name of the syscall, or an error.
|
||||
// if the syscall is unrecognized or an issue occurred.
|
||||
func (s ScmpSyscall) GetNameByArch(arch ScmpArch) (string, error) {
|
||||
if err := sanitizeArch(arch); err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
cString := C.seccomp_syscall_resolve_num_arch(arch.toNative(), C.int(s))
|
||||
if cString == nil {
|
||||
return "", fmt.Errorf("could not resolve syscall name")
|
||||
}
|
||||
defer C.free(unsafe.Pointer(cString))
|
||||
|
||||
finalStr := C.GoString(cString)
|
||||
return finalStr, nil
|
||||
}
|
||||
|
||||
// GetSyscallFromName returns the number of a syscall by name on the kernel's
|
||||
// native architecture.
|
||||
// Accepts a string containing the name of a syscall.
|
||||
// Returns the number of the syscall, or an error if no syscall with that name
|
||||
// was found.
|
||||
func GetSyscallFromName(name string) (ScmpSyscall, error) {
|
||||
cString := C.CString(name)
|
||||
defer C.free(unsafe.Pointer(cString))
|
||||
|
||||
result := C.seccomp_syscall_resolve_name(cString)
|
||||
if result == scmpError {
|
||||
return 0, fmt.Errorf("could not resolve name to syscall")
|
||||
}
|
||||
|
||||
return ScmpSyscall(result), nil
|
||||
}
|
||||
|
||||
// GetSyscallFromNameByArch returns the number of a syscall by name for a given
|
||||
// architecture's ABI.
|
||||
// Accepts the name of a syscall and an architecture constant.
|
||||
// Returns the number of the syscall, or an error if an invalid architecture is
|
||||
// passed or a syscall with that name was not found.
|
||||
func GetSyscallFromNameByArch(name string, arch ScmpArch) (ScmpSyscall, error) {
|
||||
if err := sanitizeArch(arch); err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
cString := C.CString(name)
|
||||
defer C.free(unsafe.Pointer(cString))
|
||||
|
||||
result := C.seccomp_syscall_resolve_name_arch(arch.toNative(), cString)
|
||||
if result == scmpError {
|
||||
return 0, fmt.Errorf("could not resolve name to syscall")
|
||||
}
|
||||
|
||||
return ScmpSyscall(result), nil
|
||||
}
|
||||
|
||||
// MakeCondition creates and returns a new condition to attach to a filter rule.
|
||||
// Associated rules will only match if this condition is true.
|
||||
// Accepts the number the argument we are checking, and a comparison operator
|
||||
// and value to compare to.
|
||||
// The rule will match if argument $arg (zero-indexed) of the syscall is
|
||||
// $COMPARE_OP the provided comparison value.
|
||||
// Some comparison operators accept two values. Masked equals, for example,
|
||||
// will mask $arg of the syscall with the second value provided (via bitwise
|
||||
// AND) and then compare against the first value provided.
|
||||
// For example, in the less than or equal case, if the syscall argument was
|
||||
// 0 and the value provided was 1, the condition would match, as 0 is less
|
||||
// than or equal to 1.
|
||||
// Return either an error on bad argument or a valid ScmpCondition struct.
|
||||
func MakeCondition(arg uint, comparison ScmpCompareOp, values ...uint64) (ScmpCondition, error) {
|
||||
var condStruct ScmpCondition
|
||||
|
||||
if comparison == CompareInvalid {
|
||||
return condStruct, fmt.Errorf("invalid comparison operator")
|
||||
} else if arg > 5 {
|
||||
return condStruct, fmt.Errorf("syscalls only have up to 6 arguments")
|
||||
} else if len(values) > 2 {
|
||||
return condStruct, fmt.Errorf("conditions can have at most 2 arguments")
|
||||
} else if len(values) == 0 {
|
||||
return condStruct, fmt.Errorf("must provide at least one value to compare against")
|
||||
}
|
||||
|
||||
condStruct.Argument = arg
|
||||
condStruct.Op = comparison
|
||||
condStruct.Operand1 = values[0]
|
||||
if len(values) == 2 {
|
||||
condStruct.Operand2 = values[1]
|
||||
} else {
|
||||
condStruct.Operand2 = 0 // Unused
|
||||
}
|
||||
|
||||
return condStruct, nil
|
||||
}
|
||||
|
||||
// Utility Functions
|
||||
|
||||
// GetNativeArch returns architecture token representing the native kernel
|
||||
// architecture
|
||||
func GetNativeArch() (ScmpArch, error) {
|
||||
arch := C.seccomp_arch_native()
|
||||
|
||||
return archFromNative(arch)
|
||||
}
|
||||
|
||||
// Public Filter API
|
||||
|
||||
// ScmpFilter represents a filter context in libseccomp.
|
||||
// A filter context is initially empty. Rules can be added to it, and it can
|
||||
// then be loaded into the kernel.
|
||||
type ScmpFilter struct {
|
||||
filterCtx C.scmp_filter_ctx
|
||||
valid bool
|
||||
lock sync.Mutex
|
||||
}
|
||||
|
||||
// NewFilter creates and returns a new filter context.
|
||||
// Accepts a default action to be taken for syscalls which match no rules in
|
||||
// the filter.
|
||||
// Returns a reference to a valid filter context, or nil and an error if the
|
||||
// filter context could not be created or an invalid default action was given.
|
||||
func NewFilter(defaultAction ScmpAction) (*ScmpFilter, error) {
|
||||
if err := sanitizeAction(defaultAction); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
fPtr := C.seccomp_init(defaultAction.toNative())
|
||||
if fPtr == nil {
|
||||
return nil, fmt.Errorf("could not create filter")
|
||||
}
|
||||
|
||||
filter := new(ScmpFilter)
|
||||
filter.filterCtx = fPtr
|
||||
filter.valid = true
|
||||
runtime.SetFinalizer(filter, filterFinalizer)
|
||||
|
||||
return filter, nil
|
||||
}
|
||||
|
||||
// IsValid determines whether a filter context is valid to use.
|
||||
// Some operations (Release and Merge) render filter contexts invalid and
|
||||
// consequently prevent further use.
|
||||
func (f *ScmpFilter) IsValid() bool {
|
||||
f.lock.Lock()
|
||||
defer f.lock.Unlock()
|
||||
|
||||
return f.valid
|
||||
}
|
||||
|
||||
// Reset resets a filter context, removing all its existing state.
|
||||
// Accepts a new default action to be taken for syscalls which do not match.
|
||||
// Returns an error if the filter or action provided are invalid.
|
||||
func (f *ScmpFilter) Reset(defaultAction ScmpAction) error {
|
||||
f.lock.Lock()
|
||||
defer f.lock.Unlock()
|
||||
|
||||
if err := sanitizeAction(defaultAction); err != nil {
|
||||
return err
|
||||
} else if !f.valid {
|
||||
return errBadFilter
|
||||
}
|
||||
|
||||
retCode := C.seccomp_reset(f.filterCtx, defaultAction.toNative())
|
||||
if retCode != 0 {
|
||||
return syscall.Errno(-1 * retCode)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Release releases a filter context, freeing its memory. Should be called after
|
||||
// loading into the kernel, when the filter is no longer needed.
|
||||
// After calling this function, the given filter is no longer valid and cannot
|
||||
// be used.
|
||||
// Release() will be invoked automatically when a filter context is garbage
|
||||
// collected, but can also be called manually to free memory.
|
||||
func (f *ScmpFilter) Release() {
|
||||
f.lock.Lock()
|
||||
defer f.lock.Unlock()
|
||||
|
||||
if !f.valid {
|
||||
return
|
||||
}
|
||||
|
||||
f.valid = false
|
||||
C.seccomp_release(f.filterCtx)
|
||||
}
|
||||
|
||||
// Merge merges two filter contexts.
|
||||
// The source filter src will be released as part of the process, and will no
|
||||
// longer be usable or valid after this call.
|
||||
// To be merged, filters must NOT share any architectures, and all their
|
||||
// attributes (Default Action, Bad Arch Action, No New Privs and TSync bools)
|
||||
// must match.
|
||||
// The filter src will be merged into the filter this is called on.
|
||||
// The architectures of the src filter not present in the destination, and all
|
||||
// associated rules, will be added to the destination.
|
||||
// Returns an error if merging the filters failed.
|
||||
func (f *ScmpFilter) Merge(src *ScmpFilter) error {
|
||||
f.lock.Lock()
|
||||
defer f.lock.Unlock()
|
||||
|
||||
src.lock.Lock()
|
||||
defer src.lock.Unlock()
|
||||
|
||||
if !src.valid || !f.valid {
|
||||
return fmt.Errorf("one or more of the filter contexts is invalid or uninitialized")
|
||||
}
|
||||
|
||||
// Merge the filters
|
||||
retCode := C.seccomp_merge(f.filterCtx, src.filterCtx)
|
||||
if syscall.Errno(-1*retCode) == syscall.EINVAL {
|
||||
return fmt.Errorf("filters could not be merged due to a mismatch in attributes or invalid filter")
|
||||
} else if retCode != 0 {
|
||||
return syscall.Errno(-1 * retCode)
|
||||
}
|
||||
|
||||
src.valid = false
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// IsArchPresent checks if an architecture is present in a filter.
|
||||
// If a filter contains an architecture, it uses its default action for
|
||||
// syscalls which do not match rules in it, and its rules can match syscalls
|
||||
// for that ABI.
|
||||
// If a filter does not contain an architecture, all syscalls made to that
|
||||
// kernel ABI will fail with the filter's default Bad Architecture Action
|
||||
// (by default, killing the process).
|
||||
// Accepts an architecture constant.
|
||||
// Returns true if the architecture is present in the filter, false otherwise,
|
||||
// and an error on an invalid filter context, architecture constant, or an
|
||||
// issue with the call to libseccomp.
|
||||
func (f *ScmpFilter) IsArchPresent(arch ScmpArch) (bool, error) {
|
||||
f.lock.Lock()
|
||||
defer f.lock.Unlock()
|
||||
|
||||
if err := sanitizeArch(arch); err != nil {
|
||||
return false, err
|
||||
} else if !f.valid {
|
||||
return false, errBadFilter
|
||||
}
|
||||
|
||||
retCode := C.seccomp_arch_exist(f.filterCtx, arch.toNative())
|
||||
if syscall.Errno(-1*retCode) == syscall.EEXIST {
|
||||
// -EEXIST is "arch not present"
|
||||
return false, nil
|
||||
} else if retCode != 0 {
|
||||
return false, syscall.Errno(-1 * retCode)
|
||||
}
|
||||
|
||||
return true, nil
|
||||
}
|
||||
|
||||
// AddArch adds an architecture to the filter.
|
||||
// Accepts an architecture constant.
|
||||
// Returns an error on invalid filter context or architecture token, or an
|
||||
// issue with the call to libseccomp.
|
||||
func (f *ScmpFilter) AddArch(arch ScmpArch) error {
|
||||
f.lock.Lock()
|
||||
defer f.lock.Unlock()
|
||||
|
||||
if err := sanitizeArch(arch); err != nil {
|
||||
return err
|
||||
} else if !f.valid {
|
||||
return errBadFilter
|
||||
}
|
||||
|
||||
// Libseccomp returns -EEXIST if the specified architecture is already
|
||||
// present. Succeed silently in this case, as it's not fatal, and the
|
||||
// architecture is present already.
|
||||
retCode := C.seccomp_arch_add(f.filterCtx, arch.toNative())
|
||||
if retCode != 0 && syscall.Errno(-1*retCode) != syscall.EEXIST {
|
||||
return syscall.Errno(-1 * retCode)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// RemoveArch removes an architecture from the filter.
|
||||
// Accepts an architecture constant.
|
||||
// Returns an error on invalid filter context or architecture token, or an
|
||||
// issue with the call to libseccomp.
|
||||
func (f *ScmpFilter) RemoveArch(arch ScmpArch) error {
|
||||
f.lock.Lock()
|
||||
defer f.lock.Unlock()
|
||||
|
||||
if err := sanitizeArch(arch); err != nil {
|
||||
return err
|
||||
} else if !f.valid {
|
||||
return errBadFilter
|
||||
}
|
||||
|
||||
// Similar to AddArch, -EEXIST is returned if the arch is not present
|
||||
// Succeed silently in that case, this is not fatal and the architecture
|
||||
// is not present in the filter after RemoveArch
|
||||
retCode := C.seccomp_arch_remove(f.filterCtx, arch.toNative())
|
||||
if retCode != 0 && syscall.Errno(-1*retCode) != syscall.EEXIST {
|
||||
return syscall.Errno(-1 * retCode)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Load loads a filter context into the kernel.
|
||||
// Returns an error if the filter context is invalid or the syscall failed.
|
||||
func (f *ScmpFilter) Load() error {
|
||||
f.lock.Lock()
|
||||
defer f.lock.Unlock()
|
||||
|
||||
if !f.valid {
|
||||
return errBadFilter
|
||||
}
|
||||
|
||||
if retCode := C.seccomp_load(f.filterCtx); retCode != 0 {
|
||||
return syscall.Errno(-1 * retCode)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// GetDefaultAction returns the default action taken on a syscall which does not
|
||||
// match a rule in the filter, or an error if an issue was encountered
|
||||
// retrieving the value.
|
||||
func (f *ScmpFilter) GetDefaultAction() (ScmpAction, error) {
|
||||
action, err := f.getFilterAttr(filterAttrActDefault)
|
||||
if err != nil {
|
||||
return 0x0, err
|
||||
}
|
||||
|
||||
return actionFromNative(action)
|
||||
}
|
||||
|
||||
// GetBadArchAction returns the default action taken on a syscall for an
|
||||
// architecture not in the filter, or an error if an issue was encountered
|
||||
// retrieving the value.
|
||||
func (f *ScmpFilter) GetBadArchAction() (ScmpAction, error) {
|
||||
action, err := f.getFilterAttr(filterAttrActBadArch)
|
||||
if err != nil {
|
||||
return 0x0, err
|
||||
}
|
||||
|
||||
return actionFromNative(action)
|
||||
}
|
||||
|
||||
// GetNoNewPrivsBit returns the current state the No New Privileges bit will be set
|
||||
// to on the filter being loaded, or an error if an issue was encountered
|
||||
// retrieving the value.
|
||||
// The No New Privileges bit tells the kernel that new processes run with exec()
|
||||
// cannot gain more privileges than the process that ran exec().
|
||||
// For example, a process with No New Privileges set would be unable to exec
|
||||
// setuid/setgid executables.
|
||||
func (f *ScmpFilter) GetNoNewPrivsBit() (bool, error) {
|
||||
noNewPrivs, err := f.getFilterAttr(filterAttrNNP)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
|
||||
if noNewPrivs == 0 {
|
||||
return false, nil
|
||||
}
|
||||
|
||||
return true, nil
|
||||
}
|
||||
|
||||
// GetTsyncBit returns whether Thread Synchronization will be enabled on the
|
||||
// filter being loaded, or an error if an issue was encountered retrieving the
|
||||
// value.
|
||||
// Thread Sync ensures that all members of the thread group of the calling
|
||||
// process will share the same Seccomp filter set.
|
||||
// Tsync is a fairly recent addition to the Linux kernel and older kernels
|
||||
// lack support. If the running kernel does not support Tsync and it is
|
||||
// requested in a filter, Libseccomp will not enable TSync support and will
|
||||
// proceed as normal.
|
||||
// This function is unavailable before v2.2 of libseccomp and will return an
|
||||
// error.
|
||||
func (f *ScmpFilter) GetTsyncBit() (bool, error) {
|
||||
tSync, err := f.getFilterAttr(filterAttrTsync)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
|
||||
if tSync == 0 {
|
||||
return false, nil
|
||||
}
|
||||
|
||||
return true, nil
|
||||
}
|
||||
|
||||
// SetBadArchAction sets the default action taken on a syscall for an
|
||||
// architecture not in the filter, or an error if an issue was encountered
|
||||
// setting the value.
|
||||
func (f *ScmpFilter) SetBadArchAction(action ScmpAction) error {
|
||||
if err := sanitizeAction(action); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return f.setFilterAttr(filterAttrActBadArch, action.toNative())
|
||||
}
|
||||
|
||||
// SetNoNewPrivsBit sets the state of the No New Privileges bit, which will be
|
||||
// applied on filter load, or an error if an issue was encountered setting the
|
||||
// value.
|
||||
// Filters with No New Privileges set to 0 can only be loaded if the process
|
||||
// has the CAP_SYS_ADMIN capability.
|
||||
func (f *ScmpFilter) SetNoNewPrivsBit(state bool) error {
|
||||
var toSet C.uint32_t = 0x0
|
||||
|
||||
if state {
|
||||
toSet = 0x1
|
||||
}
|
||||
|
||||
return f.setFilterAttr(filterAttrNNP, toSet)
|
||||
}
|
||||
|
||||
// SetTsync sets whether Thread Synchronization will be enabled on the filter
|
||||
// being loaded. Returns an error if setting Tsync failed, or the filter is
|
||||
// invalid.
|
||||
// Thread Sync ensures that all members of the thread group of the calling
|
||||
// process will share the same Seccomp filter set.
|
||||
// Tsync is a fairly recent addition to the Linux kernel and older kernels
|
||||
// lack support. If the running kernel does not support Tsync and it is
|
||||
// requested in a filter, Libseccomp will not enable TSync support and will
|
||||
// proceed as normal.
|
||||
// This function is unavailable before v2.2 of libseccomp and will return an
|
||||
// error.
|
||||
func (f *ScmpFilter) SetTsync(enable bool) error {
|
||||
var toSet C.uint32_t = 0x0
|
||||
|
||||
if enable {
|
||||
toSet = 0x1
|
||||
}
|
||||
|
||||
return f.setFilterAttr(filterAttrTsync, toSet)
|
||||
}
|
||||
|
||||
// SetSyscallPriority sets a syscall's priority.
|
||||
// This provides a hint to the filter generator in libseccomp about the
|
||||
// importance of this syscall. High-priority syscalls are placed
|
||||
// first in the filter code, and incur less overhead (at the expense of
|
||||
// lower-priority syscalls).
|
||||
func (f *ScmpFilter) SetSyscallPriority(call ScmpSyscall, priority uint8) error {
|
||||
f.lock.Lock()
|
||||
defer f.lock.Unlock()
|
||||
|
||||
if !f.valid {
|
||||
return errBadFilter
|
||||
}
|
||||
|
||||
if retCode := C.seccomp_syscall_priority(f.filterCtx, C.int(call),
|
||||
C.uint8_t(priority)); retCode != 0 {
|
||||
return syscall.Errno(-1 * retCode)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// AddRule adds a single rule for an unconditional action on a syscall.
|
||||
// Accepts the number of the syscall and the action to be taken on the call
|
||||
// being made.
|
||||
// Returns an error if an issue was encountered adding the rule.
|
||||
func (f *ScmpFilter) AddRule(call ScmpSyscall, action ScmpAction) error {
|
||||
return f.addRuleGeneric(call, action, false, nil)
|
||||
}
|
||||
|
||||
// AddRuleExact adds a single rule for an unconditional action on a syscall.
|
||||
// Accepts the number of the syscall and the action to be taken on the call
|
||||
// being made.
|
||||
// No modifications will be made to the rule, and it will fail to add if it
|
||||
// cannot be applied to the current architecture without modification.
|
||||
// The rule will function exactly as described, but it may not function identically
|
||||
// (or be able to be applied to) all architectures.
|
||||
// Returns an error if an issue was encountered adding the rule.
|
||||
func (f *ScmpFilter) AddRuleExact(call ScmpSyscall, action ScmpAction) error {
|
||||
return f.addRuleGeneric(call, action, true, nil)
|
||||
}
|
||||
|
||||
// AddRuleConditional adds a single rule for a conditional action on a syscall.
|
||||
// Returns an error if an issue was encountered adding the rule.
|
||||
// All conditions must match for the rule to match.
|
||||
// There is a bug in library versions below v2.2.1 which can, in some cases,
|
||||
// cause conditions to be lost when more than one are used. Consequently,
|
||||
// AddRuleConditional is disabled on library versions lower than v2.2.1
|
||||
func (f *ScmpFilter) AddRuleConditional(call ScmpSyscall, action ScmpAction, conds []ScmpCondition) error {
|
||||
return f.addRuleGeneric(call, action, false, conds)
|
||||
}
|
||||
|
||||
// AddRuleConditionalExact adds a single rule for a conditional action on a
|
||||
// syscall.
|
||||
// No modifications will be made to the rule, and it will fail to add if it
|
||||
// cannot be applied to the current architecture without modification.
|
||||
// The rule will function exactly as described, but it may not function identically
|
||||
// (or be able to be applied to) all architectures.
|
||||
// Returns an error if an issue was encountered adding the rule.
|
||||
// There is a bug in library versions below v2.2.1 which can, in some cases,
|
||||
// cause conditions to be lost when more than one are used. Consequently,
|
||||
// AddRuleConditionalExact is disabled on library versions lower than v2.2.1
|
||||
func (f *ScmpFilter) AddRuleConditionalExact(call ScmpSyscall, action ScmpAction, conds []ScmpCondition) error {
|
||||
return f.addRuleGeneric(call, action, true, conds)
|
||||
}
|
||||
|
||||
// ExportPFC output PFC-formatted, human-readable dump of a filter context's
|
||||
// rules to a file.
|
||||
// Accepts file to write to (must be open for writing).
|
||||
// Returns an error if writing to the file fails.
|
||||
func (f *ScmpFilter) ExportPFC(file *os.File) error {
|
||||
f.lock.Lock()
|
||||
defer f.lock.Unlock()
|
||||
|
||||
fd := file.Fd()
|
||||
|
||||
if !f.valid {
|
||||
return errBadFilter
|
||||
}
|
||||
|
||||
if retCode := C.seccomp_export_pfc(f.filterCtx, C.int(fd)); retCode != 0 {
|
||||
return syscall.Errno(-1 * retCode)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// ExportBPF outputs Berkeley Packet Filter-formatted, kernel-readable dump of a
|
||||
// filter context's rules to a file.
|
||||
// Accepts file to write to (must be open for writing).
|
||||
// Returns an error if writing to the file fails.
|
||||
func (f *ScmpFilter) ExportBPF(file *os.File) error {
|
||||
f.lock.Lock()
|
||||
defer f.lock.Unlock()
|
||||
|
||||
fd := file.Fd()
|
||||
|
||||
if !f.valid {
|
||||
return errBadFilter
|
||||
}
|
||||
|
||||
if retCode := C.seccomp_export_bpf(f.filterCtx, C.int(fd)); retCode != 0 {
|
||||
return syscall.Errno(-1 * retCode)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
|
@ -0,0 +1,506 @@
|
|||
// +build linux
|
||||
|
||||
// Internal functions for libseccomp Go bindings
|
||||
// No exported functions
|
||||
|
||||
package seccomp
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"syscall"
|
||||
)
|
||||
|
||||
// Unexported C wrapping code - provides the C-Golang interface
|
||||
// Get the seccomp header in scope
|
||||
// Need stdlib.h for free() on cstrings
|
||||
|
||||
// #cgo pkg-config: libseccomp
|
||||
/*
|
||||
#include <stdlib.h>
|
||||
#include <seccomp.h>
|
||||
|
||||
#if SCMP_VER_MAJOR < 2
|
||||
#error Minimum supported version of Libseccomp is v2.1.0
|
||||
#elif SCMP_VER_MAJOR == 2 && SCMP_VER_MINOR < 1
|
||||
#error Minimum supported version of Libseccomp is v2.1.0
|
||||
#endif
|
||||
|
||||
#define ARCH_BAD ~0
|
||||
|
||||
const uint32_t C_ARCH_BAD = ARCH_BAD;
|
||||
|
||||
#ifndef SCMP_ARCH_AARCH64
|
||||
#define SCMP_ARCH_AARCH64 ARCH_BAD
|
||||
#endif
|
||||
|
||||
#ifndef SCMP_ARCH_MIPS
|
||||
#define SCMP_ARCH_MIPS ARCH_BAD
|
||||
#endif
|
||||
|
||||
#ifndef SCMP_ARCH_MIPS64
|
||||
#define SCMP_ARCH_MIPS64 ARCH_BAD
|
||||
#endif
|
||||
|
||||
#ifndef SCMP_ARCH_MIPS64N32
|
||||
#define SCMP_ARCH_MIPS64N32 ARCH_BAD
|
||||
#endif
|
||||
|
||||
#ifndef SCMP_ARCH_MIPSEL
|
||||
#define SCMP_ARCH_MIPSEL ARCH_BAD
|
||||
#endif
|
||||
|
||||
#ifndef SCMP_ARCH_MIPSEL64
|
||||
#define SCMP_ARCH_MIPSEL64 ARCH_BAD
|
||||
#endif
|
||||
|
||||
#ifndef SCMP_ARCH_MIPSEL64N32
|
||||
#define SCMP_ARCH_MIPSEL64N32 ARCH_BAD
|
||||
#endif
|
||||
|
||||
#ifndef SCMP_ARCH_PPC
|
||||
#define SCMP_ARCH_PPC ARCH_BAD
|
||||
#endif
|
||||
|
||||
#ifndef SCMP_ARCH_PPC64
|
||||
#define SCMP_ARCH_PPC64 ARCH_BAD
|
||||
#endif
|
||||
|
||||
#ifndef SCMP_ARCH_PPC64LE
|
||||
#define SCMP_ARCH_PPC64LE ARCH_BAD
|
||||
#endif
|
||||
|
||||
#ifndef SCMP_ARCH_S390
|
||||
#define SCMP_ARCH_S390 ARCH_BAD
|
||||
#endif
|
||||
|
||||
#ifndef SCMP_ARCH_S390X
|
||||
#define SCMP_ARCH_S390X ARCH_BAD
|
||||
#endif
|
||||
|
||||
const uint32_t C_ARCH_NATIVE = SCMP_ARCH_NATIVE;
|
||||
const uint32_t C_ARCH_X86 = SCMP_ARCH_X86;
|
||||
const uint32_t C_ARCH_X86_64 = SCMP_ARCH_X86_64;
|
||||
const uint32_t C_ARCH_X32 = SCMP_ARCH_X32;
|
||||
const uint32_t C_ARCH_ARM = SCMP_ARCH_ARM;
|
||||
const uint32_t C_ARCH_AARCH64 = SCMP_ARCH_AARCH64;
|
||||
const uint32_t C_ARCH_MIPS = SCMP_ARCH_MIPS;
|
||||
const uint32_t C_ARCH_MIPS64 = SCMP_ARCH_MIPS64;
|
||||
const uint32_t C_ARCH_MIPS64N32 = SCMP_ARCH_MIPS64N32;
|
||||
const uint32_t C_ARCH_MIPSEL = SCMP_ARCH_MIPSEL;
|
||||
const uint32_t C_ARCH_MIPSEL64 = SCMP_ARCH_MIPSEL64;
|
||||
const uint32_t C_ARCH_MIPSEL64N32 = SCMP_ARCH_MIPSEL64N32;
|
||||
const uint32_t C_ARCH_PPC = SCMP_ARCH_PPC;
|
||||
const uint32_t C_ARCH_PPC64 = SCMP_ARCH_PPC64;
|
||||
const uint32_t C_ARCH_PPC64LE = SCMP_ARCH_PPC64LE;
|
||||
const uint32_t C_ARCH_S390 = SCMP_ARCH_S390;
|
||||
const uint32_t C_ARCH_S390X = SCMP_ARCH_S390X;
|
||||
|
||||
const uint32_t C_ACT_KILL = SCMP_ACT_KILL;
|
||||
const uint32_t C_ACT_TRAP = SCMP_ACT_TRAP;
|
||||
const uint32_t C_ACT_ERRNO = SCMP_ACT_ERRNO(0);
|
||||
const uint32_t C_ACT_TRACE = SCMP_ACT_TRACE(0);
|
||||
const uint32_t C_ACT_ALLOW = SCMP_ACT_ALLOW;
|
||||
|
||||
// If TSync is not supported, make sure it doesn't map to a supported filter attribute
|
||||
// Don't worry about major version < 2, the minimum version checks should catch that case
|
||||
#if SCMP_VER_MAJOR == 2 && SCMP_VER_MINOR < 2
|
||||
#define SCMP_FLTATR_CTL_TSYNC _SCMP_CMP_MIN
|
||||
#endif
|
||||
|
||||
const uint32_t C_ATTRIBUTE_DEFAULT = (uint32_t)SCMP_FLTATR_ACT_DEFAULT;
|
||||
const uint32_t C_ATTRIBUTE_BADARCH = (uint32_t)SCMP_FLTATR_ACT_BADARCH;
|
||||
const uint32_t C_ATTRIBUTE_NNP = (uint32_t)SCMP_FLTATR_CTL_NNP;
|
||||
const uint32_t C_ATTRIBUTE_TSYNC = (uint32_t)SCMP_FLTATR_CTL_TSYNC;
|
||||
|
||||
const int C_CMP_NE = (int)SCMP_CMP_NE;
|
||||
const int C_CMP_LT = (int)SCMP_CMP_LT;
|
||||
const int C_CMP_LE = (int)SCMP_CMP_LE;
|
||||
const int C_CMP_EQ = (int)SCMP_CMP_EQ;
|
||||
const int C_CMP_GE = (int)SCMP_CMP_GE;
|
||||
const int C_CMP_GT = (int)SCMP_CMP_GT;
|
||||
const int C_CMP_MASKED_EQ = (int)SCMP_CMP_MASKED_EQ;
|
||||
|
||||
const int C_VERSION_MAJOR = SCMP_VER_MAJOR;
|
||||
const int C_VERSION_MINOR = SCMP_VER_MINOR;
|
||||
const int C_VERSION_MICRO = SCMP_VER_MICRO;
|
||||
|
||||
typedef struct scmp_arg_cmp* scmp_cast_t;
|
||||
|
||||
// Wrapper to create an scmp_arg_cmp struct
|
||||
void*
|
||||
make_struct_arg_cmp(
|
||||
unsigned int arg,
|
||||
int compare,
|
||||
uint64_t a,
|
||||
uint64_t b
|
||||
)
|
||||
{
|
||||
struct scmp_arg_cmp *s = malloc(sizeof(struct scmp_arg_cmp));
|
||||
|
||||
s->arg = arg;
|
||||
s->op = compare;
|
||||
s->datum_a = a;
|
||||
s->datum_b = b;
|
||||
|
||||
return s;
|
||||
}
|
||||
*/
|
||||
import "C"
|
||||
|
||||
// Nonexported types
|
||||
type scmpFilterAttr uint32
|
||||
|
||||
// Nonexported constants
|
||||
|
||||
const (
|
||||
filterAttrActDefault scmpFilterAttr = iota
|
||||
filterAttrActBadArch scmpFilterAttr = iota
|
||||
filterAttrNNP scmpFilterAttr = iota
|
||||
filterAttrTsync scmpFilterAttr = iota
|
||||
)
|
||||
|
||||
const (
|
||||
// An error return from certain libseccomp functions
|
||||
scmpError C.int = -1
|
||||
// Comparison boundaries to check for architecture validity
|
||||
archStart ScmpArch = ArchNative
|
||||
archEnd ScmpArch = ArchS390X
|
||||
// Comparison boundaries to check for action validity
|
||||
actionStart ScmpAction = ActKill
|
||||
actionEnd ScmpAction = ActAllow
|
||||
// Comparison boundaries to check for comparison operator validity
|
||||
compareOpStart ScmpCompareOp = CompareNotEqual
|
||||
compareOpEnd ScmpCompareOp = CompareMaskedEqual
|
||||
)
|
||||
|
||||
var (
|
||||
// Error thrown on bad filter context
|
||||
errBadFilter = fmt.Errorf("filter is invalid or uninitialized")
|
||||
// Constants representing library major, minor, and micro versions
|
||||
verMajor = int(C.C_VERSION_MAJOR)
|
||||
verMinor = int(C.C_VERSION_MINOR)
|
||||
verMicro = int(C.C_VERSION_MICRO)
|
||||
)
|
||||
|
||||
// Nonexported functions
|
||||
|
||||
// Check if library version is greater than or equal to the given one
|
||||
func checkVersionAbove(major, minor, micro int) bool {
|
||||
return (verMajor > major) ||
|
||||
(verMajor == major && verMinor > minor) ||
|
||||
(verMajor == major && verMinor == minor && verMicro >= micro)
|
||||
}
|
||||
|
||||
// Init function: Verify library version is appropriate
|
||||
func init() {
|
||||
if !checkVersionAbove(2, 1, 0) {
|
||||
fmt.Fprintf(os.Stderr, "Libseccomp version too low: minimum supported is 2.1.0, detected %d.%d.%d", C.C_VERSION_MAJOR, C.C_VERSION_MINOR, C.C_VERSION_MICRO)
|
||||
os.Exit(-1)
|
||||
}
|
||||
}
|
||||
|
||||
// Filter helpers
|
||||
|
||||
// Filter finalizer - ensure that kernel context for filters is freed
|
||||
func filterFinalizer(f *ScmpFilter) {
|
||||
f.Release()
|
||||
}
|
||||
|
||||
// Get a raw filter attribute
|
||||
func (f *ScmpFilter) getFilterAttr(attr scmpFilterAttr) (C.uint32_t, error) {
|
||||
f.lock.Lock()
|
||||
defer f.lock.Unlock()
|
||||
|
||||
if !f.valid {
|
||||
return 0x0, errBadFilter
|
||||
}
|
||||
|
||||
if !checkVersionAbove(2, 2, 0) && attr == filterAttrTsync {
|
||||
return 0x0, fmt.Errorf("the thread synchronization attribute is not supported in this version of the library")
|
||||
}
|
||||
|
||||
var attribute C.uint32_t
|
||||
|
||||
retCode := C.seccomp_attr_get(f.filterCtx, attr.toNative(), &attribute)
|
||||
if retCode != 0 {
|
||||
return 0x0, syscall.Errno(-1 * retCode)
|
||||
}
|
||||
|
||||
return attribute, nil
|
||||
}
|
||||
|
||||
// Set a raw filter attribute
|
||||
func (f *ScmpFilter) setFilterAttr(attr scmpFilterAttr, value C.uint32_t) error {
|
||||
f.lock.Lock()
|
||||
defer f.lock.Unlock()
|
||||
|
||||
if !f.valid {
|
||||
return errBadFilter
|
||||
}
|
||||
|
||||
if !checkVersionAbove(2, 2, 0) && attr == filterAttrTsync {
|
||||
return fmt.Errorf("the thread synchronization attribute is not supported in this version of the library")
|
||||
}
|
||||
|
||||
retCode := C.seccomp_attr_set(f.filterCtx, attr.toNative(), value)
|
||||
if retCode != 0 {
|
||||
return syscall.Errno(-1 * retCode)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// DOES NOT LOCK OR CHECK VALIDITY
|
||||
// Assumes caller has already done this
|
||||
// Wrapper for seccomp_rule_add_... functions
|
||||
func (f *ScmpFilter) addRuleWrapper(call ScmpSyscall, action ScmpAction, exact bool, cond C.scmp_cast_t) error {
|
||||
var length C.uint
|
||||
if cond != nil {
|
||||
length = 1
|
||||
} else {
|
||||
length = 0
|
||||
}
|
||||
|
||||
var retCode C.int
|
||||
if exact {
|
||||
retCode = C.seccomp_rule_add_exact_array(f.filterCtx, action.toNative(), C.int(call), length, cond)
|
||||
} else {
|
||||
retCode = C.seccomp_rule_add_array(f.filterCtx, action.toNative(), C.int(call), length, cond)
|
||||
}
|
||||
|
||||
if syscall.Errno(-1*retCode) == syscall.EFAULT {
|
||||
return fmt.Errorf("unrecognized syscall")
|
||||
} else if syscall.Errno(-1*retCode) == syscall.EPERM {
|
||||
return fmt.Errorf("requested action matches default action of filter")
|
||||
} else if retCode != 0 {
|
||||
return syscall.Errno(-1 * retCode)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Generic add function for filter rules
|
||||
func (f *ScmpFilter) addRuleGeneric(call ScmpSyscall, action ScmpAction, exact bool, conds []ScmpCondition) error {
|
||||
f.lock.Lock()
|
||||
defer f.lock.Unlock()
|
||||
|
||||
if !f.valid {
|
||||
return errBadFilter
|
||||
}
|
||||
|
||||
if len(conds) == 0 {
|
||||
if err := f.addRuleWrapper(call, action, exact, nil); err != nil {
|
||||
return err
|
||||
}
|
||||
} else {
|
||||
// We don't support conditional filtering in library version v2.1
|
||||
if !checkVersionAbove(2, 2, 1) {
|
||||
return fmt.Errorf("conditional filtering requires libseccomp version >= 2.2.1")
|
||||
}
|
||||
|
||||
for _, cond := range conds {
|
||||
cmpStruct := C.make_struct_arg_cmp(C.uint(cond.Argument), cond.Op.toNative(), C.uint64_t(cond.Operand1), C.uint64_t(cond.Operand2))
|
||||
defer C.free(cmpStruct)
|
||||
|
||||
if err := f.addRuleWrapper(call, action, exact, C.scmp_cast_t(cmpStruct)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Generic Helpers
|
||||
|
||||
// Helper - Sanitize Arch token input
|
||||
func sanitizeArch(in ScmpArch) error {
|
||||
if in < archStart || in > archEnd {
|
||||
return fmt.Errorf("unrecognized architecture")
|
||||
}
|
||||
|
||||
if in.toNative() == C.C_ARCH_BAD {
|
||||
return fmt.Errorf("architecture is not supported on this version of the library")
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func sanitizeAction(in ScmpAction) error {
|
||||
inTmp := in & 0x0000FFFF
|
||||
if inTmp < actionStart || inTmp > actionEnd {
|
||||
return fmt.Errorf("unrecognized action")
|
||||
}
|
||||
|
||||
if inTmp != ActTrace && inTmp != ActErrno && (in&0xFFFF0000) != 0 {
|
||||
return fmt.Errorf("highest 16 bits must be zeroed except for Trace and Errno")
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func sanitizeCompareOp(in ScmpCompareOp) error {
|
||||
if in < compareOpStart || in > compareOpEnd {
|
||||
return fmt.Errorf("unrecognized comparison operator")
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func archFromNative(a C.uint32_t) (ScmpArch, error) {
|
||||
switch a {
|
||||
case C.C_ARCH_X86:
|
||||
return ArchX86, nil
|
||||
case C.C_ARCH_X86_64:
|
||||
return ArchAMD64, nil
|
||||
case C.C_ARCH_X32:
|
||||
return ArchX32, nil
|
||||
case C.C_ARCH_ARM:
|
||||
return ArchARM, nil
|
||||
case C.C_ARCH_NATIVE:
|
||||
return ArchNative, nil
|
||||
case C.C_ARCH_AARCH64:
|
||||
return ArchARM64, nil
|
||||
case C.C_ARCH_MIPS:
|
||||
return ArchMIPS, nil
|
||||
case C.C_ARCH_MIPS64:
|
||||
return ArchMIPS64, nil
|
||||
case C.C_ARCH_MIPS64N32:
|
||||
return ArchMIPS64N32, nil
|
||||
case C.C_ARCH_MIPSEL:
|
||||
return ArchMIPSEL, nil
|
||||
case C.C_ARCH_MIPSEL64:
|
||||
return ArchMIPSEL64, nil
|
||||
case C.C_ARCH_MIPSEL64N32:
|
||||
return ArchMIPSEL64N32, nil
|
||||
case C.C_ARCH_PPC:
|
||||
return ArchPPC, nil
|
||||
case C.C_ARCH_PPC64:
|
||||
return ArchPPC64, nil
|
||||
case C.C_ARCH_PPC64LE:
|
||||
return ArchPPC64LE, nil
|
||||
case C.C_ARCH_S390:
|
||||
return ArchS390, nil
|
||||
case C.C_ARCH_S390X:
|
||||
return ArchS390X, nil
|
||||
default:
|
||||
return 0x0, fmt.Errorf("unrecognized architecture")
|
||||
}
|
||||
}
|
||||
|
||||
// Only use with sanitized arches, no error handling
|
||||
func (a ScmpArch) toNative() C.uint32_t {
|
||||
switch a {
|
||||
case ArchX86:
|
||||
return C.C_ARCH_X86
|
||||
case ArchAMD64:
|
||||
return C.C_ARCH_X86_64
|
||||
case ArchX32:
|
||||
return C.C_ARCH_X32
|
||||
case ArchARM:
|
||||
return C.C_ARCH_ARM
|
||||
case ArchARM64:
|
||||
return C.C_ARCH_AARCH64
|
||||
case ArchMIPS:
|
||||
return C.C_ARCH_MIPS
|
||||
case ArchMIPS64:
|
||||
return C.C_ARCH_MIPS64
|
||||
case ArchMIPS64N32:
|
||||
return C.C_ARCH_MIPS64N32
|
||||
case ArchMIPSEL:
|
||||
return C.C_ARCH_MIPSEL
|
||||
case ArchMIPSEL64:
|
||||
return C.C_ARCH_MIPSEL64
|
||||
case ArchMIPSEL64N32:
|
||||
return C.C_ARCH_MIPSEL64N32
|
||||
case ArchPPC:
|
||||
return C.C_ARCH_PPC
|
||||
case ArchPPC64:
|
||||
return C.C_ARCH_PPC64
|
||||
case ArchPPC64LE:
|
||||
return C.C_ARCH_PPC64LE
|
||||
case ArchS390:
|
||||
return C.C_ARCH_S390
|
||||
case ArchS390X:
|
||||
return C.C_ARCH_S390X
|
||||
case ArchNative:
|
||||
return C.C_ARCH_NATIVE
|
||||
default:
|
||||
return 0x0
|
||||
}
|
||||
}
|
||||
|
||||
// Only use with sanitized ops, no error handling
|
||||
func (a ScmpCompareOp) toNative() C.int {
|
||||
switch a {
|
||||
case CompareNotEqual:
|
||||
return C.C_CMP_NE
|
||||
case CompareLess:
|
||||
return C.C_CMP_LT
|
||||
case CompareLessOrEqual:
|
||||
return C.C_CMP_LE
|
||||
case CompareEqual:
|
||||
return C.C_CMP_EQ
|
||||
case CompareGreaterEqual:
|
||||
return C.C_CMP_GE
|
||||
case CompareGreater:
|
||||
return C.C_CMP_GT
|
||||
case CompareMaskedEqual:
|
||||
return C.C_CMP_MASKED_EQ
|
||||
default:
|
||||
return 0x0
|
||||
}
|
||||
}
|
||||
|
||||
func actionFromNative(a C.uint32_t) (ScmpAction, error) {
|
||||
aTmp := a & 0xFFFF
|
||||
switch a & 0xFFFF0000 {
|
||||
case C.C_ACT_KILL:
|
||||
return ActKill, nil
|
||||
case C.C_ACT_TRAP:
|
||||
return ActTrap, nil
|
||||
case C.C_ACT_ERRNO:
|
||||
return ActErrno.SetReturnCode(int16(aTmp)), nil
|
||||
case C.C_ACT_TRACE:
|
||||
return ActTrace.SetReturnCode(int16(aTmp)), nil
|
||||
case C.C_ACT_ALLOW:
|
||||
return ActAllow, nil
|
||||
default:
|
||||
return 0x0, fmt.Errorf("unrecognized action")
|
||||
}
|
||||
}
|
||||
|
||||
// Only use with sanitized actions, no error handling
|
||||
func (a ScmpAction) toNative() C.uint32_t {
|
||||
switch a & 0xFFFF {
|
||||
case ActKill:
|
||||
return C.C_ACT_KILL
|
||||
case ActTrap:
|
||||
return C.C_ACT_TRAP
|
||||
case ActErrno:
|
||||
return C.C_ACT_ERRNO | (C.uint32_t(a) >> 16)
|
||||
case ActTrace:
|
||||
return C.C_ACT_TRACE | (C.uint32_t(a) >> 16)
|
||||
case ActAllow:
|
||||
return C.C_ACT_ALLOW
|
||||
default:
|
||||
return 0x0
|
||||
}
|
||||
}
|
||||
|
||||
// Internal only, assumes safe attribute
|
||||
func (a scmpFilterAttr) toNative() uint32 {
|
||||
switch a {
|
||||
case filterAttrActDefault:
|
||||
return uint32(C.C_ATTRIBUTE_DEFAULT)
|
||||
case filterAttrActBadArch:
|
||||
return uint32(C.C_ATTRIBUTE_BADARCH)
|
||||
case filterAttrNNP:
|
||||
return uint32(C.C_ATTRIBUTE_NNP)
|
||||
case filterAttrTsync:
|
||||
return uint32(C.C_ATTRIBUTE_TSYNC)
|
||||
default:
|
||||
return 0x0
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue