exporter: support compression for layer blob data

Allow user to choose the compression type for layer data. Gzip is
default compression for layer exporter, which consume more Cpu resources
and take long time to export. With compression option, user can use
nocompressed option to export to save time. And future, zstd is one new
option for end-user.

Signed-off-by: Wei Fu <fuweid89@gmail.com>
v0.7
Wei Fu 2019-11-30 19:46:11 +08:00
parent b1f83754d5
commit 65388edc6c
8 changed files with 381 additions and 53 deletions

View File

@ -224,6 +224,7 @@ Keys supported by image output:
* `unpack=true`: unpack image after creation (for use with containerd)
* `dangling-name-prefix=[value]`: name image with `prefix@<digest>` , used for anonymous images
* `name-canonical=true`: add additional canonical name `name@<digest>`
* `compression=[uncompressed,gzip]`: choose compression type for layer, gzip is default value
If credentials are required, `buildctl` will attempt to read Docker configuration file `$DOCKER_CONFIG/config.json`.

41
cache/blobs/blobs.go vendored
View File

@ -33,7 +33,7 @@ var ErrNoBlobs = errors.Errorf("no blobs for snapshot")
// GetDiffPairs returns the DiffID/Blobsum pairs for a giver reference and saves it.
// Caller must hold a lease when calling this function.
func GetDiffPairs(ctx context.Context, contentStore content.Store, differ diff.Comparer, ref cache.ImmutableRef, createBlobs bool) ([]DiffPair, error) {
func GetDiffPairs(ctx context.Context, contentStore content.Store, differ diff.Comparer, ref cache.ImmutableRef, createBlobs bool, compression CompressionType) ([]DiffPair, error) {
if ref == nil {
return nil, nil
}
@ -50,10 +50,10 @@ func GetDiffPairs(ctx context.Context, contentStore content.Store, differ diff.C
ctx = winlayers.UseWindowsLayerMode(ctx)
}
return getDiffPairs(ctx, contentStore, differ, ref, createBlobs)
return getDiffPairs(ctx, contentStore, differ, ref, createBlobs, compression)
}
func getDiffPairs(ctx context.Context, contentStore content.Store, differ diff.Comparer, ref cache.ImmutableRef, createBlobs bool) ([]DiffPair, error) {
func getDiffPairs(ctx context.Context, contentStore content.Store, differ diff.Comparer, ref cache.ImmutableRef, createBlobs bool, compression CompressionType) ([]DiffPair, error) {
if ref == nil {
return nil, nil
}
@ -66,7 +66,7 @@ func getDiffPairs(ctx context.Context, contentStore content.Store, differ diff.C
if parent != nil {
defer parent.Release(context.TODO())
eg.Go(func() error {
dp, err := getDiffPairs(ctx, contentStore, differ, parent, createBlobs)
dp, err := getDiffPairs(ctx, contentStore, differ, parent, createBlobs, compression)
if err != nil {
return err
}
@ -82,10 +82,22 @@ func getDiffPairs(ctx context.Context, contentStore content.Store, differ diff.C
} else if !createBlobs {
return nil, errors.WithStack(ErrNoBlobs)
}
var mediaType string
var descr ocispec.Descriptor
var err error
switch compression {
case Uncompressed:
mediaType = ocispec.MediaTypeImageLayer
case Gzip:
mediaType = ocispec.MediaTypeImageLayerGzip
default:
return nil, errors.Errorf("unknown layer compression type")
}
if pc, ok := differ.(CompareWithParent); ok {
descr, err = pc.CompareWithParent(ctx, ref.ID(), diff.WithMediaType(ocispec.MediaTypeImageLayerGzip))
descr, err = pc.CompareWithParent(ctx, ref.ID(), diff.WithMediaType(mediaType))
if err != nil {
return nil, err
}
@ -121,24 +133,29 @@ func getDiffPairs(ctx context.Context, contentStore content.Store, differ diff.C
defer release()
}
descr, err = differ.Compare(ctx, lower, upper,
diff.WithMediaType(ocispec.MediaTypeImageLayerGzip),
diff.WithMediaType(mediaType),
diff.WithReference(ref.ID()),
)
if err != nil {
return nil, err
}
}
if descr.Annotations == nil {
descr.Annotations = map[string]string{}
}
info, err := contentStore.Info(ctx, descr.Digest)
if err != nil {
return nil, err
}
if diffID, ok := info.Labels[containerdUncompressed]; !ok {
return nil, errors.Errorf("invalid differ response with no diffID: %v", descr.Digest)
} else {
if descr.Annotations == nil {
descr.Annotations = map[string]string{}
}
if diffID, ok := info.Labels[containerdUncompressed]; ok {
descr.Annotations[containerdUncompressed] = diffID
} else if compression == Uncompressed {
descr.Annotations[containerdUncompressed] = descr.Digest.String()
} else {
return nil, errors.Errorf("unknown layer compression type")
}
return descr, nil

122
cache/blobs/compression.go vendored Normal file
View File

@ -0,0 +1,122 @@
package blobs
import (
"bytes"
"context"
"io"
"github.com/containerd/containerd/content"
"github.com/containerd/containerd/images"
"github.com/moby/buildkit/cache"
digest "github.com/opencontainers/go-digest"
ocispec "github.com/opencontainers/image-spec/specs-go/v1"
"github.com/pkg/errors"
)
// CompressionType represents compression type for blob data.
type CompressionType int
const (
// Uncompressed indicates no compression.
Uncompressed CompressionType = iota
// Gzip is used for blob data.
Gzip
// UnknownCompression means not supported yet.
UnknownCompression CompressionType = -1
)
var DefaultCompression = Gzip
func (ct CompressionType) String() string {
switch ct {
case Uncompressed:
return "uncompressed"
case Gzip:
return "gzip"
default:
return "unknown"
}
}
// DetectCompressionType returns media type from existing blob data.
func DetectLayerMediaType(ctx context.Context, cs content.Store, id digest.Digest, oci bool) (string, error) {
ra, err := cs.ReaderAt(ctx, ocispec.Descriptor{Digest: id})
if err != nil {
return "", err
}
defer ra.Close()
ct, err := detectCompressionType(content.NewReader(ra))
if err != nil {
return "", err
}
switch ct {
case Uncompressed:
if oci {
return ocispec.MediaTypeImageLayer, nil
} else {
return images.MediaTypeDockerSchema2Layer, nil
}
case Gzip:
if oci {
return ocispec.MediaTypeImageLayerGzip, nil
} else {
return images.MediaTypeDockerSchema2LayerGzip, nil
}
default:
return "", errors.Errorf("failed to detect layer %v compression type", id)
}
}
// detectCompressionType detects compression type from real blob data.
func detectCompressionType(cr io.Reader) (CompressionType, error) {
var buf [10]byte
var n int
var err error
if n, err = cr.Read(buf[:]); err != nil && err != io.EOF {
// Note: we'll ignore any io.EOF error because there are some
// odd cases where the layer.tar file will be empty (zero bytes)
// and we'll just treat it as a non-compressed stream and that
// means just create an empty layer.
//
// See issue docker/docker#18170
return UnknownCompression, err
}
for c, m := range map[CompressionType][]byte{
Gzip: {0x1F, 0x8B, 0x08},
} {
if n < len(m) {
continue
}
if bytes.Equal(m, buf[:len(m)]) {
return c, nil
}
}
return Uncompressed, nil
}
// GetMediaTypeForLayers retrieves media type for layer from ref information.
func GetMediaTypeForLayers(diffPairs []DiffPair, ref cache.ImmutableRef) []string {
tref := ref
layerTypes := make([]string, 0, len(diffPairs))
for _, dp := range diffPairs {
if tref == nil {
return nil
}
info := tref.Info()
if !(info.DiffID == dp.DiffID && info.Blob == dp.Blobsum) {
return nil
}
layerTypes = append(layerTypes, info.MediaType)
tref = tref.Parent()
}
return layerTypes
}

View File

@ -69,6 +69,7 @@ func TestIntegration(t *testing.T) {
testBuildMultiMount,
testBuildHTTPSource,
testBuildPushAndValidate,
testBuildExportWithUncompressed,
testResolveAndHosts,
testUser,
testOCIExporter,
@ -1514,6 +1515,121 @@ func testTarExporterWithSocket(t *testing.T, sb integration.Sandbox) {
require.NoError(t, err)
}
func testBuildExportWithUncompressed(t *testing.T, sb integration.Sandbox) {
requiresLinux(t)
c, err := New(context.TODO(), sb.Address())
require.NoError(t, err)
defer c.Close()
busybox := llb.Image("busybox:latest")
cmd := `sh -e -c "echo uncompressed > data"`
st := llb.Scratch()
st = busybox.Run(llb.Shlex(cmd), llb.Dir("/wd")).AddMount("/wd", st)
def, err := st.Marshal()
require.NoError(t, err)
registry, err := sb.NewRegistry()
if errors.Cause(err) == integration.ErrorRequirements {
t.Skip(err.Error())
}
require.NoError(t, err)
target := registry + "/buildkit/build/exporter:withnocompressed"
_, err = c.Solve(context.TODO(), def, SolveOpt{
Exports: []ExportEntry{
{
Type: ExporterImage,
Attrs: map[string]string{
"name": target,
"push": "true",
"compression": "uncompressed",
},
},
},
}, nil)
require.NoError(t, err)
// new layer with gzip compression
targetImg := llb.Image(target)
cmd = `sh -e -c "echo gzip > data"`
st = targetImg.Run(llb.Shlex(cmd), llb.Dir("/wd")).GetMount("/wd")
target = registry + "/buildkit/build/exporter:withcompressed"
_, err = c.Solve(context.TODO(), def, SolveOpt{
Exports: []ExportEntry{
{
Type: ExporterImage,
Attrs: map[string]string{
"name": target,
"push": "true",
},
},
},
}, nil)
require.NoError(t, err)
var cdAddress string
if cd, ok := sb.(interface {
ContainerdAddress() string
}); !ok {
return
} else {
cdAddress = cd.ContainerdAddress()
}
client, err := newContainerd(cdAddress)
require.NoError(t, err)
defer client.Close()
ctx := namespaces.WithNamespace(context.Background(), "buildkit")
err = client.ImageService().Delete(ctx, target, images.SynchronousDelete())
require.NoError(t, err)
checkAllReleasable(t, c, sb, true)
img, err := client.Pull(ctx, target)
require.NoError(t, err)
dt, err := content.ReadBlob(ctx, img.ContentStore(), img.Target())
require.NoError(t, err)
var mfst = struct {
MediaType string `json:"mediaType,omitempty"`
ocispec.Manifest
}{}
err = json.Unmarshal(dt, &mfst)
require.NoError(t, err)
require.Equal(t, 2, len(mfst.Layers))
require.Equal(t, images.MediaTypeDockerSchema2Layer, mfst.Layers[0].MediaType)
require.Equal(t, images.MediaTypeDockerSchema2LayerGzip, mfst.Layers[1].MediaType)
dt, err = content.ReadBlob(ctx, img.ContentStore(), ocispec.Descriptor{Digest: mfst.Layers[0].Digest})
require.NoError(t, err)
m, err := testutil.ReadTarToMap(dt, false)
require.NoError(t, err)
item, ok := m["data"]
require.True(t, ok)
require.Equal(t, int32(item.Header.Typeflag), tar.TypeReg)
require.Equal(t, []byte("uncompressed"), item.Data)
dt, err = content.ReadBlob(ctx, img.ContentStore(), ocispec.Descriptor{Digest: mfst.Layers[1].Digest})
require.NoError(t, err)
m, err = testutil.ReadTarToMap(dt, true)
require.NoError(t, err)
item, ok = m["data"]
require.True(t, ok)
require.Equal(t, int32(item.Header.Typeflag), tar.TypeReg)
require.Equal(t, []byte("gzip"), item.Data)
}
func testBuildPushAndValidate(t *testing.T, sb integration.Sandbox) {
requiresLinux(t)
c, err := New(context.TODO(), sb.Address())
@ -1667,6 +1783,8 @@ func testBuildPushAndValidate(t *testing.T, sb integration.Sandbox) {
require.Equal(t, images.MediaTypeDockerSchema2Manifest, mfst.MediaType)
require.Equal(t, 2, len(mfst.Layers))
require.Equal(t, images.MediaTypeDockerSchema2LayerGzip, mfst.Layers[0].MediaType)
require.Equal(t, images.MediaTypeDockerSchema2LayerGzip, mfst.Layers[1].MediaType)
dt, err = content.ReadBlob(ctx, img.ContentStore(), ocispec.Descriptor{Digest: mfst.Layers[0].Digest})
require.NoError(t, err)

View File

@ -13,6 +13,7 @@ import (
"github.com/containerd/containerd/leases"
"github.com/containerd/containerd/platforms"
"github.com/containerd/containerd/rootfs"
"github.com/moby/buildkit/cache/blobs"
"github.com/moby/buildkit/exporter"
"github.com/moby/buildkit/session"
"github.com/moby/buildkit/snapshot"
@ -33,6 +34,7 @@ const (
keyUnpack = "unpack"
keyDanglingPrefix = "dangling-name-prefix"
keyNameCanonical = "name-canonical"
keyLayerCompression = "compression"
ociTypes = "oci-mediatypes"
)
@ -58,7 +60,11 @@ func New(opt Opt) (exporter.Exporter, error) {
}
func (e *imageExporter) Resolve(ctx context.Context, opt map[string]string) (exporter.ExporterInstance, error) {
i := &imageExporterInstance{imageExporter: e}
i := &imageExporterInstance{
imageExporter: e,
layerCompression: blobs.DefaultCompression,
}
for k, v := range opt {
switch k {
case keyImageName:
@ -125,6 +131,15 @@ func (e *imageExporter) Resolve(ctx context.Context, opt map[string]string) (exp
return nil, errors.Wrapf(err, "non-bool value specified for %s", k)
}
i.nameCanonical = b
case keyLayerCompression:
switch v {
case "gzip":
i.layerCompression = blobs.Gzip
case "uncompressed":
i.layerCompression = blobs.Uncompressed
default:
return nil, errors.Errorf("unsupported layer compression type: %v", v)
}
default:
if i.meta == nil {
i.meta = make(map[string][]byte)
@ -145,6 +160,7 @@ type imageExporterInstance struct {
ociTypes bool
nameCanonical bool
danglingPrefix string
layerCompression blobs.CompressionType
meta map[string][]byte
}
@ -166,7 +182,7 @@ func (e *imageExporterInstance) Export(ctx context.Context, src exporter.Source)
}
defer done(context.TODO())
desc, err := e.opt.ImageWriter.Commit(ctx, src, e.ociTypes)
desc, err := e.opt.ImageWriter.Commit(ctx, src, e.ociTypes, e.layerCompression)
if err != nil {
return nil, err
}

View File

@ -45,7 +45,7 @@ type ImageWriter struct {
opt WriterOpt
}
func (ic *ImageWriter) Commit(ctx context.Context, inp exporter.Source, oci bool) (*ocispec.Descriptor, error) {
func (ic *ImageWriter) Commit(ctx context.Context, inp exporter.Source, oci bool, compression blobs.CompressionType) (*ocispec.Descriptor, error) {
platformsBytes, ok := inp.Metadata[exptypes.ExporterPlatformsKey]
if len(inp.Refs) > 0 && !ok {
@ -53,7 +53,7 @@ func (ic *ImageWriter) Commit(ctx context.Context, inp exporter.Source, oci bool
}
if len(inp.Refs) == 0 {
layers, err := ic.exportLayers(ctx, inp.Ref)
layers, err := ic.exportLayers(ctx, compression, inp.Ref)
if err != nil {
return nil, err
}
@ -76,7 +76,7 @@ func (ic *ImageWriter) Commit(ctx context.Context, inp exporter.Source, oci bool
refs = append(refs, r)
}
layers, err := ic.exportLayers(ctx, refs...)
layers, err := ic.exportLayers(ctx, compression, refs...)
if err != nil {
return nil, err
}
@ -141,7 +141,7 @@ func (ic *ImageWriter) Commit(ctx context.Context, inp exporter.Source, oci bool
return &idxDesc, nil
}
func (ic *ImageWriter) exportLayers(ctx context.Context, refs ...cache.ImmutableRef) ([][]blobs.DiffPair, error) {
func (ic *ImageWriter) exportLayers(ctx context.Context, compression blobs.CompressionType, refs ...cache.ImmutableRef) ([][]blobs.DiffPair, error) {
eg, ctx := errgroup.WithContext(ctx)
layersDone := oneOffProgress(ctx, "exporting layers")
@ -150,7 +150,7 @@ func (ic *ImageWriter) exportLayers(ctx context.Context, refs ...cache.Immutable
for i, ref := range refs {
func(i int, ref cache.ImmutableRef) {
eg.Go(func() error {
diffPairs, err := blobs.GetDiffPairs(ctx, ic.opt.ContentStore, ic.opt.Differ, ref, true)
diffPairs, err := blobs.GetDiffPairs(ctx, ic.opt.ContentStore, ic.opt.Differ, ref, true, compression)
if err != nil {
return errors.Wrap(err, "failed calculating diff pairs for exported snapshot")
}
@ -192,14 +192,12 @@ func (ic *ImageWriter) commitDistributionManifest(ctx context.Context, ref cache
configDigest = digest.FromBytes(config)
manifestType = ocispec.MediaTypeImageManifest
configType = ocispec.MediaTypeImageConfig
layerType = ocispec.MediaTypeImageLayerGzip
)
// Use docker media types for older Docker versions and registries
if !oci {
manifestType = images.MediaTypeDockerSchema2Manifest
configType = images.MediaTypeDockerSchema2Config
layerType = images.MediaTypeDockerSchema2LayerGzip
}
mfst := struct {
@ -226,11 +224,31 @@ func (ic *ImageWriter) commitDistributionManifest(ctx context.Context, ref cache
"containerd.io/gc.ref.content.0": configDigest.String(),
}
layerMediaTypes := blobs.GetMediaTypeForLayers(diffPairs, ref)
cs := ic.opt.ContentStore
for i, dp := range diffPairs {
info, err := ic.opt.ContentStore.Info(ctx, dp.Blobsum)
info, err := cs.Info(ctx, dp.Blobsum)
if err != nil {
return nil, errors.Wrapf(err, "could not find blob %s from contentstore", dp.Blobsum)
}
var layerType string
if len(layerMediaTypes) > i {
layerType = layerMediaTypes[i]
}
// NOTE: The media type might be missing for some migrated ones
// from before lease based storage. If so, we should detect
// the media type from blob data.
//
// Discussion: https://github.com/moby/buildkit/pull/1277#discussion_r352795429
if layerType == "" {
layerType, err = blobs.DetectLayerMediaType(ctx, cs, dp.Blobsum, oci)
if err != nil {
return nil, err
}
}
mfst.Layers = append(mfst.Layers, ocispec.Descriptor{
Digest: dp.Blobsum,
Size: info.Size,

View File

@ -9,6 +9,7 @@ import (
archiveexporter "github.com/containerd/containerd/images/archive"
"github.com/containerd/containerd/leases"
"github.com/docker/distribution/reference"
"github.com/moby/buildkit/cache/blobs"
"github.com/moby/buildkit/exporter"
"github.com/moby/buildkit/exporter/containerimage"
"github.com/moby/buildkit/session"
@ -25,6 +26,7 @@ type ExporterVariant string
const (
keyImageName = "name"
keyLayerCompression = "compression"
VariantOCI = "oci"
VariantDocker = "docker"
ociTypes = "oci-mediatypes"
@ -61,11 +63,24 @@ func (e *imageExporter) Resolve(ctx context.Context, opt map[string]string) (exp
}
var ot *bool
i := &imageExporterInstance{imageExporter: e, caller: caller}
i := &imageExporterInstance{
imageExporter: e,
caller: caller,
layerCompression: blobs.DefaultCompression,
}
for k, v := range opt {
switch k {
case keyImageName:
i.name = v
case keyLayerCompression:
switch v {
case "gzip":
i.layerCompression = blobs.Gzip
case "uncompressed":
i.layerCompression = blobs.Uncompressed
default:
return nil, errors.Errorf("unsupported layer compression type: %v", v)
}
case ociTypes:
ot = new(bool)
if v == "" {
@ -98,6 +113,7 @@ type imageExporterInstance struct {
caller session.Caller
name string
ociTypes bool
layerCompression blobs.CompressionType
}
func (e *imageExporterInstance) Name() string {
@ -122,7 +138,7 @@ func (e *imageExporterInstance) Export(ctx context.Context, src exporter.Source)
}
defer done(context.TODO())
desc, err := e.opt.ImageWriter.Commit(ctx, src, e.ociTypes)
desc, err := e.opt.ImageWriter.Commit(ctx, src, e.ociTypes, e.layerCompression)
if err != nil {
return nil, err
}

View File

@ -348,7 +348,8 @@ func (w *Worker) GetRemote(ctx context.Context, ref cache.ImmutableRef, createIf
}
defer done(ctx)
diffPairs, err := blobs.GetDiffPairs(ctx, w.ContentStore(), w.Differ, ref, createIfNeeded)
// TODO(fuweid): add compression option or config for cache exporter.
diffPairs, err := blobs.GetDiffPairs(ctx, w.ContentStore(), w.Differ, ref, createIfNeeded, blobs.DefaultCompression)
if err != nil {
return nil, errors.Wrap(err, "failed calculating diff pairs for exported snapshot")
}
@ -363,8 +364,10 @@ func (w *Worker) GetRemote(ctx context.Context, ref cache.ImmutableRef, createIf
descs := make([]ocispec.Descriptor, len(diffPairs))
cs := w.ContentStore()
layerMediaTypes := blobs.GetMediaTypeForLayers(diffPairs, ref)
for i, dp := range diffPairs {
info, err := w.ContentStore().Info(ctx, dp.Blobsum)
info, err := cs.Info(ctx, dp.Blobsum)
if err != nil {
return nil, err
}
@ -374,10 +377,27 @@ func (w *Worker) GetRemote(ctx context.Context, ref cache.ImmutableRef, createIf
return nil, err
}
var mediaType string
if len(layerMediaTypes) > i {
mediaType = layerMediaTypes[i]
}
// NOTE: The media type might be missing for some migrated ones
// from before lease based storage. If so, we should detect
// the media type from blob data.
//
// Discussion: https://github.com/moby/buildkit/pull/1277#discussion_r352795429
if mediaType == "" {
mediaType, err = blobs.DetectLayerMediaType(ctx, cs, dp.Blobsum, false)
if err != nil {
return nil, err
}
}
descs[i] = ocispec.Descriptor{
Digest: dp.Blobsum,
Size: info.Size,
MediaType: images.MediaTypeDockerSchema2LayerGzip,
MediaType: mediaType,
Annotations: map[string]string{
"containerd.io/uncompressed": dp.DiffID.String(),
labelCreatedAt: string(tm),
@ -387,7 +407,7 @@ func (w *Worker) GetRemote(ctx context.Context, ref cache.ImmutableRef, createIf
return &solver.Remote{
Descriptors: descs,
Provider: w.ContentStore(),
Provider: cs,
}, nil
}