buildkit/cache/blobs.go

467 lines
14 KiB
Go
Raw Normal View History

package cache
import (
"context"
"fmt"
"io"
"os"
"strconv"
"github.com/containerd/containerd/content"
"github.com/containerd/containerd/diff"
"github.com/containerd/containerd/leases"
"github.com/containerd/containerd/mount"
"github.com/klauspost/compress/zstd"
"github.com/moby/buildkit/session"
"github.com/moby/buildkit/util/compression"
"github.com/moby/buildkit/util/flightcontrol"
"github.com/moby/buildkit/util/winlayers"
digest "github.com/opencontainers/go-digest"
imagespecidentity "github.com/opencontainers/image-spec/identity"
ocispecs "github.com/opencontainers/image-spec/specs-go/v1"
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
"golang.org/x/sync/errgroup"
)
var g flightcontrol.Group
const containerdUncompressed = "containerd.io/uncompressed"
var ErrNoBlobs = errors.Errorf("no blobs for snapshot")
// computeBlobChain ensures every ref in a parent chain has an associated blob in the content store. If
// a blob is missing and createIfNeeded is true, then the blob will be created, otherwise ErrNoBlobs will
// be returned. Caller must hold a lease when calling this function.
// If forceCompression is specified but the blob of compressionType doesn't exist, this function creates it.
func (sr *immutableRef) computeBlobChain(ctx context.Context, createIfNeeded bool, compressionType compression.Type, forceCompression bool, s session.Group) error {
if _, ok := leases.FromContext(ctx); !ok {
return errors.Errorf("missing lease requirement for computeBlobChain")
}
if err := sr.Finalize(ctx); err != nil {
return err
}
if isTypeWindows(sr) {
ctx = winlayers.UseWindowsLayerMode(ctx)
}
cache: add support for Diff refs. This allows you to create refs that are single layers representing the diff between any two arbitrary refs. The primary use case for this is to allows users to extract the changes created by ops like Exec and rebase them elsewhere through MergeOp. However, there is no restriction on the inputs to DiffOp and the resulting ref's layer is simply the layer created by running the differ on the two inputs refs (specifically, the same differ used during exports). A Diff ref can be mounted by itself, in which case it is defined as the result of applying the diff to Scratch. Most use cases though will use Diff refs as the input to a MergeOp, in which case the diff is just applied on top of the lower merge inputs, as was the case before. In cases like Diff(A, A->B->C) (i.e. cases where the diff is between two refs where the lower is an ancestor of upper), the diff will be defined as the layers separating the two refs. In other cases, the diff is just a single layer, not re-used from the inputs, representing the diff between the two refs (which can be defined as the layer "Diff(A,B)" that satisfies "Merge(A, Diff(A,B)) == B"). Note that there is technically a meaningful difference between the "unmerge" behavior of extracting the layers separating diffs and the "simple diff" of just running the differ on the two refs. Namely, in the case where there are "intermediate deletes" (i.e. deletes that only exist in layers between A and B but not between A and B by themselves), then the simple diff and unmerge can create different results when plugged into a MergeOp. This is due to the fact that intermediate deletes will apply to the merge when using the unmerge behavior, but not when using the simple diff. This is on top of the fact that the simple diff inherently has a "flattening" behavior where multiple layers are squashed into a single one. So, in the case where lower is an ancestor of upper, we choose to follow the unmerge behavior, but it's possible users may prefer the simple diff behavior. As of right now, they won't be able to do so, but if needed we can add the ability to choose which behavior is followed in the future. This could be done through a flag provided to DiffOp or possibly by adapting llb.Copy to support this type of behavior with the same efficiency as DiffOp. Signed-off-by: Erik Sipsma <erik@sipsma.dev>
2021-11-26 21:43:24 +00:00
// filter keeps track of which layers should actually be included in the blob chain.
// This is required for diff refs, which can include only a single layer from their parent
// refs rather than every single layer present among their ancestors.
filter := sr.layerSet()
return computeBlobChain(ctx, sr, createIfNeeded, compressionType, forceCompression, s, filter)
}
type compressor func(dest io.Writer, requiredMediaType string) (io.WriteCloser, error)
cache: add support for Diff refs. This allows you to create refs that are single layers representing the diff between any two arbitrary refs. The primary use case for this is to allows users to extract the changes created by ops like Exec and rebase them elsewhere through MergeOp. However, there is no restriction on the inputs to DiffOp and the resulting ref's layer is simply the layer created by running the differ on the two inputs refs (specifically, the same differ used during exports). A Diff ref can be mounted by itself, in which case it is defined as the result of applying the diff to Scratch. Most use cases though will use Diff refs as the input to a MergeOp, in which case the diff is just applied on top of the lower merge inputs, as was the case before. In cases like Diff(A, A->B->C) (i.e. cases where the diff is between two refs where the lower is an ancestor of upper), the diff will be defined as the layers separating the two refs. In other cases, the diff is just a single layer, not re-used from the inputs, representing the diff between the two refs (which can be defined as the layer "Diff(A,B)" that satisfies "Merge(A, Diff(A,B)) == B"). Note that there is technically a meaningful difference between the "unmerge" behavior of extracting the layers separating diffs and the "simple diff" of just running the differ on the two refs. Namely, in the case where there are "intermediate deletes" (i.e. deletes that only exist in layers between A and B but not between A and B by themselves), then the simple diff and unmerge can create different results when plugged into a MergeOp. This is due to the fact that intermediate deletes will apply to the merge when using the unmerge behavior, but not when using the simple diff. This is on top of the fact that the simple diff inherently has a "flattening" behavior where multiple layers are squashed into a single one. So, in the case where lower is an ancestor of upper, we choose to follow the unmerge behavior, but it's possible users may prefer the simple diff behavior. As of right now, they won't be able to do so, but if needed we can add the ability to choose which behavior is followed in the future. This could be done through a flag provided to DiffOp or possibly by adapting llb.Copy to support this type of behavior with the same efficiency as DiffOp. Signed-off-by: Erik Sipsma <erik@sipsma.dev>
2021-11-26 21:43:24 +00:00
func computeBlobChain(ctx context.Context, sr *immutableRef, createIfNeeded bool, compressionType compression.Type, forceCompression bool, s session.Group, filter map[string]struct{}) error {
eg, ctx := errgroup.WithContext(ctx)
switch sr.kind() {
case Merge:
for _, parent := range sr.mergeParents {
parent := parent
eg.Go(func() error {
cache: add support for Diff refs. This allows you to create refs that are single layers representing the diff between any two arbitrary refs. The primary use case for this is to allows users to extract the changes created by ops like Exec and rebase them elsewhere through MergeOp. However, there is no restriction on the inputs to DiffOp and the resulting ref's layer is simply the layer created by running the differ on the two inputs refs (specifically, the same differ used during exports). A Diff ref can be mounted by itself, in which case it is defined as the result of applying the diff to Scratch. Most use cases though will use Diff refs as the input to a MergeOp, in which case the diff is just applied on top of the lower merge inputs, as was the case before. In cases like Diff(A, A->B->C) (i.e. cases where the diff is between two refs where the lower is an ancestor of upper), the diff will be defined as the layers separating the two refs. In other cases, the diff is just a single layer, not re-used from the inputs, representing the diff between the two refs (which can be defined as the layer "Diff(A,B)" that satisfies "Merge(A, Diff(A,B)) == B"). Note that there is technically a meaningful difference between the "unmerge" behavior of extracting the layers separating diffs and the "simple diff" of just running the differ on the two refs. Namely, in the case where there are "intermediate deletes" (i.e. deletes that only exist in layers between A and B but not between A and B by themselves), then the simple diff and unmerge can create different results when plugged into a MergeOp. This is due to the fact that intermediate deletes will apply to the merge when using the unmerge behavior, but not when using the simple diff. This is on top of the fact that the simple diff inherently has a "flattening" behavior where multiple layers are squashed into a single one. So, in the case where lower is an ancestor of upper, we choose to follow the unmerge behavior, but it's possible users may prefer the simple diff behavior. As of right now, they won't be able to do so, but if needed we can add the ability to choose which behavior is followed in the future. This could be done through a flag provided to DiffOp or possibly by adapting llb.Copy to support this type of behavior with the same efficiency as DiffOp. Signed-off-by: Erik Sipsma <erik@sipsma.dev>
2021-11-26 21:43:24 +00:00
return computeBlobChain(ctx, parent, createIfNeeded, compressionType, forceCompression, s, filter)
})
}
case Diff:
if _, ok := filter[sr.ID()]; !ok && sr.diffParents.upper != nil {
// This diff is just re-using the upper blob, compute that
eg.Go(func() error {
return computeBlobChain(ctx, sr.diffParents.upper, createIfNeeded, compressionType, forceCompression, s, filter)
})
}
case Layer:
eg.Go(func() error {
cache: add support for Diff refs. This allows you to create refs that are single layers representing the diff between any two arbitrary refs. The primary use case for this is to allows users to extract the changes created by ops like Exec and rebase them elsewhere through MergeOp. However, there is no restriction on the inputs to DiffOp and the resulting ref's layer is simply the layer created by running the differ on the two inputs refs (specifically, the same differ used during exports). A Diff ref can be mounted by itself, in which case it is defined as the result of applying the diff to Scratch. Most use cases though will use Diff refs as the input to a MergeOp, in which case the diff is just applied on top of the lower merge inputs, as was the case before. In cases like Diff(A, A->B->C) (i.e. cases where the diff is between two refs where the lower is an ancestor of upper), the diff will be defined as the layers separating the two refs. In other cases, the diff is just a single layer, not re-used from the inputs, representing the diff between the two refs (which can be defined as the layer "Diff(A,B)" that satisfies "Merge(A, Diff(A,B)) == B"). Note that there is technically a meaningful difference between the "unmerge" behavior of extracting the layers separating diffs and the "simple diff" of just running the differ on the two refs. Namely, in the case where there are "intermediate deletes" (i.e. deletes that only exist in layers between A and B but not between A and B by themselves), then the simple diff and unmerge can create different results when plugged into a MergeOp. This is due to the fact that intermediate deletes will apply to the merge when using the unmerge behavior, but not when using the simple diff. This is on top of the fact that the simple diff inherently has a "flattening" behavior where multiple layers are squashed into a single one. So, in the case where lower is an ancestor of upper, we choose to follow the unmerge behavior, but it's possible users may prefer the simple diff behavior. As of right now, they won't be able to do so, but if needed we can add the ability to choose which behavior is followed in the future. This could be done through a flag provided to DiffOp or possibly by adapting llb.Copy to support this type of behavior with the same efficiency as DiffOp. Signed-off-by: Erik Sipsma <erik@sipsma.dev>
2021-11-26 21:43:24 +00:00
return computeBlobChain(ctx, sr.layerParent, createIfNeeded, compressionType, forceCompression, s, filter)
})
cache: add support for Diff refs. This allows you to create refs that are single layers representing the diff between any two arbitrary refs. The primary use case for this is to allows users to extract the changes created by ops like Exec and rebase them elsewhere through MergeOp. However, there is no restriction on the inputs to DiffOp and the resulting ref's layer is simply the layer created by running the differ on the two inputs refs (specifically, the same differ used during exports). A Diff ref can be mounted by itself, in which case it is defined as the result of applying the diff to Scratch. Most use cases though will use Diff refs as the input to a MergeOp, in which case the diff is just applied on top of the lower merge inputs, as was the case before. In cases like Diff(A, A->B->C) (i.e. cases where the diff is between two refs where the lower is an ancestor of upper), the diff will be defined as the layers separating the two refs. In other cases, the diff is just a single layer, not re-used from the inputs, representing the diff between the two refs (which can be defined as the layer "Diff(A,B)" that satisfies "Merge(A, Diff(A,B)) == B"). Note that there is technically a meaningful difference between the "unmerge" behavior of extracting the layers separating diffs and the "simple diff" of just running the differ on the two refs. Namely, in the case where there are "intermediate deletes" (i.e. deletes that only exist in layers between A and B but not between A and B by themselves), then the simple diff and unmerge can create different results when plugged into a MergeOp. This is due to the fact that intermediate deletes will apply to the merge when using the unmerge behavior, but not when using the simple diff. This is on top of the fact that the simple diff inherently has a "flattening" behavior where multiple layers are squashed into a single one. So, in the case where lower is an ancestor of upper, we choose to follow the unmerge behavior, but it's possible users may prefer the simple diff behavior. As of right now, they won't be able to do so, but if needed we can add the ability to choose which behavior is followed in the future. This could be done through a flag provided to DiffOp or possibly by adapting llb.Copy to support this type of behavior with the same efficiency as DiffOp. Signed-off-by: Erik Sipsma <erik@sipsma.dev>
2021-11-26 21:43:24 +00:00
}
if _, ok := filter[sr.ID()]; ok {
eg.Go(func() error {
_, err := g.Do(ctx, fmt.Sprintf("%s-%t", sr.ID(), createIfNeeded), func(ctx context.Context) (interface{}, error) {
if sr.getBlob() != "" {
return nil, nil
}
if !createIfNeeded {
return nil, errors.WithStack(ErrNoBlobs)
}
var mediaType string
var compressorFunc compressor
var finalize func(context.Context, content.Store) (map[string]string, error)
switch compressionType {
case compression.Uncompressed:
mediaType = ocispecs.MediaTypeImageLayer
case compression.Gzip:
mediaType = ocispecs.MediaTypeImageLayerGzip
case compression.EStargz:
compressorFunc, finalize = compressEStargz()
mediaType = ocispecs.MediaTypeImageLayerGzip
case compression.Zstd:
compressorFunc = zstdWriter
mediaType = ocispecs.MediaTypeImageLayer + "+zstd"
default:
return nil, errors.Errorf("unknown layer compression type: %q", compressionType)
}
cache: add support for Diff refs. This allows you to create refs that are single layers representing the diff between any two arbitrary refs. The primary use case for this is to allows users to extract the changes created by ops like Exec and rebase them elsewhere through MergeOp. However, there is no restriction on the inputs to DiffOp and the resulting ref's layer is simply the layer created by running the differ on the two inputs refs (specifically, the same differ used during exports). A Diff ref can be mounted by itself, in which case it is defined as the result of applying the diff to Scratch. Most use cases though will use Diff refs as the input to a MergeOp, in which case the diff is just applied on top of the lower merge inputs, as was the case before. In cases like Diff(A, A->B->C) (i.e. cases where the diff is between two refs where the lower is an ancestor of upper), the diff will be defined as the layers separating the two refs. In other cases, the diff is just a single layer, not re-used from the inputs, representing the diff between the two refs (which can be defined as the layer "Diff(A,B)" that satisfies "Merge(A, Diff(A,B)) == B"). Note that there is technically a meaningful difference between the "unmerge" behavior of extracting the layers separating diffs and the "simple diff" of just running the differ on the two refs. Namely, in the case where there are "intermediate deletes" (i.e. deletes that only exist in layers between A and B but not between A and B by themselves), then the simple diff and unmerge can create different results when plugged into a MergeOp. This is due to the fact that intermediate deletes will apply to the merge when using the unmerge behavior, but not when using the simple diff. This is on top of the fact that the simple diff inherently has a "flattening" behavior where multiple layers are squashed into a single one. So, in the case where lower is an ancestor of upper, we choose to follow the unmerge behavior, but it's possible users may prefer the simple diff behavior. As of right now, they won't be able to do so, but if needed we can add the ability to choose which behavior is followed in the future. This could be done through a flag provided to DiffOp or possibly by adapting llb.Copy to support this type of behavior with the same efficiency as DiffOp. Signed-off-by: Erik Sipsma <erik@sipsma.dev>
2021-11-26 21:43:24 +00:00
var lowerRef *immutableRef
switch sr.kind() {
case Diff:
lowerRef = sr.diffParents.lower
case Layer:
lowerRef = sr.layerParent
}
var lower []mount.Mount
cache: add support for Diff refs. This allows you to create refs that are single layers representing the diff between any two arbitrary refs. The primary use case for this is to allows users to extract the changes created by ops like Exec and rebase them elsewhere through MergeOp. However, there is no restriction on the inputs to DiffOp and the resulting ref's layer is simply the layer created by running the differ on the two inputs refs (specifically, the same differ used during exports). A Diff ref can be mounted by itself, in which case it is defined as the result of applying the diff to Scratch. Most use cases though will use Diff refs as the input to a MergeOp, in which case the diff is just applied on top of the lower merge inputs, as was the case before. In cases like Diff(A, A->B->C) (i.e. cases where the diff is between two refs where the lower is an ancestor of upper), the diff will be defined as the layers separating the two refs. In other cases, the diff is just a single layer, not re-used from the inputs, representing the diff between the two refs (which can be defined as the layer "Diff(A,B)" that satisfies "Merge(A, Diff(A,B)) == B"). Note that there is technically a meaningful difference between the "unmerge" behavior of extracting the layers separating diffs and the "simple diff" of just running the differ on the two refs. Namely, in the case where there are "intermediate deletes" (i.e. deletes that only exist in layers between A and B but not between A and B by themselves), then the simple diff and unmerge can create different results when plugged into a MergeOp. This is due to the fact that intermediate deletes will apply to the merge when using the unmerge behavior, but not when using the simple diff. This is on top of the fact that the simple diff inherently has a "flattening" behavior where multiple layers are squashed into a single one. So, in the case where lower is an ancestor of upper, we choose to follow the unmerge behavior, but it's possible users may prefer the simple diff behavior. As of right now, they won't be able to do so, but if needed we can add the ability to choose which behavior is followed in the future. This could be done through a flag provided to DiffOp or possibly by adapting llb.Copy to support this type of behavior with the same efficiency as DiffOp. Signed-off-by: Erik Sipsma <erik@sipsma.dev>
2021-11-26 21:43:24 +00:00
if lowerRef != nil {
m, err := lowerRef.Mount(ctx, true, s)
if err != nil {
return nil, err
}
var release func() error
lower, release, err = m.Mount()
if err != nil {
return nil, err
}
if release != nil {
defer release()
}
}
cache: add support for Diff refs. This allows you to create refs that are single layers representing the diff between any two arbitrary refs. The primary use case for this is to allows users to extract the changes created by ops like Exec and rebase them elsewhere through MergeOp. However, there is no restriction on the inputs to DiffOp and the resulting ref's layer is simply the layer created by running the differ on the two inputs refs (specifically, the same differ used during exports). A Diff ref can be mounted by itself, in which case it is defined as the result of applying the diff to Scratch. Most use cases though will use Diff refs as the input to a MergeOp, in which case the diff is just applied on top of the lower merge inputs, as was the case before. In cases like Diff(A, A->B->C) (i.e. cases where the diff is between two refs where the lower is an ancestor of upper), the diff will be defined as the layers separating the two refs. In other cases, the diff is just a single layer, not re-used from the inputs, representing the diff between the two refs (which can be defined as the layer "Diff(A,B)" that satisfies "Merge(A, Diff(A,B)) == B"). Note that there is technically a meaningful difference between the "unmerge" behavior of extracting the layers separating diffs and the "simple diff" of just running the differ on the two refs. Namely, in the case where there are "intermediate deletes" (i.e. deletes that only exist in layers between A and B but not between A and B by themselves), then the simple diff and unmerge can create different results when plugged into a MergeOp. This is due to the fact that intermediate deletes will apply to the merge when using the unmerge behavior, but not when using the simple diff. This is on top of the fact that the simple diff inherently has a "flattening" behavior where multiple layers are squashed into a single one. So, in the case where lower is an ancestor of upper, we choose to follow the unmerge behavior, but it's possible users may prefer the simple diff behavior. As of right now, they won't be able to do so, but if needed we can add the ability to choose which behavior is followed in the future. This could be done through a flag provided to DiffOp or possibly by adapting llb.Copy to support this type of behavior with the same efficiency as DiffOp. Signed-off-by: Erik Sipsma <erik@sipsma.dev>
2021-11-26 21:43:24 +00:00
var upperRef *immutableRef
switch sr.kind() {
case Diff:
upperRef = sr.diffParents.upper
default:
upperRef = sr
}
cache: add support for Diff refs. This allows you to create refs that are single layers representing the diff between any two arbitrary refs. The primary use case for this is to allows users to extract the changes created by ops like Exec and rebase them elsewhere through MergeOp. However, there is no restriction on the inputs to DiffOp and the resulting ref's layer is simply the layer created by running the differ on the two inputs refs (specifically, the same differ used during exports). A Diff ref can be mounted by itself, in which case it is defined as the result of applying the diff to Scratch. Most use cases though will use Diff refs as the input to a MergeOp, in which case the diff is just applied on top of the lower merge inputs, as was the case before. In cases like Diff(A, A->B->C) (i.e. cases where the diff is between two refs where the lower is an ancestor of upper), the diff will be defined as the layers separating the two refs. In other cases, the diff is just a single layer, not re-used from the inputs, representing the diff between the two refs (which can be defined as the layer "Diff(A,B)" that satisfies "Merge(A, Diff(A,B)) == B"). Note that there is technically a meaningful difference between the "unmerge" behavior of extracting the layers separating diffs and the "simple diff" of just running the differ on the two refs. Namely, in the case where there are "intermediate deletes" (i.e. deletes that only exist in layers between A and B but not between A and B by themselves), then the simple diff and unmerge can create different results when plugged into a MergeOp. This is due to the fact that intermediate deletes will apply to the merge when using the unmerge behavior, but not when using the simple diff. This is on top of the fact that the simple diff inherently has a "flattening" behavior where multiple layers are squashed into a single one. So, in the case where lower is an ancestor of upper, we choose to follow the unmerge behavior, but it's possible users may prefer the simple diff behavior. As of right now, they won't be able to do so, but if needed we can add the ability to choose which behavior is followed in the future. This could be done through a flag provided to DiffOp or possibly by adapting llb.Copy to support this type of behavior with the same efficiency as DiffOp. Signed-off-by: Erik Sipsma <erik@sipsma.dev>
2021-11-26 21:43:24 +00:00
var upper []mount.Mount
if upperRef != nil {
m, err := upperRef.Mount(ctx, true, s)
if err != nil {
return nil, err
}
var release func() error
upper, release, err = m.Mount()
if err != nil {
return nil, err
}
if release != nil {
defer release()
}
}
cache: add support for Diff refs. This allows you to create refs that are single layers representing the diff between any two arbitrary refs. The primary use case for this is to allows users to extract the changes created by ops like Exec and rebase them elsewhere through MergeOp. However, there is no restriction on the inputs to DiffOp and the resulting ref's layer is simply the layer created by running the differ on the two inputs refs (specifically, the same differ used during exports). A Diff ref can be mounted by itself, in which case it is defined as the result of applying the diff to Scratch. Most use cases though will use Diff refs as the input to a MergeOp, in which case the diff is just applied on top of the lower merge inputs, as was the case before. In cases like Diff(A, A->B->C) (i.e. cases where the diff is between two refs where the lower is an ancestor of upper), the diff will be defined as the layers separating the two refs. In other cases, the diff is just a single layer, not re-used from the inputs, representing the diff between the two refs (which can be defined as the layer "Diff(A,B)" that satisfies "Merge(A, Diff(A,B)) == B"). Note that there is technically a meaningful difference between the "unmerge" behavior of extracting the layers separating diffs and the "simple diff" of just running the differ on the two refs. Namely, in the case where there are "intermediate deletes" (i.e. deletes that only exist in layers between A and B but not between A and B by themselves), then the simple diff and unmerge can create different results when plugged into a MergeOp. This is due to the fact that intermediate deletes will apply to the merge when using the unmerge behavior, but not when using the simple diff. This is on top of the fact that the simple diff inherently has a "flattening" behavior where multiple layers are squashed into a single one. So, in the case where lower is an ancestor of upper, we choose to follow the unmerge behavior, but it's possible users may prefer the simple diff behavior. As of right now, they won't be able to do so, but if needed we can add the ability to choose which behavior is followed in the future. This could be done through a flag provided to DiffOp or possibly by adapting llb.Copy to support this type of behavior with the same efficiency as DiffOp. Signed-off-by: Erik Sipsma <erik@sipsma.dev>
2021-11-26 21:43:24 +00:00
var desc ocispecs.Descriptor
cache: add support for Diff refs. This allows you to create refs that are single layers representing the diff between any two arbitrary refs. The primary use case for this is to allows users to extract the changes created by ops like Exec and rebase them elsewhere through MergeOp. However, there is no restriction on the inputs to DiffOp and the resulting ref's layer is simply the layer created by running the differ on the two inputs refs (specifically, the same differ used during exports). A Diff ref can be mounted by itself, in which case it is defined as the result of applying the diff to Scratch. Most use cases though will use Diff refs as the input to a MergeOp, in which case the diff is just applied on top of the lower merge inputs, as was the case before. In cases like Diff(A, A->B->C) (i.e. cases where the diff is between two refs where the lower is an ancestor of upper), the diff will be defined as the layers separating the two refs. In other cases, the diff is just a single layer, not re-used from the inputs, representing the diff between the two refs (which can be defined as the layer "Diff(A,B)" that satisfies "Merge(A, Diff(A,B)) == B"). Note that there is technically a meaningful difference between the "unmerge" behavior of extracting the layers separating diffs and the "simple diff" of just running the differ on the two refs. Namely, in the case where there are "intermediate deletes" (i.e. deletes that only exist in layers between A and B but not between A and B by themselves), then the simple diff and unmerge can create different results when plugged into a MergeOp. This is due to the fact that intermediate deletes will apply to the merge when using the unmerge behavior, but not when using the simple diff. This is on top of the fact that the simple diff inherently has a "flattening" behavior where multiple layers are squashed into a single one. So, in the case where lower is an ancestor of upper, we choose to follow the unmerge behavior, but it's possible users may prefer the simple diff behavior. As of right now, they won't be able to do so, but if needed we can add the ability to choose which behavior is followed in the future. This could be done through a flag provided to DiffOp or possibly by adapting llb.Copy to support this type of behavior with the same efficiency as DiffOp. Signed-off-by: Erik Sipsma <erik@sipsma.dev>
2021-11-26 21:43:24 +00:00
var err error
// Determine differ and error/log handling according to the platform, envvar and the snapshotter.
var enableOverlay, fallback, logWarnOnErr bool
cache: add support for Diff refs. This allows you to create refs that are single layers representing the diff between any two arbitrary refs. The primary use case for this is to allows users to extract the changes created by ops like Exec and rebase them elsewhere through MergeOp. However, there is no restriction on the inputs to DiffOp and the resulting ref's layer is simply the layer created by running the differ on the two inputs refs (specifically, the same differ used during exports). A Diff ref can be mounted by itself, in which case it is defined as the result of applying the diff to Scratch. Most use cases though will use Diff refs as the input to a MergeOp, in which case the diff is just applied on top of the lower merge inputs, as was the case before. In cases like Diff(A, A->B->C) (i.e. cases where the diff is between two refs where the lower is an ancestor of upper), the diff will be defined as the layers separating the two refs. In other cases, the diff is just a single layer, not re-used from the inputs, representing the diff between the two refs (which can be defined as the layer "Diff(A,B)" that satisfies "Merge(A, Diff(A,B)) == B"). Note that there is technically a meaningful difference between the "unmerge" behavior of extracting the layers separating diffs and the "simple diff" of just running the differ on the two refs. Namely, in the case where there are "intermediate deletes" (i.e. deletes that only exist in layers between A and B but not between A and B by themselves), then the simple diff and unmerge can create different results when plugged into a MergeOp. This is due to the fact that intermediate deletes will apply to the merge when using the unmerge behavior, but not when using the simple diff. This is on top of the fact that the simple diff inherently has a "flattening" behavior where multiple layers are squashed into a single one. So, in the case where lower is an ancestor of upper, we choose to follow the unmerge behavior, but it's possible users may prefer the simple diff behavior. As of right now, they won't be able to do so, but if needed we can add the ability to choose which behavior is followed in the future. This could be done through a flag provided to DiffOp or possibly by adapting llb.Copy to support this type of behavior with the same efficiency as DiffOp. Signed-off-by: Erik Sipsma <erik@sipsma.dev>
2021-11-26 21:43:24 +00:00
if forceOvlStr := os.Getenv("BUILDKIT_DEBUG_FORCE_OVERLAY_DIFF"); forceOvlStr != "" && sr.kind() != Diff {
enableOverlay, err = strconv.ParseBool(forceOvlStr)
if err != nil {
return nil, errors.Wrapf(err, "invalid boolean in BUILDKIT_DEBUG_FORCE_OVERLAY_DIFF")
}
fallback = false // prohibit fallback on debug
} else if !isTypeWindows(sr) {
enableOverlay, fallback = true, true
switch sr.cm.Snapshotter.Name() {
case "overlayfs", "stargz":
cache: add support for Diff refs. This allows you to create refs that are single layers representing the diff between any two arbitrary refs. The primary use case for this is to allows users to extract the changes created by ops like Exec and rebase them elsewhere through MergeOp. However, there is no restriction on the inputs to DiffOp and the resulting ref's layer is simply the layer created by running the differ on the two inputs refs (specifically, the same differ used during exports). A Diff ref can be mounted by itself, in which case it is defined as the result of applying the diff to Scratch. Most use cases though will use Diff refs as the input to a MergeOp, in which case the diff is just applied on top of the lower merge inputs, as was the case before. In cases like Diff(A, A->B->C) (i.e. cases where the diff is between two refs where the lower is an ancestor of upper), the diff will be defined as the layers separating the two refs. In other cases, the diff is just a single layer, not re-used from the inputs, representing the diff between the two refs (which can be defined as the layer "Diff(A,B)" that satisfies "Merge(A, Diff(A,B)) == B"). Note that there is technically a meaningful difference between the "unmerge" behavior of extracting the layers separating diffs and the "simple diff" of just running the differ on the two refs. Namely, in the case where there are "intermediate deletes" (i.e. deletes that only exist in layers between A and B but not between A and B by themselves), then the simple diff and unmerge can create different results when plugged into a MergeOp. This is due to the fact that intermediate deletes will apply to the merge when using the unmerge behavior, but not when using the simple diff. This is on top of the fact that the simple diff inherently has a "flattening" behavior where multiple layers are squashed into a single one. So, in the case where lower is an ancestor of upper, we choose to follow the unmerge behavior, but it's possible users may prefer the simple diff behavior. As of right now, they won't be able to do so, but if needed we can add the ability to choose which behavior is followed in the future. This could be done through a flag provided to DiffOp or possibly by adapting llb.Copy to support this type of behavior with the same efficiency as DiffOp. Signed-off-by: Erik Sipsma <erik@sipsma.dev>
2021-11-26 21:43:24 +00:00
// overlayfs-based snapshotters should support overlay diff except when running an arbitrary diff
// (in which case lower and upper may differ by more than one layer), so print warn log on unexpected
// failure.
logWarnOnErr = sr.kind() != Diff
case "fuse-overlayfs", "native":
// not supported with fuse-overlayfs snapshotter which doesn't provide overlayfs mounts.
// TODO: add support for fuse-overlayfs
enableOverlay = false
}
}
if enableOverlay {
computed, ok, err := sr.tryComputeOverlayBlob(ctx, lower, upper, mediaType, sr.ID(), compressorFunc)
if !ok || err != nil {
if !fallback {
if !ok {
return nil, errors.Errorf("overlay mounts not detected (lower=%+v,upper=%+v)", lower, upper)
}
if err != nil {
return nil, errors.Wrapf(err, "failed to compute overlay diff")
}
}
if logWarnOnErr {
logrus.Warnf("failed to compute blob by overlay differ (ok=%v): %v", ok, err)
}
}
if ok {
desc = computed
}
}
if desc.Digest == "" {
desc, err = sr.cm.Differ.Compare(ctx, lower, upper,
diff.WithMediaType(mediaType),
diff.WithReference(sr.ID()),
diff.WithCompressor(compressorFunc),
)
if err != nil {
return nil, err
}
}
if desc.Annotations == nil {
desc.Annotations = map[string]string{}
}
if finalize != nil {
a, err := finalize(ctx, sr.cm.ContentStore)
if err != nil {
return nil, errors.Wrapf(err, "failed to finalize compression")
}
for k, v := range a {
desc.Annotations[k] = v
}
}
info, err := sr.cm.ContentStore.Info(ctx, desc.Digest)
if err != nil {
return nil, err
}
if diffID, ok := info.Labels[containerdUncompressed]; ok {
desc.Annotations[containerdUncompressed] = diffID
} else if mediaType == ocispecs.MediaTypeImageLayer {
desc.Annotations[containerdUncompressed] = desc.Digest.String()
} else {
return nil, errors.Errorf("unknown layer compression type")
}
if err := sr.setBlob(ctx, compressionType, desc); err != nil {
return nil, err
}
return nil, nil
})
if err != nil {
return err
}
if forceCompression {
if err := ensureCompression(ctx, sr, compressionType, s); err != nil {
return errors.Wrapf(err, "failed to ensure compression type of %q", compressionType)
}
}
return nil
})
}
if err := eg.Wait(); err != nil {
return err
}
cache: add support for Diff refs. This allows you to create refs that are single layers representing the diff between any two arbitrary refs. The primary use case for this is to allows users to extract the changes created by ops like Exec and rebase them elsewhere through MergeOp. However, there is no restriction on the inputs to DiffOp and the resulting ref's layer is simply the layer created by running the differ on the two inputs refs (specifically, the same differ used during exports). A Diff ref can be mounted by itself, in which case it is defined as the result of applying the diff to Scratch. Most use cases though will use Diff refs as the input to a MergeOp, in which case the diff is just applied on top of the lower merge inputs, as was the case before. In cases like Diff(A, A->B->C) (i.e. cases where the diff is between two refs where the lower is an ancestor of upper), the diff will be defined as the layers separating the two refs. In other cases, the diff is just a single layer, not re-used from the inputs, representing the diff between the two refs (which can be defined as the layer "Diff(A,B)" that satisfies "Merge(A, Diff(A,B)) == B"). Note that there is technically a meaningful difference between the "unmerge" behavior of extracting the layers separating diffs and the "simple diff" of just running the differ on the two refs. Namely, in the case where there are "intermediate deletes" (i.e. deletes that only exist in layers between A and B but not between A and B by themselves), then the simple diff and unmerge can create different results when plugged into a MergeOp. This is due to the fact that intermediate deletes will apply to the merge when using the unmerge behavior, but not when using the simple diff. This is on top of the fact that the simple diff inherently has a "flattening" behavior where multiple layers are squashed into a single one. So, in the case where lower is an ancestor of upper, we choose to follow the unmerge behavior, but it's possible users may prefer the simple diff behavior. As of right now, they won't be able to do so, but if needed we can add the ability to choose which behavior is followed in the future. This could be done through a flag provided to DiffOp or possibly by adapting llb.Copy to support this type of behavior with the same efficiency as DiffOp. Signed-off-by: Erik Sipsma <erik@sipsma.dev>
2021-11-26 21:43:24 +00:00
return sr.computeChainMetadata(ctx, filter)
}
// setBlob associates a blob with the cache record.
// A lease must be held for the blob when calling this function
func (sr *immutableRef) setBlob(ctx context.Context, compressionType compression.Type, desc ocispecs.Descriptor) error {
if _, ok := leases.FromContext(ctx); !ok {
return errors.Errorf("missing lease requirement for setBlob")
}
diffID, err := diffIDFromDescriptor(desc)
if err != nil {
return err
}
if _, err := sr.cm.ContentStore.Info(ctx, desc.Digest); err != nil {
return err
}
if compressionType == compression.UnknownCompression {
return errors.Errorf("unhandled layer media type: %q", desc.MediaType)
}
sr.mu.Lock()
defer sr.mu.Unlock()
if sr.getBlob() != "" {
return nil
}
if err := sr.finalize(ctx); err != nil {
return err
}
if err := sr.cm.LeaseManager.AddResource(ctx, leases.Lease{ID: sr.ID()}, leases.Resource{
ID: desc.Digest.String(),
Type: "content",
}); err != nil {
return err
}
sr.queueDiffID(diffID)
sr.queueBlob(desc.Digest)
sr.queueMediaType(desc.MediaType)
sr.queueBlobSize(desc.Size)
if err := sr.commitMetadata(); err != nil {
return err
}
if err := sr.addCompressionBlob(ctx, desc, compressionType); err != nil {
return err
}
return nil
}
cache: add support for Diff refs. This allows you to create refs that are single layers representing the diff between any two arbitrary refs. The primary use case for this is to allows users to extract the changes created by ops like Exec and rebase them elsewhere through MergeOp. However, there is no restriction on the inputs to DiffOp and the resulting ref's layer is simply the layer created by running the differ on the two inputs refs (specifically, the same differ used during exports). A Diff ref can be mounted by itself, in which case it is defined as the result of applying the diff to Scratch. Most use cases though will use Diff refs as the input to a MergeOp, in which case the diff is just applied on top of the lower merge inputs, as was the case before. In cases like Diff(A, A->B->C) (i.e. cases where the diff is between two refs where the lower is an ancestor of upper), the diff will be defined as the layers separating the two refs. In other cases, the diff is just a single layer, not re-used from the inputs, representing the diff between the two refs (which can be defined as the layer "Diff(A,B)" that satisfies "Merge(A, Diff(A,B)) == B"). Note that there is technically a meaningful difference between the "unmerge" behavior of extracting the layers separating diffs and the "simple diff" of just running the differ on the two refs. Namely, in the case where there are "intermediate deletes" (i.e. deletes that only exist in layers between A and B but not between A and B by themselves), then the simple diff and unmerge can create different results when plugged into a MergeOp. This is due to the fact that intermediate deletes will apply to the merge when using the unmerge behavior, but not when using the simple diff. This is on top of the fact that the simple diff inherently has a "flattening" behavior where multiple layers are squashed into a single one. So, in the case where lower is an ancestor of upper, we choose to follow the unmerge behavior, but it's possible users may prefer the simple diff behavior. As of right now, they won't be able to do so, but if needed we can add the ability to choose which behavior is followed in the future. This could be done through a flag provided to DiffOp or possibly by adapting llb.Copy to support this type of behavior with the same efficiency as DiffOp. Signed-off-by: Erik Sipsma <erik@sipsma.dev>
2021-11-26 21:43:24 +00:00
func (sr *immutableRef) computeChainMetadata(ctx context.Context, filter map[string]struct{}) error {
if _, ok := leases.FromContext(ctx); !ok {
return errors.Errorf("missing lease requirement for computeChainMetadata")
}
sr.mu.Lock()
defer sr.mu.Unlock()
if sr.getChainID() != "" {
return nil
}
var chainID digest.Digest
var blobChainID digest.Digest
switch sr.kind() {
case BaseLayer:
cache: add support for Diff refs. This allows you to create refs that are single layers representing the diff between any two arbitrary refs. The primary use case for this is to allows users to extract the changes created by ops like Exec and rebase them elsewhere through MergeOp. However, there is no restriction on the inputs to DiffOp and the resulting ref's layer is simply the layer created by running the differ on the two inputs refs (specifically, the same differ used during exports). A Diff ref can be mounted by itself, in which case it is defined as the result of applying the diff to Scratch. Most use cases though will use Diff refs as the input to a MergeOp, in which case the diff is just applied on top of the lower merge inputs, as was the case before. In cases like Diff(A, A->B->C) (i.e. cases where the diff is between two refs where the lower is an ancestor of upper), the diff will be defined as the layers separating the two refs. In other cases, the diff is just a single layer, not re-used from the inputs, representing the diff between the two refs (which can be defined as the layer "Diff(A,B)" that satisfies "Merge(A, Diff(A,B)) == B"). Note that there is technically a meaningful difference between the "unmerge" behavior of extracting the layers separating diffs and the "simple diff" of just running the differ on the two refs. Namely, in the case where there are "intermediate deletes" (i.e. deletes that only exist in layers between A and B but not between A and B by themselves), then the simple diff and unmerge can create different results when plugged into a MergeOp. This is due to the fact that intermediate deletes will apply to the merge when using the unmerge behavior, but not when using the simple diff. This is on top of the fact that the simple diff inherently has a "flattening" behavior where multiple layers are squashed into a single one. So, in the case where lower is an ancestor of upper, we choose to follow the unmerge behavior, but it's possible users may prefer the simple diff behavior. As of right now, they won't be able to do so, but if needed we can add the ability to choose which behavior is followed in the future. This could be done through a flag provided to DiffOp or possibly by adapting llb.Copy to support this type of behavior with the same efficiency as DiffOp. Signed-off-by: Erik Sipsma <erik@sipsma.dev>
2021-11-26 21:43:24 +00:00
if _, ok := filter[sr.ID()]; !ok {
return nil
}
diffID := sr.getDiffID()
chainID = diffID
blobChainID = imagespecidentity.ChainID([]digest.Digest{digest.Digest(sr.getBlob()), diffID})
case Layer:
cache: add support for Diff refs. This allows you to create refs that are single layers representing the diff between any two arbitrary refs. The primary use case for this is to allows users to extract the changes created by ops like Exec and rebase them elsewhere through MergeOp. However, there is no restriction on the inputs to DiffOp and the resulting ref's layer is simply the layer created by running the differ on the two inputs refs (specifically, the same differ used during exports). A Diff ref can be mounted by itself, in which case it is defined as the result of applying the diff to Scratch. Most use cases though will use Diff refs as the input to a MergeOp, in which case the diff is just applied on top of the lower merge inputs, as was the case before. In cases like Diff(A, A->B->C) (i.e. cases where the diff is between two refs where the lower is an ancestor of upper), the diff will be defined as the layers separating the two refs. In other cases, the diff is just a single layer, not re-used from the inputs, representing the diff between the two refs (which can be defined as the layer "Diff(A,B)" that satisfies "Merge(A, Diff(A,B)) == B"). Note that there is technically a meaningful difference between the "unmerge" behavior of extracting the layers separating diffs and the "simple diff" of just running the differ on the two refs. Namely, in the case where there are "intermediate deletes" (i.e. deletes that only exist in layers between A and B but not between A and B by themselves), then the simple diff and unmerge can create different results when plugged into a MergeOp. This is due to the fact that intermediate deletes will apply to the merge when using the unmerge behavior, but not when using the simple diff. This is on top of the fact that the simple diff inherently has a "flattening" behavior where multiple layers are squashed into a single one. So, in the case where lower is an ancestor of upper, we choose to follow the unmerge behavior, but it's possible users may prefer the simple diff behavior. As of right now, they won't be able to do so, but if needed we can add the ability to choose which behavior is followed in the future. This could be done through a flag provided to DiffOp or possibly by adapting llb.Copy to support this type of behavior with the same efficiency as DiffOp. Signed-off-by: Erik Sipsma <erik@sipsma.dev>
2021-11-26 21:43:24 +00:00
if _, ok := filter[sr.ID()]; !ok {
return nil
}
cache: add support for Diff refs. This allows you to create refs that are single layers representing the diff between any two arbitrary refs. The primary use case for this is to allows users to extract the changes created by ops like Exec and rebase them elsewhere through MergeOp. However, there is no restriction on the inputs to DiffOp and the resulting ref's layer is simply the layer created by running the differ on the two inputs refs (specifically, the same differ used during exports). A Diff ref can be mounted by itself, in which case it is defined as the result of applying the diff to Scratch. Most use cases though will use Diff refs as the input to a MergeOp, in which case the diff is just applied on top of the lower merge inputs, as was the case before. In cases like Diff(A, A->B->C) (i.e. cases where the diff is between two refs where the lower is an ancestor of upper), the diff will be defined as the layers separating the two refs. In other cases, the diff is just a single layer, not re-used from the inputs, representing the diff between the two refs (which can be defined as the layer "Diff(A,B)" that satisfies "Merge(A, Diff(A,B)) == B"). Note that there is technically a meaningful difference between the "unmerge" behavior of extracting the layers separating diffs and the "simple diff" of just running the differ on the two refs. Namely, in the case where there are "intermediate deletes" (i.e. deletes that only exist in layers between A and B but not between A and B by themselves), then the simple diff and unmerge can create different results when plugged into a MergeOp. This is due to the fact that intermediate deletes will apply to the merge when using the unmerge behavior, but not when using the simple diff. This is on top of the fact that the simple diff inherently has a "flattening" behavior where multiple layers are squashed into a single one. So, in the case where lower is an ancestor of upper, we choose to follow the unmerge behavior, but it's possible users may prefer the simple diff behavior. As of right now, they won't be able to do so, but if needed we can add the ability to choose which behavior is followed in the future. This could be done through a flag provided to DiffOp or possibly by adapting llb.Copy to support this type of behavior with the same efficiency as DiffOp. Signed-off-by: Erik Sipsma <erik@sipsma.dev>
2021-11-26 21:43:24 +00:00
if _, ok := filter[sr.layerParent.ID()]; ok {
if parentChainID := sr.layerParent.getChainID(); parentChainID != "" {
chainID = parentChainID
} else {
return errors.Errorf("failed to set chain for reference with non-addressable parent %q", sr.layerParent.GetDescription())
}
if parentBlobChainID := sr.layerParent.getBlobChainID(); parentBlobChainID != "" {
blobChainID = parentBlobChainID
} else {
return errors.Errorf("failed to set blobchain for reference with non-addressable parent %q", sr.layerParent.GetDescription())
}
}
diffID := digest.Digest(sr.getDiffID())
chainID = imagespecidentity.ChainID([]digest.Digest{chainID, diffID})
blobID := imagespecidentity.ChainID([]digest.Digest{digest.Digest(sr.getBlob()), diffID})
blobChainID = imagespecidentity.ChainID([]digest.Digest{blobChainID, blobID})
case Merge:
baseInput := sr.mergeParents[0]
cache: add support for Diff refs. This allows you to create refs that are single layers representing the diff between any two arbitrary refs. The primary use case for this is to allows users to extract the changes created by ops like Exec and rebase them elsewhere through MergeOp. However, there is no restriction on the inputs to DiffOp and the resulting ref's layer is simply the layer created by running the differ on the two inputs refs (specifically, the same differ used during exports). A Diff ref can be mounted by itself, in which case it is defined as the result of applying the diff to Scratch. Most use cases though will use Diff refs as the input to a MergeOp, in which case the diff is just applied on top of the lower merge inputs, as was the case before. In cases like Diff(A, A->B->C) (i.e. cases where the diff is between two refs where the lower is an ancestor of upper), the diff will be defined as the layers separating the two refs. In other cases, the diff is just a single layer, not re-used from the inputs, representing the diff between the two refs (which can be defined as the layer "Diff(A,B)" that satisfies "Merge(A, Diff(A,B)) == B"). Note that there is technically a meaningful difference between the "unmerge" behavior of extracting the layers separating diffs and the "simple diff" of just running the differ on the two refs. Namely, in the case where there are "intermediate deletes" (i.e. deletes that only exist in layers between A and B but not between A and B by themselves), then the simple diff and unmerge can create different results when plugged into a MergeOp. This is due to the fact that intermediate deletes will apply to the merge when using the unmerge behavior, but not when using the simple diff. This is on top of the fact that the simple diff inherently has a "flattening" behavior where multiple layers are squashed into a single one. So, in the case where lower is an ancestor of upper, we choose to follow the unmerge behavior, but it's possible users may prefer the simple diff behavior. As of right now, they won't be able to do so, but if needed we can add the ability to choose which behavior is followed in the future. This could be done through a flag provided to DiffOp or possibly by adapting llb.Copy to support this type of behavior with the same efficiency as DiffOp. Signed-off-by: Erik Sipsma <erik@sipsma.dev>
2021-11-26 21:43:24 +00:00
if _, ok := filter[baseInput.ID()]; !ok {
// not enough information to compute chain at this time
return nil
}
chainID = baseInput.getChainID()
blobChainID = baseInput.getBlobChainID()
for _, mergeParent := range sr.mergeParents[1:] {
for _, layer := range mergeParent.layerChain() {
cache: add support for Diff refs. This allows you to create refs that are single layers representing the diff between any two arbitrary refs. The primary use case for this is to allows users to extract the changes created by ops like Exec and rebase them elsewhere through MergeOp. However, there is no restriction on the inputs to DiffOp and the resulting ref's layer is simply the layer created by running the differ on the two inputs refs (specifically, the same differ used during exports). A Diff ref can be mounted by itself, in which case it is defined as the result of applying the diff to Scratch. Most use cases though will use Diff refs as the input to a MergeOp, in which case the diff is just applied on top of the lower merge inputs, as was the case before. In cases like Diff(A, A->B->C) (i.e. cases where the diff is between two refs where the lower is an ancestor of upper), the diff will be defined as the layers separating the two refs. In other cases, the diff is just a single layer, not re-used from the inputs, representing the diff between the two refs (which can be defined as the layer "Diff(A,B)" that satisfies "Merge(A, Diff(A,B)) == B"). Note that there is technically a meaningful difference between the "unmerge" behavior of extracting the layers separating diffs and the "simple diff" of just running the differ on the two refs. Namely, in the case where there are "intermediate deletes" (i.e. deletes that only exist in layers between A and B but not between A and B by themselves), then the simple diff and unmerge can create different results when plugged into a MergeOp. This is due to the fact that intermediate deletes will apply to the merge when using the unmerge behavior, but not when using the simple diff. This is on top of the fact that the simple diff inherently has a "flattening" behavior where multiple layers are squashed into a single one. So, in the case where lower is an ancestor of upper, we choose to follow the unmerge behavior, but it's possible users may prefer the simple diff behavior. As of right now, they won't be able to do so, but if needed we can add the ability to choose which behavior is followed in the future. This could be done through a flag provided to DiffOp or possibly by adapting llb.Copy to support this type of behavior with the same efficiency as DiffOp. Signed-off-by: Erik Sipsma <erik@sipsma.dev>
2021-11-26 21:43:24 +00:00
if _, ok := filter[layer.ID()]; !ok {
// not enough information to compute chain at this time
return nil
}
diffID := digest.Digest(layer.getDiffID())
chainID = imagespecidentity.ChainID([]digest.Digest{chainID, diffID})
blobID := imagespecidentity.ChainID([]digest.Digest{digest.Digest(layer.getBlob()), diffID})
blobChainID = imagespecidentity.ChainID([]digest.Digest{blobChainID, blobID})
}
}
cache: add support for Diff refs. This allows you to create refs that are single layers representing the diff between any two arbitrary refs. The primary use case for this is to allows users to extract the changes created by ops like Exec and rebase them elsewhere through MergeOp. However, there is no restriction on the inputs to DiffOp and the resulting ref's layer is simply the layer created by running the differ on the two inputs refs (specifically, the same differ used during exports). A Diff ref can be mounted by itself, in which case it is defined as the result of applying the diff to Scratch. Most use cases though will use Diff refs as the input to a MergeOp, in which case the diff is just applied on top of the lower merge inputs, as was the case before. In cases like Diff(A, A->B->C) (i.e. cases where the diff is between two refs where the lower is an ancestor of upper), the diff will be defined as the layers separating the two refs. In other cases, the diff is just a single layer, not re-used from the inputs, representing the diff between the two refs (which can be defined as the layer "Diff(A,B)" that satisfies "Merge(A, Diff(A,B)) == B"). Note that there is technically a meaningful difference between the "unmerge" behavior of extracting the layers separating diffs and the "simple diff" of just running the differ on the two refs. Namely, in the case where there are "intermediate deletes" (i.e. deletes that only exist in layers between A and B but not between A and B by themselves), then the simple diff and unmerge can create different results when plugged into a MergeOp. This is due to the fact that intermediate deletes will apply to the merge when using the unmerge behavior, but not when using the simple diff. This is on top of the fact that the simple diff inherently has a "flattening" behavior where multiple layers are squashed into a single one. So, in the case where lower is an ancestor of upper, we choose to follow the unmerge behavior, but it's possible users may prefer the simple diff behavior. As of right now, they won't be able to do so, but if needed we can add the ability to choose which behavior is followed in the future. This could be done through a flag provided to DiffOp or possibly by adapting llb.Copy to support this type of behavior with the same efficiency as DiffOp. Signed-off-by: Erik Sipsma <erik@sipsma.dev>
2021-11-26 21:43:24 +00:00
case Diff:
if _, ok := filter[sr.ID()]; ok {
// this diff is its own blob
diffID := sr.getDiffID()
chainID = diffID
blobChainID = imagespecidentity.ChainID([]digest.Digest{digest.Digest(sr.getBlob()), diffID})
} else {
// re-using upper blob
chainID = sr.diffParents.upper.getChainID()
blobChainID = sr.diffParents.upper.getBlobChainID()
}
}
sr.queueChainID(chainID)
sr.queueBlobChainID(blobChainID)
if err := sr.commitMetadata(); err != nil {
return err
}
return nil
}
func isTypeWindows(sr *immutableRef) bool {
if sr.GetLayerType() == "windows" {
return true
}
switch sr.kind() {
case Merge:
for _, p := range sr.mergeParents {
if isTypeWindows(p) {
return true
}
}
case Layer:
return isTypeWindows(sr.layerParent)
}
return false
}
// ensureCompression ensures the specified ref has the blob of the specified compression Type.
func ensureCompression(ctx context.Context, ref *immutableRef, compressionType compression.Type, s session.Group) error {
_, err := g.Do(ctx, fmt.Sprintf("%s-%d", ref.ID(), compressionType), func(ctx context.Context) (interface{}, error) {
desc, err := ref.ociDesc(ctx, ref.descHandlers)
if err != nil {
return nil, err
}
// Resolve converters
layerConvertFunc, err := getConverter(ctx, ref.cm.ContentStore, desc, compressionType)
if err != nil {
return nil, err
} else if layerConvertFunc == nil {
if isLazy, err := ref.isLazy(ctx); err != nil {
return nil, err
} else if isLazy {
// This ref can be used as the specified compressionType. Keep it lazy.
return nil, nil
}
return nil, ref.addCompressionBlob(ctx, desc, compressionType)
}
// First, lookup local content store
if _, err := ref.getCompressionBlob(ctx, compressionType); err == nil {
return nil, nil // found the compression variant. no need to convert.
}
// Convert layer compression type
if err := (lazyRefProvider{
ref: ref,
desc: desc,
dh: ref.descHandlers[desc.Digest],
session: s,
}).Unlazy(ctx); err != nil {
return nil, err
}
newDesc, err := layerConvertFunc(ctx, ref.cm.ContentStore, desc)
if err != nil {
return nil, errors.Wrapf(err, "failed to convert")
}
// Start to track converted layer
if err := ref.addCompressionBlob(ctx, *newDesc, compressionType); err != nil {
return nil, errors.Wrapf(err, "failed to add compression blob")
}
return nil, nil
})
return err
}
func zstdWriter(dest io.Writer, requiredMediaType string) (io.WriteCloser, error) {
return zstd.NewWriter(dest)
}