cache: handle crash after snapshot commit
Before this, there could be crash during a call to finalize a ref that occured after the snapshot was committed but before committing the metadata that indicates the immutable ref no longer had an equalMutable. This resulted in a situation where any future calls to finalize that ref would fail. Now, if that situation happens, the cache will notice when it's initially loaded that the ref has an equalMutable that's missing its snapshot and that its own snapshot exists. It will then just use the correctly committed snapshot and clear the equalMutable field. Signed-off-by: Erik Sipsma <erik@sipsma.dev>master
parent
17c237d69a
commit
eb935e525f
|
@ -410,25 +410,32 @@ func (cm *cacheManager) getRecord(ctx context.Context, id string, opts ...RefOpt
|
|||
|
||||
if mutableID := md.getEqualMutable(); mutableID != "" {
|
||||
mutable, err := cm.getRecord(ctx, mutableID)
|
||||
if err != nil {
|
||||
// check loading mutable deleted record from disk
|
||||
if IsNotFound(err) {
|
||||
cm.MetadataStore.Clear(id)
|
||||
if err == nil {
|
||||
rec := &cacheRecord{
|
||||
mu: &sync.Mutex{},
|
||||
cm: cm,
|
||||
refs: make(map[ref]struct{}),
|
||||
parentRefs: parents,
|
||||
cacheMetadata: md,
|
||||
equalMutable: &mutableRef{cacheRecord: mutable},
|
||||
}
|
||||
mutable.equalImmutable = &immutableRef{cacheRecord: rec}
|
||||
cm.records[id] = rec
|
||||
return rec, nil
|
||||
} else if IsNotFound(err) {
|
||||
// The equal mutable for this ref is not found, check to see if our snapshot exists
|
||||
if _, statErr := cm.Snapshotter.Stat(ctx, md.getSnapshotID()); statErr != nil {
|
||||
// this ref's snapshot also doesn't exist, just remove this record
|
||||
cm.MetadataStore.Clear(id)
|
||||
return nil, errors.Wrap(errNotFound, id)
|
||||
}
|
||||
// Our snapshot exists, so there may have been a crash while finalizing this ref.
|
||||
// Clear the equal mutable field and continue using this ref.
|
||||
md.clearEqualMutable()
|
||||
md.commitMetadata()
|
||||
} else {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
rec := &cacheRecord{
|
||||
mu: &sync.Mutex{},
|
||||
cm: cm,
|
||||
refs: make(map[ref]struct{}),
|
||||
parentRefs: parents,
|
||||
cacheMetadata: md,
|
||||
equalMutable: &mutableRef{cacheRecord: mutable},
|
||||
}
|
||||
mutable.equalImmutable = &immutableRef{cacheRecord: rec}
|
||||
cm.records[id] = rec
|
||||
return rec, nil
|
||||
}
|
||||
|
||||
rec := &cacheRecord{
|
||||
|
@ -448,6 +455,20 @@ func (cm *cacheManager) getRecord(ctx context.Context, id string, opts ...RefOpt
|
|||
return nil, errors.Wrapf(errNotFound, "failed to get deleted record %s", id)
|
||||
}
|
||||
|
||||
if rec.mutable {
|
||||
// If the record is mutable, then the snapshot must exist
|
||||
if _, err := cm.Snapshotter.Stat(ctx, rec.ID()); err != nil {
|
||||
if !errdefs.IsNotFound(err) {
|
||||
return nil, errors.Wrap(err, "failed to check mutable ref snapshot")
|
||||
}
|
||||
// the snapshot doesn't exist, clear this record
|
||||
if err := rec.remove(ctx, true); err != nil {
|
||||
return nil, errors.Wrap(err, "failed to remove mutable rec with missing snapshot")
|
||||
}
|
||||
return nil, errors.Wrap(errNotFound, rec.ID())
|
||||
}
|
||||
}
|
||||
|
||||
if err := initializeMetadata(rec.cacheMetadata, rec.parentRefs, opts...); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
|
|
@ -1684,6 +1684,87 @@ func TestMergeOp(t *testing.T) {
|
|||
checkDiskUsage(ctx, t, cm, 0, 0)
|
||||
}
|
||||
|
||||
func TestLoadHalfFinalizedRef(t *testing.T) {
|
||||
// This test simulates the situation where a ref w/ an equalMutable has its
|
||||
// snapshot committed but there is a crash before the metadata is updated to
|
||||
// clear the equalMutable field. It's expected that the mutable will be
|
||||
// removed and the immutable ref will continue to be usable.
|
||||
t.Parallel()
|
||||
|
||||
ctx := namespaces.WithNamespace(context.Background(), "buildkit-test")
|
||||
|
||||
tmpdir, err := ioutil.TempDir("", "cachemanager")
|
||||
require.NoError(t, err)
|
||||
defer os.RemoveAll(tmpdir)
|
||||
|
||||
snapshotter, err := native.NewSnapshotter(filepath.Join(tmpdir, "snapshots"))
|
||||
require.NoError(t, err)
|
||||
|
||||
co, cleanup, err := newCacheManager(ctx, cmOpt{
|
||||
tmpdir: tmpdir,
|
||||
snapshotter: snapshotter,
|
||||
snapshotterName: "native",
|
||||
})
|
||||
require.NoError(t, err)
|
||||
defer cleanup()
|
||||
cm := co.manager.(*cacheManager)
|
||||
|
||||
mref, err := cm.New(ctx, nil, nil, CachePolicyRetain)
|
||||
require.NoError(t, err)
|
||||
mutRef := mref.(*mutableRef)
|
||||
|
||||
iref, err := mutRef.Commit(ctx)
|
||||
require.NoError(t, err)
|
||||
immutRef := iref.(*immutableRef)
|
||||
|
||||
require.NoError(t, mref.Release(ctx))
|
||||
|
||||
_, err = co.lm.Create(ctx, func(l *leases.Lease) error {
|
||||
l.ID = immutRef.ID()
|
||||
l.Labels = map[string]string{
|
||||
"containerd.io/gc.flat": time.Now().UTC().Format(time.RFC3339Nano),
|
||||
}
|
||||
return nil
|
||||
})
|
||||
require.NoError(t, err)
|
||||
err = co.lm.AddResource(ctx, leases.Lease{ID: immutRef.ID()}, leases.Resource{
|
||||
ID: immutRef.getSnapshotID(),
|
||||
Type: "snapshots/" + cm.Snapshotter.Name(),
|
||||
})
|
||||
require.NoError(t, err)
|
||||
|
||||
err = cm.Snapshotter.Commit(ctx, immutRef.getSnapshotID(), mutRef.getSnapshotID())
|
||||
require.NoError(t, err)
|
||||
|
||||
_, err = cm.Snapshotter.Stat(ctx, mutRef.getSnapshotID())
|
||||
require.Error(t, err)
|
||||
|
||||
require.NoError(t, iref.Release(ctx))
|
||||
|
||||
require.NoError(t, cm.Close())
|
||||
require.NoError(t, cleanup())
|
||||
|
||||
co, cleanup, err = newCacheManager(ctx, cmOpt{
|
||||
tmpdir: tmpdir,
|
||||
snapshotter: snapshotter,
|
||||
snapshotterName: "native",
|
||||
})
|
||||
require.NoError(t, err)
|
||||
defer cleanup()
|
||||
cm = co.manager.(*cacheManager)
|
||||
|
||||
_, err = cm.GetMutable(ctx, mutRef.ID())
|
||||
require.ErrorIs(t, err, errNotFound)
|
||||
|
||||
iref, err = cm.Get(ctx, immutRef.ID())
|
||||
require.NoError(t, err)
|
||||
require.NoError(t, iref.Finalize(ctx))
|
||||
immutRef = iref.(*immutableRef)
|
||||
|
||||
_, err = cm.Snapshotter.Stat(ctx, immutRef.getSnapshotID())
|
||||
require.NoError(t, err)
|
||||
}
|
||||
|
||||
func TestMountReadOnly(t *testing.T) {
|
||||
t.Parallel()
|
||||
if runtime.GOOS != "linux" {
|
||||
|
|
Loading…
Reference in New Issue