diff --git a/cache/manager.go b/cache/manager.go index fe22fc07..36dac6fe 100644 --- a/cache/manager.go +++ b/cache/manager.go @@ -410,25 +410,32 @@ func (cm *cacheManager) getRecord(ctx context.Context, id string, opts ...RefOpt if mutableID := md.getEqualMutable(); mutableID != "" { mutable, err := cm.getRecord(ctx, mutableID) - if err != nil { - // check loading mutable deleted record from disk - if IsNotFound(err) { - cm.MetadataStore.Clear(id) + if err == nil { + rec := &cacheRecord{ + mu: &sync.Mutex{}, + cm: cm, + refs: make(map[ref]struct{}), + parentRefs: parents, + cacheMetadata: md, + equalMutable: &mutableRef{cacheRecord: mutable}, } + mutable.equalImmutable = &immutableRef{cacheRecord: rec} + cm.records[id] = rec + return rec, nil + } else if IsNotFound(err) { + // The equal mutable for this ref is not found, check to see if our snapshot exists + if _, statErr := cm.Snapshotter.Stat(ctx, md.getSnapshotID()); statErr != nil { + // this ref's snapshot also doesn't exist, just remove this record + cm.MetadataStore.Clear(id) + return nil, errors.Wrap(errNotFound, id) + } + // Our snapshot exists, so there may have been a crash while finalizing this ref. + // Clear the equal mutable field and continue using this ref. + md.clearEqualMutable() + md.commitMetadata() + } else { return nil, err } - - rec := &cacheRecord{ - mu: &sync.Mutex{}, - cm: cm, - refs: make(map[ref]struct{}), - parentRefs: parents, - cacheMetadata: md, - equalMutable: &mutableRef{cacheRecord: mutable}, - } - mutable.equalImmutable = &immutableRef{cacheRecord: rec} - cm.records[id] = rec - return rec, nil } rec := &cacheRecord{ @@ -448,6 +455,20 @@ func (cm *cacheManager) getRecord(ctx context.Context, id string, opts ...RefOpt return nil, errors.Wrapf(errNotFound, "failed to get deleted record %s", id) } + if rec.mutable { + // If the record is mutable, then the snapshot must exist + if _, err := cm.Snapshotter.Stat(ctx, rec.ID()); err != nil { + if !errdefs.IsNotFound(err) { + return nil, errors.Wrap(err, "failed to check mutable ref snapshot") + } + // the snapshot doesn't exist, clear this record + if err := rec.remove(ctx, true); err != nil { + return nil, errors.Wrap(err, "failed to remove mutable rec with missing snapshot") + } + return nil, errors.Wrap(errNotFound, rec.ID()) + } + } + if err := initializeMetadata(rec.cacheMetadata, rec.parentRefs, opts...); err != nil { return nil, err } diff --git a/cache/manager_test.go b/cache/manager_test.go index d5cd5d14..80d465b2 100644 --- a/cache/manager_test.go +++ b/cache/manager_test.go @@ -1684,6 +1684,87 @@ func TestMergeOp(t *testing.T) { checkDiskUsage(ctx, t, cm, 0, 0) } +func TestLoadHalfFinalizedRef(t *testing.T) { + // This test simulates the situation where a ref w/ an equalMutable has its + // snapshot committed but there is a crash before the metadata is updated to + // clear the equalMutable field. It's expected that the mutable will be + // removed and the immutable ref will continue to be usable. + t.Parallel() + + ctx := namespaces.WithNamespace(context.Background(), "buildkit-test") + + tmpdir, err := ioutil.TempDir("", "cachemanager") + require.NoError(t, err) + defer os.RemoveAll(tmpdir) + + snapshotter, err := native.NewSnapshotter(filepath.Join(tmpdir, "snapshots")) + require.NoError(t, err) + + co, cleanup, err := newCacheManager(ctx, cmOpt{ + tmpdir: tmpdir, + snapshotter: snapshotter, + snapshotterName: "native", + }) + require.NoError(t, err) + defer cleanup() + cm := co.manager.(*cacheManager) + + mref, err := cm.New(ctx, nil, nil, CachePolicyRetain) + require.NoError(t, err) + mutRef := mref.(*mutableRef) + + iref, err := mutRef.Commit(ctx) + require.NoError(t, err) + immutRef := iref.(*immutableRef) + + require.NoError(t, mref.Release(ctx)) + + _, err = co.lm.Create(ctx, func(l *leases.Lease) error { + l.ID = immutRef.ID() + l.Labels = map[string]string{ + "containerd.io/gc.flat": time.Now().UTC().Format(time.RFC3339Nano), + } + return nil + }) + require.NoError(t, err) + err = co.lm.AddResource(ctx, leases.Lease{ID: immutRef.ID()}, leases.Resource{ + ID: immutRef.getSnapshotID(), + Type: "snapshots/" + cm.Snapshotter.Name(), + }) + require.NoError(t, err) + + err = cm.Snapshotter.Commit(ctx, immutRef.getSnapshotID(), mutRef.getSnapshotID()) + require.NoError(t, err) + + _, err = cm.Snapshotter.Stat(ctx, mutRef.getSnapshotID()) + require.Error(t, err) + + require.NoError(t, iref.Release(ctx)) + + require.NoError(t, cm.Close()) + require.NoError(t, cleanup()) + + co, cleanup, err = newCacheManager(ctx, cmOpt{ + tmpdir: tmpdir, + snapshotter: snapshotter, + snapshotterName: "native", + }) + require.NoError(t, err) + defer cleanup() + cm = co.manager.(*cacheManager) + + _, err = cm.GetMutable(ctx, mutRef.ID()) + require.ErrorIs(t, err, errNotFound) + + iref, err = cm.Get(ctx, immutRef.ID()) + require.NoError(t, err) + require.NoError(t, iref.Finalize(ctx)) + immutRef = iref.(*immutableRef) + + _, err = cm.Snapshotter.Stat(ctx, immutRef.getSnapshotID()) + require.NoError(t, err) +} + func TestMountReadOnly(t *testing.T) { t.Parallel() if runtime.GOOS != "linux" {