overlay differ: Do file comparison in some cases.

This change results in the overlay differ comparing files to determine
if they are actually part of the diff. This is needed to resolve
differences between the blobs created by the overlay differ and the
double-walking differ.

Before this change, the overlay differ always just assumed that if a
file was in the upperdir it must be part of the diff and included it as
an add or a modify change. However, there are situations in which files
can appear in the upperdir without having been modified or even opened.
For example, if "foo" is a file or dir present in the lowerdirs of an
overlay mount and you run "mv foo footmp; mv footmp foo", then the
upperdir will contain foo (in addition to any files found under foo if
it's a dir). In this situation, the double-walking differ would not
include foo as part of the diff, but the overlay differ would.

This meant that the overlay differ would potentially include extra files
in each blob for such diffs relative to the double-walking differ. As of
now, while this does increase image size, it doesn't result in any
inconsistencies in terms of the contents of images because it just
results in files/dirs getting duplicated on top of their equivalents.

However, for the upcoming DiffOp support, this inconsistency could
actually result in the same operation producing mounts with different
contents depending on which differ is used. This change is therefore
necessary in order to enforce DiffOp consistency (on top of the possible
improvements to exported image size).

The main concern here is that this could undo the performance benefits
that the overlay differ was intended to fix. However, in practice the
situations where this has worse performance are quite obscure and the
benefits should still be present.

First, consider the case where foo is a directory and the user does the
equivalent of "mv foo footmp; mv footmp foo". Even before this change,
the overlay differ would see that foo is marked as opaque and thus fall
back to using the double-walking differ. So there's no performance
regression in this case as the double-walking differ does the same
file comparisons as were added in this commit.

For the case where the user shuffles a file back and forth, there will
potentially be a slow file content based comparison if the underlying
file has a truncated nanosecond timestamp (i.e. it was unpacked from a
tar file). However, the situations in which you shuffle an individual
file without changing it (or open it for writing but then write nothing)
that is large enough in size for content comparisons to be slow are
obscure. Additionally, while the content comparison may be slow, there
will be time saved during export because the file won't be included
unnecessarily in the exported blob, so it's a tradeoff rather than a
pure loss.

In situations where the user actually did change a file and it shows up
in the upperdir, it should be extremely rare that the content comparison
code path is followed. It would require that the user changed no other
metadata of the file, including size, and both mod timestamps were the
same (which could only really happen if their underlying filesystem
lacked support for nanosecond precision and they modified the file
within 1 second of its modification in the lowerdir or they manually
changed the modtime with chtimes).

Signed-off-by: Erik Sipsma <erik@sipsma.dev>
master
Erik Sipsma 2021-11-23 13:18:25 -08:00
parent d5b7ce35d8
commit 18292913c4
2 changed files with 116 additions and 21 deletions

View File

@ -12,6 +12,7 @@ import (
"os"
"path/filepath"
"strings"
"sync"
"syscall"
"github.com/containerd/containerd/archive"
@ -181,7 +182,7 @@ func Changes(ctx context.Context, changeFn fs.ChangeFunc, upperdir, upperdirView
kind = fs.ChangeKindModify
// Avoid including directory that hasn't been modified. If /foo/bar/baz is modified,
// then /foo will apper here even if it's not been modified because it's the parent of bar.
if same, err := sameDir(baseF, f, filepath.Join(base, path), filepath.Join(upperdirView, path)); same {
if same, err := sameDirent(baseF, f, filepath.Join(base, path), filepath.Join(upperdirView, path)); same {
skipRecord = true // Both are the same, don't record the change
} else if err != nil {
return err
@ -267,15 +268,18 @@ func checkOpaque(upperdir string, path string, base string, f os.FileInfo) (isOp
return false, nil
}
// sameDir performs continity-compatible comparison of directories.
// sameDirent performs continity-compatible comparison of files and directories.
// https://github.com/containerd/continuity/blob/v0.1.0/fs/path.go#L91-L133
// This doesn't compare files because it requires to compare their contents.
// This is what we want to avoid by this overlayfs-specialized differ.
func sameDir(f1, f2 os.FileInfo, f1fullPath, f2fullPath string) (bool, error) {
if !f1.IsDir() || !f2.IsDir() {
return false, nil
}
// This will only do a slow content comparison of two files if they have all the
// same metadata and both have truncated nanosecond mtime timestamps. In practice,
// this can only happen if both the base file in the lowerdirs has a truncated
// timestamp (i.e. was unpacked from a tar) and the user did something like
// "mv foo tmp && mv tmp foo" that results in the file being copied up to the
// upperdir without making any changes to it. This is much rarer than similar
// cases in the double-walking differ, where the slow content comparison will
// be used whenever a file with a truncated timestamp is in the lowerdir at
// all and left unmodified.
func sameDirent(f1, f2 os.FileInfo, f1fullPath, f2fullPath string) (bool, error) {
if os.SameFile(f1, f2) {
return true, nil
}
@ -289,6 +293,32 @@ func sameDir(f1, f2 os.FileInfo, f1fullPath, f2fullPath string) (bool, error) {
return eq, err
}
if !f1.IsDir() {
if f1.Size() != f2.Size() {
return false, nil
}
t1 := f1.ModTime()
t2 := f2.ModTime()
if t1.Unix() != t2.Unix() {
return false, nil
}
// If the timestamp may have been truncated in both of the
// files, check content of file to determine difference
if t1.Nanosecond() == 0 && t2.Nanosecond() == 0 {
if (f1.Mode() & os.ModeSymlink) == os.ModeSymlink {
return compareSymlinkTarget(f1fullPath, f2fullPath)
}
if f1.Size() == 0 {
return true, nil
}
return compareFileContent(f1fullPath, f2fullPath)
} else if t1.Nanosecond() != t2.Nanosecond() {
return false, nil
}
}
return true, nil
}
@ -322,3 +352,80 @@ func compareCapabilities(p1, p2 string) (bool, error) {
}
return bytes.Equal(c1, c2), nil
}
// Ported from continuity project
// https://github.com/containerd/continuity/blob/bce1c3f9669b6f3e7f6656ee715b0b4d75fa64a6/fs/path.go#L135
// Copyright The containerd Authors.
func compareSymlinkTarget(p1, p2 string) (bool, error) {
t1, err := os.Readlink(p1)
if err != nil {
return false, err
}
t2, err := os.Readlink(p2)
if err != nil {
return false, err
}
return t1 == t2, nil
}
var bufPool = sync.Pool{
New: func() interface{} {
b := make([]byte, 32*1024)
return &b
},
}
// Ported from continuity project
// https://github.com/containerd/continuity/blob/bce1c3f9669b6f3e7f6656ee715b0b4d75fa64a6/fs/path.go#L151
// Copyright The containerd Authors.
func compareFileContent(p1, p2 string) (bool, error) {
f1, err := os.Open(p1)
if err != nil {
return false, err
}
defer f1.Close()
if stat, err := f1.Stat(); err != nil {
return false, err
} else if !stat.Mode().IsRegular() {
return false, errors.Errorf("%s is not a regular file", p1)
}
f2, err := os.Open(p2)
if err != nil {
return false, err
}
defer f2.Close()
if stat, err := f2.Stat(); err != nil {
return false, err
} else if !stat.Mode().IsRegular() {
return false, errors.Errorf("%s is not a regular file", p2)
}
b1 := bufPool.Get().(*[]byte)
defer bufPool.Put(b1)
b2 := bufPool.Get().(*[]byte)
defer bufPool.Put(b2)
for {
n1, err1 := io.ReadFull(f1, *b1)
if err1 == io.ErrUnexpectedEOF {
// it's expected to get EOF when file size isn't a multiple of chunk size, consolidate these error types
err1 = io.EOF
}
if err1 != nil && err1 != io.EOF {
return false, err1
}
n2, err2 := io.ReadFull(f2, *b2)
if err2 == io.ErrUnexpectedEOF {
err2 = io.EOF
}
if err2 != nil && err2 != io.EOF {
return false, err2
}
if n1 != n2 || !bytes.Equal((*b1)[:n1], (*b2)[:n2]) {
return false, nil
}
if err1 == io.EOF && err2 == io.EOF {
return true, nil
}
}
}

View File

@ -221,8 +221,6 @@ func TestParentDirectoryPermission(t *testing.T) {
// TestUpdateWithSameTime is a test ported from
// https://github.com/containerd/continuity/blob/v0.1.0/fs/diff_test.go#L221-L269
// Copyright The containerd Authors.
//
// NOTE: This test is patched for our differ. See the following NOTE for details.
func TestUpdateWithSameTime(t *testing.T) {
tt := time.Now().Truncate(time.Second)
t1 := tt.Add(5 * time.Nanosecond)
@ -257,16 +255,6 @@ func TestUpdateWithSameTime(t *testing.T) {
)
diff := []TestChange{
Modify("/file-modified-time"),
// NOTE: Even if the file is identical, overlayfs copies it to
// the upper layer when the modification occurred. continuity's differ avoids counting
// this as "modify" by comparing the time and the file contents between upper and lower
// but here we want to avoid comparing bits which makes the differ slower.
// TODO: we need a way to effectively determine two files are identical
// without copmaring bits.
Modify("/file-no-change"),
Modify("/file-same-time"),
// Include changes with truncated timestamps. Comparing newly
// extracted tars which have truncated timestamps will be
// expected to produce changes. The expectation is that diff