Fix merge-split spill issues.

master
Ben Johnson 2014-06-03 13:21:28 -06:00
parent a96185e8b6
commit 54cad40a78
5 changed files with 74 additions and 25 deletions

View File

@ -613,6 +613,7 @@ func (b *Bucket) rebalance() {
// node creates a node from a page and associates it with a given parent.
func (b *Bucket) node(pgid pgid, parent *node) *node {
_assert(b.nodes != nil, "nodes map expected")
// Retrieve node if it's already been created.
if n := b.nodes[pgid]; n != nil {
return n

View File

@ -580,17 +580,17 @@ func TestBucket_Stats(t *testing.T) {
stats := b.Stats()
assert.Equal(t, 1, stats.BranchPageN, "BranchPageN")
assert.Equal(t, 0, stats.BranchOverflowN, "BranchOverflowN")
assert.Equal(t, 6, stats.LeafPageN, "LeafPageN")
assert.Equal(t, 7, stats.LeafPageN, "LeafPageN")
assert.Equal(t, 2, stats.LeafOverflowN, "LeafOverflowN")
assert.Equal(t, 501, stats.KeyN, "KeyN")
assert.Equal(t, 2, stats.Depth, "Depth")
branchInuse := pageHeaderSize // branch page header
branchInuse += 6 * branchPageElementSize // branch elements
branchInuse += 6 * 3 // branch keys (6 3-byte keys)
branchInuse += 7 * branchPageElementSize // branch elements
branchInuse += 7 * 3 // branch keys (6 3-byte keys)
assert.Equal(t, branchInuse, stats.BranchInuse, "BranchInuse")
leafInuse := 6 * pageHeaderSize // leaf page header
leafInuse := 7 * pageHeaderSize // leaf page header
leafInuse += 501 * leafPageElementSize // leaf elements
leafInuse += 500*3 + len(big_key) // leaf keys
leafInuse += 1*10 + 2*90 + 3*400 + 10000 // leaf values
@ -599,7 +599,7 @@ func TestBucket_Stats(t *testing.T) {
if os.Getpagesize() == 4096 {
// Incompatible page size
assert.Equal(t, 4096, stats.BranchAlloc, "BranchAlloc")
assert.Equal(t, 32768, stats.LeafAlloc, "LeafAlloc")
assert.Equal(t, 36864, stats.LeafAlloc, "LeafAlloc")
}
assert.Equal(t, 1, stats.BucketN, "BucketN")
@ -612,6 +612,9 @@ func TestBucket_Stats(t *testing.T) {
// Ensure a bucket with random insertion utilizes fill percentage correctly.
func TestBucket_Stats_RandomFill(t *testing.T) {
if testing.Short() {
t.Skip("skipping test in short mode.")
}
if os.Getpagesize() != 4096 {
t.Skip("invalid page size for test")
}
@ -621,6 +624,7 @@ func TestBucket_Stats_RandomFill(t *testing.T) {
// Add a set of values in random order. It will be the same random
// order so we can maintain consistency between test runs.
var count int
r := rand.New(rand.NewSource(42))
for _, i := range r.Perm(1000) {
db.Update(func(tx *Tx) error {
@ -628,11 +632,12 @@ func TestBucket_Stats_RandomFill(t *testing.T) {
for _, j := range r.Perm(100) {
index := (j * 10000) + i
b.Put([]byte(fmt.Sprintf("%d000000000000000", index)), []byte("0000000000"))
count++
}
return nil
})
mustCheck(db)
}
mustCheck(db)
db.View(func(tx *Tx) error {
s := tx.Bucket([]byte("woojits")).Stats()
@ -640,13 +645,13 @@ func TestBucket_Stats_RandomFill(t *testing.T) {
assert.Equal(t, 22, s.BranchPageN, "BranchPageN")
assert.Equal(t, 0, s.BranchOverflowN, "BranchOverflowN")
assert.Equal(t, 62963, s.BranchInuse, "BranchInuse")
assert.Equal(t, 61708, s.BranchInuse, "BranchInuse")
assert.Equal(t, 90112, s.BranchAlloc, "BranchAlloc")
assert.Equal(t, 1677, s.LeafPageN, "LeafPageN")
assert.Equal(t, 1643, s.LeafPageN, "LeafPageN")
assert.Equal(t, 0, s.LeafOverflowN, "LeafOverflowN")
assert.Equal(t, 4714722, s.LeafInuse, "LeafInuse")
assert.Equal(t, 6868992, s.LeafAlloc, "LeafAlloc")
assert.Equal(t, 4714178, s.LeafInuse, "LeafInuse")
assert.Equal(t, 6729728, s.LeafAlloc, "LeafAlloc")
return nil
})
})

View File

@ -7,6 +7,8 @@ import (
"io/ioutil"
"os"
"regexp"
"sort"
"strings"
"testing"
"time"
"unsafe"
@ -520,6 +522,38 @@ func mustCheck(db *DB) {
}
}
// mustContainKeys checks that a bucket contains a given set of keys.
func mustContainKeys(b *Bucket, m map[string]string) {
found := make(map[string]string)
b.ForEach(func(k, _ []byte) error {
found[string(k)] = ""
return nil
})
// Check for keys found in bucket that shouldn't be there.
var keys []string
for k, _ := range found {
if _, ok := m[string(k)]; !ok {
keys = append(keys, k)
}
}
if len(keys) > 0 {
sort.Strings(keys)
panic(fmt.Sprintf("keys found(%d): %s", len(keys), strings.Join(keys, ",")))
}
// Check for keys not found in bucket that should be there.
for k, _ := range m {
if _, ok := found[string(k)]; !ok {
keys = append(keys, k)
}
}
if len(keys) > 0 {
sort.Strings(keys)
panic(fmt.Sprintf("keys not found(%d): %s", len(keys), strings.Join(keys, ",")))
}
}
func trunc(b []byte, length int) []byte {
if length < len(b) {
return b[:length]

View File

@ -70,14 +70,12 @@ func (f *freelist) allocate(n int) pgid {
// free releases a page and its overflow for a given transaction id.
// If the page is already free then a panic will occur.
func (f *freelist) free(txid txid, p *page) {
warn("free:", txid, p.id, p.overflow)
_assert(p.id > 1, "cannot free page 0 or 1: %d", p.id)
// Verify that page is not already free.
minid, maxid := p.id, p.id+pgid(p.overflow)
for _, id := range f.ids {
if id >= minid && id <= maxid {
warn(" ‡", id, "|", minid, maxid)
panic(fmt.Sprintf("page %d already freed in tx", id))
}
}
@ -92,9 +90,6 @@ func (f *freelist) free(txid txid, p *page) {
// Free page and all its overflow pages.
var ids = f.pending[txid]
for i := 0; i < int(p.overflow+1); i++ {
if p.id+pgid(i) == 55 {
warn(" •", txid, p.id+pgid(i))
}
ids = append(ids, p.id+pgid(i))
}
f.pending[txid] = ids

34
node.go
View File

@ -14,7 +14,7 @@ type node struct {
key []byte
pgid pgid
parent *node
children []*node
children nodes
inodes inodes
}
@ -231,6 +231,7 @@ func (n *node) split(pageSize int) []*node {
// If we can fit our extra keys on the next page then merge into it.
if next := n.nextSibling(); next != nil && next.size()+sz1 < threshold {
next.inodes = append(n.inodes[splitIndex:], next.inodes...)
n.inodes = n.inodes[:splitIndex]
return []*node{n}
}
@ -284,22 +285,29 @@ func (n *node) splitIndex(threshold int) (index, sz int) {
func (n *node) spill() error {
var tx = n.bucket.tx
// Spill child nodes first.
for _, child := range n.children {
if err := child.spill(); err != nil {
// Spill child nodes first. Child nodes can materialize sibling nodes in
// the case of split-merge so we cannot use a range loop. We have to check
// the children size on every loop iteration.
sort.Sort(n.children)
for i := 0; i < len(n.children); i++ {
if err := n.children[i].spill(); err != nil {
return err
}
}
// Add node's page to the freelist if it's not new.
if n.pgid > 0 {
tx.db.freelist.free(tx.id(), tx.page(n.pgid))
n.pgid = 0
}
// We no longer need the child list because it's only used for spill tracking.
n.children = nil
// Spill nodes by deepest first.
// Spill nodes by deepest first. The first node returned from split() will
// always be "n".
var nodes = n.split(tx.db.pageSize)
for _, node := range nodes {
// Add node's page to the freelist if it's not new.
if node.pgid > 0 {
tx.db.freelist.free(tx.id(), tx.page(node.pgid))
node.pgid = 0
}
// Allocate contiguous space for the node.
p, err := tx.allocate((node.size() / tx.db.pageSize) + 1)
if err != nil {
@ -565,6 +573,12 @@ func (n *node) dump() {
}
*/
type nodes []*node
func (s nodes) Len() int { return len(s) }
func (s nodes) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
func (s nodes) Less(i, j int) bool { return bytes.Compare(s[i].inodes[0].key, s[j].inodes[0].key) == -1 }
// inode represents an internal node inside of a node.
// It can be used to point to elements in a page or point
// to an element which hasn't been added to a page yet.