From 01774c35cfb7ed1c32d35b2561f080d9b734038c Mon Sep 17 00:00:00 2001 From: Ben Johnson Date: Tue, 11 Feb 2014 08:41:22 -0700 Subject: [PATCH] Cursor iteration. --- README.md | 158 -------------------------------------------- cursor.go | 53 ++++++++++++++- node.go | 4 ++ quick_test.go | 5 ++ transaction_test.go | 123 ++++++++++++++++++++++++++++++++++ 5 files changed, 182 insertions(+), 161 deletions(-) diff --git a/README.md b/README.md index 9315c6d..3f61fca 100644 --- a/README.md +++ b/README.md @@ -42,147 +42,6 @@ There are also several differences between Bolt and LMDB: So why use Bolt? The goal of Bolt is provide a simple, fast data store that is easily integrated into Go projects. The library does not require CGO so it is compatible with `go get` and you can easily build static binaries with it. We are not accepting additional functionality into the library so the API and file format are stable. Bolt also has near 100% unit test coverage and also includes heavy black box testing using the [testing/quick](http://golang.org/pkg/testing/quick/) package. -## API - -### Database - -The database is the object that represents your data as a whole -It is split up into buckets which is analogous to tables in a relational database. - -#### Opening and closing a database - -```go -db := DB() -err := db.Open("/path/to/db", 0666) -... -err := db.Close() -``` - - -### Transactions - -Versioning of data in the bucket data happens through a Transaction. -These transactions can be either be read-only or read/write transactions. -Transactions are what keeps Bolt consistent and allows data to be rolled back if needed. - -It may seem strange that read-only access needs to be wrapped in a transaction but this is so Bolt can keep track of what version of the data is currently in use. -The space used to hold data is kept until the transaction closes. - -One important note is that long running transactions can cause the database to grow in size quickly. -Please double check that you are appropriately closing all transactions after you're done with them. - -#### Creating and closing a read-only transaction - -```go -t, err := db.Transaction() -t.Close() -``` - -#### Creating and committing a read/write transaction - -``` -t, err := db.RWTransaction() -err := t.Commit() -``` - -#### Creating and aborting a read/write transaction - -``` -t, err := db.RWTransaction() -err := t.Abort() -``` - - -### Buckets - -Buckets are where your actual key/value data gets stored. -You can create new buckets from the database and look them up by name. - -#### Creating a bucket - -```go -t, err := db.RWTransaction() -err := t.CreateBucket("widgets") -``` - -#### Renaming a bucket - -```go -t, err := db.RWTransaction() -err := t.RenameBucket("widgets", "woojits") -``` - -#### Deleting a bucket - -```go -t, err := db.RWTransaction() -err := t.DeleteBucket("widgets") -``` - -#### Retrieve an existing bucket - -```go -t, err := db.Transaction() -b, err := t.Bucket("widgets") -``` - -#### Retrieve a list of all buckets - -```go -t, err := db.Transaction() -buckets, err := db.Buckets() -``` - - -### Key/Value Access - -#### Retrieve a value for a specific key - -```go -t, err := db.Transaction() -value, err := t.Get("widgets", []byte("foo")) -value, err := t.GetString("widgets", "foo") -``` - -#### Set the value for a key - -```go -t, err := db.RWTransaction() -err := t.Put("widgets", []byte("foo"), []byte("bar")) -err := t.PutString("widgets", "foo", "bar") -``` - -#### Delete a given key - -```go -t, err := db.RWTransaction() -err := t.Delete("widgets", []byte("foo")) -err := t.DeleteString("widgets", "foo") -``` - - -### Cursors - -Cursors provide fast read-only access to a specific bucket within a transaction. - - -#### Creating a read-only cursor - -```go -t, err := db.Transaction() -c, err := b.Cursor("widgets") -``` - -#### Iterating over a cursor - -```go -for k, v, err := c.First(); k != nil; k, v, err = c.Next() { - if err != nil { - return err - } - ... DO SOMETHING ... -} -``` ## Internals @@ -213,20 +72,3 @@ There are several different types of pages: * Overflow pages - These are special pages used when a key's data is too large for a leaf page and needs to spill onto additional pages. - -### Nodes - -Within each page there are one or more elements called nodes. -In branch pages, these nodes store references to other child pages in the tree. -In leaf pages, these nodes store the actual key/value data. - - -## TODO - -The following is a list of items to do on the Bolt project: - -1. Calculate freelist on db.Open(). (Traverse branches, set bitmap index, load free pages into a list -- lazy load in the future). -2. Resize map. (Make sure there are no reader txns before resizing) -3. DB.Copy() -4. Merge pages. -5. Rebalance (after deletion). diff --git a/cursor.go b/cursor.go index d993668..b42c13c 100644 --- a/cursor.go +++ b/cursor.go @@ -13,13 +13,35 @@ type Cursor struct { // First moves the cursor to the first item in the bucket and returns its key and data. func (c *Cursor) First() ([]byte, []byte) { - // TODO: Traverse to the first key. - return nil, nil + if len(c.stack) > 0 { + c.stack = c.stack[:0] + } + c.stack = append(c.stack, pageElementRef{page: c.transaction.page(c.root), index: 0}) + c.first() + return c.keyValue() } // Move the cursor to the next key/value. func (c *Cursor) Next() ([]byte, []byte) { - return nil, nil + // Attempt to move over one element until we're successful. + // Move up the stack as we hit the end of each page in our stack. + for i := len(c.stack) - 1; i >= 0; i-- { + elem := &c.stack[i] + if elem.index < elem.page.count-1 { + elem.index++ + break + } + c.stack = c.stack[:i] + } + + // If we've hit the end then return nil. + if len(c.stack) == 0 { + return nil, nil + } + + // Move down the stack to find the first element of the first leaf under this branch. + c.first() + return c.keyValue() } // Get positions the cursor at a specific key and returns the its value. @@ -42,6 +64,21 @@ func (c *Cursor) Get(key []byte) []byte { return c.element().value() } +// first moves the cursor to the first leaf element under a page. +func (c *Cursor) first() { + p := c.stack[len(c.stack)-1].page + for { + // Exit when we hit a leaf page. + if (p.flags & p_leaf) != 0 { + break + } + + // Keep adding pages pointing to the first element to the stack. + p = c.transaction.page(p.branchPageElement(c.stack[len(c.stack)-1].index).pgid) + c.stack = append(c.stack, pageElementRef{page: p, index: 0}) + } +} + func (c *Cursor) search(key []byte, p *page) { _assert((p.flags&(p_branch|p_leaf)) != 0, "invalid page type: "+p.typ()) e := pageElementRef{page: p} @@ -99,6 +136,16 @@ func (c *Cursor) element() *leafPageElement { return ref.page.leafPageElement(ref.index) } +// keyValue returns the key and value of the current leaf element. +func (c *Cursor) keyValue() ([]byte, []byte) { + ref := &c.stack[len(c.stack)-1] + if ref.index >= ref.page.count { + return nil, nil + } + e := ref.page.leafPageElement(ref.index) + return e.key(), e.value() +} + // node returns the node that the cursor is currently positioned on. func (c *Cursor) node(t *RWTransaction) *node { if len(c.stack) == 0 { diff --git a/node.go b/node.go index ad31f5b..11b5e2f 100644 --- a/node.go +++ b/node.go @@ -161,8 +161,10 @@ func (n *node) write(p *page) { // Initialize page. if n.isLeaf { p.flags |= p_leaf + // warn("∑", p.id, "leaf") } else { p.flags |= p_branch + // warn("∑", p.id, "branch") } p.count = uint16(len(n.inodes)) @@ -175,11 +177,13 @@ func (n *node) write(p *page) { elem.pos = uint32(uintptr(unsafe.Pointer(&b[0])) - uintptr(unsafe.Pointer(elem))) elem.ksize = uint32(len(item.key)) elem.vsize = uint32(len(item.value)) + // warn(" »", string(item.key), "->", string(item.value)) } else { elem := p.branchPageElement(uint16(i)) elem.pos = uint32(uintptr(unsafe.Pointer(&b[0])) - uintptr(unsafe.Pointer(elem))) elem.ksize = uint32(len(item.key)) elem.pgid = item.pgid + // warn(" »", string(item.key)) } // Write data for the element to the end of the page. diff --git a/quick_test.go b/quick_test.go index 4a107a8..3db0b6c 100644 --- a/quick_test.go +++ b/quick_test.go @@ -1,6 +1,7 @@ package bolt import ( + "bytes" "flag" "math/rand" "reflect" @@ -39,6 +40,10 @@ func qconfig() *quick.Config { type testdata []testdataitem +func (t testdata) Len() int { return len(t) } +func (t testdata) Swap(i, j int) { t[i], t[j] = t[j], t[i] } +func (t testdata) Less(i, j int) bool { return bytes.Compare(t[i].Key, t[j].Key) == -1 } + func (t testdata) Generate(rand *rand.Rand, size int) reflect.Value { n := rand.Intn(qmaxitems-1) + 1 items := make(testdata, n) diff --git a/transaction_test.go b/transaction_test.go index 6041f2f..b59d9b2 100644 --- a/transaction_test.go +++ b/transaction_test.go @@ -1,7 +1,11 @@ package bolt import ( + "fmt" + "os" + "sort" "testing" + "testing/quick" "github.com/stretchr/testify/assert" ) @@ -33,3 +37,122 @@ func TestTransactionGetMissing(t *testing.T) { assert.Nil(t, value) }) } + +// Ensure that a Transaction cursor can iterate over an empty bucket without error. +func TestTransactionCursorEmptyBucket(t *testing.T) { + withOpenDB(func(db *DB, path string) { + db.CreateBucket("widgets") + txn, _ := db.Transaction() + c := txn.Cursor("widgets") + k, v := c.First() + assert.Nil(t, k) + assert.Nil(t, v) + txn.Close() + }) +} + +// Ensure that a Transaction returns a nil when a bucket doesn't exist. +func TestTransactionCursorMissingBucket(t *testing.T) { + withOpenDB(func(db *DB, path string) { + db.CreateBucket("widgets") + txn, _ := db.Transaction() + assert.Nil(t, txn.Cursor("woojits")) + txn.Close() + }) +} + +// Ensure that a Transaction cursor can iterate over a single root with a couple elements. +func TestTransactionCursorLeafRoot(t *testing.T) { + withOpenDB(func(db *DB, path string) { + db.CreateBucket("widgets") + db.Put("widgets", []byte("baz"), []byte{}) + db.Put("widgets", []byte("foo"), []byte{0}) + db.Put("widgets", []byte("bar"), []byte{1}) + txn, _ := db.Transaction() + c := txn.Cursor("widgets") + + k, v := c.First() + assert.Equal(t, string(k), "bar") + assert.Equal(t, v, []byte{1}) + + k, v = c.Next() + assert.Equal(t, string(k), "baz") + assert.Equal(t, v, []byte{}) + + k, v = c.Next() + assert.Equal(t, string(k), "foo") + assert.Equal(t, v, []byte{0}) + + k, v = c.Next() + assert.Nil(t, k) + assert.Nil(t, v) + + k, v = c.Next() + assert.Nil(t, k) + assert.Nil(t, v) + + txn.Close() + }) +} + +// Ensure that a Transaction cursor can restart from the beginning. +func TestTransactionCursorRestart(t *testing.T) { + withOpenDB(func(db *DB, path string) { + db.CreateBucket("widgets") + db.Put("widgets", []byte("bar"), []byte{}) + db.Put("widgets", []byte("foo"), []byte{}) + + txn, _ := db.Transaction() + c := txn.Cursor("widgets") + + k, _ := c.First() + assert.Equal(t, string(k), "bar") + + k, _ = c.Next() + assert.Equal(t, string(k), "foo") + + k, _ = c.First() + assert.Equal(t, string(k), "bar") + + k, _ = c.Next() + assert.Equal(t, string(k), "foo") + + txn.Close() + }) +} + +// Ensure that a transaction can iterate over all elements in a bucket. +func TestTransactionCursorIterate(t *testing.T) { + f := func(items testdata) bool { + withOpenDB(func(db *DB, path string) { + // Bulk insert all values. + db.CreateBucket("widgets") + rwtxn, _ := db.RWTransaction() + for _, item := range items { + assert.NoError(t, rwtxn.Put("widgets", item.Key, item.Value)) + } + assert.NoError(t, rwtxn.Commit()) + + // Sort test data. + sort.Sort(items) + + // Iterate over all items and check consistency. + var index = 0 + txn, _ := db.Transaction() + c := txn.Cursor("widgets") + for k, v := c.First(); k != nil && index < len(items); k, v = c.Next() { + assert.Equal(t, k, items[index].Key) + assert.Equal(t, v, items[index].Value) + index++ + } + assert.Equal(t, len(items), index) + txn.Close() + }) + fmt.Fprint(os.Stderr, ".") + return true + } + if err := quick.Check(f, qconfig()); err != nil { + t.Error(err) + } + fmt.Fprint(os.Stderr, "\n") +}