buildkit/vendor/go.etcd.io/bbolt/node.go

package bbolt

import (
	"bytes"
	"fmt"
	"sort"
	"unsafe"
)

// node represents an in-memory, deserialized page.
type node struct {
	bucket     *Bucket
	isLeaf     bool
	unbalanced bool
	spilled    bool
	key        []byte
	pgid       pgid
	parent     *node
	children   nodes
	inodes     inodes
}

// root returns the top-level node this node is attached to.
func (n *node) root() *node {
	if n.parent == nil {
		return n
	}
	return n.parent.root()
}

// minKeys returns the minimum number of inodes this node should have.
func (n *node) minKeys() int {
	if n.isLeaf {
		return 1
	}
	return 2
}

// size returns the size of the node after serialization.
func (n *node) size() int {
	sz, elsz := pageHeaderSize, n.pageElementSize()
	for i := 0; i < len(n.inodes); i++ {
		item := &n.inodes[i]
		sz += elsz + uintptr(len(item.key)) + uintptr(len(item.value))
	}
	return int(sz)
}

// sizeLessThan returns true if the node is less than a given size.
// This is an optimization to avoid calculating a large node when we only need
// to know if it fits inside a certain page size.
func (n *node) sizeLessThan(v uintptr) bool {
	sz, elsz := pageHeaderSize, n.pageElementSize()
	for i := 0; i < len(n.inodes); i++ {
		item := &n.inodes[i]
		sz += elsz + uintptr(len(item.key)) + uintptr(len(item.value))
		if sz >= v {
			return false
		}
	}
	return true
}

// pageElementSize returns the size of each page element based on the type of node.
func (n *node) pageElementSize() uintptr {
	if n.isLeaf {
		return leafPageElementSize
	}
	return branchPageElementSize
}

// childAt returns the child node at a given index.
func (n *node) childAt(index int) *node {
	if n.isLeaf {
		panic(fmt.Sprintf("invalid childAt(%d) on a leaf node", index))
	}
	return n.bucket.node(n.inodes[index].pgid, n)
}

// childIndex returns the index of a given child node.
func (n *node) childIndex(child *node) int {
	index := sort.Search(len(n.inodes), func(i int) bool { return bytes.Compare(n.inodes[i].key, child.key) != -1 })
	return index
}

// numChildren returns the number of children.
func (n *node) numChildren() int {
	return len(n.inodes)
}

// nextSibling returns the next node with the same parent.
func (n *node) nextSibling() *node {
	if n.parent == nil {
		return nil
	}
	index := n.parent.childIndex(n)
	if index >= n.parent.numChildren()-1 {
		return nil
	}
	return n.parent.childAt(index + 1)
}

// prevSibling returns the previous node with the same parent.
func (n *node) prevSibling() *node {
	if n.parent == nil {
		return nil
	}
	index := n.parent.childIndex(n)
	if index == 0 {
		return nil
	}
	return n.parent.childAt(index - 1)
}

// put inserts a key/value.
func (n *node) put(oldKey, newKey, value []byte, pgid pgid, flags uint32) {
	if pgid >= n.bucket.tx.meta.pgid {
		panic(fmt.Sprintf("pgid (%d) above high water mark (%d)", pgid, n.bucket.tx.meta.pgid))
	} else if len(oldKey) <= 0 {
		panic("put: zero-length old key")
	} else if len(newKey) <= 0 {
		panic("put: zero-length new key")
	}

	// Find insertion index.
	index := sort.Search(len(n.inodes), func(i int) bool { return bytes.Compare(n.inodes[i].key, oldKey) != -1 })

	// Add capacity and shift nodes if we don't have an exact match and need to insert.
	exact := (len(n.inodes) > 0 && index < len(n.inodes) && bytes.Equal(n.inodes[index].key, oldKey))
	if !exact {
		n.inodes = append(n.inodes, inode{})
		copy(n.inodes[index+1:], n.inodes[index:])
	}

	inode := &n.inodes[index]
	inode.flags = flags
	inode.key = newKey
	inode.value = value
	inode.pgid = pgid
	_assert(len(inode.key) > 0, "put: zero-length inode key")
}

// del removes a key from the node.
func (n *node) del(key []byte) {
	// Find index of key.
	index := sort.Search(len(n.inodes), func(i int) bool { return bytes.Compare(n.inodes[i].key, key) != -1 })

	// Exit if the key isn't found.
	if index >= len(n.inodes) || !bytes.Equal(n.inodes[index].key, key) {
		return
	}

	// Delete inode from the node.
	n.inodes = append(n.inodes[:index], n.inodes[index+1:]...)

	// Mark the node as needing rebalancing.
	n.unbalanced = true
}

// read initializes the node from a page.
func (n *node) read(p *page) {
	n.pgid = p.id
	n.isLeaf = ((p.flags & leafPageFlag) != 0)
	n.inodes = make(inodes, int(p.count))

	for i := 0; i < int(p.count); i++ {
		inode := &n.inodes[i]
		if n.isLeaf {
			elem := p.leafPageElement(uint16(i))
			inode.flags = elem.flags
			inode.key = elem.key()
			inode.value = elem.value()
		} else {
			elem := p.branchPageElement(uint16(i))
			inode.pgid = elem.pgid
			inode.key = elem.key()
		}
		_assert(len(inode.key) > 0, "read: zero-length inode key")
	}

	// Save first key so we can find the node in the parent when we spill.
	if len(n.inodes) > 0 {
		n.key = n.inodes[0].key
		_assert(len(n.key) > 0, "read: zero-length node key")
	} else {
		n.key = nil
	}
}

// write writes the items onto one or more pages.
func (n *node) write(p *page) {
	// Initialize page.
	if n.isLeaf {
		p.flags |= leafPageFlag
	} else {
		p.flags |= branchPageFlag
	}

	if len(n.inodes) >= 0xFFFF {
		panic(fmt.Sprintf("inode overflow: %d (pgid=%d)", len(n.inodes), p.id))
	}
	p.count = uint16(len(n.inodes))

	// Stop here if there are no items to write.
	if p.count == 0 {
		return
	}

	// Loop over each item and write it to the page.
	// off tracks the offset into p of the start of the next data.
	off := unsafe.Sizeof(*p) + n.pageElementSize()*uintptr(len(n.inodes))
	for i, item := range n.inodes {
		_assert(len(item.key) > 0, "write: zero-length inode key")

		// Create a slice to write into of needed size and advance
		// byte pointer for next iteration.
		sz := len(item.key) + len(item.value)
		b := unsafeByteSlice(unsafe.Pointer(p), off, 0, sz)
		off += uintptr(sz)

		// Write the page element.
		if n.isLeaf {
			elem := p.leafPageElement(uint16(i))
			elem.pos = uint32(uintptr(unsafe.Pointer(&b[0])) - uintptr(unsafe.Pointer(elem)))
			elem.flags = item.flags
			elem.ksize = uint32(len(item.key))
			elem.vsize = uint32(len(item.value))
		} else {
			elem := p.branchPageElement(uint16(i))
			elem.pos = uint32(uintptr(unsafe.Pointer(&b[0])) - uintptr(unsafe.Pointer(elem)))
			elem.ksize = uint32(len(item.key))
			elem.pgid = item.pgid
			_assert(elem.pgid != p.id, "write: circular dependency occurred")
		}

		// Write data for the element to the end of the page.
		l := copy(b, item.key)
		copy(b[l:], item.value)
	}

	// DEBUG ONLY: n.dump()
}

// split breaks up a node into multiple smaller nodes, if appropriate.
// This should only be called from the spill() function.
func (n *node) split(pageSize uintptr) []*node {
	var nodes []*node

	node := n
	for {
		// Split node into two.
		a, b := node.splitTwo(pageSize)
		nodes = append(nodes, a)

		// If we can't split then exit the loop.
		if b == nil {
			break
		}

		// Set node to b so it gets split on the next iteration.
		node = b
	}

	return nodes
}

// splitTwo breaks up a node into two smaller nodes, if appropriate.
// This should only be called from the split() function.
func (n *node) splitTwo(pageSize uintptr) (*node, *node) {
	// Ignore the split if the page doesn't have at least enough nodes for
	// two pages or if the nodes can fit in a single page.
	if len(n.inodes) <= (minKeysPerPage*2) || n.sizeLessThan(pageSize) {
		return n, nil
	}

	// Determine the threshold before starting a new node.
	var fillPercent = n.bucket.FillPercent
	if fillPercent < minFillPercent {
		fillPercent = minFillPercent
	} else if fillPercent > maxFillPercent {
		fillPercent = maxFillPercent
	}
	threshold := int(float64(pageSize) * fillPercent)

	// Determine split position and sizes of the two pages.
	splitIndex, _ := n.splitIndex(threshold)

	// Split node into two separate nodes.
	// If there's no parent then we'll need to create one.
	if n.parent == nil {
		n.parent = &node{bucket: n.bucket, children: []*node{n}}
	}

	// Create a new node and add it to the parent.
	next := &node{bucket: n.bucket, isLeaf: n.isLeaf, parent: n.parent}
	n.parent.children = append(n.parent.children, next)

	// Split inodes across two nodes.
	next.inodes = n.inodes[splitIndex:]
	n.inodes = n.inodes[:splitIndex]

	// Update the statistics.
	n.bucket.tx.stats.Split++

	return n, next
}

// splitIndex finds the position where a page will fill a given threshold.
// It returns the index as well as the size of the first page.
// This is only be called from split().
func (n *node) splitIndex(threshold int) (index, sz uintptr) {
	sz = pageHeaderSize

	// Loop until we only have the minimum number of keys required for the second page.
	for i := 0; i < len(n.inodes)-minKeysPerPage; i++ {
		index = uintptr(i)
		inode := n.inodes[i]
		elsize := n.pageElementSize() + uintptr(len(inode.key)) + uintptr(len(inode.value))

		// If we have at least the minimum number of keys and adding another
		// node would put us over the threshold then exit and return.
		if index >= minKeysPerPage && sz+elsize > uintptr(threshold) {
			break
		}

		// Add the element size to the total size.
		sz += elsize
	}

	return
}

// spill writes the nodes to dirty pages and splits nodes as it goes.
// Returns an error if dirty pages cannot be allocated.
func (n *node) spill() error {
	var tx = n.bucket.tx
	if n.spilled {
		return nil
	}

	// Spill child nodes first. Child nodes can materialize sibling nodes in
	// the case of split-merge so we cannot use a range loop. We have to check
	// the children size on every loop iteration.
	sort.Sort(n.children)
	for i := 0; i < len(n.children); i++ {
		if err := n.children[i].spill(); err != nil {
			return err
		}
	}

	// We no longer need the child list because it's only used for spill tracking.
	n.children = nil

	// Split nodes into appropriate sizes. The first node will always be n.
	var nodes = n.split(uintptr(tx.db.pageSize))
	for _, node := range nodes {
		// Add node's page to the freelist if it's not new.
		if node.pgid > 0 {
			tx.db.freelist.free(tx.meta.txid, tx.page(node.pgid))
			node.pgid = 0
		}

		// Allocate contiguous space for the node.
		p, err := tx.allocate((node.size() + tx.db.pageSize - 1) / tx.db.pageSize)
		if err != nil {
			return err
		}

		// Write the node.
		if p.id >= tx.meta.pgid {
			panic(fmt.Sprintf("pgid (%d) above high water mark (%d)", p.id, tx.meta.pgid))
		}
		node.pgid = p.id
		node.write(p)
		node.spilled = true

		// Insert into parent inodes.
		if node.parent != nil {
			var key = node.key
			if key == nil {
				key = node.inodes[0].key
			}

			node.parent.put(key, node.inodes[0].key, nil, node.pgid, 0)
			node.key = node.inodes[0].key
			_assert(len(node.key) > 0, "spill: zero-length node key")
		}

		// Update the statistics.
		tx.stats.Spill++
	}

	// If the root node split and created a new root then we need to spill that
	// as well. We'll clear out the children to make sure it doesn't try to respill.
	if n.parent != nil && n.parent.pgid == 0 {
		n.children = nil
		return n.parent.spill()
	}

	return nil
}

// rebalance attempts to combine the node with sibling nodes if the node fill
// size is below a threshold or if there are not enough keys.
func (n *node) rebalance() {
	if !n.unbalanced {
		return
	}
	n.unbalanced = false

	// Update statistics.
	n.bucket.tx.stats.Rebalance++

	// Ignore if node is above threshold (25%) and has enough keys.
	var threshold = n.bucket.tx.db.pageSize / 4
	if n.size() > threshold && len(n.inodes) > n.minKeys() {
		return
	}

	// Root node has special handling.
	if n.parent == nil {
		// If root node is a branch and only has one node then collapse it.
		if !n.isLeaf && len(n.inodes) == 1 {
			// Move root's child up.
			child := n.bucket.node(n.inodes[0].pgid, n)
			n.isLeaf = child.isLeaf
			n.inodes = child.inodes[:]
			n.children = child.children

			// Reparent all child nodes being moved.
			for _, inode := range n.inodes {
				if child, ok := n.bucket.nodes[inode.pgid]; ok {
					child.parent = n
				}
			}

			// Remove old child.
			child.parent = nil
			delete(n.bucket.nodes, child.pgid)
			child.free()
		}

		return
	}

	// If node has no keys then just remove it.
	if n.numChildren() == 0 {
		n.parent.del(n.key)
		n.parent.removeChild(n)
		delete(n.bucket.nodes, n.pgid)
		n.free()
		n.parent.rebalance()
		return
	}

	_assert(n.parent.numChildren() > 1, "parent must have at least 2 children")

	// Destination node is right sibling if idx == 0, otherwise left sibling.
	var target *node
	var useNextSibling = (n.parent.childIndex(n) == 0)
	if useNextSibling {
		target = n.nextSibling()
	} else {
		target = n.prevSibling()
	}

	// If both this node and the target node are too small then merge them.
	if useNextSibling {
		// Reparent all child nodes being moved.
		for _, inode := range target.inodes {
			if child, ok := n.bucket.nodes[inode.pgid]; ok {
				child.parent.removeChild(child)
				child.parent = n
				child.parent.children = append(child.parent.children, child)
			}
		}

		// Copy over inodes from target and remove target.
		n.inodes = append(n.inodes, target.inodes...)
		n.parent.del(target.key)
		n.parent.removeChild(target)
		delete(n.bucket.nodes, target.pgid)
		target.free()
	} else {
		// Reparent all child nodes being moved.
		for _, inode := range n.inodes {
			if child, ok := n.bucket.nodes[inode.pgid]; ok {
				child.parent.removeChild(child)
				child.parent = target
				child.parent.children = append(child.parent.children, child)
			}
		}

		// Copy over inodes to target and remove node.
		target.inodes = append(target.inodes, n.inodes...)
		n.parent.del(n.key)
		n.parent.removeChild(n)
		delete(n.bucket.nodes, n.pgid)
		n.free()
	}

	// Either this node or the target node was deleted from the parent so rebalance it.
	n.parent.rebalance()
}

// removes a node from the list of in-memory children.
// This does not affect the inodes.
func (n *node) removeChild(target *node) {
	for i, child := range n.children {
		if child == target {
			n.children = append(n.children[:i], n.children[i+1:]...)
			return
		}
	}
}

// dereference causes the node to copy all its inode key/value references to heap memory.
// This is required when the mmap is reallocated so inodes are not pointing to stale data.
func (n *node) dereference() {
	if n.key != nil {
		key := make([]byte, len(n.key))
		copy(key, n.key)
		n.key = key
		_assert(n.pgid == 0 || len(n.key) > 0, "dereference: zero-length node key on existing node")
	}

	for i := range n.inodes {
		inode := &n.inodes[i]

		key := make([]byte, len(inode.key))
		copy(key, inode.key)
		inode.key = key
		_assert(len(inode.key) > 0, "dereference: zero-length inode key")

		value := make([]byte, len(inode.value))
		copy(value, inode.value)
		inode.value = value
	}

	// Recursively dereference children.
	for _, child := range n.children {
		child.dereference()
	}

	// Update statistics.
	n.bucket.tx.stats.NodeDeref++
}

// free adds the node's underlying page to the freelist.
func (n *node) free() {
	if n.pgid != 0 {
		n.bucket.tx.db.freelist.free(n.bucket.tx.meta.txid, n.bucket.tx.page(n.pgid))
		n.pgid = 0
	}
}

// dump writes the contents of the node to STDERR for debugging purposes.
/*
func (n *node) dump() {
	// Write node header.
	var typ = "branch"
	if n.isLeaf {
		typ = "leaf"
	}
	warnf("[NODE %d {type=%s count=%d}]", n.pgid, typ, len(n.inodes))

	// Write out abbreviated version of each item.
	for _, item := range n.inodes {
		if n.isLeaf {
			if item.flags&bucketLeafFlag != 0 {
				bucket := (*bucket)(unsafe.Pointer(&item.value[0]))
				warnf("+L %08x -> (bucket root=%d)", trunc(item.key, 4), bucket.root)
			} else {
				warnf("+L %08x -> %08x", trunc(item.key, 4), trunc(item.value, 4))
			}
		} else {
			warnf("+B %08x -> pgid=%d", trunc(item.key, 4), item.pgid)
		}
	}
	warn("")
}
*/

type nodes []*node

func (s nodes) Len() int      { return len(s) }
func (s nodes) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
func (s nodes) Less(i, j int) bool {
	return bytes.Compare(s[i].inodes[0].key, s[j].inodes[0].key) == -1
}

// inode represents an internal node inside of a node.
// It can be used to point to elements in a page or point
// to an element which hasn't been added to a page yet.
type inode struct {
	flags uint32
	pgid  pgid
	key   []byte
	value []byte
}

type inodes []inode
Revendoring to move boltdb to bbolt Signed-off-by: John Howard <jhoward@microsoft.com> 2018-09-18 18:18:08 +00:00			`package bbolt`
vendor: add vendoring script Signed-off-by: Tonis Tiigi <tonistiigi@gmail.com> 2017-05-25 23:26:11 +00:00
			`import (`
			`"bytes"`
			`"fmt"`
			`"sort"`
			`"unsafe"`
			`)`

			`// node represents an in-memory, deserialized page.`
			`type node struct {`
			`bucket *Bucket`
			`isLeaf bool`
			`unbalanced bool`
			`spilled bool`
			`key []byte`
			`pgid pgid`
			`parent *node`
			`children nodes`
			`inodes inodes`
			`}`

			`// root returns the top-level node this node is attached to.`
			`func (n node) root() node {`
			`if n.parent == nil {`
			`return n`
			`}`
			`return n.parent.root()`
			`}`

			`// minKeys returns the minimum number of inodes this node should have.`
			`func (n *node) minKeys() int {`
			`if n.isLeaf {`
			`return 1`
			`}`
			`return 2`
			`}`

			`// size returns the size of the node after serialization.`
			`func (n *node) size() int {`
			`sz, elsz := pageHeaderSize, n.pageElementSize()`
			`for i := 0; i < len(n.inodes); i++ {`
			`item := &n.inodes[i]`
vendor: update containerd to eb6354a11 Signed-off-by: Tonis Tiigi <tonistiigi@gmail.com> 2020-07-29 00:48:03 +00:00			`sz += elsz + uintptr(len(item.key)) + uintptr(len(item.value))`
vendor: add vendoring script Signed-off-by: Tonis Tiigi <tonistiigi@gmail.com> 2017-05-25 23:26:11 +00:00			`}`
vendor: update containerd to eb6354a11 Signed-off-by: Tonis Tiigi <tonistiigi@gmail.com> 2020-07-29 00:48:03 +00:00			`return int(sz)`
vendor: add vendoring script Signed-off-by: Tonis Tiigi <tonistiigi@gmail.com> 2017-05-25 23:26:11 +00:00			`}`

			`// sizeLessThan returns true if the node is less than a given size.`
			`// This is an optimization to avoid calculating a large node when we only need`
			`// to know if it fits inside a certain page size.`
vendor: update containerd to eb6354a11 Signed-off-by: Tonis Tiigi <tonistiigi@gmail.com> 2020-07-29 00:48:03 +00:00			`func (n *node) sizeLessThan(v uintptr) bool {`
vendor: add vendoring script Signed-off-by: Tonis Tiigi <tonistiigi@gmail.com> 2017-05-25 23:26:11 +00:00			`sz, elsz := pageHeaderSize, n.pageElementSize()`
			`for i := 0; i < len(n.inodes); i++ {`
			`item := &n.inodes[i]`
vendor: update containerd to eb6354a11 Signed-off-by: Tonis Tiigi <tonistiigi@gmail.com> 2020-07-29 00:48:03 +00:00			`sz += elsz + uintptr(len(item.key)) + uintptr(len(item.value))`
vendor: add vendoring script Signed-off-by: Tonis Tiigi <tonistiigi@gmail.com> 2017-05-25 23:26:11 +00:00			`if sz >= v {`
			`return false`
			`}`
			`}`
			`return true`
			`}`

			`// pageElementSize returns the size of each page element based on the type of node.`
vendor: update containerd to eb6354a11 Signed-off-by: Tonis Tiigi <tonistiigi@gmail.com> 2020-07-29 00:48:03 +00:00			`func (n *node) pageElementSize() uintptr {`
vendor: add vendoring script Signed-off-by: Tonis Tiigi <tonistiigi@gmail.com> 2017-05-25 23:26:11 +00:00			`if n.isLeaf {`
			`return leafPageElementSize`
			`}`
			`return branchPageElementSize`
			`}`

			`// childAt returns the child node at a given index.`
			`func (n node) childAt(index int) node {`
			`if n.isLeaf {`
			`panic(fmt.Sprintf("invalid childAt(%d) on a leaf node", index))`
			`}`
			`return n.bucket.node(n.inodes[index].pgid, n)`
			`}`

			`// childIndex returns the index of a given child node.`
			`func (n node) childIndex(child node) int {`
			`index := sort.Search(len(n.inodes), func(i int) bool { return bytes.Compare(n.inodes[i].key, child.key) != -1 })`
			`return index`
			`}`

			`// numChildren returns the number of children.`
			`func (n *node) numChildren() int {`
			`return len(n.inodes)`
			`}`

			`// nextSibling returns the next node with the same parent.`
			`func (n node) nextSibling() node {`
			`if n.parent == nil {`
			`return nil`
			`}`
			`index := n.parent.childIndex(n)`
			`if index >= n.parent.numChildren()-1 {`
			`return nil`
			`}`
			`return n.parent.childAt(index + 1)`
			`}`

			`// prevSibling returns the previous node with the same parent.`
			`func (n node) prevSibling() node {`
			`if n.parent == nil {`
			`return nil`
			`}`
			`index := n.parent.childIndex(n)`
			`if index == 0 {`
			`return nil`
			`}`
			`return n.parent.childAt(index - 1)`
			`}`

			`// put inserts a key/value.`
			`func (n *node) put(oldKey, newKey, value []byte, pgid pgid, flags uint32) {`
			`if pgid >= n.bucket.tx.meta.pgid {`
			`panic(fmt.Sprintf("pgid (%d) above high water mark (%d)", pgid, n.bucket.tx.meta.pgid))`
			`} else if len(oldKey) <= 0 {`
			`panic("put: zero-length old key")`
			`} else if len(newKey) <= 0 {`
			`panic("put: zero-length new key")`
			`}`

			`// Find insertion index.`
			`index := sort.Search(len(n.inodes), func(i int) bool { return bytes.Compare(n.inodes[i].key, oldKey) != -1 })`

			`// Add capacity and shift nodes if we don't have an exact match and need to insert.`
			`exact := (len(n.inodes) > 0 && index < len(n.inodes) && bytes.Equal(n.inodes[index].key, oldKey))`
			`if !exact {`
			`n.inodes = append(n.inodes, inode{})`
			`copy(n.inodes[index+1:], n.inodes[index:])`
			`}`

			`inode := &n.inodes[index]`
			`inode.flags = flags`
			`inode.key = newKey`
			`inode.value = value`
			`inode.pgid = pgid`
			`_assert(len(inode.key) > 0, "put: zero-length inode key")`
			`}`

			`// del removes a key from the node.`
			`func (n *node) del(key []byte) {`
			`// Find index of key.`
			`index := sort.Search(len(n.inodes), func(i int) bool { return bytes.Compare(n.inodes[i].key, key) != -1 })`

			`// Exit if the key isn't found.`
			`if index >= len(n.inodes) \|\| !bytes.Equal(n.inodes[index].key, key) {`
			`return`
			`}`

			`// Delete inode from the node.`
			`n.inodes = append(n.inodes[:index], n.inodes[index+1:]...)`

			`// Mark the node as needing rebalancing.`
			`n.unbalanced = true`
			`}`

			`// read initializes the node from a page.`
			`func (n node) read(p page) {`
			`n.pgid = p.id`
			`n.isLeaf = ((p.flags & leafPageFlag) != 0)`
			`n.inodes = make(inodes, int(p.count))`

			`for i := 0; i < int(p.count); i++ {`
			`inode := &n.inodes[i]`
			`if n.isLeaf {`
			`elem := p.leafPageElement(uint16(i))`
			`inode.flags = elem.flags`
			`inode.key = elem.key()`
			`inode.value = elem.value()`
			`} else {`
			`elem := p.branchPageElement(uint16(i))`
			`inode.pgid = elem.pgid`
			`inode.key = elem.key()`
			`}`
			`_assert(len(inode.key) > 0, "read: zero-length inode key")`
			`}`

			`// Save first key so we can find the node in the parent when we spill.`
			`if len(n.inodes) > 0 {`
			`n.key = n.inodes[0].key`
			`_assert(len(n.key) > 0, "read: zero-length node key")`
			`} else {`
			`n.key = nil`
			`}`
			`}`

			`// write writes the items onto one or more pages.`
			`func (n node) write(p page) {`
			`// Initialize page.`
			`if n.isLeaf {`
			`p.flags \|= leafPageFlag`
			`} else {`
			`p.flags \|= branchPageFlag`
			`}`

			`if len(n.inodes) >= 0xFFFF {`
			`panic(fmt.Sprintf("inode overflow: %d (pgid=%d)", len(n.inodes), p.id))`
			`}`
			`p.count = uint16(len(n.inodes))`

			`// Stop here if there are no items to write.`
			`if p.count == 0 {`
			`return`
			`}`

			`// Loop over each item and write it to the page.`
vendor: update containerd to eb6354a11 Signed-off-by: Tonis Tiigi <tonistiigi@gmail.com> 2020-07-29 00:48:03 +00:00			`// off tracks the offset into p of the start of the next data.`
			`off := unsafe.Sizeof(p) + n.pageElementSize()uintptr(len(n.inodes))`
vendor: add vendoring script Signed-off-by: Tonis Tiigi <tonistiigi@gmail.com> 2017-05-25 23:26:11 +00:00			`for i, item := range n.inodes {`
			`_assert(len(item.key) > 0, "write: zero-length inode key")`

vendor: update containerd to eb6354a11 Signed-off-by: Tonis Tiigi <tonistiigi@gmail.com> 2020-07-29 00:48:03 +00:00			`// Create a slice to write into of needed size and advance`
			`// byte pointer for next iteration.`
			`sz := len(item.key) + len(item.value)`
			`b := unsafeByteSlice(unsafe.Pointer(p), off, 0, sz)`
			`off += uintptr(sz)`

vendor: add vendoring script Signed-off-by: Tonis Tiigi <tonistiigi@gmail.com> 2017-05-25 23:26:11 +00:00			`// Write the page element.`
			`if n.isLeaf {`
			`elem := p.leafPageElement(uint16(i))`
			`elem.pos = uint32(uintptr(unsafe.Pointer(&b[0])) - uintptr(unsafe.Pointer(elem)))`
			`elem.flags = item.flags`
			`elem.ksize = uint32(len(item.key))`
			`elem.vsize = uint32(len(item.value))`
			`} else {`
			`elem := p.branchPageElement(uint16(i))`
			`elem.pos = uint32(uintptr(unsafe.Pointer(&b[0])) - uintptr(unsafe.Pointer(elem)))`
			`elem.ksize = uint32(len(item.key))`
			`elem.pgid = item.pgid`
			`_assert(elem.pgid != p.id, "write: circular dependency occurred")`
			`}`

			`// Write data for the element to the end of the page.`
vendor: update containerd to eb6354a11 Signed-off-by: Tonis Tiigi <tonistiigi@gmail.com> 2020-07-29 00:48:03 +00:00			`l := copy(b, item.key)`
			`copy(b[l:], item.value)`
vendor: add vendoring script Signed-off-by: Tonis Tiigi <tonistiigi@gmail.com> 2017-05-25 23:26:11 +00:00			`}`

			`// DEBUG ONLY: n.dump()`
			`}`

			`// split breaks up a node into multiple smaller nodes, if appropriate.`
			`// This should only be called from the spill() function.`
vendor: update containerd to eb6354a11 Signed-off-by: Tonis Tiigi <tonistiigi@gmail.com> 2020-07-29 00:48:03 +00:00			`func (n node) split(pageSize uintptr) []node {`
vendor: add vendoring script Signed-off-by: Tonis Tiigi <tonistiigi@gmail.com> 2017-05-25 23:26:11 +00:00			`var nodes []*node`

			`node := n`
			`for {`
			`// Split node into two.`
			`a, b := node.splitTwo(pageSize)`
			`nodes = append(nodes, a)`

			`// If we can't split then exit the loop.`
			`if b == nil {`
			`break`
			`}`

			`// Set node to b so it gets split on the next iteration.`
			`node = b`
			`}`

			`return nodes`
			`}`

			`// splitTwo breaks up a node into two smaller nodes, if appropriate.`
			`// This should only be called from the split() function.`
vendor: update containerd to eb6354a11 Signed-off-by: Tonis Tiigi <tonistiigi@gmail.com> 2020-07-29 00:48:03 +00:00			`func (n node) splitTwo(pageSize uintptr) (node, *node) {`
vendor: add vendoring script Signed-off-by: Tonis Tiigi <tonistiigi@gmail.com> 2017-05-25 23:26:11 +00:00			`// Ignore the split if the page doesn't have at least enough nodes for`
			`// two pages or if the nodes can fit in a single page.`
			`if len(n.inodes) <= (minKeysPerPage*2) \|\| n.sizeLessThan(pageSize) {`
			`return n, nil`
			`}`

			`// Determine the threshold before starting a new node.`
			`var fillPercent = n.bucket.FillPercent`
			`if fillPercent < minFillPercent {`
			`fillPercent = minFillPercent`
			`} else if fillPercent > maxFillPercent {`
			`fillPercent = maxFillPercent`
			`}`
			`threshold := int(float64(pageSize) * fillPercent)`

			`// Determine split position and sizes of the two pages.`
			`splitIndex, _ := n.splitIndex(threshold)`

			`// Split node into two separate nodes.`
			`// If there's no parent then we'll need to create one.`
			`if n.parent == nil {`
			`n.parent = &node{bucket: n.bucket, children: []*node{n}}`
			`}`

			`// Create a new node and add it to the parent.`
			`next := &node{bucket: n.bucket, isLeaf: n.isLeaf, parent: n.parent}`
			`n.parent.children = append(n.parent.children, next)`

			`// Split inodes across two nodes.`
			`next.inodes = n.inodes[splitIndex:]`
			`n.inodes = n.inodes[:splitIndex]`

			`// Update the statistics.`
			`n.bucket.tx.stats.Split++`

			`return n, next`
			`}`

			`// splitIndex finds the position where a page will fill a given threshold.`
			`// It returns the index as well as the size of the first page.`
			`// This is only be called from split().`
vendor: update containerd to eb6354a11 Signed-off-by: Tonis Tiigi <tonistiigi@gmail.com> 2020-07-29 00:48:03 +00:00			`func (n *node) splitIndex(threshold int) (index, sz uintptr) {`
vendor: add vendoring script Signed-off-by: Tonis Tiigi <tonistiigi@gmail.com> 2017-05-25 23:26:11 +00:00			`sz = pageHeaderSize`

			`// Loop until we only have the minimum number of keys required for the second page.`
			`for i := 0; i < len(n.inodes)-minKeysPerPage; i++ {`
vendor: update containerd to eb6354a11 Signed-off-by: Tonis Tiigi <tonistiigi@gmail.com> 2020-07-29 00:48:03 +00:00			`index = uintptr(i)`
vendor: add vendoring script Signed-off-by: Tonis Tiigi <tonistiigi@gmail.com> 2017-05-25 23:26:11 +00:00			`inode := n.inodes[i]`
vendor: update containerd to eb6354a11 Signed-off-by: Tonis Tiigi <tonistiigi@gmail.com> 2020-07-29 00:48:03 +00:00			`elsize := n.pageElementSize() + uintptr(len(inode.key)) + uintptr(len(inode.value))`
vendor: add vendoring script Signed-off-by: Tonis Tiigi <tonistiigi@gmail.com> 2017-05-25 23:26:11 +00:00
			`// If we have at least the minimum number of keys and adding another`
			`// node would put us over the threshold then exit and return.`
vendor: update containerd to eb6354a11 Signed-off-by: Tonis Tiigi <tonistiigi@gmail.com> 2020-07-29 00:48:03 +00:00			`if index >= minKeysPerPage && sz+elsize > uintptr(threshold) {`
vendor: add vendoring script Signed-off-by: Tonis Tiigi <tonistiigi@gmail.com> 2017-05-25 23:26:11 +00:00			`break`
			`}`

			`// Add the element size to the total size.`
			`sz += elsize`
			`}`

			`return`
			`}`

			`// spill writes the nodes to dirty pages and splits nodes as it goes.`
			`// Returns an error if dirty pages cannot be allocated.`
			`func (n *node) spill() error {`
			`var tx = n.bucket.tx`
			`if n.spilled {`
			`return nil`
			`}`

			`// Spill child nodes first. Child nodes can materialize sibling nodes in`
			`// the case of split-merge so we cannot use a range loop. We have to check`
			`// the children size on every loop iteration.`
			`sort.Sort(n.children)`
			`for i := 0; i < len(n.children); i++ {`
			`if err := n.children[i].spill(); err != nil {`
			`return err`
			`}`
			`}`

			`// We no longer need the child list because it's only used for spill tracking.`
			`n.children = nil`

			`// Split nodes into appropriate sizes. The first node will always be n.`
vendor: update containerd to eb6354a11 Signed-off-by: Tonis Tiigi <tonistiigi@gmail.com> 2020-07-29 00:48:03 +00:00			`var nodes = n.split(uintptr(tx.db.pageSize))`
vendor: add vendoring script Signed-off-by: Tonis Tiigi <tonistiigi@gmail.com> 2017-05-25 23:26:11 +00:00			`for _, node := range nodes {`
			`// Add node's page to the freelist if it's not new.`
			`if node.pgid > 0 {`
			`tx.db.freelist.free(tx.meta.txid, tx.page(node.pgid))`
			`node.pgid = 0`
			`}`

			`// Allocate contiguous space for the node.`
Revendoring to move boltdb to bbolt Signed-off-by: John Howard <jhoward@microsoft.com> 2018-09-18 18:18:08 +00:00			`p, err := tx.allocate((node.size() + tx.db.pageSize - 1) / tx.db.pageSize)`
vendor: add vendoring script Signed-off-by: Tonis Tiigi <tonistiigi@gmail.com> 2017-05-25 23:26:11 +00:00			`if err != nil {`
			`return err`
			`}`

			`// Write the node.`
			`if p.id >= tx.meta.pgid {`
			`panic(fmt.Sprintf("pgid (%d) above high water mark (%d)", p.id, tx.meta.pgid))`
			`}`
			`node.pgid = p.id`
			`node.write(p)`
			`node.spilled = true`

			`// Insert into parent inodes.`
			`if node.parent != nil {`
			`var key = node.key`
			`if key == nil {`
			`key = node.inodes[0].key`
			`}`

			`node.parent.put(key, node.inodes[0].key, nil, node.pgid, 0)`
			`node.key = node.inodes[0].key`
			`_assert(len(node.key) > 0, "spill: zero-length node key")`
			`}`

			`// Update the statistics.`
			`tx.stats.Spill++`
			`}`

			`// If the root node split and created a new root then we need to spill that`
			`// as well. We'll clear out the children to make sure it doesn't try to respill.`
			`if n.parent != nil && n.parent.pgid == 0 {`
			`n.children = nil`
			`return n.parent.spill()`
			`}`

			`return nil`
			`}`

			`// rebalance attempts to combine the node with sibling nodes if the node fill`
			`// size is below a threshold or if there are not enough keys.`
			`func (n *node) rebalance() {`
			`if !n.unbalanced {`
			`return`
			`}`
			`n.unbalanced = false`

			`// Update statistics.`
			`n.bucket.tx.stats.Rebalance++`

			`// Ignore if node is above threshold (25%) and has enough keys.`
			`var threshold = n.bucket.tx.db.pageSize / 4`
			`if n.size() > threshold && len(n.inodes) > n.minKeys() {`
			`return`
			`}`

			`// Root node has special handling.`
			`if n.parent == nil {`
			`// If root node is a branch and only has one node then collapse it.`
			`if !n.isLeaf && len(n.inodes) == 1 {`
			`// Move root's child up.`
			`child := n.bucket.node(n.inodes[0].pgid, n)`
			`n.isLeaf = child.isLeaf`
			`n.inodes = child.inodes[:]`
			`n.children = child.children`

			`// Reparent all child nodes being moved.`
			`for _, inode := range n.inodes {`
			`if child, ok := n.bucket.nodes[inode.pgid]; ok {`
			`child.parent = n`
			`}`
			`}`

			`// Remove old child.`
			`child.parent = nil`
			`delete(n.bucket.nodes, child.pgid)`
			`child.free()`
			`}`

			`return`
			`}`

			`// If node has no keys then just remove it.`
			`if n.numChildren() == 0 {`
			`n.parent.del(n.key)`
			`n.parent.removeChild(n)`
			`delete(n.bucket.nodes, n.pgid)`
			`n.free()`
			`n.parent.rebalance()`
			`return`
			`}`

			`_assert(n.parent.numChildren() > 1, "parent must have at least 2 children")`

			`// Destination node is right sibling if idx == 0, otherwise left sibling.`
			`var target *node`
			`var useNextSibling = (n.parent.childIndex(n) == 0)`
			`if useNextSibling {`
			`target = n.nextSibling()`
			`} else {`
			`target = n.prevSibling()`
			`}`

			`// If both this node and the target node are too small then merge them.`
			`if useNextSibling {`
			`// Reparent all child nodes being moved.`
			`for _, inode := range target.inodes {`
			`if child, ok := n.bucket.nodes[inode.pgid]; ok {`
			`child.parent.removeChild(child)`
			`child.parent = n`
			`child.parent.children = append(child.parent.children, child)`
			`}`
			`}`

			`// Copy over inodes from target and remove target.`
			`n.inodes = append(n.inodes, target.inodes...)`
			`n.parent.del(target.key)`
			`n.parent.removeChild(target)`
			`delete(n.bucket.nodes, target.pgid)`
			`target.free()`
			`} else {`
			`// Reparent all child nodes being moved.`
			`for _, inode := range n.inodes {`
			`if child, ok := n.bucket.nodes[inode.pgid]; ok {`
			`child.parent.removeChild(child)`
			`child.parent = target`
			`child.parent.children = append(child.parent.children, child)`
			`}`
			`}`

			`// Copy over inodes to target and remove node.`
			`target.inodes = append(target.inodes, n.inodes...)`
			`n.parent.del(n.key)`
			`n.parent.removeChild(n)`
			`delete(n.bucket.nodes, n.pgid)`
			`n.free()`
			`}`

			`// Either this node or the target node was deleted from the parent so rebalance it.`
			`n.parent.rebalance()`
			`}`

			`// removes a node from the list of in-memory children.`
			`// This does not affect the inodes.`
			`func (n node) removeChild(target node) {`
			`for i, child := range n.children {`
			`if child == target {`
			`n.children = append(n.children[:i], n.children[i+1:]...)`
			`return`
			`}`
			`}`
			`}`

			`// dereference causes the node to copy all its inode key/value references to heap memory.`
			`// This is required when the mmap is reallocated so inodes are not pointing to stale data.`
			`func (n *node) dereference() {`
			`if n.key != nil {`
			`key := make([]byte, len(n.key))`
			`copy(key, n.key)`
			`n.key = key`
			`_assert(n.pgid == 0 \|\| len(n.key) > 0, "dereference: zero-length node key on existing node")`
			`}`

			`for i := range n.inodes {`
			`inode := &n.inodes[i]`

			`key := make([]byte, len(inode.key))`
			`copy(key, inode.key)`
			`inode.key = key`
			`_assert(len(inode.key) > 0, "dereference: zero-length inode key")`

			`value := make([]byte, len(inode.value))`
			`copy(value, inode.value)`
			`inode.value = value`
			`}`

			`// Recursively dereference children.`
			`for _, child := range n.children {`
			`child.dereference()`
			`}`

			`// Update statistics.`
			`n.bucket.tx.stats.NodeDeref++`
			`}`

			`// free adds the node's underlying page to the freelist.`
			`func (n *node) free() {`
			`if n.pgid != 0 {`
			`n.bucket.tx.db.freelist.free(n.bucket.tx.meta.txid, n.bucket.tx.page(n.pgid))`
			`n.pgid = 0`
			`}`
			`}`

			`// dump writes the contents of the node to STDERR for debugging purposes.`
			`/*`
			`func (n *node) dump() {`
			`// Write node header.`
			`var typ = "branch"`
			`if n.isLeaf {`
			`typ = "leaf"`
			`}`
			`warnf("[NODE %d {type=%s count=%d}]", n.pgid, typ, len(n.inodes))`

			`// Write out abbreviated version of each item.`
			`for _, item := range n.inodes {`
			`if n.isLeaf {`
			`if item.flags&bucketLeafFlag != 0 {`
			`bucket := (*bucket)(unsafe.Pointer(&item.value[0]))`
			`warnf("+L %08x -> (bucket root=%d)", trunc(item.key, 4), bucket.root)`
			`} else {`
			`warnf("+L %08x -> %08x", trunc(item.key, 4), trunc(item.value, 4))`
			`}`
			`} else {`
			`warnf("+B %08x -> pgid=%d", trunc(item.key, 4), item.pgid)`
			`}`
			`}`
			`warn("")`
			`}`
			`*/`

			`type nodes []*node`

vendor: update containerd to eb6354a11 Signed-off-by: Tonis Tiigi <tonistiigi@gmail.com> 2020-07-29 00:48:03 +00:00			`func (s nodes) Len() int { return len(s) }`
			`func (s nodes) Swap(i, j int) { s[i], s[j] = s[j], s[i] }`
			`func (s nodes) Less(i, j int) bool {`
			`return bytes.Compare(s[i].inodes[0].key, s[j].inodes[0].key) == -1`
			`}`
vendor: add vendoring script Signed-off-by: Tonis Tiigi <tonistiigi@gmail.com> 2017-05-25 23:26:11 +00:00
			`// inode represents an internal node inside of a node.`
			`// It can be used to point to elements in a page or point`
			`// to an element which hasn't been added to a page yet.`
			`type inode struct {`
			`flags uint32`
			`pgid pgid`
			`key []byte`
			`value []byte`
			`}`

			`type inodes []inode`