bolt/db.go

1037 lines
28 KiB
Go
Raw Normal View History

2014-01-08 15:06:17 +00:00
package bolt
2014-01-12 05:51:01 +00:00
import (
"errors"
2014-02-16 19:11:10 +00:00
"fmt"
"hash/fnv"
"log"
2014-01-12 22:30:09 +00:00
"os"
"runtime"
"runtime/debug"
"strings"
2014-01-12 05:51:01 +00:00
"sync"
"time"
2014-01-12 05:51:01 +00:00
"unsafe"
)
2014-02-12 21:57:27 +00:00
// The largest step that can be taken when remapping the mmap.
2014-02-11 19:16:12 +00:00
const maxMmapStep = 1 << 30 // 1GB
// The data file format version.
const version = 2
// Represents a marker value to indicate that a file is a Bolt DB.
const magic uint32 = 0xED0CDAED
// IgnoreNoSync specifies whether the NoSync field of a DB is ignored when
// syncing changes to a file. This is required as some operating systems,
// such as OpenBSD, do not have a unified buffer cache (UBC) and writes
2016-01-08 07:33:40 +00:00
// must be synchronized using the msync(2) syscall.
const IgnoreNoSync = runtime.GOOS == "openbsd"
// Default values if not set in a DB instance.
const (
DefaultMaxBatchSize int = 1000
DefaultMaxBatchDelay = 10 * time.Millisecond
DefaultAllocSize = 16 * 1024 * 1024
)
// default page size for db is set to the OS page size.
var defaultPageSize = os.Getpagesize()
2014-02-12 21:57:27 +00:00
// DB represents a collection of buckets persisted to a file on disk.
// All data access is performed through transactions which can be obtained through the DB.
// All the functions on DB will return a ErrDatabaseNotOpen if accessed before Open() is called.
2014-01-12 05:51:01 +00:00
type DB struct {
2014-05-15 00:08:55 +00:00
// When enabled, the database will perform a Check() after every commit.
// A panic is issued if the database is in an inconsistent state. This
// flag has a large performance impact so it should only be used for
// debugging purposes.
StrictMode bool
// Setting the NoSync flag will cause the database to skip fsync()
// calls after each commit. This can be useful when bulk loading data
// into a database and you can restart the bulk load in the event of
// a system failure or database corruption. Do not set this flag for
// normal use.
//
// If the package global IgnoreNoSync constant is true, this value is
// ignored. See the comment on that constant for more details.
//
// THIS IS UNSAFE. PLEASE USE WITH CAUTION.
NoSync bool
// When true, skips the truncate call when growing the database.
// Setting this to true is only safe on non-ext3/ext4 systems.
// Skipping truncation avoids preallocation of hard drive space and
// bypasses a truncate() and fsync() syscall on remapping.
//
// https://github.com/boltdb/bolt/issues/284
NoGrowSync bool
// If you want to read the entire database fast, you can set MmapFlag to
// syscall.MAP_POPULATE on Linux 2.6.23+ for sequential read-ahead.
MmapFlags int
// MaxBatchSize is the maximum size of a batch. Default value is
// copied from DefaultMaxBatchSize in Open.
//
// If <=0, disables batching.
//
// Do not change concurrently with calls to Batch.
MaxBatchSize int
// MaxBatchDelay is the maximum delay before a batch starts.
// Default value is copied from DefaultMaxBatchDelay in Open.
//
// If <=0, effectively disables batching.
//
// Do not change concurrently with calls to Batch.
MaxBatchDelay time.Duration
// AllocSize is the amount of space allocated when the database
// needs to create new pages. This is done to amortize the cost
// of truncate() and fsync() when growing the data file.
AllocSize int
path string
file *os.File
2016-03-01 05:58:44 +00:00
lockfile *os.File // windows only
dataref []byte // mmap'ed readonly, write throws SEGV
data *[maxMapSize]byte
datasz int
filesz int // current on disk file size
meta0 *meta
meta1 *meta
pageSize int
opened bool
2014-03-09 03:25:37 +00:00
rwtx *Tx
txs []*Tx
freelist *freelist
stats Stats
2014-02-09 22:52:19 +00:00
2016-04-22 20:24:11 +00:00
pagePool sync.Pool
batchMu sync.Mutex
batch *batch
2014-02-09 22:52:19 +00:00
rwlock sync.Mutex // Allows only one writer at a time.
metalock sync.Mutex // Protects meta page access.
mmaplock sync.RWMutex // Protects mmap access during remapping.
statlock sync.RWMutex // Protects stats access.
ops struct {
writeAt func(b []byte, off int64) (n int, err error)
}
2015-05-18 19:45:02 +00:00
// Read only mode.
// When true, Update() and Begin(true) return ErrDatabaseReadOnly immediately.
readOnly bool
2014-01-12 05:51:01 +00:00
}
2014-01-24 19:51:56 +00:00
// Path returns the path to currently open database file.
2014-01-12 05:51:01 +00:00
func (db *DB) Path() string {
2014-01-09 16:07:10 +00:00
return db.path
}
// GoString returns the Go string representation of the database.
func (db *DB) GoString() string {
return fmt.Sprintf("bolt.DB{path:%q}", db.path)
}
// String returns the string representation of the database.
func (db *DB) String() string {
return fmt.Sprintf("DB<%q>", db.path)
}
// Open creates and opens a database at the given path.
2014-01-24 19:51:56 +00:00
// If the file does not exist then it will be created automatically.
// Passing in nil options will cause Bolt to open the database with the default options.
func Open(path string, mode os.FileMode, options *Options) (*DB, error) {
var db = &DB{opened: true}
2014-01-10 14:32:12 +00:00
// Set default options if no options are provided.
if options == nil {
options = DefaultOptions
}
db.NoGrowSync = options.NoGrowSync
db.MmapFlags = options.MmapFlags
// Set default values for later DB operations.
db.MaxBatchSize = DefaultMaxBatchSize
db.MaxBatchDelay = DefaultMaxBatchDelay
db.AllocSize = DefaultAllocSize
flag := os.O_RDWR
if options.ReadOnly {
flag = os.O_RDONLY
db.readOnly = true
}
2014-01-10 14:32:12 +00:00
// Open data file and separate sync handler for metadata writes.
db.path = path
var err error
if db.file, err = os.OpenFile(db.path, flag|os.O_CREATE, mode); err != nil {
2014-03-24 13:38:27 +00:00
_ = db.close()
return nil, err
2014-01-10 14:32:12 +00:00
}
// Lock file so that other processes using Bolt in read-write mode cannot
// use the database at the same time. This would cause corruption since
// the two processes would write meta pages and free pages separately.
// The database file is locked exclusively (only one process can grab the lock)
// if !options.ReadOnly.
// The database file is locked using the shared lock (more than one process may
// hold a lock at the same time) otherwise (options.ReadOnly is set).
2016-03-01 05:58:44 +00:00
if err := flock(db, mode, !db.readOnly, options.Timeout); err != nil {
_ = db.close()
return nil, err
}
// Default values for test hooks
db.ops.writeAt = db.file.WriteAt
2014-01-28 19:52:09 +00:00
// Initialize the database if it doesn't exist.
if info, err := db.file.Stat(); err != nil {
return nil, err
2014-01-28 19:52:09 +00:00
} else if info.Size() == 0 {
// Initialize new files with meta pages.
2014-01-12 05:51:01 +00:00
if err := db.init(); err != nil {
return nil, err
2014-01-10 14:32:12 +00:00
}
2014-01-28 19:52:09 +00:00
} else {
// Read the first meta page to determine the page size.
2014-02-12 21:57:27 +00:00
var buf [0x1000]byte
2014-01-28 19:52:09 +00:00
if _, err := db.file.ReadAt(buf[:], 0); err == nil {
2014-01-30 03:35:58 +00:00
m := db.pageInBuffer(buf[:], 0).meta()
if err := m.validate(); err != nil {
// If we can't read the page size, we can assume it's the same
// as the OS -- since that's how the page size was chosen in the
// first place.
2016-04-24 20:09:45 +00:00
//
// If the first page is invalid and this OS uses a different
// page size than what the database was created with then we
// are out of luck and cannot access the database.
db.pageSize = os.Getpagesize()
} else {
db.pageSize = int(m.pageSize)
2014-01-28 19:52:09 +00:00
}
}
2014-01-10 14:32:12 +00:00
}
2016-04-22 20:24:11 +00:00
// Initialize page pool.
db.pagePool = sync.Pool{
New: func() interface{} {
return make([]byte, db.pageSize)
},
}
2014-01-10 14:32:12 +00:00
// Memory map the data file.
if err := db.mmap(options.InitialMmapSize); err != nil {
2014-03-24 13:38:27 +00:00
_ = db.close()
return nil, err
2014-01-10 14:32:12 +00:00
}
2014-02-09 22:52:19 +00:00
// Read in the freelist.
db.freelist = newFreelist()
2014-02-09 22:52:19 +00:00
db.freelist.read(db.page(db.meta().freelist))
2014-01-10 14:32:12 +00:00
// Mark the database as opened and return.
return db, nil
2014-01-10 14:32:12 +00:00
}
2014-01-24 19:51:56 +00:00
// mmap opens the underlying memory-mapped file and initializes the meta references.
2014-02-11 19:16:12 +00:00
// minsz is the minimum size that the new mmap can be.
func (db *DB) mmap(minsz int) error {
db.mmaplock.Lock()
defer db.mmaplock.Unlock()
2014-01-28 19:52:09 +00:00
info, err := db.file.Stat()
if err != nil {
return fmt.Errorf("mmap stat error: %s", err)
2014-01-28 20:16:22 +00:00
} else if int(info.Size()) < db.pageSize*2 {
return fmt.Errorf("file size too small")
2014-01-10 14:32:12 +00:00
}
2014-01-30 23:22:02 +00:00
2014-02-11 19:16:12 +00:00
// Ensure the size is at least the minimum size.
var size = int(info.Size())
if size < minsz {
size = minsz
}
size, err = db.mmapSize(size)
if err != nil {
return err
}
// Dereference all mmap references before unmapping.
if db.rwtx != nil {
db.rwtx.root.dereference()
}
// Unmap existing data before continuing.
if err := db.munmap(); err != nil {
return err
}
2014-01-12 05:51:01 +00:00
// Memory-map the data file as a byte slice.
if err := mmap(db, size); err != nil {
2014-01-12 05:51:01 +00:00
return err
}
2014-01-10 14:32:12 +00:00
2014-01-12 05:51:01 +00:00
// Save references to the meta pages.
2014-01-30 03:35:58 +00:00
db.meta0 = db.page(0).meta()
db.meta1 = db.page(1).meta()
// Validate the meta pages. We only return an error if both meta pages fail
// validation, since meta0 failing validation means that it wasn't saved
// properly -- but we can recover using meta1. And vice-versa.
err0 := db.meta0.validate()
err1 := db.meta1.validate()
if err0 != nil && err1 != nil {
2016-04-24 20:09:45 +00:00
return err0
2014-01-10 14:32:12 +00:00
}
return nil
}
2014-02-11 19:16:12 +00:00
// munmap unmaps the data file from memory.
func (db *DB) munmap() error {
if err := munmap(db); err != nil {
return fmt.Errorf("unmap error: " + err.Error())
2014-02-11 19:16:12 +00:00
}
return nil
2014-02-11 19:16:12 +00:00
}
// mmapSize determines the appropriate size for the mmap given the current size
2015-12-16 23:17:37 +00:00
// of the database. The minimum size is 32KB and doubles until it reaches 1GB.
// Returns an error if the new mmap size is greater than the max allowed.
func (db *DB) mmapSize(size int) (int, error) {
// Double the size from 32KB until 1GB.
for i := uint(15); i <= 30; i++ {
if size <= 1<<i {
return 1 << i, nil
}
}
2015-01-28 21:27:06 +00:00
// Verify the requested size is not above the maximum allowed.
if size > maxMapSize {
return 0, fmt.Errorf("mmap too large")
}
// If larger than 1GB then grow by 1GB at a time.
sz := int64(size)
2015-01-28 21:27:06 +00:00
if remainder := sz % int64(maxMmapStep); remainder > 0 {
sz += int64(maxMmapStep) - remainder
2014-02-11 19:16:12 +00:00
}
// Ensure that the mmap size is a multiple of the page size.
// This should always be true since we're incrementing in MBs.
2015-01-28 21:27:06 +00:00
pageSize := int64(db.pageSize)
if (sz % pageSize) != 0 {
sz = ((sz / pageSize) + 1) * pageSize
2014-02-11 19:16:12 +00:00
}
2015-01-28 21:27:06 +00:00
// If we've exceeded the max size then only grow up to the max size.
if sz > maxMapSize {
sz = maxMapSize
}
return int(sz), nil
2014-02-11 19:16:12 +00:00
}
2014-01-12 05:51:01 +00:00
// init creates a new database file and initializes its meta pages.
func (db *DB) init() error {
2014-01-28 03:22:37 +00:00
// Set the page size to the OS page size.
2016-04-22 20:24:11 +00:00
db.pageSize = os.Getpagesize()
2014-01-10 14:32:12 +00:00
2014-01-12 05:51:01 +00:00
// Create two meta pages on a buffer.
2014-01-30 03:50:29 +00:00
buf := make([]byte, db.pageSize*4)
2014-01-12 05:51:01 +00:00
for i := 0; i < 2; i++ {
2014-01-27 15:11:54 +00:00
p := db.pageInBuffer(buf[:], pgid(i))
p.id = pgid(i)
2014-02-12 21:57:27 +00:00
p.flags = metaPageFlag
2014-01-30 03:35:58 +00:00
2014-01-30 03:50:29 +00:00
// Initialize the meta page.
2014-01-30 03:35:58 +00:00
m := p.meta()
m.magic = magic
2014-01-31 17:22:58 +00:00
m.version = version
2014-01-30 03:35:58 +00:00
m.pageSize = uint32(db.pageSize)
2014-02-09 22:52:19 +00:00
m.freelist = 2
m.root = bucket{root: 3}
2014-01-30 23:22:02 +00:00
m.pgid = 4
m.txid = txid(i)
2016-04-24 20:09:45 +00:00
m.checksum = m.sum64()
2014-01-12 05:51:01 +00:00
}
2014-01-10 14:32:12 +00:00
2014-01-30 03:50:29 +00:00
// Write an empty freelist at page 3.
p := db.pageInBuffer(buf[:], pgid(2))
2014-01-30 05:11:46 +00:00
p.id = pgid(2)
2014-02-12 21:57:27 +00:00
p.flags = freelistPageFlag
2014-01-30 03:50:29 +00:00
p.count = 0
// Write an empty leaf page at page 4.
p = db.pageInBuffer(buf[:], pgid(3))
2014-01-30 05:11:46 +00:00
p.id = pgid(3)
p.flags = leafPageFlag
2014-01-30 03:50:29 +00:00
p.count = 0
2014-01-12 05:51:01 +00:00
// Write the buffer to our data file.
if _, err := db.ops.writeAt(buf, 0); err != nil {
return err
}
if err := fdatasync(db); err != nil {
2014-01-12 05:51:01 +00:00
return err
}
2014-01-10 14:32:12 +00:00
2014-01-12 05:51:01 +00:00
return nil
}
2014-01-10 14:32:12 +00:00
2014-02-12 21:57:27 +00:00
// Close releases all database resources.
// All transactions must be closed before closing the database.
func (db *DB) Close() error {
db.rwlock.Lock()
defer db.rwlock.Unlock()
2014-02-09 22:52:19 +00:00
db.metalock.Lock()
defer db.metalock.Unlock()
db.mmaplock.RLock()
defer db.mmaplock.RUnlock()
return db.close()
2014-01-24 19:51:56 +00:00
}
func (db *DB) close() error {
2016-03-01 05:58:44 +00:00
if !db.opened {
return nil
}
2014-02-16 05:34:21 +00:00
db.opened = false
2014-02-16 06:38:03 +00:00
2014-02-09 22:52:19 +00:00
db.freelist = nil
2014-02-11 19:16:12 +00:00
// Clear ops.
db.ops.writeAt = nil
// Close the mmap.
if err := db.munmap(); err != nil {
return err
}
// Close file handles.
if db.file != nil {
// No need to unlock read-only file.
if !db.readOnly {
// Unlock the file.
2016-03-01 05:58:44 +00:00
if err := funlock(db); err != nil {
log.Printf("bolt.Close(): funlock error: %s", err)
}
}
// Close the file descriptor.
if err := db.file.Close(); err != nil {
return fmt.Errorf("db file close: %s", err)
}
db.file = nil
}
db.path = ""
return nil
2014-01-12 05:51:01 +00:00
}
2014-01-10 14:32:12 +00:00
// Begin starts a new transaction.
// Multiple read-only transactions can be used concurrently but only one
// write transaction can be used at a time. Starting multiple write transactions
// will cause the calls to block and be serialized until the current write
// transaction finishes.
2014-02-12 21:57:27 +00:00
//
2016-01-08 07:33:40 +00:00
// Transactions should not be dependent on one another. Opening a read
// transaction and a write transaction in the same goroutine can cause the
// writer to deadlock because the database periodically needs to re-mmap itself
// as it grows and it cannot do that while a read transaction is open.
//
// If a long running read transaction (for example, a snapshot transaction) is
// needed, you might want to set DB.InitialMmapSize to a large enough value
// to avoid potential blocking of write transaction.
//
// IMPORTANT: You must close read-only transactions after you are finished or
// else the database will not reclaim old pages.
func (db *DB) Begin(writable bool) (*Tx, error) {
if writable {
return db.beginRWTx()
}
return db.beginTx()
}
func (db *DB) beginTx() (*Tx, error) {
// Lock the meta pages while we initialize the transaction. We obtain
// the meta lock before the mmap lock because that's the order that the
// write transaction will obtain them.
db.metalock.Lock()
2014-02-11 19:16:12 +00:00
// Obtain a read-only lock on the mmap. When the mmap is remapped it will
// obtain a write lock so all transactions must finish before it can be
// remapped.
db.mmaplock.RLock()
2014-01-13 17:35:04 +00:00
// Exit if the database is not open yet.
if !db.opened {
db.mmaplock.RUnlock()
db.metalock.Unlock()
2014-02-16 19:18:44 +00:00
return nil, ErrDatabaseNotOpen
2014-01-13 17:35:04 +00:00
}
// Create a transaction associated with the database.
t := &Tx{}
2014-01-30 23:22:02 +00:00
t.init(db)
2014-01-14 20:01:02 +00:00
2014-02-09 22:52:19 +00:00
// Keep track of transaction until it closes.
db.txs = append(db.txs, t)
n := len(db.txs)
// Unlock the meta pages.
db.metalock.Unlock()
// Update the transaction stats.
db.statlock.Lock()
db.stats.TxN++
db.stats.OpenTxN = n
db.statlock.Unlock()
2014-02-09 22:52:19 +00:00
2014-01-24 19:51:56 +00:00
return t, nil
}
func (db *DB) beginRWTx() (*Tx, error) {
2015-05-18 19:45:02 +00:00
// If the database was opened with Options.ReadOnly, return an error.
if db.readOnly {
return nil, ErrDatabaseReadOnly
}
2015-05-18 19:45:02 +00:00
2014-03-09 03:25:37 +00:00
// Obtain writer lock. This is released by the transaction when it closes.
// This enforces only one writer transaction at a time.
2014-02-09 22:52:19 +00:00
db.rwlock.Lock()
2014-01-24 19:51:56 +00:00
// Once we have the writer lock then we can lock the meta pages so that
// we can set up the transaction.
db.metalock.Lock()
defer db.metalock.Unlock()
2014-01-26 22:29:06 +00:00
// Exit if the database is not open yet.
if !db.opened {
2014-02-09 22:52:19 +00:00
db.rwlock.Unlock()
2014-02-16 19:18:44 +00:00
return nil, ErrDatabaseNotOpen
2014-01-13 17:35:04 +00:00
}
// Create a transaction associated with the database.
2014-03-09 03:25:37 +00:00
t := &Tx{writable: true}
2014-01-30 23:22:02 +00:00
t.init(db)
db.rwtx = t
2014-02-09 22:52:19 +00:00
// Free any pages associated with closed read-only transactions.
var minid txid = 0xFFFFFFFFFFFFFFFF
for _, t := range db.txs {
2014-07-26 21:11:47 +00:00
if t.meta.txid < minid {
minid = t.meta.txid
2014-02-09 22:52:19 +00:00
}
}
if minid > 0 {
db.freelist.release(minid - 1)
}
2014-01-26 22:29:06 +00:00
2014-01-13 17:35:04 +00:00
return t, nil
}
// removeTx removes a transaction from the database.
func (db *DB) removeTx(tx *Tx) {
2014-02-11 19:16:12 +00:00
// Release the read lock on the mmap.
db.mmaplock.RUnlock()
// Use the meta lock to restrict access to the DB object.
db.metalock.Lock()
2014-02-09 22:52:19 +00:00
// Remove the transaction.
for i, t := range db.txs {
if t == tx {
db.txs = append(db.txs[:i], db.txs[i+1:]...)
2014-02-09 22:52:19 +00:00
break
}
}
n := len(db.txs)
// Unlock the meta pages.
db.metalock.Unlock()
// Merge statistics.
db.statlock.Lock()
db.stats.OpenTxN = n
db.stats.TxStats.add(&tx.stats)
db.statlock.Unlock()
2014-02-09 22:52:19 +00:00
}
// Update executes a function within the context of a read-write managed transaction.
2014-02-15 21:54:45 +00:00
// If no error is returned from the function then the transaction is committed.
// If an error is returned then the entire transaction is rolled back.
// Any error that is returned from the function or returned from the commit is
// returned from the Update() method.
//
// Attempting to manually commit or rollback within the function will cause a panic.
func (db *DB) Update(fn func(*Tx) error) error {
t, err := db.Begin(true)
2014-02-15 21:54:45 +00:00
if err != nil {
return err
}
// Make sure the transaction rolls back in the event of a panic.
defer func() {
if t.db != nil {
t.rollback()
}
}()
// Mark as a managed tx so that the inner function cannot manually commit.
t.managed = true
2014-02-15 21:54:45 +00:00
// If an error is returned from the function then rollback and return error.
err = fn(t)
t.managed = false
if err != nil {
2014-03-24 13:38:27 +00:00
_ = t.Rollback()
2014-02-15 21:54:45 +00:00
return err
}
return t.Commit()
}
// View executes a function within the context of a managed read-only transaction.
// Any error that is returned from the function is returned from the View() method.
//
// Attempting to manually rollback within the function will cause a panic.
func (db *DB) View(fn func(*Tx) error) error {
t, err := db.Begin(false)
2014-02-16 20:51:35 +00:00
if err != nil {
return err
}
2014-02-16 22:43:35 +00:00
// Make sure the transaction rolls back in the event of a panic.
defer func() {
if t.db != nil {
t.rollback()
}
}()
// Mark as a managed tx so that the inner function cannot manually rollback.
t.managed = true
2014-02-16 22:43:35 +00:00
// If an error is returned from the function then pass it through.
err = fn(t)
t.managed = false
2014-03-24 13:38:27 +00:00
if err != nil {
_ = t.Rollback()
return err
}
if err := t.Rollback(); err != nil {
return err
}
2014-03-24 13:38:27 +00:00
return nil
2014-02-16 22:43:35 +00:00
}
// Batch calls fn as part of a batch. It behaves similar to Update,
// except:
//
// 1. concurrent Batch calls can be combined into a single Bolt
// transaction.
//
// 2. the function passed to Batch may be called multiple times,
// regardless of whether it returns error or not.
//
// This means that Batch function side effects must be idempotent and
// take permanent effect only after a successful return is seen in
// caller.
//
// The maximum batch size and delay can be adjusted with DB.MaxBatchSize
// and DB.MaxBatchDelay, respectively.
//
// Batch is only useful when there are multiple goroutines calling it.
func (db *DB) Batch(fn func(*Tx) error) error {
errCh := make(chan error, 1)
db.batchMu.Lock()
if (db.batch == nil) || (db.batch != nil && len(db.batch.calls) >= db.MaxBatchSize) {
// There is no existing batch, or the existing batch is full; start a new one.
db.batch = &batch{
db: db,
}
db.batch.timer = time.AfterFunc(db.MaxBatchDelay, db.batch.trigger)
}
db.batch.calls = append(db.batch.calls, call{fn: fn, err: errCh})
if len(db.batch.calls) >= db.MaxBatchSize {
// wake up batch, it's ready to run
go db.batch.trigger()
}
db.batchMu.Unlock()
err := <-errCh
if err == trySolo {
err = db.Update(fn)
}
return err
}
type call struct {
fn func(*Tx) error
err chan<- error
}
type batch struct {
db *DB
timer *time.Timer
start sync.Once
calls []call
}
// trigger runs the batch if it hasn't already been run.
func (b *batch) trigger() {
b.start.Do(b.run)
}
// run performs the transactions in the batch and communicates results
// back to DB.Batch.
func (b *batch) run() {
b.db.batchMu.Lock()
b.timer.Stop()
// Make sure no new work is added to this batch, but don't break
// other batches.
if b.db.batch == b {
b.db.batch = nil
}
b.db.batchMu.Unlock()
retry:
for len(b.calls) > 0 {
var failIdx = -1
err := b.db.Update(func(tx *Tx) error {
for i, c := range b.calls {
if err := safelyCall(c.fn, tx); err != nil {
failIdx = i
return err
}
}
return nil
})
if failIdx >= 0 {
// take the failing transaction out of the batch. it's
// safe to shorten b.calls here because db.batch no longer
// points to us, and we hold the mutex anyway.
c := b.calls[failIdx]
b.calls[failIdx], b.calls = b.calls[len(b.calls)-1], b.calls[:len(b.calls)-1]
// tell the submitter re-run it solo, continue with the rest of the batch
c.err <- trySolo
continue retry
}
// pass success, or bolt internal errors, to all callers
for _, c := range b.calls {
if c.err != nil {
c.err <- err
}
}
break retry
}
}
// trySolo is a special sentinel error value used for signaling that a
// transaction function should be re-run. It should never be seen by
// callers.
var trySolo = errors.New("batch function returned an error and should be re-run solo")
type panicked struct {
reason interface{}
}
func (p panicked) Error() string {
if err, ok := p.reason.(error); ok {
return err.Error()
}
return fmt.Sprintf("panic: %v", p.reason)
}
func safelyCall(fn func(*Tx) error, tx *Tx) (err error) {
defer func() {
if p := recover(); p != nil {
err = panicked{p}
}
}()
return fn(tx)
}
// Sync executes fdatasync() against the database file handle.
//
// This is not necessary under normal operation, however, if you use NoSync
// then it allows you to force the database file to sync against the disk.
func (db *DB) Sync() error { return fdatasync(db) }
// Stats retrieves ongoing performance stats for the database.
// This is only updated when a transaction closes.
func (db *DB) Stats() Stats {
db.statlock.RLock()
defer db.statlock.RUnlock()
return db.stats
2014-02-21 16:49:15 +00:00
}
2014-04-15 17:56:53 +00:00
// This is for internal access to the raw data bytes from the C cursor, use
// carefully, or not at all.
2014-04-16 15:00:26 +00:00
func (db *DB) Info() *Info {
return &Info{uintptr(unsafe.Pointer(&db.data[0])), db.pageSize}
2014-04-15 17:56:53 +00:00
}
2014-01-24 19:51:56 +00:00
// page retrieves a page reference from the mmap based on the current page size.
2014-01-27 15:11:54 +00:00
func (db *DB) page(id pgid) *page {
pos := id * pgid(db.pageSize)
return (*page)(unsafe.Pointer(&db.data[pos]))
2014-01-24 19:51:56 +00:00
}
// pageInBuffer retrieves a page reference from a given byte array based on the current page size.
2014-01-27 15:11:54 +00:00
func (db *DB) pageInBuffer(b []byte, id pgid) *page {
return (*page)(unsafe.Pointer(&b[id*pgid(db.pageSize)]))
2014-01-12 05:51:01 +00:00
}
2014-01-10 14:32:12 +00:00
2014-01-14 20:01:02 +00:00
// meta retrieves the current meta page reference.
func (db *DB) meta() *meta {
// We have to return the meta with the highest txid which doesn't fail
// validation. Otherwise, we can cause errors when in fact the database is
// in a consistent state. metaA is the one with the higher txid.
metaA := db.meta0
metaB := db.meta1
if db.meta1.txid > db.meta0.txid {
metaA = db.meta1
metaB = db.meta0
2014-01-14 20:01:02 +00:00
}
2016-04-24 20:09:45 +00:00
// Use higher meta page if valid. Otherwise fallback to previous, if valid.
if err := metaA.validate(); err == nil {
return metaA
2016-04-24 20:09:45 +00:00
} else if err := metaB.validate(); err == nil {
return metaB
}
// This should never be reached, because both meta1 and meta0 were validated
// on mmap() and we do fsync() on every write.
2016-04-24 20:09:45 +00:00
panic("bolt.DB.meta(): invalid meta pages")
2014-01-14 20:01:02 +00:00
}
2014-02-09 22:52:19 +00:00
// allocate returns a contiguous block of memory starting at a given page.
2014-02-11 19:16:12 +00:00
func (db *DB) allocate(count int) (*page, error) {
2014-02-09 22:52:19 +00:00
// Allocate a temporary buffer for the page.
var buf []byte
2016-04-22 20:24:11 +00:00
if count == 1 {
buf = db.pagePool.Get().([]byte)
} else {
buf = make([]byte, count*db.pageSize)
}
2014-02-09 22:52:19 +00:00
p := (*page)(unsafe.Pointer(&buf[0]))
p.overflow = uint32(count - 1)
// Use pages from the freelist if they are available.
if p.id = db.freelist.allocate(count); p.id != 0 {
2014-02-11 19:16:12 +00:00
return p, nil
2014-02-09 22:52:19 +00:00
}
2014-02-11 19:16:12 +00:00
// Resize mmap() if we're at the end.
p.id = db.rwtx.meta.pgid
2014-02-11 19:16:12 +00:00
var minsz = int((p.id+pgid(count))+1) * db.pageSize
if minsz >= db.datasz {
2014-02-11 19:16:12 +00:00
if err := db.mmap(minsz); err != nil {
return nil, fmt.Errorf("mmap allocate error: %s", err)
2014-02-11 19:16:12 +00:00
}
}
// Move the page id high water mark.
db.rwtx.meta.pgid += pgid(count)
2014-02-09 22:52:19 +00:00
2014-02-11 19:16:12 +00:00
return p, nil
2014-02-09 22:52:19 +00:00
}
2014-02-21 16:49:15 +00:00
// grow grows the size of the database to the given sz.
func (db *DB) grow(sz int) error {
// Ignore if the new size is less than available file size.
if sz <= db.filesz {
return nil
}
// If the data is smaller than the alloc size then only allocate what's needed.
// Once it goes over the allocation size then allocate in chunks.
if db.datasz < db.AllocSize {
sz = db.datasz
} else {
sz += db.AllocSize
}
// Truncate and fsync to ensure file size metadata is flushed.
// https://github.com/boltdb/bolt/issues/284
if !db.NoGrowSync && !db.readOnly {
2016-03-01 05:58:44 +00:00
if runtime.GOOS != "windows" {
if err := db.file.Truncate(int64(sz)); err != nil {
return fmt.Errorf("file resize error: %s", err)
}
}
if err := db.file.Sync(); err != nil {
return fmt.Errorf("file sync error: %s", err)
}
}
db.filesz = sz
return nil
}
func (db *DB) IsReadOnly() bool {
return db.readOnly
}
// Options represents the options that can be set when opening a database.
type Options struct {
// Timeout is the amount of time to wait to obtain a file lock.
// When set to zero it will wait indefinitely. This option is only
// available on Darwin and Linux.
Timeout time.Duration
// Sets the DB.NoGrowSync flag before memory mapping the file.
NoGrowSync bool
// Open database in read-only mode. Uses flock(..., LOCK_SH |LOCK_NB) to
// grab a shared lock (UNIX).
ReadOnly bool
// Sets the DB.MmapFlags flag before memory mapping the file.
MmapFlags int
// InitialMmapSize is the initial mmap size of the database
// in bytes. Read transactions won't block write transaction
// if the InitialMmapSize is large enough to hold database mmap
// size. (See DB.Begin for more information)
//
// If <=0, the initial map size is 0.
// If initialMmapSize is smaller than the previous database size,
// it takes no effect.
InitialMmapSize int
}
// DefaultOptions represent the options used if nil options are passed into Open().
// No timeout is used which will cause Bolt to wait indefinitely for a lock.
var DefaultOptions = &Options{
Timeout: 0,
NoGrowSync: false,
}
// Stats represents statistics about the database.
type Stats struct {
2014-06-17 18:40:56 +00:00
// Freelist stats
FreePageN int // total number of free pages on the freelist
PendingPageN int // total number of pending pages on the freelist
FreeAlloc int // total bytes allocated in free pages
FreelistInuse int // total bytes used by the freelist
2014-06-17 18:40:56 +00:00
// Transaction stats
2014-06-17 18:40:56 +00:00
TxN int // total number of started read transactions
OpenTxN int // number of currently open read transactions
TxStats TxStats // global, ongoing stats.
}
2014-02-21 16:49:15 +00:00
// Sub calculates and returns the difference between two sets of database stats.
// This is useful when obtaining stats at two different points and time and
// you need the performance counters that occurred within that time span.
func (s *Stats) Sub(other *Stats) Stats {
2014-06-24 15:30:28 +00:00
if other == nil {
2014-06-24 17:54:40 +00:00
return *s
2014-06-24 15:30:28 +00:00
}
2014-06-24 17:58:46 +00:00
var diff Stats
diff.FreePageN = s.FreePageN
diff.PendingPageN = s.PendingPageN
diff.FreeAlloc = s.FreeAlloc
diff.FreelistInuse = s.FreelistInuse
2014-06-24 17:54:40 +00:00
diff.TxN = other.TxN - s.TxN
diff.TxStats = s.TxStats.Sub(&other.TxStats)
return diff
}
2014-02-21 16:49:15 +00:00
func (s *Stats) add(other *Stats) {
s.TxStats.add(&other.TxStats)
2014-02-21 16:49:15 +00:00
}
2014-04-16 15:00:26 +00:00
type Info struct {
Data uintptr
2014-04-16 15:00:26 +00:00
PageSize int
}
type meta struct {
magic uint32
version uint32
pageSize uint32
flags uint32
root bucket
freelist pgid
pgid pgid
txid txid
checksum uint64
}
// validate checks the marker bytes and version of the meta page to ensure it matches this binary.
func (m *meta) validate() error {
2016-04-24 20:09:45 +00:00
if m.magic != magic {
return ErrInvalid
} else if m.version != version {
return ErrVersionMismatch
2016-04-24 20:09:45 +00:00
} else if m.checksum != 0 && m.checksum != m.sum64() {
return ErrChecksum
}
return nil
}
// copy copies one meta object to another.
func (m *meta) copy(dest *meta) {
*dest = *m
}
// write writes the meta onto a page.
func (m *meta) write(p *page) {
if m.root.root >= m.pgid {
panic(fmt.Sprintf("root bucket pgid (%d) above high water mark (%d)", m.root.root, m.pgid))
} else if m.freelist >= m.pgid {
panic(fmt.Sprintf("freelist pgid (%d) above high water mark (%d)", m.freelist, m.pgid))
}
// Page id is either going to be 0 or 1 which we can determine by the transaction ID.
p.id = pgid(m.txid % 2)
p.flags |= metaPageFlag
// Calculate the checksum.
m.checksum = m.sum64()
m.copy(p.meta())
}
// generates the checksum for the meta.
func (m *meta) sum64() uint64 {
var h = fnv.New64a()
_, _ = h.Write((*[unsafe.Offsetof(meta{}.checksum)]byte)(unsafe.Pointer(m))[:])
return h.Sum64()
}
// _assert will panic with a given formatted message if the given condition is false.
func _assert(condition bool, msg string, v ...interface{}) {
if !condition {
panic(fmt.Sprintf("assertion failed: "+msg, v...))
}
}
func warn(v ...interface{}) { fmt.Fprintln(os.Stderr, v...) }
func warnf(msg string, v ...interface{}) { fmt.Fprintf(os.Stderr, msg+"\n", v...) }
func printstack() {
stack := strings.Join(strings.Split(string(debug.Stack()), "\n")[2:], "\n")
fmt.Fprintln(os.Stderr, stack)
}