Don't allocate huge slices to merge pgids in freelist.write

Using a large (50gb) database with a read-write-delete heavy load, nearly 100% of allocated space came from freelists. 1/3 came from freelist.release, 1/3 from freelist.write, and 1/3 came from tx.allocate to make space for freelist.write. In the case of freelist.write, the newly allocated giant slice gets copied to the space prepared by tx.allocate and then discarded. To avoid this, add func mergepgids that accepts a destination slice, and use it in freelist.write. This has a mild negative impact on the existing benchmarks, but cuts allocated space in my real world db by over 30%. name old time/op new time/op delta _FreelistRelease10K-8 18.7µs ±10% 18.2µs ± 4% ~ (p=0.548 n=5+5) _FreelistRelease100K-8 233µs ± 5% 258µs ±20% ~ (p=0.151 n=5+5) _FreelistRelease1000K-8 3.34ms ± 8% 3.13ms ± 8% ~ (p=0.151 n=5+5) _FreelistRelease10000K-8 32.3ms ± 1% 32.2ms ± 7% ~ (p=0.690 n=5+5) DBBatchAutomatic-8 2.18ms ± 3% 2.19ms ± 4% ~ (p=0.421 n=5+5) DBBatchSingle-8 140ms ± 6% 140ms ± 4% ~ (p=0.841 n=5+5) DBBatchManual10x100-8 4.41ms ± 2% 4.37ms ± 3% ~ (p=0.548 n=5+5) name old alloc/op new alloc/op delta _FreelistRelease10K-8 82.5kB ± 0% 82.5kB ± 0% ~ (all samples are equal) _FreelistRelease100K-8 805kB ± 0% 805kB ± 0% ~ (all samples are equal) _FreelistRelease1000K-8 8.05MB ± 0% 8.05MB ± 0% ~ (all samples are equal) _FreelistRelease10000K-8 80.4MB ± 0% 80.4MB ± 0% ~ (p=1.000 n=5+5) DBBatchAutomatic-8 384kB ± 0% 384kB ± 0% ~ (p=0.095 n=5+5) DBBatchSingle-8 17.2MB ± 1% 17.2MB ± 1% ~ (p=0.310 n=5+5) DBBatchManual10x100-8 908kB ± 0% 902kB ± 1% ~ (p=0.730 n=4+5) name old allocs/op new allocs/op delta _FreelistRelease10K-8 5.00 ± 0% 5.00 ± 0% ~ (all samples are equal) _FreelistRelease100K-8 5.00 ± 0% 5.00 ± 0% ~ (all samples are equal) _FreelistRelease1000K-8 5.00 ± 0% 5.00 ± 0% ~ (all samples are equal) _FreelistRelease10000K-8 5.00 ± 0% 5.00 ± 0% ~ (all samples are equal) DBBatchAutomatic-8 10.2k ± 0% 10.2k ± 0% +0.07% (p=0.032 n=5+5) DBBatchSingle-8 58.6k ± 0% 59.6k ± 0% +1.70% (p=0.008 n=5+5) DBBatchManual10x100-8 6.02k ± 0% 6.03k ± 0% +0.17% (p=0.029 n=4+4)
2016-12-20 14:04:46 -08:00 · 2016-12-20 14:04:46 -08:00 · 4d8824b05d
parent 0d9f544bb9
commit 4d8824b05d
3 changed files with 49 additions and 20 deletions
--- a/freelist.go
+++ b/freelist.go
@ -46,16 +46,24 @@ func (f *freelist) pending_count() int {
 	return count
 }

-// all returns a list of all free ids and all pending ids in one sorted list.
-func (f *freelist) all() []pgid {
-	m := make(pgids, 0)
+// lenall returns the combined number of all free ids and all pending ids.
+func (f *freelist) lenall() int {
+	n := len(f.ids)
+	for _, list := range f.pending {
+		n += len(list)
+	}
+	return n
+}

+// all copies into dst a list of all free ids and all pending ids in one sorted list.
+// f.lenall returns the minimum length required for dst.
+func (f *freelist) copyall(dst []pgid) {
+	m := make(pgids, 0, len(f.pending)) // len(f.pending) undercounts, but it is a start
 	for _, list := range f.pending {
 		m = append(m, list...)
 	}
-
 	sort.Sort(m)
-	return pgids(f.ids).merge(m)
+	mergepgids(dst, f.ids, m)
 }

 // allocate returns the starting page id of a contiguous list of pages of a given size.
@ -186,22 +194,22 @@ func (f *freelist) read(p *page) {
 // become free.
 func (f *freelist) write(p *page) error {
 	// Combine the old free pgids and pgids waiting on an open transaction.
-	ids := f.all()

 	// Update the header flag.
 	p.flags |= freelistPageFlag

 	// The page.count can only hold up to 64k elements so if we overflow that
 	// number then we handle it by putting the size in the first element.
-	if len(ids) == 0 {
-		p.count = uint16(len(ids))
-	} else if len(ids) < 0xFFFF {
-		p.count = uint16(len(ids))
-		copy(((*[maxAllocSize]pgid)(unsafe.Pointer(&p.ptr)))[:], ids)
+	lenids := f.lenall()
+	if lenids == 0 {
+		p.count = uint16(lenids)
+	} else if lenids < 0xFFFF {
+		p.count = uint16(lenids)
+		f.copyall(((*[maxAllocSize]pgid)(unsafe.Pointer(&p.ptr)))[:])
 	} else {
 		p.count = 0xFFFF
-		((*[maxAllocSize]pgid)(unsafe.Pointer(&p.ptr)))[0] = pgid(len(ids))
-		copy(((*[maxAllocSize]pgid)(unsafe.Pointer(&p.ptr)))[1:], ids)
+		((*[maxAllocSize]pgid)(unsafe.Pointer(&p.ptr)))[0] = pgid(lenids)
+		f.copyall(((*[maxAllocSize]pgid)(unsafe.Pointer(&p.ptr)))[1:])
 	}

 	return nil
--- a/page.go
+++ b/page.go
@ -145,12 +145,33 @@ func (a pgids) merge(b pgids) pgids {
 	// Return the opposite slice if one is nil.
 	if len(a) == 0 {
 		return b
-	} else if len(b) == 0 {
+	}
+	if len(b) == 0 {
 		return a
 	}
+	merged := make(pgids, len(a)+len(b))
+	mergepgids(merged, a, b)
+	return merged
+}

-	// Create a list to hold all elements from both lists.
-	merged := make(pgids, 0, len(a)+len(b))
+// merge copies the sorted union of a and b into dst.
+// If dst is too small, it panics.
+func mergepgids(dst, a, b pgids) {
+	if len(dst) < len(a)+len(b) {
+		panic(fmt.Errorf("mergepgids bad len %d < %d + %d", len(dst), len(a), len(b)))
+	}
+	// Copy in the opposite slice if one is nil.
+	if len(a) == 0 {
+		copy(dst, b)
+		return
+	}
+	if len(b) == 0 {
+		copy(dst, a)
+		return
+	}
+
+	// Merged will hold all elements from both lists.
+	merged := dst[:0]

 	// Assign lead to the slice with a lower starting value, follow to the higher value.
 	lead, follow := a, b
@ -172,7 +193,5 @@ func (a pgids) merge(b pgids) pgids {
 	}

 	// Append what's left in follow.
-	merged = append(merged, follow...)
-
-	return merged
+	_ = append(merged, follow...)
 }
--- a/tx.go
+++ b/tx.go
@ -381,7 +381,9 @@ func (tx *Tx) Check() <-chan error {
 func (tx *Tx) check(ch chan error) {
 	// Check if any pages are double freed.
 	freed := make(map[pgid]bool)
-	for _, id := range tx.db.freelist.all() {
+	all := make([]pgid, tx.db.freelist.lenall())
+	tx.db.freelist.copyall(all)
+	for _, id := range all {
 		if freed[id] {
 			ch <- fmt.Errorf("page %d: already freed", id)
 		}