blob: ca4ad812f41cf2b1c09d42dd1bc629289b6c7685 [file] [log] [blame]
Patrick Williamsd849ec72016-08-17 14:59:38 -05001From a41917c2c88bd7f694d141ac67f4a194aaa16fa1 Mon Sep 17 00:00:00 2001
2From: Qiang Huang <h.huangqiang@huawei.com>
3Date: Wed, 28 Oct 2015 08:49:45 +0800
4Subject: [PATCH] Bump bolt to v1.1.0
5
6It adds ARM64, ppc64le, s390x, solaris support, and a bunch of
7bugfixs.
8
9Signed-off-by: Qiang Huang <h.huangqiang@huawei.com>
10---
11 hack/vendor.sh | 2 +-
12 vendor/src/github.com/boltdb/bolt/.gitignore | 1 +
13 vendor/src/github.com/boltdb/bolt/README.md | 250 +++++++++++++++++++--
14 vendor/src/github.com/boltdb/bolt/batch.go | 138 ++++++++++++
15 vendor/src/github.com/boltdb/bolt/bolt_386.go | 5 +-
16 vendor/src/github.com/boltdb/bolt/bolt_amd64.go | 3 +
17 vendor/src/github.com/boltdb/bolt/bolt_arm.go | 5 +-
18 vendor/src/github.com/boltdb/bolt/bolt_arm64.go | 9 +
19 vendor/src/github.com/boltdb/bolt/bolt_ppc64le.go | 9 +
20 vendor/src/github.com/boltdb/bolt/bolt_s390x.go | 9 +
21 vendor/src/github.com/boltdb/bolt/bolt_unix.go | 37 ++-
22 .../github.com/boltdb/bolt/bolt_unix_solaris.go | 101 +++++++++
23 vendor/src/github.com/boltdb/bolt/bolt_windows.go | 10 +-
24 vendor/src/github.com/boltdb/bolt/bucket.go | 29 ++-
25 vendor/src/github.com/boltdb/bolt/cursor.go | 12 +-
26 vendor/src/github.com/boltdb/bolt/db.go | 195 ++++++++++++----
27 vendor/src/github.com/boltdb/bolt/errors.go | 4 +
28 vendor/src/github.com/boltdb/bolt/freelist.go | 28 ++-
29 vendor/src/github.com/boltdb/bolt/node.go | 36 ++-
30 vendor/src/github.com/boltdb/bolt/page.go | 45 +++-
31 vendor/src/github.com/boltdb/bolt/tx.go | 80 +++++--
32 21 files changed, 886 insertions(+), 122 deletions(-)
33 create mode 100644 vendor/src/github.com/boltdb/bolt/batch.go
34 create mode 100644 vendor/src/github.com/boltdb/bolt/bolt_arm64.go
35 create mode 100644 vendor/src/github.com/boltdb/bolt/bolt_ppc64le.go
36 create mode 100644 vendor/src/github.com/boltdb/bolt/bolt_s390x.go
37 create mode 100644 vendor/src/github.com/boltdb/bolt/bolt_unix_solaris.go
38
39diff --git a/hack/vendor.sh b/hack/vendor.sh
40index d872d4a..c28e677 100755
41--- a/hack/vendor.sh
42+++ b/hack/vendor.sh
43@@ -36,7 +36,7 @@ clone git github.com/coreos/etcd v2.2.0
44 fix_rewritten_imports github.com/coreos/etcd
45 clone git github.com/ugorji/go 5abd4e96a45c386928ed2ca2a7ef63e2533e18ec
46 clone git github.com/hashicorp/consul v0.5.2
47-clone git github.com/boltdb/bolt v1.0
48+clone git github.com/boltdb/bolt v1.1.0
49
50 # get graph and distribution packages
51 clone git github.com/docker/distribution 20c4b7a1805a52753dfd593ee1cc35558722a0ce # docker/1.9 branch
52diff --git a/vendor/src/github.com/boltdb/bolt/.gitignore b/vendor/src/github.com/boltdb/bolt/.gitignore
53index b2bb382..c7bd2b7 100644
54--- a/vendor/src/github.com/boltdb/bolt/.gitignore
55+++ b/vendor/src/github.com/boltdb/bolt/.gitignore
56@@ -1,3 +1,4 @@
57 *.prof
58 *.test
59+*.swp
60 /bin/
61diff --git a/vendor/src/github.com/boltdb/bolt/README.md b/vendor/src/github.com/boltdb/bolt/README.md
62index 727e977..0a33ebc 100644
63--- a/vendor/src/github.com/boltdb/bolt/README.md
64+++ b/vendor/src/github.com/boltdb/bolt/README.md
65@@ -16,7 +16,7 @@ and setting values. That's it.
66
67 ## Project Status
68
69-Bolt is stable and the API is fixed. Full unit test coverage and randomized
70+Bolt is stable and the API is fixed. Full unit test coverage and randomized
71 black box testing are used to ensure database consistency and thread safety.
72 Bolt is currently in high-load production environments serving databases as
73 large as 1TB. Many companies such as Shopify and Heroku use Bolt-backed
74@@ -87,6 +87,11 @@ are not thread safe. To work with data in multiple goroutines you must start
75 a transaction for each one or use locking to ensure only one goroutine accesses
76 a transaction at a time. Creating transaction from the `DB` is thread safe.
77
78+Read-only transactions and read-write transactions should not depend on one
79+another and generally shouldn't be opened simultaneously in the same goroutine.
80+This can cause a deadlock as the read-write transaction needs to periodically
81+re-map the data file but it cannot do so while a read-only transaction is open.
82+
83
84 #### Read-write transactions
85
86@@ -120,12 +125,88 @@ err := db.View(func(tx *bolt.Tx) error {
87 })
88 ```
89
90-You also get a consistent view of the database within this closure, however,
91+You also get a consistent view of the database within this closure, however,
92 no mutating operations are allowed within a read-only transaction. You can only
93 retrieve buckets, retrieve values, and copy the database within a read-only
94 transaction.
95
96
97+#### Batch read-write transactions
98+
99+Each `DB.Update()` waits for disk to commit the writes. This overhead
100+can be minimized by combining multiple updates with the `DB.Batch()`
101+function:
102+
103+```go
104+err := db.Batch(func(tx *bolt.Tx) error {
105+ ...
106+ return nil
107+})
108+```
109+
110+Concurrent Batch calls are opportunistically combined into larger
111+transactions. Batch is only useful when there are multiple goroutines
112+calling it.
113+
114+The trade-off is that `Batch` can call the given
115+function multiple times, if parts of the transaction fail. The
116+function must be idempotent and side effects must take effect only
117+after a successful return from `DB.Batch()`.
118+
119+For example: don't display messages from inside the function, instead
120+set variables in the enclosing scope:
121+
122+```go
123+var id uint64
124+err := db.Batch(func(tx *bolt.Tx) error {
125+ // Find last key in bucket, decode as bigendian uint64, increment
126+ // by one, encode back to []byte, and add new key.
127+ ...
128+ id = newValue
129+ return nil
130+})
131+if err != nil {
132+ return ...
133+}
134+fmt.Println("Allocated ID %d", id)
135+```
136+
137+
138+#### Managing transactions manually
139+
140+The `DB.View()` and `DB.Update()` functions are wrappers around the `DB.Begin()`
141+function. These helper functions will start the transaction, execute a function,
142+and then safely close your transaction if an error is returned. This is the
143+recommended way to use Bolt transactions.
144+
145+However, sometimes you may want to manually start and end your transactions.
146+You can use the `Tx.Begin()` function directly but _please_ be sure to close the
147+transaction.
148+
149+```go
150+// Start a writable transaction.
151+tx, err := db.Begin(true)
152+if err != nil {
153+ return err
154+}
155+defer tx.Rollback()
156+
157+// Use the transaction...
158+_, err := tx.CreateBucket([]byte("MyBucket"))
159+if err != nil {
160+ return err
161+}
162+
163+// Commit the transaction and check for error.
164+if err := tx.Commit(); err != nil {
165+ return err
166+}
167+```
168+
169+The first argument to `DB.Begin()` is a boolean stating if the transaction
170+should be writable.
171+
172+
173 ### Using buckets
174
175 Buckets are collections of key/value pairs within the database. All keys in a
176@@ -175,13 +256,61 @@ db.View(func(tx *bolt.Tx) error {
177 ```
178
179 The `Get()` function does not return an error because its operation is
180-guarenteed to work (unless there is some kind of system failure). If the key
181+guaranteed to work (unless there is some kind of system failure). If the key
182 exists then it will return its byte slice value. If it doesn't exist then it
183 will return `nil`. It's important to note that you can have a zero-length value
184 set to a key which is different than the key not existing.
185
186 Use the `Bucket.Delete()` function to delete a key from the bucket.
187
188+Please note that values returned from `Get()` are only valid while the
189+transaction is open. If you need to use a value outside of the transaction
190+then you must use `copy()` to copy it to another byte slice.
191+
192+
193+### Autoincrementing integer for the bucket
194+By using the NextSequence() function, you can let Bolt determine a sequence
195+which can be used as the unique identifier for your key/value pairs. See the
196+example below.
197+
198+```go
199+// CreateUser saves u to the store. The new user ID is set on u once the data is persisted.
200+func (s *Store) CreateUser(u *User) error {
201+ return s.db.Update(func(tx *bolt.Tx) error {
202+ // Retrieve the users bucket.
203+ // This should be created when the DB is first opened.
204+ b := tx.Bucket([]byte("users"))
205+
206+ // Generate ID for the user.
207+ // This returns an error only if the Tx is closed or not writeable.
208+ // That can't happen in an Update() call so I ignore the error check.
209+ id, _ = b.NextSequence()
210+ u.ID = int(id)
211+
212+ // Marshal user data into bytes.
213+ buf, err := json.Marshal(u)
214+ if err != nil {
215+ return err
216+ }
217+
218+ // Persist bytes to users bucket.
219+ return b.Put(itob(u.ID), buf)
220+ })
221+}
222+
223+// itob returns an 8-byte big endian representation of v.
224+func itob(v int) []byte {
225+ b := make([]byte, 8)
226+ binary.BigEndian.PutUint64(b, uint64(v))
227+ return b
228+}
229+
230+type User struct {
231+ ID int
232+ ...
233+}
234+
235+```
236
237 ### Iterating over keys
238
239@@ -254,7 +383,7 @@ db.View(func(tx *bolt.Tx) error {
240 max := []byte("2000-01-01T00:00:00Z")
241
242 // Iterate over the 90's.
243- for k, v := c.Seek(min); k != nil && bytes.Compare(k, max) != -1; k, v = c.Next() {
244+ for k, v := c.Seek(min); k != nil && bytes.Compare(k, max) <= 0; k, v = c.Next() {
245 fmt.Printf("%s: %s\n", k, v)
246 }
247
248@@ -294,7 +423,7 @@ func (*Bucket) DeleteBucket(key []byte) error
249
250 ### Database backups
251
252-Bolt is a single file so it's easy to backup. You can use the `Tx.Copy()`
253+Bolt is a single file so it's easy to backup. You can use the `Tx.WriteTo()`
254 function to write a consistent view of the database to a writer. If you call
255 this from a read-only transaction, it will perform a hot backup and not block
256 your other database reads and writes. It will also use `O_DIRECT` when available
257@@ -305,11 +434,12 @@ do database backups:
258
259 ```go
260 func BackupHandleFunc(w http.ResponseWriter, req *http.Request) {
261- err := db.View(func(tx bolt.Tx) error {
262+ err := db.View(func(tx *bolt.Tx) error {
263 w.Header().Set("Content-Type", "application/octet-stream")
264 w.Header().Set("Content-Disposition", `attachment; filename="my.db"`)
265 w.Header().Set("Content-Length", strconv.Itoa(int(tx.Size())))
266- return tx.Copy(w)
267+ _, err := tx.WriteTo(w)
268+ return err
269 })
270 if err != nil {
271 http.Error(w, err.Error(), http.StatusInternalServerError)
272@@ -351,14 +481,13 @@ go func() {
273 // Grab the current stats and diff them.
274 stats := db.Stats()
275 diff := stats.Sub(&prev)
276-
277+
278 // Encode stats to JSON and print to STDERR.
279 json.NewEncoder(os.Stderr).Encode(diff)
280
281 // Save stats for the next loop.
282 prev = stats
283 }
284-}
285 }()
286 ```
287
288@@ -366,25 +495,83 @@ It's also useful to pipe these stats to a service such as statsd for monitoring
289 or to provide an HTTP endpoint that will perform a fixed-length sample.
290
291
292+### Read-Only Mode
293+
294+Sometimes it is useful to create a shared, read-only Bolt database. To this,
295+set the `Options.ReadOnly` flag when opening your database. Read-only mode
296+uses a shared lock to allow multiple processes to read from the database but
297+it will block any processes from opening the database in read-write mode.
298+
299+```go
300+db, err := bolt.Open("my.db", 0666, &bolt.Options{ReadOnly: true})
301+if err != nil {
302+ log.Fatal(err)
303+}
304+```
305+
306+
307 ## Resources
308
309 For more information on getting started with Bolt, check out the following articles:
310
311 * [Intro to BoltDB: Painless Performant Persistence](http://npf.io/2014/07/intro-to-boltdb-painless-performant-persistence/) by [Nate Finch](https://github.com/natefinch).
312+* [Bolt -- an embedded key/value database for Go](https://www.progville.com/go/bolt-embedded-db-golang/) by Progville
313+
314+
315+## Comparison with other databases
316+
317+### Postgres, MySQL, & other relational databases
318+
319+Relational databases structure data into rows and are only accessible through
320+the use of SQL. This approach provides flexibility in how you store and query
321+your data but also incurs overhead in parsing and planning SQL statements. Bolt
322+accesses all data by a byte slice key. This makes Bolt fast to read and write
323+data by key but provides no built-in support for joining values together.
324+
325+Most relational databases (with the exception of SQLite) are standalone servers
326+that run separately from your application. This gives your systems
327+flexibility to connect multiple application servers to a single database
328+server but also adds overhead in serializing and transporting data over the
329+network. Bolt runs as a library included in your application so all data access
330+has to go through your application's process. This brings data closer to your
331+application but limits multi-process access to the data.
332+
333+
334+### LevelDB, RocksDB
335
336+LevelDB and its derivatives (RocksDB, HyperLevelDB) are similar to Bolt in that
337+they are libraries bundled into the application, however, their underlying
338+structure is a log-structured merge-tree (LSM tree). An LSM tree optimizes
339+random writes by using a write ahead log and multi-tiered, sorted files called
340+SSTables. Bolt uses a B+tree internally and only a single file. Both approaches
341+have trade offs.
342
343+If you require a high random write throughput (>10,000 w/sec) or you need to use
344+spinning disks then LevelDB could be a good choice. If your application is
345+read-heavy or does a lot of range scans then Bolt could be a good choice.
346
347-## Comparing Bolt to LMDB
348+One other important consideration is that LevelDB does not have transactions.
349+It supports batch writing of key/values pairs and it supports read snapshots
350+but it will not give you the ability to do a compare-and-swap operation safely.
351+Bolt supports fully serializable ACID transactions.
352+
353+
354+### LMDB
355
356 Bolt was originally a port of LMDB so it is architecturally similar. Both use
357-a B+tree, have ACID semanetics with fully serializable transactions, and support
358+a B+tree, have ACID semantics with fully serializable transactions, and support
359 lock-free MVCC using a single writer and multiple readers.
360
361 The two projects have somewhat diverged. LMDB heavily focuses on raw performance
362 while Bolt has focused on simplicity and ease of use. For example, LMDB allows
363-several unsafe actions such as direct writes and append writes for the sake of
364-performance. Bolt opts to disallow actions which can leave the database in a
365-corrupted state. The only exception to this in Bolt is `DB.NoSync`.
366+several unsafe actions such as direct writes for the sake of performance. Bolt
367+opts to disallow actions which can leave the database in a corrupted state. The
368+only exception to this in Bolt is `DB.NoSync`.
369+
370+There are also a few differences in API. LMDB requires a maximum mmap size when
371+opening an `mdb_env` whereas Bolt will handle incremental mmap resizing
372+automatically. LMDB overloads the getter and setter functions with multiple
373+flags whereas Bolt splits these specialized cases into their own functions.
374
375
376 ## Caveats & Limitations
377@@ -425,14 +612,33 @@ Here are a few things to note when evaluating and using Bolt:
378 can in memory and will release memory as needed to other processes. This means
379 that Bolt can show very high memory usage when working with large databases.
380 However, this is expected and the OS will release memory as needed. Bolt can
381- handle databases much larger than the available physical RAM.
382+ handle databases much larger than the available physical RAM, provided its
383+ memory-map fits in the process virtual address space. It may be problematic
384+ on 32-bits systems.
385+
386+* The data structures in the Bolt database are memory mapped so the data file
387+ will be endian specific. This means that you cannot copy a Bolt file from a
388+ little endian machine to a big endian machine and have it work. For most
389+ users this is not a concern since most modern CPUs are little endian.
390+
391+* Because of the way pages are laid out on disk, Bolt cannot truncate data files
392+ and return free pages back to the disk. Instead, Bolt maintains a free list
393+ of unused pages within its data file. These free pages can be reused by later
394+ transactions. This works well for many use cases as databases generally tend
395+ to grow. However, it's important to note that deleting large chunks of data
396+ will not allow you to reclaim that space on disk.
397+
398+ For more information on page allocation, [see this comment][page-allocation].
399+
400+[page-allocation]: https://github.com/boltdb/bolt/issues/308#issuecomment-74811638
401
402
403 ## Other Projects Using Bolt
404
405 Below is a list of public, open source projects that use Bolt:
406
407-* [Bazil](https://github.com/bazillion/bazil) - A file system that lets your data reside where it is most convenient for it to reside.
408+* [Operation Go: A Routine Mission](http://gocode.io) - An online programming game for Golang using Bolt for user accounts and a leaderboard.
409+* [Bazil](https://bazil.org/) - A file system that lets your data reside where it is most convenient for it to reside.
410 * [DVID](https://github.com/janelia-flyem/dvid) - Added Bolt as optional storage engine and testing it against Basho-tuned leveldb.
411 * [Skybox Analytics](https://github.com/skybox/skybox) - A standalone funnel analysis tool for web analytics.
412 * [Scuttlebutt](https://github.com/benbjohnson/scuttlebutt) - Uses Bolt to store and process all Twitter mentions of GitHub projects.
413@@ -450,6 +656,16 @@ Below is a list of public, open source projects that use Bolt:
414 * [bleve](http://www.blevesearch.com/) - A pure Go search engine similar to ElasticSearch that uses Bolt as the default storage backend.
415 * [tentacool](https://github.com/optiflows/tentacool) - REST api server to manage system stuff (IP, DNS, Gateway...) on a linux server.
416 * [SkyDB](https://github.com/skydb/sky) - Behavioral analytics database.
417+* [Seaweed File System](https://github.com/chrislusf/weed-fs) - Highly scalable distributed key~file system with O(1) disk read.
418+* [InfluxDB](http://influxdb.com) - Scalable datastore for metrics, events, and real-time analytics.
419+* [Freehold](http://tshannon.bitbucket.org/freehold/) - An open, secure, and lightweight platform for your files and data.
420+* [Prometheus Annotation Server](https://github.com/oliver006/prom_annotation_server) - Annotation server for PromDash & Prometheus service monitoring system.
421+* [Consul](https://github.com/hashicorp/consul) - Consul is service discovery and configuration made easy. Distributed, highly available, and datacenter-aware.
422+* [Kala](https://github.com/ajvb/kala) - Kala is a modern job scheduler optimized to run on a single node. It is persistent, JSON over HTTP API, ISO 8601 duration notation, and dependent jobs.
423+* [drive](https://github.com/odeke-em/drive) - drive is an unofficial Google Drive command line client for \*NIX operating systems.
424+* [stow](https://github.com/djherbis/stow) - a persistence manager for objects
425+ backed by boltdb.
426+* [buckets](https://github.com/joyrexus/buckets) - a bolt wrapper streamlining
427+ simple tx and key scans.
428
429 If you are using Bolt in a project please send a pull request to add it to the list.
430-
431diff --git a/vendor/src/github.com/boltdb/bolt/batch.go b/vendor/src/github.com/boltdb/bolt/batch.go
432new file mode 100644
433index 0000000..84acae6
434--- /dev/null
435+++ b/vendor/src/github.com/boltdb/bolt/batch.go
436@@ -0,0 +1,138 @@
437+package bolt
438+
439+import (
440+ "errors"
441+ "fmt"
442+ "sync"
443+ "time"
444+)
445+
446+// Batch calls fn as part of a batch. It behaves similar to Update,
447+// except:
448+//
449+// 1. concurrent Batch calls can be combined into a single Bolt
450+// transaction.
451+//
452+// 2. the function passed to Batch may be called multiple times,
453+// regardless of whether it returns error or not.
454+//
455+// This means that Batch function side effects must be idempotent and
456+// take permanent effect only after a successful return is seen in
457+// caller.
458+//
459+// The maximum batch size and delay can be adjusted with DB.MaxBatchSize
460+// and DB.MaxBatchDelay, respectively.
461+//
462+// Batch is only useful when there are multiple goroutines calling it.
463+func (db *DB) Batch(fn func(*Tx) error) error {
464+ errCh := make(chan error, 1)
465+
466+ db.batchMu.Lock()
467+ if (db.batch == nil) || (db.batch != nil && len(db.batch.calls) >= db.MaxBatchSize) {
468+ // There is no existing batch, or the existing batch is full; start a new one.
469+ db.batch = &batch{
470+ db: db,
471+ }
472+ db.batch.timer = time.AfterFunc(db.MaxBatchDelay, db.batch.trigger)
473+ }
474+ db.batch.calls = append(db.batch.calls, call{fn: fn, err: errCh})
475+ if len(db.batch.calls) >= db.MaxBatchSize {
476+ // wake up batch, it's ready to run
477+ go db.batch.trigger()
478+ }
479+ db.batchMu.Unlock()
480+
481+ err := <-errCh
482+ if err == trySolo {
483+ err = db.Update(fn)
484+ }
485+ return err
486+}
487+
488+type call struct {
489+ fn func(*Tx) error
490+ err chan<- error
491+}
492+
493+type batch struct {
494+ db *DB
495+ timer *time.Timer
496+ start sync.Once
497+ calls []call
498+}
499+
500+// trigger runs the batch if it hasn't already been run.
501+func (b *batch) trigger() {
502+ b.start.Do(b.run)
503+}
504+
505+// run performs the transactions in the batch and communicates results
506+// back to DB.Batch.
507+func (b *batch) run() {
508+ b.db.batchMu.Lock()
509+ b.timer.Stop()
510+ // Make sure no new work is added to this batch, but don't break
511+ // other batches.
512+ if b.db.batch == b {
513+ b.db.batch = nil
514+ }
515+ b.db.batchMu.Unlock()
516+
517+retry:
518+ for len(b.calls) > 0 {
519+ var failIdx = -1
520+ err := b.db.Update(func(tx *Tx) error {
521+ for i, c := range b.calls {
522+ if err := safelyCall(c.fn, tx); err != nil {
523+ failIdx = i
524+ return err
525+ }
526+ }
527+ return nil
528+ })
529+
530+ if failIdx >= 0 {
531+ // take the failing transaction out of the batch. it's
532+ // safe to shorten b.calls here because db.batch no longer
533+ // points to us, and we hold the mutex anyway.
534+ c := b.calls[failIdx]
535+ b.calls[failIdx], b.calls = b.calls[len(b.calls)-1], b.calls[:len(b.calls)-1]
536+ // tell the submitter re-run it solo, continue with the rest of the batch
537+ c.err <- trySolo
538+ continue retry
539+ }
540+
541+ // pass success, or bolt internal errors, to all callers
542+ for _, c := range b.calls {
543+ if c.err != nil {
544+ c.err <- err
545+ }
546+ }
547+ break retry
548+ }
549+}
550+
551+// trySolo is a special sentinel error value used for signaling that a
552+// transaction function should be re-run. It should never be seen by
553+// callers.
554+var trySolo = errors.New("batch function returned an error and should be re-run solo")
555+
556+type panicked struct {
557+ reason interface{}
558+}
559+
560+func (p panicked) Error() string {
561+ if err, ok := p.reason.(error); ok {
562+ return err.Error()
563+ }
564+ return fmt.Sprintf("panic: %v", p.reason)
565+}
566+
567+func safelyCall(fn func(*Tx) error, tx *Tx) (err error) {
568+ defer func() {
569+ if p := recover(); p != nil {
570+ err = panicked{p}
571+ }
572+ }()
573+ return fn(tx)
574+}
575diff --git a/vendor/src/github.com/boltdb/bolt/bolt_386.go b/vendor/src/github.com/boltdb/bolt/bolt_386.go
576index 856f401..e659bfb 100644
577--- a/vendor/src/github.com/boltdb/bolt/bolt_386.go
578+++ b/vendor/src/github.com/boltdb/bolt/bolt_386.go
579@@ -1,4 +1,7 @@
580 package bolt
581
582 // maxMapSize represents the largest mmap size supported by Bolt.
583-const maxMapSize = 0xFFFFFFF // 256MB
584+const maxMapSize = 0x7FFFFFFF // 2GB
585+
586+// maxAllocSize is the size used when creating array pointers.
587+const maxAllocSize = 0xFFFFFFF
588diff --git a/vendor/src/github.com/boltdb/bolt/bolt_amd64.go b/vendor/src/github.com/boltdb/bolt/bolt_amd64.go
589index 4262932..cca6b7e 100644
590--- a/vendor/src/github.com/boltdb/bolt/bolt_amd64.go
591+++ b/vendor/src/github.com/boltdb/bolt/bolt_amd64.go
592@@ -2,3 +2,6 @@ package bolt
593
594 // maxMapSize represents the largest mmap size supported by Bolt.
595 const maxMapSize = 0xFFFFFFFFFFFF // 256TB
596+
597+// maxAllocSize is the size used when creating array pointers.
598+const maxAllocSize = 0x7FFFFFFF
599diff --git a/vendor/src/github.com/boltdb/bolt/bolt_arm.go b/vendor/src/github.com/boltdb/bolt/bolt_arm.go
600index 856f401..e659bfb 100644
601--- a/vendor/src/github.com/boltdb/bolt/bolt_arm.go
602+++ b/vendor/src/github.com/boltdb/bolt/bolt_arm.go
603@@ -1,4 +1,7 @@
604 package bolt
605
606 // maxMapSize represents the largest mmap size supported by Bolt.
607-const maxMapSize = 0xFFFFFFF // 256MB
608+const maxMapSize = 0x7FFFFFFF // 2GB
609+
610+// maxAllocSize is the size used when creating array pointers.
611+const maxAllocSize = 0xFFFFFFF
612diff --git a/vendor/src/github.com/boltdb/bolt/bolt_arm64.go b/vendor/src/github.com/boltdb/bolt/bolt_arm64.go
613new file mode 100644
614index 0000000..6d23093
615--- /dev/null
616+++ b/vendor/src/github.com/boltdb/bolt/bolt_arm64.go
617@@ -0,0 +1,9 @@
618+// +build arm64
619+
620+package bolt
621+
622+// maxMapSize represents the largest mmap size supported by Bolt.
623+const maxMapSize = 0xFFFFFFFFFFFF // 256TB
624+
625+// maxAllocSize is the size used when creating array pointers.
626+const maxAllocSize = 0x7FFFFFFF
627diff --git a/vendor/src/github.com/boltdb/bolt/bolt_ppc64le.go b/vendor/src/github.com/boltdb/bolt/bolt_ppc64le.go
628new file mode 100644
629index 0000000..8351e12
630--- /dev/null
631+++ b/vendor/src/github.com/boltdb/bolt/bolt_ppc64le.go
632@@ -0,0 +1,9 @@
633+// +build ppc64le
634+
635+package bolt
636+
637+// maxMapSize represents the largest mmap size supported by Bolt.
638+const maxMapSize = 0xFFFFFFFFFFFF // 256TB
639+
640+// maxAllocSize is the size used when creating array pointers.
641+const maxAllocSize = 0x7FFFFFFF
642diff --git a/vendor/src/github.com/boltdb/bolt/bolt_s390x.go b/vendor/src/github.com/boltdb/bolt/bolt_s390x.go
643new file mode 100644
644index 0000000..f4dd26b
645--- /dev/null
646+++ b/vendor/src/github.com/boltdb/bolt/bolt_s390x.go
647@@ -0,0 +1,9 @@
648+// +build s390x
649+
650+package bolt
651+
652+// maxMapSize represents the largest mmap size supported by Bolt.
653+const maxMapSize = 0xFFFFFFFFFFFF // 256TB
654+
655+// maxAllocSize is the size used when creating array pointers.
656+const maxAllocSize = 0x7FFFFFFF
657diff --git a/vendor/src/github.com/boltdb/bolt/bolt_unix.go b/vendor/src/github.com/boltdb/bolt/bolt_unix.go
658index 95647a7..6eef6b2 100644
659--- a/vendor/src/github.com/boltdb/bolt/bolt_unix.go
660+++ b/vendor/src/github.com/boltdb/bolt/bolt_unix.go
661@@ -1,8 +1,9 @@
662-// +build !windows,!plan9
663+// +build !windows,!plan9,!solaris
664
665 package bolt
666
667 import (
668+ "fmt"
669 "os"
670 "syscall"
671 "time"
672@@ -10,7 +11,7 @@ import (
673 )
674
675 // flock acquires an advisory lock on a file descriptor.
676-func flock(f *os.File, timeout time.Duration) error {
677+func flock(f *os.File, exclusive bool, timeout time.Duration) error {
678 var t time.Time
679 for {
680 // If we're beyond our timeout then return an error.
681@@ -20,9 +21,13 @@ func flock(f *os.File, timeout time.Duration) error {
682 } else if timeout > 0 && time.Since(t) > timeout {
683 return ErrTimeout
684 }
685+ flag := syscall.LOCK_SH
686+ if exclusive {
687+ flag = syscall.LOCK_EX
688+ }
689
690 // Otherwise attempt to obtain an exclusive lock.
691- err := syscall.Flock(int(f.Fd()), syscall.LOCK_EX|syscall.LOCK_NB)
692+ err := syscall.Flock(int(f.Fd()), flag|syscall.LOCK_NB)
693 if err == nil {
694 return nil
695 } else if err != syscall.EWOULDBLOCK {
696@@ -41,11 +46,28 @@ func funlock(f *os.File) error {
697
698 // mmap memory maps a DB's data file.
699 func mmap(db *DB, sz int) error {
700+ // Truncate and fsync to ensure file size metadata is flushed.
701+ // https://github.com/boltdb/bolt/issues/284
702+ if !db.NoGrowSync && !db.readOnly {
703+ if err := db.file.Truncate(int64(sz)); err != nil {
704+ return fmt.Errorf("file resize error: %s", err)
705+ }
706+ if err := db.file.Sync(); err != nil {
707+ return fmt.Errorf("file sync error: %s", err)
708+ }
709+ }
710+
711+ // Map the data file to memory.
712 b, err := syscall.Mmap(int(db.file.Fd()), 0, sz, syscall.PROT_READ, syscall.MAP_SHARED)
713 if err != nil {
714 return err
715 }
716
717+ // Advise the kernel that the mmap is accessed randomly.
718+ if err := madvise(b, syscall.MADV_RANDOM); err != nil {
719+ return fmt.Errorf("madvise: %s", err)
720+ }
721+
722 // Save the original byte slice and convert to a byte array pointer.
723 db.dataref = b
724 db.data = (*[maxMapSize]byte)(unsafe.Pointer(&b[0]))
725@@ -67,3 +89,12 @@ func munmap(db *DB) error {
726 db.datasz = 0
727 return err
728 }
729+
730+// NOTE: This function is copied from stdlib because it is not available on darwin.
731+func madvise(b []byte, advice int) (err error) {
732+ _, _, e1 := syscall.Syscall(syscall.SYS_MADVISE, uintptr(unsafe.Pointer(&b[0])), uintptr(len(b)), uintptr(advice))
733+ if e1 != 0 {
734+ err = e1
735+ }
736+ return
737+}
738diff --git a/vendor/src/github.com/boltdb/bolt/bolt_unix_solaris.go b/vendor/src/github.com/boltdb/bolt/bolt_unix_solaris.go
739new file mode 100644
740index 0000000..f480ee7
741--- /dev/null
742+++ b/vendor/src/github.com/boltdb/bolt/bolt_unix_solaris.go
743@@ -0,0 +1,101 @@
744+
745+package bolt
746+
747+import (
748+ "fmt"
749+ "os"
750+ "syscall"
751+ "time"
752+ "unsafe"
753+ "golang.org/x/sys/unix"
754+)
755+
756+// flock acquires an advisory lock on a file descriptor.
757+func flock(f *os.File, exclusive bool, timeout time.Duration) error {
758+ var t time.Time
759+ for {
760+ // If we're beyond our timeout then return an error.
761+ // This can only occur after we've attempted a flock once.
762+ if t.IsZero() {
763+ t = time.Now()
764+ } else if timeout > 0 && time.Since(t) > timeout {
765+ return ErrTimeout
766+ }
767+ var lock syscall.Flock_t
768+ lock.Start = 0
769+ lock.Len = 0
770+ lock.Pid = 0
771+ lock.Whence = 0
772+ lock.Pid = 0
773+ if exclusive {
774+ lock.Type = syscall.F_WRLCK
775+ } else {
776+ lock.Type = syscall.F_RDLCK
777+ }
778+ err := syscall.FcntlFlock(f.Fd(), syscall.F_SETLK, &lock)
779+ if err == nil {
780+ return nil
781+ } else if err != syscall.EAGAIN {
782+ return err
783+ }
784+
785+ // Wait for a bit and try again.
786+ time.Sleep(50 * time.Millisecond)
787+ }
788+}
789+
790+// funlock releases an advisory lock on a file descriptor.
791+func funlock(f *os.File) error {
792+ var lock syscall.Flock_t
793+ lock.Start = 0
794+ lock.Len = 0
795+ lock.Type = syscall.F_UNLCK
796+ lock.Whence = 0
797+ return syscall.FcntlFlock(uintptr(f.Fd()), syscall.F_SETLK, &lock)
798+}
799+
800+// mmap memory maps a DB's data file.
801+func mmap(db *DB, sz int) error {
802+ // Truncate and fsync to ensure file size metadata is flushed.
803+ // https://github.com/boltdb/bolt/issues/284
804+ if !db.NoGrowSync && !db.readOnly {
805+ if err := db.file.Truncate(int64(sz)); err != nil {
806+ return fmt.Errorf("file resize error: %s", err)
807+ }
808+ if err := db.file.Sync(); err != nil {
809+ return fmt.Errorf("file sync error: %s", err)
810+ }
811+ }
812+
813+ // Map the data file to memory.
814+ b, err := unix.Mmap(int(db.file.Fd()), 0, sz, syscall.PROT_READ, syscall.MAP_SHARED)
815+ if err != nil {
816+ return err
817+ }
818+
819+ // Advise the kernel that the mmap is accessed randomly.
820+ if err := unix.Madvise(b, syscall.MADV_RANDOM); err != nil {
821+ return fmt.Errorf("madvise: %s", err)
822+ }
823+
824+ // Save the original byte slice and convert to a byte array pointer.
825+ db.dataref = b
826+ db.data = (*[maxMapSize]byte)(unsafe.Pointer(&b[0]))
827+ db.datasz = sz
828+ return nil
829+}
830+
831+// munmap unmaps a DB's data file from memory.
832+func munmap(db *DB) error {
833+ // Ignore the unmap if we have no mapped data.
834+ if db.dataref == nil {
835+ return nil
836+ }
837+
838+ // Unmap using the original byte slice.
839+ err := unix.Munmap(db.dataref)
840+ db.dataref = nil
841+ db.data = nil
842+ db.datasz = 0
843+ return err
844+}
845diff --git a/vendor/src/github.com/boltdb/bolt/bolt_windows.go b/vendor/src/github.com/boltdb/bolt/bolt_windows.go
846index c8539d4..8b782be 100644
847--- a/vendor/src/github.com/boltdb/bolt/bolt_windows.go
848+++ b/vendor/src/github.com/boltdb/bolt/bolt_windows.go
849@@ -16,7 +16,7 @@ func fdatasync(db *DB) error {
850 }
851
852 // flock acquires an advisory lock on a file descriptor.
853-func flock(f *os.File, _ time.Duration) error {
854+func flock(f *os.File, _ bool, _ time.Duration) error {
855 return nil
856 }
857
858@@ -28,9 +28,11 @@ func funlock(f *os.File) error {
859 // mmap memory maps a DB's data file.
860 // Based on: https://github.com/edsrzf/mmap-go
861 func mmap(db *DB, sz int) error {
862- // Truncate the database to the size of the mmap.
863- if err := db.file.Truncate(int64(sz)); err != nil {
864- return fmt.Errorf("truncate: %s", err)
865+ if !db.readOnly {
866+ // Truncate the database to the size of the mmap.
867+ if err := db.file.Truncate(int64(sz)); err != nil {
868+ return fmt.Errorf("truncate: %s", err)
869+ }
870 }
871
872 // Open a file mapping handle.
873diff --git a/vendor/src/github.com/boltdb/bolt/bucket.go b/vendor/src/github.com/boltdb/bolt/bucket.go
874index 2630800..2925288 100644
875--- a/vendor/src/github.com/boltdb/bolt/bucket.go
876+++ b/vendor/src/github.com/boltdb/bolt/bucket.go
877@@ -99,6 +99,7 @@ func (b *Bucket) Cursor() *Cursor {
878
879 // Bucket retrieves a nested bucket by name.
880 // Returns nil if the bucket does not exist.
881+// The bucket instance is only valid for the lifetime of the transaction.
882 func (b *Bucket) Bucket(name []byte) *Bucket {
883 if b.buckets != nil {
884 if child := b.buckets[string(name)]; child != nil {
885@@ -148,6 +149,7 @@ func (b *Bucket) openBucket(value []byte) *Bucket {
886
887 // CreateBucket creates a new bucket at the given key and returns the new bucket.
888 // Returns an error if the key already exists, if the bucket name is blank, or if the bucket name is too long.
889+// The bucket instance is only valid for the lifetime of the transaction.
890 func (b *Bucket) CreateBucket(key []byte) (*Bucket, error) {
891 if b.tx.db == nil {
892 return nil, ErrTxClosed
893@@ -192,6 +194,7 @@ func (b *Bucket) CreateBucket(key []byte) (*Bucket, error) {
894
895 // CreateBucketIfNotExists creates a new bucket if it doesn't already exist and returns a reference to it.
896 // Returns an error if the bucket name is blank, or if the bucket name is too long.
897+// The bucket instance is only valid for the lifetime of the transaction.
898 func (b *Bucket) CreateBucketIfNotExists(key []byte) (*Bucket, error) {
899 child, err := b.CreateBucket(key)
900 if err == ErrBucketExists {
901@@ -252,6 +255,7 @@ func (b *Bucket) DeleteBucket(key []byte) error {
902
903 // Get retrieves the value for a key in the bucket.
904 // Returns a nil value if the key does not exist or if the key is a nested bucket.
905+// The returned value is only valid for the life of the transaction.
906 func (b *Bucket) Get(key []byte) []byte {
907 k, v, flags := b.Cursor().seek(key)
908
909@@ -332,6 +336,12 @@ func (b *Bucket) NextSequence() (uint64, error) {
910 return 0, ErrTxNotWritable
911 }
912
913+ // Materialize the root node if it hasn't been already so that the
914+ // bucket will be saved during commit.
915+ if b.rootNode == nil {
916+ _ = b.node(b.root, nil)
917+ }
918+
919 // Increment and return the sequence.
920 b.bucket.sequence++
921 return b.bucket.sequence, nil
922@@ -339,7 +349,8 @@ func (b *Bucket) NextSequence() (uint64, error) {
923
924 // ForEach executes a function for each key/value pair in a bucket.
925 // If the provided function returns an error then the iteration is stopped and
926-// the error is returned to the caller.
927+// the error is returned to the caller. The provided function must not modify
928+// the bucket; this will result in undefined behavior.
929 func (b *Bucket) ForEach(fn func(k, v []byte) error) error {
930 if b.tx.db == nil {
931 return ErrTxClosed
932@@ -511,8 +522,12 @@ func (b *Bucket) spill() error {
933 // Update parent node.
934 var c = b.Cursor()
935 k, _, flags := c.seek([]byte(name))
936- _assert(bytes.Equal([]byte(name), k), "misplaced bucket header: %x -> %x", []byte(name), k)
937- _assert(flags&bucketLeafFlag != 0, "unexpected bucket header flag: %x", flags)
938+ if !bytes.Equal([]byte(name), k) {
939+ panic(fmt.Sprintf("misplaced bucket header: %x -> %x", []byte(name), k))
940+ }
941+ if flags&bucketLeafFlag == 0 {
942+ panic(fmt.Sprintf("unexpected bucket header flag: %x", flags))
943+ }
944 c.node().put([]byte(name), []byte(name), value, 0, bucketLeafFlag)
945 }
946
947@@ -528,7 +543,9 @@ func (b *Bucket) spill() error {
948 b.rootNode = b.rootNode.root()
949
950 // Update the root node for this bucket.
951- _assert(b.rootNode.pgid < b.tx.meta.pgid, "pgid (%d) above high water mark (%d)", b.rootNode.pgid, b.tx.meta.pgid)
952+ if b.rootNode.pgid >= b.tx.meta.pgid {
953+ panic(fmt.Sprintf("pgid (%d) above high water mark (%d)", b.rootNode.pgid, b.tx.meta.pgid))
954+ }
955 b.root = b.rootNode.pgid
956
957 return nil
958@@ -659,7 +676,9 @@ func (b *Bucket) pageNode(id pgid) (*page, *node) {
959 // Inline buckets have a fake page embedded in their value so treat them
960 // differently. We'll return the rootNode (if available) or the fake page.
961 if b.root == 0 {
962- _assert(id == 0, "inline bucket non-zero page access(2): %d != 0", id)
963+ if id != 0 {
964+ panic(fmt.Sprintf("inline bucket non-zero page access(2): %d != 0", id))
965+ }
966 if b.rootNode != nil {
967 return nil, b.rootNode
968 }
969diff --git a/vendor/src/github.com/boltdb/bolt/cursor.go b/vendor/src/github.com/boltdb/bolt/cursor.go
970index 3bfc2f1..006c548 100644
971--- a/vendor/src/github.com/boltdb/bolt/cursor.go
972+++ b/vendor/src/github.com/boltdb/bolt/cursor.go
973@@ -2,6 +2,7 @@ package bolt
974
975 import (
976 "bytes"
977+ "fmt"
978 "sort"
979 )
980
981@@ -9,6 +10,8 @@ import (
982 // Cursors see nested buckets with value == nil.
983 // Cursors can be obtained from a transaction and are valid as long as the transaction is open.
984 //
985+// Keys and values returned from the cursor are only valid for the life of the transaction.
986+//
987 // Changing data while traversing with a cursor may cause it to be invalidated
988 // and return unexpected keys and/or values. You must reposition your cursor
989 // after mutating data.
990@@ -24,6 +27,7 @@ func (c *Cursor) Bucket() *Bucket {
991
992 // First moves the cursor to the first item in the bucket and returns its key and value.
993 // If the bucket is empty then a nil key and value are returned.
994+// The returned key and value are only valid for the life of the transaction.
995 func (c *Cursor) First() (key []byte, value []byte) {
996 _assert(c.bucket.tx.db != nil, "tx closed")
997 c.stack = c.stack[:0]
998@@ -40,6 +44,7 @@ func (c *Cursor) First() (key []byte, value []byte) {
999
1000 // Last moves the cursor to the last item in the bucket and returns its key and value.
1001 // If the bucket is empty then a nil key and value are returned.
1002+// The returned key and value are only valid for the life of the transaction.
1003 func (c *Cursor) Last() (key []byte, value []byte) {
1004 _assert(c.bucket.tx.db != nil, "tx closed")
1005 c.stack = c.stack[:0]
1006@@ -57,6 +62,7 @@ func (c *Cursor) Last() (key []byte, value []byte) {
1007
1008 // Next moves the cursor to the next item in the bucket and returns its key and value.
1009 // If the cursor is at the end of the bucket then a nil key and value are returned.
1010+// The returned key and value are only valid for the life of the transaction.
1011 func (c *Cursor) Next() (key []byte, value []byte) {
1012 _assert(c.bucket.tx.db != nil, "tx closed")
1013 k, v, flags := c.next()
1014@@ -68,6 +74,7 @@ func (c *Cursor) Next() (key []byte, value []byte) {
1015
1016 // Prev moves the cursor to the previous item in the bucket and returns its key and value.
1017 // If the cursor is at the beginning of the bucket then a nil key and value are returned.
1018+// The returned key and value are only valid for the life of the transaction.
1019 func (c *Cursor) Prev() (key []byte, value []byte) {
1020 _assert(c.bucket.tx.db != nil, "tx closed")
1021
1022@@ -99,6 +106,7 @@ func (c *Cursor) Prev() (key []byte, value []byte) {
1023 // Seek moves the cursor to a given key and returns it.
1024 // If the key does not exist then the next key is used. If no keys
1025 // follow, a nil key is returned.
1026+// The returned key and value are only valid for the life of the transaction.
1027 func (c *Cursor) Seek(seek []byte) (key []byte, value []byte) {
1028 k, v, flags := c.seek(seek)
1029
1030@@ -228,8 +236,8 @@ func (c *Cursor) next() (key []byte, value []byte, flags uint32) {
1031 // search recursively performs a binary search against a given page/node until it finds a given key.
1032 func (c *Cursor) search(key []byte, pgid pgid) {
1033 p, n := c.bucket.pageNode(pgid)
1034- if p != nil {
1035- _assert((p.flags&(branchPageFlag|leafPageFlag)) != 0, "invalid page type: %d: %x", p.id, p.flags)
1036+ if p != nil && (p.flags&(branchPageFlag|leafPageFlag)) == 0 {
1037+ panic(fmt.Sprintf("invalid page type: %d: %x", p.id, p.flags))
1038 }
1039 e := elemRef{page: p, node: n}
1040 c.stack = append(c.stack, e)
1041diff --git a/vendor/src/github.com/boltdb/bolt/db.go b/vendor/src/github.com/boltdb/bolt/db.go
1042index 6c45736..d39c4aa 100644
1043--- a/vendor/src/github.com/boltdb/bolt/db.go
1044+++ b/vendor/src/github.com/boltdb/bolt/db.go
1045@@ -12,9 +12,6 @@ import (
1046 "unsafe"
1047 )
1048
1049-// The smallest size that the mmap can be.
1050-const minMmapSize = 1 << 22 // 4MB
1051-
1052 // The largest step that can be taken when remapping the mmap.
1053 const maxMmapStep = 1 << 30 // 1GB
1054
1055@@ -30,6 +27,12 @@ const magic uint32 = 0xED0CDAED
1056 // must be synchronzied using the msync(2) syscall.
1057 const IgnoreNoSync = runtime.GOOS == "openbsd"
1058
1059+// Default values if not set in a DB instance.
1060+const (
1061+ DefaultMaxBatchSize int = 1000
1062+ DefaultMaxBatchDelay = 10 * time.Millisecond
1063+)
1064+
1065 // DB represents a collection of buckets persisted to a file on disk.
1066 // All data access is performed through transactions which can be obtained through the DB.
1067 // All the functions on DB will return a ErrDatabaseNotOpen if accessed before Open() is called.
1068@@ -52,9 +55,33 @@ type DB struct {
1069 // THIS IS UNSAFE. PLEASE USE WITH CAUTION.
1070 NoSync bool
1071
1072+ // When true, skips the truncate call when growing the database.
1073+ // Setting this to true is only safe on non-ext3/ext4 systems.
1074+ // Skipping truncation avoids preallocation of hard drive space and
1075+ // bypasses a truncate() and fsync() syscall on remapping.
1076+ //
1077+ // https://github.com/boltdb/bolt/issues/284
1078+ NoGrowSync bool
1079+
1080+ // MaxBatchSize is the maximum size of a batch. Default value is
1081+ // copied from DefaultMaxBatchSize in Open.
1082+ //
1083+ // If <=0, disables batching.
1084+ //
1085+ // Do not change concurrently with calls to Batch.
1086+ MaxBatchSize int
1087+
1088+ // MaxBatchDelay is the maximum delay before a batch starts.
1089+ // Default value is copied from DefaultMaxBatchDelay in Open.
1090+ //
1091+ // If <=0, effectively disables batching.
1092+ //
1093+ // Do not change concurrently with calls to Batch.
1094+ MaxBatchDelay time.Duration
1095+
1096 path string
1097 file *os.File
1098- dataref []byte
1099+ dataref []byte // mmap'ed readonly, write throws SEGV
1100 data *[maxMapSize]byte
1101 datasz int
1102 meta0 *meta
1103@@ -66,6 +93,9 @@ type DB struct {
1104 freelist *freelist
1105 stats Stats
1106
1107+ batchMu sync.Mutex
1108+ batch *batch
1109+
1110 rwlock sync.Mutex // Allows only one writer at a time.
1111 metalock sync.Mutex // Protects meta page access.
1112 mmaplock sync.RWMutex // Protects mmap access during remapping.
1113@@ -74,6 +104,10 @@ type DB struct {
1114 ops struct {
1115 writeAt func(b []byte, off int64) (n int, err error)
1116 }
1117+
1118+ // Read only mode.
1119+ // When true, Update() and Begin(true) return ErrDatabaseReadOnly immediately.
1120+ readOnly bool
1121 }
1122
1123 // Path returns the path to currently open database file.
1124@@ -101,20 +135,34 @@ func Open(path string, mode os.FileMode, options *Options) (*DB, error) {
1125 if options == nil {
1126 options = DefaultOptions
1127 }
1128+ db.NoGrowSync = options.NoGrowSync
1129+
1130+ // Set default values for later DB operations.
1131+ db.MaxBatchSize = DefaultMaxBatchSize
1132+ db.MaxBatchDelay = DefaultMaxBatchDelay
1133+
1134+ flag := os.O_RDWR
1135+ if options.ReadOnly {
1136+ flag = os.O_RDONLY
1137+ db.readOnly = true
1138+ }
1139
1140 // Open data file and separate sync handler for metadata writes.
1141 db.path = path
1142-
1143 var err error
1144- if db.file, err = os.OpenFile(db.path, os.O_RDWR|os.O_CREATE, mode); err != nil {
1145+ if db.file, err = os.OpenFile(db.path, flag|os.O_CREATE, mode); err != nil {
1146 _ = db.close()
1147 return nil, err
1148 }
1149
1150- // Lock file so that other processes using Bolt cannot use the database
1151- // at the same time. This would cause corruption since the two processes
1152- // would write meta pages and free pages separately.
1153- if err := flock(db.file, options.Timeout); err != nil {
1154+ // Lock file so that other processes using Bolt in read-write mode cannot
1155+ // use the database at the same time. This would cause corruption since
1156+ // the two processes would write meta pages and free pages separately.
1157+ // The database file is locked exclusively (only one process can grab the lock)
1158+ // if !options.ReadOnly.
1159+ // The database file is locked using the shared lock (more than one process may
1160+ // hold a lock at the same time) otherwise (options.ReadOnly is set).
1161+ if err := flock(db.file, !db.readOnly, options.Timeout); err != nil {
1162 _ = db.close()
1163 return nil, err
1164 }
1165@@ -162,16 +210,6 @@ func (db *DB) mmap(minsz int) error {
1166 db.mmaplock.Lock()
1167 defer db.mmaplock.Unlock()
1168
1169- // Dereference all mmap references before unmapping.
1170- if db.rwtx != nil {
1171- db.rwtx.root.dereference()
1172- }
1173-
1174- // Unmap existing data before continuing.
1175- if err := db.munmap(); err != nil {
1176- return err
1177- }
1178-
1179 info, err := db.file.Stat()
1180 if err != nil {
1181 return fmt.Errorf("mmap stat error: %s", err)
1182@@ -184,7 +222,20 @@ func (db *DB) mmap(minsz int) error {
1183 if size < minsz {
1184 size = minsz
1185 }
1186- size = db.mmapSize(size)
1187+ size, err = db.mmapSize(size)
1188+ if err != nil {
1189+ return err
1190+ }
1191+
1192+ // Dereference all mmap references before unmapping.
1193+ if db.rwtx != nil {
1194+ db.rwtx.root.dereference()
1195+ }
1196+
1197+ // Unmap existing data before continuing.
1198+ if err := db.munmap(); err != nil {
1199+ return err
1200+ }
1201
1202 // Memory-map the data file as a byte slice.
1203 if err := mmap(db, size); err != nil {
1204@@ -215,22 +266,40 @@ func (db *DB) munmap() error {
1205 }
1206
1207 // mmapSize determines the appropriate size for the mmap given the current size
1208-// of the database. The minimum size is 4MB and doubles until it reaches 1GB.
1209-func (db *DB) mmapSize(size int) int {
1210- if size <= minMmapSize {
1211- return minMmapSize
1212- } else if size < maxMmapStep {
1213- size *= 2
1214- } else {
1215- size += maxMmapStep
1216+// of the database. The minimum size is 1MB and doubles until it reaches 1GB.
1217+// Returns an error if the new mmap size is greater than the max allowed.
1218+func (db *DB) mmapSize(size int) (int, error) {
1219+ // Double the size from 32KB until 1GB.
1220+ for i := uint(15); i <= 30; i++ {
1221+ if size <= 1<<i {
1222+ return 1 << i, nil
1223+ }
1224+ }
1225+
1226+ // Verify the requested size is not above the maximum allowed.
1227+ if size > maxMapSize {
1228+ return 0, fmt.Errorf("mmap too large")
1229+ }
1230+
1231+ // If larger than 1GB then grow by 1GB at a time.
1232+ sz := int64(size)
1233+ if remainder := sz % int64(maxMmapStep); remainder > 0 {
1234+ sz += int64(maxMmapStep) - remainder
1235 }
1236
1237 // Ensure that the mmap size is a multiple of the page size.
1238- if (size % db.pageSize) != 0 {
1239- size = ((size / db.pageSize) + 1) * db.pageSize
1240+ // This should always be true since we're incrementing in MBs.
1241+ pageSize := int64(db.pageSize)
1242+ if (sz % pageSize) != 0 {
1243+ sz = ((sz / pageSize) + 1) * pageSize
1244+ }
1245+
1246+ // If we've exceeded the max size then only grow up to the max size.
1247+ if sz > maxMapSize {
1248+ sz = maxMapSize
1249 }
1250
1251- return size
1252+ return int(sz), nil
1253 }
1254
1255 // init creates a new database file and initializes its meta pages.
1256@@ -250,7 +319,6 @@ func (db *DB) init() error {
1257 m.magic = magic
1258 m.version = version
1259 m.pageSize = uint32(db.pageSize)
1260- m.version = version
1261 m.freelist = 2
1262 m.root = bucket{root: 3}
1263 m.pgid = 4
1264@@ -283,8 +351,15 @@ func (db *DB) init() error {
1265 // Close releases all database resources.
1266 // All transactions must be closed before closing the database.
1267 func (db *DB) Close() error {
1268+ db.rwlock.Lock()
1269+ defer db.rwlock.Unlock()
1270+
1271 db.metalock.Lock()
1272 defer db.metalock.Unlock()
1273+
1274+ db.mmaplock.RLock()
1275+ defer db.mmaplock.RUnlock()
1276+
1277 return db.close()
1278 }
1279
1280@@ -304,8 +379,11 @@ func (db *DB) close() error {
1281
1282 // Close file handles.
1283 if db.file != nil {
1284- // Unlock the file.
1285- _ = funlock(db.file)
1286+ // No need to unlock read-only file.
1287+ if !db.readOnly {
1288+ // Unlock the file.
1289+ _ = funlock(db.file)
1290+ }
1291
1292 // Close the file descriptor.
1293 if err := db.file.Close(); err != nil {
1294@@ -323,6 +401,11 @@ func (db *DB) close() error {
1295 // will cause the calls to block and be serialized until the current write
1296 // transaction finishes.
1297 //
1298+// Transactions should not be depedent on one another. Opening a read
1299+// transaction and a write transaction in the same goroutine can cause the
1300+// writer to deadlock because the database periodically needs to re-mmap itself
1301+// as it grows and it cannot do that while a read transaction is open.
1302+//
1303 // IMPORTANT: You must close read-only transactions after you are finished or
1304 // else the database will not reclaim old pages.
1305 func (db *DB) Begin(writable bool) (*Tx, error) {
1306@@ -371,6 +454,11 @@ func (db *DB) beginTx() (*Tx, error) {
1307 }
1308
1309 func (db *DB) beginRWTx() (*Tx, error) {
1310+ // If the database was opened with Options.ReadOnly, return an error.
1311+ if db.readOnly {
1312+ return nil, ErrDatabaseReadOnly
1313+ }
1314+
1315 // Obtain writer lock. This is released by the transaction when it closes.
1316 // This enforces only one writer transaction at a time.
1317 db.rwlock.Lock()
1318@@ -501,6 +589,12 @@ func (db *DB) View(fn func(*Tx) error) error {
1319 return nil
1320 }
1321
1322+// Sync executes fdatasync() against the database file handle.
1323+//
1324+// This is not necessary under normal operation, however, if you use NoSync
1325+// then it allows you to force the database file to sync against the disk.
1326+func (db *DB) Sync() error { return fdatasync(db) }
1327+
1328 // Stats retrieves ongoing performance stats for the database.
1329 // This is only updated when a transaction closes.
1330 func (db *DB) Stats() Stats {
1331@@ -561,18 +655,30 @@ func (db *DB) allocate(count int) (*page, error) {
1332 return p, nil
1333 }
1334
1335+func (db *DB) IsReadOnly() bool {
1336+ return db.readOnly
1337+}
1338+
1339 // Options represents the options that can be set when opening a database.
1340 type Options struct {
1341 // Timeout is the amount of time to wait to obtain a file lock.
1342 // When set to zero it will wait indefinitely. This option is only
1343 // available on Darwin and Linux.
1344 Timeout time.Duration
1345+
1346+ // Sets the DB.NoGrowSync flag before memory mapping the file.
1347+ NoGrowSync bool
1348+
1349+ // Open database in read-only mode. Uses flock(..., LOCK_SH |LOCK_NB) to
1350+ // grab a shared lock (UNIX).
1351+ ReadOnly bool
1352 }
1353
1354 // DefaultOptions represent the options used if nil options are passed into Open().
1355 // No timeout is used which will cause Bolt to wait indefinitely for a lock.
1356 var DefaultOptions = &Options{
1357- Timeout: 0,
1358+ Timeout: 0,
1359+ NoGrowSync: false,
1360 }
1361
1362 // Stats represents statistics about the database.
1363@@ -647,9 +753,11 @@ func (m *meta) copy(dest *meta) {
1364
1365 // write writes the meta onto a page.
1366 func (m *meta) write(p *page) {
1367-
1368- _assert(m.root.root < m.pgid, "root bucket pgid (%d) above high water mark (%d)", m.root.root, m.pgid)
1369- _assert(m.freelist < m.pgid, "freelist pgid (%d) above high water mark (%d)", m.freelist, m.pgid)
1370+ if m.root.root >= m.pgid {
1371+ panic(fmt.Sprintf("root bucket pgid (%d) above high water mark (%d)", m.root.root, m.pgid))
1372+ } else if m.freelist >= m.pgid {
1373+ panic(fmt.Sprintf("freelist pgid (%d) above high water mark (%d)", m.freelist, m.pgid))
1374+ }
1375
1376 // Page id is either going to be 0 or 1 which we can determine by the transaction ID.
1377 p.id = pgid(m.txid % 2)
1378@@ -675,13 +783,8 @@ func _assert(condition bool, msg string, v ...interface{}) {
1379 }
1380 }
1381
1382-func warn(v ...interface{}) {
1383- fmt.Fprintln(os.Stderr, v...)
1384-}
1385-
1386-func warnf(msg string, v ...interface{}) {
1387- fmt.Fprintf(os.Stderr, msg+"\n", v...)
1388-}
1389+func warn(v ...interface{}) { fmt.Fprintln(os.Stderr, v...) }
1390+func warnf(msg string, v ...interface{}) { fmt.Fprintf(os.Stderr, msg+"\n", v...) }
1391
1392 func printstack() {
1393 stack := strings.Join(strings.Split(string(debug.Stack()), "\n")[2:], "\n")
1394diff --git a/vendor/src/github.com/boltdb/bolt/errors.go b/vendor/src/github.com/boltdb/bolt/errors.go
1395index aa504f1..6883786 100644
1396--- a/vendor/src/github.com/boltdb/bolt/errors.go
1397+++ b/vendor/src/github.com/boltdb/bolt/errors.go
1398@@ -36,6 +36,10 @@ var (
1399 // ErrTxClosed is returned when committing or rolling back a transaction
1400 // that has already been committed or rolled back.
1401 ErrTxClosed = errors.New("tx closed")
1402+
1403+ // ErrDatabaseReadOnly is returned when a mutating transaction is started on a
1404+ // read-only database.
1405+ ErrDatabaseReadOnly = errors.New("database is in read-only mode")
1406 )
1407
1408 // These errors can occur when putting or deleting a value or a bucket.
1409diff --git a/vendor/src/github.com/boltdb/bolt/freelist.go b/vendor/src/github.com/boltdb/bolt/freelist.go
1410index 150e3e6..0161948 100644
1411--- a/vendor/src/github.com/boltdb/bolt/freelist.go
1412+++ b/vendor/src/github.com/boltdb/bolt/freelist.go
1413@@ -1,6 +1,7 @@
1414 package bolt
1415
1416 import (
1417+ "fmt"
1418 "sort"
1419 "unsafe"
1420 )
1421@@ -47,15 +48,14 @@ func (f *freelist) pending_count() int {
1422
1423 // all returns a list of all free ids and all pending ids in one sorted list.
1424 func (f *freelist) all() []pgid {
1425- ids := make([]pgid, len(f.ids))
1426- copy(ids, f.ids)
1427+ m := make(pgids, 0)
1428
1429 for _, list := range f.pending {
1430- ids = append(ids, list...)
1431+ m = append(m, list...)
1432 }
1433
1434- sort.Sort(pgids(ids))
1435- return ids
1436+ sort.Sort(m)
1437+ return pgids(f.ids).merge(m)
1438 }
1439
1440 // allocate returns the starting page id of a contiguous list of pages of a given size.
1441@@ -67,7 +67,9 @@ func (f *freelist) allocate(n int) pgid {
1442
1443 var initial, previd pgid
1444 for i, id := range f.ids {
1445- _assert(id > 1, "invalid page allocation: %d", id)
1446+ if id <= 1 {
1447+ panic(fmt.Sprintf("invalid page allocation: %d", id))
1448+ }
1449
1450 // Reset initial page if this is not contiguous.
1451 if previd == 0 || id-previd != 1 {
1452@@ -103,13 +105,17 @@ func (f *freelist) allocate(n int) pgid {
1453 // free releases a page and its overflow for a given transaction id.
1454 // If the page is already free then a panic will occur.
1455 func (f *freelist) free(txid txid, p *page) {
1456- _assert(p.id > 1, "cannot free page 0 or 1: %d", p.id)
1457+ if p.id <= 1 {
1458+ panic(fmt.Sprintf("cannot free page 0 or 1: %d", p.id))
1459+ }
1460
1461 // Free page and all its overflow pages.
1462 var ids = f.pending[txid]
1463 for id := p.id; id <= p.id+pgid(p.overflow); id++ {
1464 // Verify that page is not already free.
1465- _assert(!f.cache[id], "page %d already freed", id)
1466+ if f.cache[id] {
1467+ panic(fmt.Sprintf("page %d already freed", id))
1468+ }
1469
1470 // Add to the freelist and cache.
1471 ids = append(ids, id)
1472@@ -120,15 +126,17 @@ func (f *freelist) free(txid txid, p *page) {
1473
1474 // release moves all page ids for a transaction id (or older) to the freelist.
1475 func (f *freelist) release(txid txid) {
1476+ m := make(pgids, 0)
1477 for tid, ids := range f.pending {
1478 if tid <= txid {
1479 // Move transaction's pending pages to the available freelist.
1480 // Don't remove from the cache since the page is still free.
1481- f.ids = append(f.ids, ids...)
1482+ m = append(m, ids...)
1483 delete(f.pending, tid)
1484 }
1485 }
1486- sort.Sort(pgids(f.ids))
1487+ sort.Sort(m)
1488+ f.ids = pgids(f.ids).merge(m)
1489 }
1490
1491 // rollback removes the pages from a given pending tx.
1492diff --git a/vendor/src/github.com/boltdb/bolt/node.go b/vendor/src/github.com/boltdb/bolt/node.go
1493index c204c39..c9fb21c 100644
1494--- a/vendor/src/github.com/boltdb/bolt/node.go
1495+++ b/vendor/src/github.com/boltdb/bolt/node.go
1496@@ -2,6 +2,7 @@ package bolt
1497
1498 import (
1499 "bytes"
1500+ "fmt"
1501 "sort"
1502 "unsafe"
1503 )
1504@@ -70,7 +71,9 @@ func (n *node) pageElementSize() int {
1505
1506 // childAt returns the child node at a given index.
1507 func (n *node) childAt(index int) *node {
1508- _assert(!n.isLeaf, "invalid childAt(%d) on a leaf node", index)
1509+ if n.isLeaf {
1510+ panic(fmt.Sprintf("invalid childAt(%d) on a leaf node", index))
1511+ }
1512 return n.bucket.node(n.inodes[index].pgid, n)
1513 }
1514
1515@@ -111,9 +114,13 @@ func (n *node) prevSibling() *node {
1516
1517 // put inserts a key/value.
1518 func (n *node) put(oldKey, newKey, value []byte, pgid pgid, flags uint32) {
1519- _assert(pgid < n.bucket.tx.meta.pgid, "pgid (%d) above high water mark (%d)", pgid, n.bucket.tx.meta.pgid)
1520- _assert(len(oldKey) > 0, "put: zero-length old key")
1521- _assert(len(newKey) > 0, "put: zero-length new key")
1522+ if pgid >= n.bucket.tx.meta.pgid {
1523+ panic(fmt.Sprintf("pgid (%d) above high water mark (%d)", pgid, n.bucket.tx.meta.pgid))
1524+ } else if len(oldKey) <= 0 {
1525+ panic("put: zero-length old key")
1526+ } else if len(newKey) <= 0 {
1527+ panic("put: zero-length new key")
1528+ }
1529
1530 // Find insertion index.
1531 index := sort.Search(len(n.inodes), func(i int) bool { return bytes.Compare(n.inodes[i].key, oldKey) != -1 })
1532@@ -189,7 +196,9 @@ func (n *node) write(p *page) {
1533 p.flags |= branchPageFlag
1534 }
1535
1536- _assert(len(n.inodes) < 0xFFFF, "inode overflow: %d (pgid=%d)", len(n.inodes), p.id)
1537+ if len(n.inodes) >= 0xFFFF {
1538+ panic(fmt.Sprintf("inode overflow: %d (pgid=%d)", len(n.inodes), p.id))
1539+ }
1540 p.count = uint16(len(n.inodes))
1541
1542 // Loop over each item and write it to the page.
1543@@ -212,11 +221,20 @@ func (n *node) write(p *page) {
1544 _assert(elem.pgid != p.id, "write: circular dependency occurred")
1545 }
1546
1547+ // If the length of key+value is larger than the max allocation size
1548+ // then we need to reallocate the byte array pointer.
1549+ //
1550+ // See: https://github.com/boltdb/bolt/pull/335
1551+ klen, vlen := len(item.key), len(item.value)
1552+ if len(b) < klen+vlen {
1553+ b = (*[maxAllocSize]byte)(unsafe.Pointer(&b[0]))[:]
1554+ }
1555+
1556 // Write data for the element to the end of the page.
1557 copy(b[0:], item.key)
1558- b = b[len(item.key):]
1559+ b = b[klen:]
1560 copy(b[0:], item.value)
1561- b = b[len(item.value):]
1562+ b = b[vlen:]
1563 }
1564
1565 // DEBUG ONLY: n.dump()
1566@@ -348,7 +366,9 @@ func (n *node) spill() error {
1567 }
1568
1569 // Write the node.
1570- _assert(p.id < tx.meta.pgid, "pgid (%d) above high water mark (%d)", p.id, tx.meta.pgid)
1571+ if p.id >= tx.meta.pgid {
1572+ panic(fmt.Sprintf("pgid (%d) above high water mark (%d)", p.id, tx.meta.pgid))
1573+ }
1574 node.pgid = p.id
1575 node.write(p)
1576 node.spilled = true
1577diff --git a/vendor/src/github.com/boltdb/bolt/page.go b/vendor/src/github.com/boltdb/bolt/page.go
1578index b3dc473..818aa1b 100644
1579--- a/vendor/src/github.com/boltdb/bolt/page.go
1580+++ b/vendor/src/github.com/boltdb/bolt/page.go
1581@@ -3,12 +3,12 @@ package bolt
1582 import (
1583 "fmt"
1584 "os"
1585+ "sort"
1586 "unsafe"
1587 )
1588
1589 const pageHeaderSize = int(unsafe.Offsetof(((*page)(nil)).ptr))
1590
1591-const maxAllocSize = 0xFFFFFFF
1592 const minKeysPerPage = 2
1593
1594 const branchPageElementSize = int(unsafe.Sizeof(branchPageElement{}))
1595@@ -97,7 +97,7 @@ type branchPageElement struct {
1596 // key returns a byte slice of the node key.
1597 func (n *branchPageElement) key() []byte {
1598 buf := (*[maxAllocSize]byte)(unsafe.Pointer(n))
1599- return buf[n.pos : n.pos+n.ksize]
1600+ return (*[maxAllocSize]byte)(unsafe.Pointer(&buf[n.pos]))[:n.ksize]
1601 }
1602
1603 // leafPageElement represents a node on a leaf page.
1604@@ -111,13 +111,13 @@ type leafPageElement struct {
1605 // key returns a byte slice of the node key.
1606 func (n *leafPageElement) key() []byte {
1607 buf := (*[maxAllocSize]byte)(unsafe.Pointer(n))
1608- return buf[n.pos : n.pos+n.ksize]
1609+ return (*[maxAllocSize]byte)(unsafe.Pointer(&buf[n.pos]))[:n.ksize]
1610 }
1611
1612 // value returns a byte slice of the node value.
1613 func (n *leafPageElement) value() []byte {
1614 buf := (*[maxAllocSize]byte)(unsafe.Pointer(n))
1615- return buf[n.pos+n.ksize : n.pos+n.ksize+n.vsize]
1616+ return (*[maxAllocSize]byte)(unsafe.Pointer(&buf[n.pos+n.ksize]))[:n.vsize]
1617 }
1618
1619 // PageInfo represents human readable information about a page.
1620@@ -133,3 +133,40 @@ type pgids []pgid
1621 func (s pgids) Len() int { return len(s) }
1622 func (s pgids) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
1623 func (s pgids) Less(i, j int) bool { return s[i] < s[j] }
1624+
1625+// merge returns the sorted union of a and b.
1626+func (a pgids) merge(b pgids) pgids {
1627+ // Return the opposite slice if one is nil.
1628+ if len(a) == 0 {
1629+ return b
1630+ } else if len(b) == 0 {
1631+ return a
1632+ }
1633+
1634+ // Create a list to hold all elements from both lists.
1635+ merged := make(pgids, 0, len(a)+len(b))
1636+
1637+ // Assign lead to the slice with a lower starting value, follow to the higher value.
1638+ lead, follow := a, b
1639+ if b[0] < a[0] {
1640+ lead, follow = b, a
1641+ }
1642+
1643+ // Continue while there are elements in the lead.
1644+ for len(lead) > 0 {
1645+ // Merge largest prefix of lead that is ahead of follow[0].
1646+ n := sort.Search(len(lead), func(i int) bool { return lead[i] > follow[0] })
1647+ merged = append(merged, lead[:n]...)
1648+ if n >= len(lead) {
1649+ break
1650+ }
1651+
1652+ // Swap lead and follow.
1653+ lead, follow = follow, lead[n:]
1654+ }
1655+
1656+ // Append what's left in follow.
1657+ merged = append(merged, follow...)
1658+
1659+ return merged
1660+}
1661diff --git a/vendor/src/github.com/boltdb/bolt/tx.go b/vendor/src/github.com/boltdb/bolt/tx.go
1662index c041d73..fe6c287 100644
1663--- a/vendor/src/github.com/boltdb/bolt/tx.go
1664+++ b/vendor/src/github.com/boltdb/bolt/tx.go
1665@@ -87,18 +87,21 @@ func (tx *Tx) Stats() TxStats {
1666
1667 // Bucket retrieves a bucket by name.
1668 // Returns nil if the bucket does not exist.
1669+// The bucket instance is only valid for the lifetime of the transaction.
1670 func (tx *Tx) Bucket(name []byte) *Bucket {
1671 return tx.root.Bucket(name)
1672 }
1673
1674 // CreateBucket creates a new bucket.
1675 // Returns an error if the bucket already exists, if the bucket name is blank, or if the bucket name is too long.
1676+// The bucket instance is only valid for the lifetime of the transaction.
1677 func (tx *Tx) CreateBucket(name []byte) (*Bucket, error) {
1678 return tx.root.CreateBucket(name)
1679 }
1680
1681 // CreateBucketIfNotExists creates a new bucket if it doesn't already exist.
1682 // Returns an error if the bucket name is blank, or if the bucket name is too long.
1683+// The bucket instance is only valid for the lifetime of the transaction.
1684 func (tx *Tx) CreateBucketIfNotExists(name []byte) (*Bucket, error) {
1685 return tx.root.CreateBucketIfNotExists(name)
1686 }
1687@@ -127,7 +130,8 @@ func (tx *Tx) OnCommit(fn func()) {
1688 }
1689
1690 // Commit writes all changes to disk and updates the meta page.
1691-// Returns an error if a disk write error occurs.
1692+// Returns an error if a disk write error occurs, or if Commit is
1693+// called on a read-only transaction.
1694 func (tx *Tx) Commit() error {
1695 _assert(!tx.managed, "managed tx commit not allowed")
1696 if tx.db == nil {
1697@@ -203,7 +207,8 @@ func (tx *Tx) Commit() error {
1698 return nil
1699 }
1700
1701-// Rollback closes the transaction and ignores all previous updates.
1702+// Rollback closes the transaction and ignores all previous updates. Read-only
1703+// transactions must be rolled back and not committed.
1704 func (tx *Tx) Rollback() error {
1705 _assert(!tx.managed, "managed tx rollback not allowed")
1706 if tx.db == nil {
1707@@ -234,7 +239,8 @@ func (tx *Tx) close() {
1708 var freelistPendingN = tx.db.freelist.pending_count()
1709 var freelistAlloc = tx.db.freelist.size()
1710
1711- // Remove writer lock.
1712+ // Remove transaction ref & writer lock.
1713+ tx.db.rwtx = nil
1714 tx.db.rwlock.Unlock()
1715
1716 // Merge statistics.
1717@@ -248,41 +254,51 @@ func (tx *Tx) close() {
1718 } else {
1719 tx.db.removeTx(tx)
1720 }
1721+
1722+ // Clear all references.
1723 tx.db = nil
1724+ tx.meta = nil
1725+ tx.root = Bucket{tx: tx}
1726+ tx.pages = nil
1727 }
1728
1729 // Copy writes the entire database to a writer.
1730-// A reader transaction is maintained during the copy so it is safe to continue
1731-// using the database while a copy is in progress.
1732-// Copy will write exactly tx.Size() bytes into the writer.
1733+// This function exists for backwards compatibility. Use WriteTo() in
1734 func (tx *Tx) Copy(w io.Writer) error {
1735- var f *os.File
1736- var err error
1737+ _, err := tx.WriteTo(w)
1738+ return err
1739+}
1740
1741+// WriteTo writes the entire database to a writer.
1742+// If err == nil then exactly tx.Size() bytes will be written into the writer.
1743+func (tx *Tx) WriteTo(w io.Writer) (n int64, err error) {
1744 // Attempt to open reader directly.
1745+ var f *os.File
1746 if f, err = os.OpenFile(tx.db.path, os.O_RDONLY|odirect, 0); err != nil {
1747 // Fallback to a regular open if that doesn't work.
1748 if f, err = os.OpenFile(tx.db.path, os.O_RDONLY, 0); err != nil {
1749- return err
1750+ return 0, err
1751 }
1752 }
1753
1754 // Copy the meta pages.
1755 tx.db.metalock.Lock()
1756- _, err = io.CopyN(w, f, int64(tx.db.pageSize*2))
1757+ n, err = io.CopyN(w, f, int64(tx.db.pageSize*2))
1758 tx.db.metalock.Unlock()
1759 if err != nil {
1760 _ = f.Close()
1761- return fmt.Errorf("meta copy: %s", err)
1762+ return n, fmt.Errorf("meta copy: %s", err)
1763 }
1764
1765 // Copy data pages.
1766- if _, err := io.CopyN(w, f, tx.Size()-int64(tx.db.pageSize*2)); err != nil {
1767+ wn, err := io.CopyN(w, f, tx.Size()-int64(tx.db.pageSize*2))
1768+ n += wn
1769+ if err != nil {
1770 _ = f.Close()
1771- return err
1772+ return n, err
1773 }
1774
1775- return f.Close()
1776+ return n, f.Close()
1777 }
1778
1779 // CopyFile copies the entire database to file at the given path.
1780@@ -416,15 +432,39 @@ func (tx *Tx) write() error {
1781 // Write pages to disk in order.
1782 for _, p := range pages {
1783 size := (int(p.overflow) + 1) * tx.db.pageSize
1784- buf := (*[maxAllocSize]byte)(unsafe.Pointer(p))[:size]
1785 offset := int64(p.id) * int64(tx.db.pageSize)
1786- if _, err := tx.db.ops.writeAt(buf, offset); err != nil {
1787- return err
1788- }
1789
1790- // Update statistics.
1791- tx.stats.Write++
1792+ // Write out page in "max allocation" sized chunks.
1793+ ptr := (*[maxAllocSize]byte)(unsafe.Pointer(p))
1794+ for {
1795+ // Limit our write to our max allocation size.
1796+ sz := size
1797+ if sz > maxAllocSize-1 {
1798+ sz = maxAllocSize - 1
1799+ }
1800+
1801+ // Write chunk to disk.
1802+ buf := ptr[:sz]
1803+ if _, err := tx.db.ops.writeAt(buf, offset); err != nil {
1804+ return err
1805+ }
1806+
1807+ // Update statistics.
1808+ tx.stats.Write++
1809+
1810+ // Exit inner for loop if we've written all the chunks.
1811+ size -= sz
1812+ if size == 0 {
1813+ break
1814+ }
1815+
1816+ // Otherwise move offset forward and move pointer to next chunk.
1817+ offset += int64(sz)
1818+ ptr = (*[maxAllocSize]byte)(unsafe.Pointer(&ptr[sz]))
1819+ }
1820 }
1821+
1822+ // Ignore file sync if flag is set on DB.
1823 if !tx.db.NoSync || IgnoreNoSync {
1824 if err := fdatasync(tx.db); err != nil {
1825 return err
1826--
18271.9.1
1828