Tokyo Cabinet is an early KV database developed in C. It also supports a wide range of data structures, mainly for its fast read and write performance at certain levels of data volume.

Tokyo Cabinet

Tokyo Cabinet data structure.

Tokyo Cabinet data structure.

Here is a simple read/write comparison with goleveldb and boltdb, using Tokyo Cabinet’s Hash data structure.

10 data tests

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
% go test -bench "Benchmark*" -benchmem -benchtime 10s
kvLst len 20
db size 7847B 0MB
goos: darwin
goarch: amd64
pkg: tk
cpu: Intel(R) Core(TM) i7-4870HQ CPU @ 2.50GHz
BenchmarkTokyoCabinetPut-8   	 3570594	      3318 ns/op	     480 B/op	      20 allocs/op
BenchmarkTokyoCabinetGet-8   	 1582812	      7626 ns/op	    8632 B/op	      40 allocs/op
BenchmarkGoleveldbPut-8      	   68617	    182042 ns/op	    1920 B/op	      36 allocs/op
BenchmarkGoleveldbGet-8      	  536542	     22335 ns/op	   15112 B/op	     135 allocs/op
BenchmarkBoltdbPut-8         	     340	  31076948 ns/op	    8546 B/op	      85 allocs/op
BenchmarkBoltdbGet-8         	 3678506	      3162 ns/op	    1112 B/op	      25 allocs/op

Length of pure data content 7847B 0MB , size of data file.

1
2
3
4
% du -h  -d=0 ./*                                     
64K	./bdb
528K	./kc.hdb
152K	./ldb

10,000 data tests

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
% go test -bench "Benchmark*" -benchmem -benchtime 10s
kvLst len 20000
db size 14650158B 13MB
goos: darwin
goarch: amd64
pkg: tk
cpu: Intel(R) Core(TM) i7-4870HQ CPU @ 2.50GHz
BenchmarkTokyoCabinetPut-8   	    2432	   4647572 ns/op	  480000 B/op	   20000 allocs/op
BenchmarkTokyoCabinetGet-8   	    1141	  10363653 ns/op	15956627 B/op	   40000 allocs/op
BenchmarkGoleveldbPut-8      	      73	 453146188 ns/op	 4003451 B/op	   46975 allocs/op
BenchmarkGoleveldbGet-8      	     256	  48339840 ns/op	23214270 B/op	  147134 allocs/op
BenchmarkBoltdbPut-8         	     102	 101226397 ns/op	 8089536 B/op	   61063 allocs/op
BenchmarkBoltdbGet-8         	    1807	   6226045 ns/op	 1680423 B/op	   30005 allocs/op

Length of pure data content 14650158B 13MB , size of data file.

1
2
3
4
% du -h  -d=0 ./*                                     
 58M	./bdb
 15M	./kc.hdb
8.1M	./ldb

100,000 data tests

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
% go test -bench "Benchmark*" -benchmem -benchtime 10s
kvLst len 200000
db size 154026354B 146MB
goos: darwin
goarch: amd64
pkg: tk
cpu: Intel(R) Core(TM) i7-4870HQ CPU @ 2.50GHz
BenchmarkTokyoCabinetPut-8   	      33	 353047212 ns/op	 4800000 B/op	  200000 allocs/op
BenchmarkTokyoCabinetGet-8   	      43	 247528234 ns/op	167481822 B/op	  400000 allocs/op
BenchmarkGoleveldbPut-8      	       3	7956023630 ns/op	64237784 B/op	  585691 allocs/op
BenchmarkGoleveldbGet-8      	      24	 478244166 ns/op	240414876 B/op	 1478027 allocs/op
BenchmarkBoltdbPut-8         	       1	35963353562 ns/op	3928988728 B/op	 4990126 allocs/op
BenchmarkBoltdbGet-8         	     140	  83254538 ns/op	16800468 B/op	  300005 allocs/op

Length of pure data content 154026354B 146MB , size of data file.

1
2
3
4
% du -h  -d=0 ./*
221M	./bdb
150M	./kc.hdb
 87M	./ldb

Test code

TokyoCabinet

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
func BenchmarkTokyoCabinetPut(b *testing.B) {
	var db = *tokyocabinet.NewHDB()
	_ = db.Open("kc.hdb", tokyocabinet.HDBOREADER|tokyocabinet.HDBOWRITER|tokyocabinet.HDBOCREAT)
	defer db.Close()

	b.ResetTimer()
	defer b.StopTimer()
	for j := 0; j < b.N; j++ {
		for i := 0; i < len(kvLst); i += 2 {
			err := db.Put(kvLst[i], kvLst[i+1])
			if err != nil {
				log.Fatal(err)
			}
		}
	}
}

func BenchmarkTokyoCabinetGet(b *testing.B) {
	var db = *tokyocabinet.NewHDB()
	_ = db.Open("kc.hdb", tokyocabinet.HDBOREADER|tokyocabinet.HDBOWRITER|tokyocabinet.HDBOCREAT)
	defer db.Close()

	b.ResetTimer()
	defer b.StopTimer()
	for j := 0; j < b.N; j++ {
		c := 0
		for i := 0; i < len(kvLst); i += 2 {
			v, err := db.Get(kvLst[i])
			c++
			if err != nil {
				log.Fatal(err, string(kvLst[i]), " ", c)
			}
			if !bytes.Equal(v, kvLst[i+1]) {
				log.Fatal("not exception value")
			}
		}
	}
}

goleveldb

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
func BenchmarkGoleveldbGet(b *testing.B) {
	db, _ := leveldb.OpenFile("ldb", nil)
	defer func() {
		//_ = db.CompactRange(util.Range{})
		_ = db.Close()
	}()

	b.ResetTimer()
	defer b.StopTimer()
	for j := 0; j < b.N; j++ {
		for i := 0; i < len(kvLst); i += 2 {
			v, err := db.Get(kvLst[i], nil)
			if err != nil {
				log.Fatal(err)
			}
			if !bytes.Equal(v, kvLst[i+1]) {
				log.Fatal("not exception value")
			}
		}
	}
}

boltdb

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
func BenchmarkBoltdbPut(b *testing.B) {
	db, _ := bolt.Open("bdb", 0600, nil)
	defer db.Close()

	b.ResetTimer()
	defer b.StopTimer()
	_ = db.Update(func(tx *bolt.Tx) error {
		_, err := tx.CreateBucketIfNotExists([]byte("b"))
		return err
	})
	for j := 0; j < b.N; j++ {
		err := db.Update(func(tx *bolt.Tx) error {
			bc := tx.Bucket([]byte("b"))
			var er2 error
			for i := 0; i < len(kvLst); i += 2 {
				er2 = bc.Put(kvLst[i], kvLst[i+1])
				if er2 != nil {
					log.Fatal(er2)
				}
			}
			return er2
		})
		if err != nil {
			log.Fatal(err)
		}
	}
}

func BenchmarkBoltdbGet(b *testing.B) {
	db, _ := bolt.Open("bdb", 0600, nil)
	defer db.Close()

	b.ResetTimer()
	defer b.StopTimer()
	_ = db.Update(func(tx *bolt.Tx) error {
		_, err := tx.CreateBucketIfNotExists([]byte("b"))
		return err
	})
	for j := 0; j < b.N; j++ {
		_ = db.View(func(tx *bolt.Tx) error {
			bc := tx.Bucket([]byte("b"))
			for i := 0; i < len(kvLst); i += 2 {
				v := bc.Get(kvLst[i])
				if !bytes.Equal(v, kvLst[i+1]) {
					log.Fatal("not exception value")
				}
			}
			return nil
		})
	}
}

Conclusion

Tokyo Cabinet performance is great at 20 million data levels, but drops off sharply after hundreds of millions; goleveldb is moderate, but still stable at high data volumes; boltdb read performance is always great, but a little unstable when writing a lot of data in one transaction.

In terms of database size, both Tokyo Cabinet and goleveldb use compression, with Tokyo Cabinet being a tiny bit more than metadata (1%), goleveldb being 60% of metadata, and boltdb being 1.5 to 4 times more than metadata (more free space).

Also note that Tokyo Cabinet’s key is unordered and the other two are ordered.