Move

author: Dimitri Sokolyuk <sokolyuk@gmail.com> 2021-11-22 13:48:19 +0100
committer: Dimitri Sokolyuk <sokolyuk@gmail.com> 2021-11-22 13:48:19 +0100
commit: 1f1942ec0a4e55b3488a1771475994253bfe9b9e (patch)
tree: 0963f3b306cf5fdb7846c88102a18c462d096387 /hash
parent: 172fe8ad99288d4a4976089955a75bd28f5da764 (diff)
9 files changed, 302 insertions, 0 deletions
diff --git a/hash/def.go b/hash/def.go
new file mode 100644
index 0000000..f84edeb
--- /dev/null
+++ b/hash/def.go
@@ -0,0 +1,78 @@
+package hash
+
+type Action int
+
+// Operations
+const (
+	HashGet Action = iota
+	HashPut
+	HashPutNew
+	HashDelete
+	HashFirst
+	HashNext
+)
+
+const (
+	BufMod = 1 << iota
+	BufDisk
+	BufBucket
+	BufPin
+)
+
+type BufHead struct {
+	Prev  *BufHead // LRU links
+	Next  *BufHead // LRU links
+	Ovrlf *BufHead // Overflow page buffer header
+	Addr  uint32   // Address of this page
+	Page  []byte   // Actual page data
+	Flags byte
+}
+
+type Segment *BufHead
+
+// HTab is memroy resident data structure
+type HTab struct {
+	Hdr      HashHdr                         // Header
+	NSegs    int                             // Number of allocated segments
+	ExSegs   int                             // Number of extra allocated
+	Hash     func(interface{}, int64) uint32 // Hash function
+	Flags    int                             // Flag values
+	FP       int                             // File pointer
+	TmpBuf   []byte                          // Temprory Buffer for BIG data
+	TmpKey   []byte                          // Temprory Buffer for BIG keys
+	CPage    BufHead                         // Currrent page
+	CBucket  int                             // Current bucket
+	Err      int                             // Error Number -- for DBM compatibility
+	NewFile  int                             // Indicates if fd is backing store or not
+	SaveFile int                             // Indicates whether we need to flush file at exit
+	Mapp     [nCached]uint32                 // Pointers to page maps
+	NMaps    int                             // Initial number of bitmaps
+	NBufs    int                             // Number of buffers left to allocate
+	BufHead  BufHead                         // Header of buffer lru list
+	Dir      Segment                         // Hash Bucket directory
+}
+
+const nCached = 32 // number of bit maps and spare points
+
+// HashHdr holds hash table invormation
+type HashHdr struct {
+	Magic     int32           // Magic NO for hash tables
+	Version   int32           // Version ID
+	LOrder    uint32          // Byte Order
+	BSize     int32           // Bucket/Page Size
+	BShift    int32           // Bucket shift
+	DSize     int32           // Directory Size
+	SSize     int32           // Segment Size
+	SShift    int32           // Segment shift
+	OvflPoint int32           // Where overflow pages are being allocated
+	LastFreed int32           // Last overflow page freed
+	MaxBucket int32           // ID of Maximum bucket in use
+	HighMask  int32           // Mask to modulo into entire table
+	LowMask   int32           // Mask to modulo into lower half of table
+	FFactor   int32           // Fill factor
+	NKeys     int32           // Number of keys in hash table
+	HdrPages  int32           // Size of table header
+	HCharkey  int32           // value of hash(CHARKEY)
+	Spares    [nCached]int32  // spare pages for overflow
+	Bitmaps   [nCached]uint16 // address of overflow page bitmaps
+}
diff --git a/hash/func.go b/hash/func.go
new file mode 100644
index 0000000..41c4fd5
--- /dev/null
+++ b/hash/func.go
@@ -0,0 +1,13 @@
+package hash
+
+// defaultHash is Chris Torek's hash function
+func defaultHash(key []byte) uint32 {
+	var h uint32
+	for _, v := range key {
+		h = hash4b(h, v)
+	}
+	return h
+}
+
+func hash4a(h uint32, v byte) uint32 { return (h << 5) - h + uint32(v) }
+func hash4b(h uint32, v byte) uint32 { return (h << 5) + h + uint32(v) }
diff --git a/hash/func_test.go b/hash/func_test.go
new file mode 100644
index 0000000..458b306
--- /dev/null
+++ b/hash/func_test.go
@@ -0,0 +1,44 @@
+package hash
+
+import "testing"
+
+func TestDefaultHash(t *testing.T) {
+	testCases := []struct {
+		key  string
+		want uint32
+	}{
+		{"", 0},
+		{"A", 65},
+		{"AA", 2210},
+		{"AAA", 72995},
+		{"AAAA", 2408900},
+		{"AAAAA", 79493765},
+		{"AAAAAA", 2623294310},
+		{"AAAAAAA", 669366375},
+		{"AAAAAAAA", 614253960},
+		{"AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA", 3607767976},
+	}
+	for _, tc := range testCases {
+		t.Run(tc.key, func(t *testing.T) {
+			got := defaultHash([]byte(tc.key))
+			if got != tc.want {
+				t.Errorf("got %v, want %v", got, tc.want)
+			}
+		})
+	}
+}
+
+func BenchmarkDefaultHash(b *testing.B) {
+	benchCases := []string{
+		"AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA",
+		"THE QUICK BROWN FOX JUMPS OVER THE LAZY DOG",
+	}
+	for _, bc := range benchCases {
+		b.Run(bc, func(b *testing.B) {
+			key := []byte(bc)
+			for i := 0; i < b.N; i++ {
+				defaultHash(key)
+			}
+		})
+	}
+}
diff --git a/hash/hash.go b/hash/hash.go
new file mode 100644
index 0000000..861dd90
--- /dev/null
+++ b/hash/hash.go
@@ -0,0 +1,52 @@
+package hash
+
+import "os"
+
+const (
+	Magic   = 0x061561
+	Version = 2
+)
+
+type Hash struct {
+	file      *os.File
+	BSize     uint                // hash table bucket size
+	FFactor   uint                // desired density within the hash table
+	CacheSize uint                // suggested maximum size in bytes
+	LOrder    uint                // byte order for integers in metadata
+	Hash      func([]byte) uint32 // user defined hash function
+}
+
+func New(file *os.File) *Hash {
+	return &Hash{
+		file: file,
+		Hash: defaultHash,
+	}
+}
+
+func (h *Hash) Close() error {
+	return h.file.Close()
+}
+
+func (h *Hash) Del(key []byte, flags uint) error {
+	panic("not implemented")
+}
+
+func (h *Hash) Fd() uintptr {
+	return h.file.Fd()
+}
+
+func (h *Hash) Get(key []byte, flags uint) ([]byte, error) {
+	panic("not implemented")
+}
+
+func (h *Hash) Put(key []byte, data []byte, flags uint) error {
+	panic("not implemented")
+}
+
+func (h *Hash) Sync(flags uint) error {
+	panic("not implemented")
+}
+
+func (h *Hash) Seq(key []byte, flags uint) ([]byte, error) {
+	panic("not implemented")
+}
diff --git a/hash/hash_test.go b/hash/hash_test.go
new file mode 100644
index 0000000..da3a4a6
--- /dev/null
+++ b/hash/hash_test.go
@@ -0,0 +1,26 @@
+package hash
+
+import (
+	"encoding/binary"
+	"os"
+	"testing"
+)
+
+func TestOpen(t *testing.T) {
+	fd, err := os.Open("testdata/aliases.db")
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer fd.Close()
+	var hdr HashHdr
+	if err := binary.Read(fd, binary.BigEndian, &hdr); err != nil {
+		t.Fatal(err)
+	}
+	if hdr.Magic != Magic {
+		t.Errorf("got %x, want %x", hdr.Magic, Magic)
+	}
+	if hdr.Version != Version {
+		t.Errorf("got %x, want %x", hdr.Version, Version)
+	}
+	t.Logf("%+v", hdr)
+}
diff --git a/hash/log2.go b/hash/log2.go
new file mode 100644
index 0000000..8ba79df
--- /dev/null
+++ b/hash/log2.go
@@ -0,0 +1,15 @@
+package hash
+
+var tab32 = [32]uint32{
+	0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30,
+	8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31,
+}
+
+func log2(value uint32) uint32 {
+	value |= value >> 1
+	value |= value >> 2
+	value |= value >> 4
+	value |= value >> 8
+	value |= value >> 16
+	return tab32[(value*0x07c4acdd)>>27]
+}
diff --git a/hash/log2_test.go b/hash/log2_test.go
new file mode 100644
index 0000000..d5ff95b
--- /dev/null
+++ b/hash/log2_test.go
@@ -0,0 +1,53 @@
+package hash
+
+import (
+	"fmt"
+	"testing"
+)
+
+func TestLog2(t *testing.T) {
+	testCases := []struct {
+		num, want uint32
+	}{
+		{0, 0},
+		{1, 0},
+		{2, 1},
+		{3, 1},
+		{4, 2},
+		{7, 2},
+		{8, 3},
+		{15, 3},
+		{16, 4},
+		{31, 4},
+		{32, 5},
+		{63, 5},
+		{64, 6},
+		{127, 6},
+		{128, 7},
+		{255, 7},
+		{256, 8},
+		{511, 8},
+		{512, 9},
+		{1023, 9},
+		{1024, 10},
+	}
+	for _, tc := range testCases {
+		t.Run(fmt.Sprintf("log2(%v)=%v", tc.num, tc.want), func(t *testing.T) {
+			got := log2(tc.num)
+			if got != tc.want {
+				t.Errorf("got %v, want %v", got, tc.want)
+			}
+		})
+	}
+}
+
+func BenchmarkLog2(b *testing.B) {
+	benchCases := []uint32{1, 1024}
+	for _, bc := range benchCases {
+		b.Run(fmt.Sprintf("log(%v)", bc), func(b *testing.B) {
+			for i := 0; i < b.N; i++ {
+				log2(bc)
+			}
+		})
+	}
+}
diff --git a/hash/page.go b/hash/page.go
new file mode 100644
index 0000000..1044f95
--- /dev/null
+++ b/hash/page.go
@@ -0,0 +1,21 @@
+package hash
+
+/*
+ * routines dealing with a data page
+ *
+ * page format:
+ *	┌───┬────────┬────────┬────────┐
+ * p	│ n │ keyoff │ datoff │ keyoff │
+ * 	├───┴────┬───┴───┬────┴──┬─────┤
+ *	│ datoff │ free  │  ptr  │ ──> │
+ *	├────────┴───────┴───────┴─────┤
+ *	│	 F R E E A R E A       │
+ *	├──────────────┬───────────────┤
+ *	│  <──── ─ ─ ─ │ data	       │
+ *	├────────┬─────┴────┬──────────┤
+ *	│  key   │ data     │ key      │
+ *	└────────┴──────────┴──────────┘
+ *
+ * Pointer to the free space is always:  p[p[0] + 2]
+ * Amount of free space on the page is:  p[p[0] + 1]
+ */
diff --git a/hash/testdata/aliases.db b/hash/testdata/aliases.db
new file mode 100644
index 0000000..0fb69a5
--- /dev/null
+++ b/hash/testdata/aliases.db
author	Dimitri Sokolyuk <sokolyuk@gmail.com>	2021-11-22 13:48:19 +0100
committer	Dimitri Sokolyuk <sokolyuk@gmail.com>	2021-11-22 13:48:19 +0100
commit	1f1942ec0a4e55b3488a1771475994253bfe9b9e (patch)
tree	0963f3b306cf5fdb7846c88102a18c462d096387 /hash
parent	172fe8ad99288d4a4976089955a75bd28f5da764 (diff)