aboutsummaryrefslogtreecommitdiff
path: root/hash
diff options
context:
space:
mode:
authorDimitri Sokolyuk <sokolyuk@gmail.com>2021-11-22 13:48:19 +0100
committerDimitri Sokolyuk <sokolyuk@gmail.com>2021-11-22 13:48:19 +0100
commit1f1942ec0a4e55b3488a1771475994253bfe9b9e (patch)
tree0963f3b306cf5fdb7846c88102a18c462d096387 /hash
parent172fe8ad99288d4a4976089955a75bd28f5da764 (diff)
Move
Diffstat (limited to 'hash')
-rw-r--r--hash/def.go78
-rw-r--r--hash/func.go13
-rw-r--r--hash/func_test.go44
-rw-r--r--hash/hash.go52
-rw-r--r--hash/hash_test.go26
-rw-r--r--hash/log2.go15
-rw-r--r--hash/log2_test.go53
-rw-r--r--hash/page.go21
-rw-r--r--hash/testdata/aliases.dbbin0 -> 65536 bytes
9 files changed, 302 insertions, 0 deletions
diff --git a/hash/def.go b/hash/def.go
new file mode 100644
index 0000000..f84edeb
--- /dev/null
+++ b/hash/def.go
@@ -0,0 +1,78 @@
+package hash
+
+type Action int
+
+// Operations
+const (
+ HashGet Action = iota
+ HashPut
+ HashPutNew
+ HashDelete
+ HashFirst
+ HashNext
+)
+
+const (
+ BufMod = 1 << iota
+ BufDisk
+ BufBucket
+ BufPin
+)
+
+type BufHead struct {
+ Prev *BufHead // LRU links
+ Next *BufHead // LRU links
+ Ovrlf *BufHead // Overflow page buffer header
+ Addr uint32 // Address of this page
+ Page []byte // Actual page data
+ Flags byte
+}
+
+type Segment *BufHead
+
+// HTab is memroy resident data structure
+type HTab struct {
+ Hdr HashHdr // Header
+ NSegs int // Number of allocated segments
+ ExSegs int // Number of extra allocated
+ Hash func(interface{}, int64) uint32 // Hash function
+ Flags int // Flag values
+ FP int // File pointer
+ TmpBuf []byte // Temprory Buffer for BIG data
+ TmpKey []byte // Temprory Buffer for BIG keys
+ CPage BufHead // Currrent page
+ CBucket int // Current bucket
+ Err int // Error Number -- for DBM compatibility
+ NewFile int // Indicates if fd is backing store or not
+ SaveFile int // Indicates whether we need to flush file at exit
+ Mapp [nCached]uint32 // Pointers to page maps
+ NMaps int // Initial number of bitmaps
+ NBufs int // Number of buffers left to allocate
+ BufHead BufHead // Header of buffer lru list
+ Dir Segment // Hash Bucket directory
+}
+
+const nCached = 32 // number of bit maps and spare points
+
+// HashHdr holds hash table invormation
+type HashHdr struct {
+ Magic int32 // Magic NO for hash tables
+ Version int32 // Version ID
+ LOrder uint32 // Byte Order
+ BSize int32 // Bucket/Page Size
+ BShift int32 // Bucket shift
+ DSize int32 // Directory Size
+ SSize int32 // Segment Size
+ SShift int32 // Segment shift
+ OvflPoint int32 // Where overflow pages are being allocated
+ LastFreed int32 // Last overflow page freed
+ MaxBucket int32 // ID of Maximum bucket in use
+ HighMask int32 // Mask to modulo into entire table
+ LowMask int32 // Mask to modulo into lower half of table
+ FFactor int32 // Fill factor
+ NKeys int32 // Number of keys in hash table
+ HdrPages int32 // Size of table header
+ HCharkey int32 // value of hash(CHARKEY)
+ Spares [nCached]int32 // spare pages for overflow
+ Bitmaps [nCached]uint16 // address of overflow page bitmaps
+}
diff --git a/hash/func.go b/hash/func.go
new file mode 100644
index 0000000..41c4fd5
--- /dev/null
+++ b/hash/func.go
@@ -0,0 +1,13 @@
+package hash
+
+// defaultHash is Chris Torek's hash function
+func defaultHash(key []byte) uint32 {
+ var h uint32
+ for _, v := range key {
+ h = hash4b(h, v)
+ }
+ return h
+}
+
+func hash4a(h uint32, v byte) uint32 { return (h << 5) - h + uint32(v) }
+func hash4b(h uint32, v byte) uint32 { return (h << 5) + h + uint32(v) }
diff --git a/hash/func_test.go b/hash/func_test.go
new file mode 100644
index 0000000..458b306
--- /dev/null
+++ b/hash/func_test.go
@@ -0,0 +1,44 @@
+package hash
+
+import "testing"
+
+func TestDefaultHash(t *testing.T) {
+ testCases := []struct {
+ key string
+ want uint32
+ }{
+ {"", 0},
+ {"A", 65},
+ {"AA", 2210},
+ {"AAA", 72995},
+ {"AAAA", 2408900},
+ {"AAAAA", 79493765},
+ {"AAAAAA", 2623294310},
+ {"AAAAAAA", 669366375},
+ {"AAAAAAAA", 614253960},
+ {"AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA", 3607767976},
+ }
+ for _, tc := range testCases {
+ t.Run(tc.key, func(t *testing.T) {
+ got := defaultHash([]byte(tc.key))
+ if got != tc.want {
+ t.Errorf("got %v, want %v", got, tc.want)
+ }
+ })
+ }
+}
+
+func BenchmarkDefaultHash(b *testing.B) {
+ benchCases := []string{
+ "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA",
+ "THE QUICK BROWN FOX JUMPS OVER THE LAZY DOG",
+ }
+ for _, bc := range benchCases {
+ b.Run(bc, func(b *testing.B) {
+ key := []byte(bc)
+ for i := 0; i < b.N; i++ {
+ defaultHash(key)
+ }
+ })
+ }
+}
diff --git a/hash/hash.go b/hash/hash.go
new file mode 100644
index 0000000..861dd90
--- /dev/null
+++ b/hash/hash.go
@@ -0,0 +1,52 @@
+package hash
+
+import "os"
+
+const (
+ Magic = 0x061561
+ Version = 2
+)
+
+type Hash struct {
+ file *os.File
+ BSize uint // hash table bucket size
+ FFactor uint // desired density within the hash table
+ CacheSize uint // suggested maximum size in bytes
+ LOrder uint // byte order for integers in metadata
+ Hash func([]byte) uint32 // user defined hash function
+}
+
+func New(file *os.File) *Hash {
+ return &Hash{
+ file: file,
+ Hash: defaultHash,
+ }
+}
+
+func (h *Hash) Close() error {
+ return h.file.Close()
+}
+
+func (h *Hash) Del(key []byte, flags uint) error {
+ panic("not implemented")
+}
+
+func (h *Hash) Fd() uintptr {
+ return h.file.Fd()
+}
+
+func (h *Hash) Get(key []byte, flags uint) ([]byte, error) {
+ panic("not implemented")
+}
+
+func (h *Hash) Put(key []byte, data []byte, flags uint) error {
+ panic("not implemented")
+}
+
+func (h *Hash) Sync(flags uint) error {
+ panic("not implemented")
+}
+
+func (h *Hash) Seq(key []byte, flags uint) ([]byte, error) {
+ panic("not implemented")
+}
diff --git a/hash/hash_test.go b/hash/hash_test.go
new file mode 100644
index 0000000..da3a4a6
--- /dev/null
+++ b/hash/hash_test.go
@@ -0,0 +1,26 @@
+package hash
+
+import (
+ "encoding/binary"
+ "os"
+ "testing"
+)
+
+func TestOpen(t *testing.T) {
+ fd, err := os.Open("testdata/aliases.db")
+ if err != nil {
+ t.Fatal(err)
+ }
+ defer fd.Close()
+ var hdr HashHdr
+ if err := binary.Read(fd, binary.BigEndian, &hdr); err != nil {
+ t.Fatal(err)
+ }
+ if hdr.Magic != Magic {
+ t.Errorf("got %x, want %x", hdr.Magic, Magic)
+ }
+ if hdr.Version != Version {
+ t.Errorf("got %x, want %x", hdr.Version, Version)
+ }
+ t.Logf("%+v", hdr)
+}
diff --git a/hash/log2.go b/hash/log2.go
new file mode 100644
index 0000000..8ba79df
--- /dev/null
+++ b/hash/log2.go
@@ -0,0 +1,15 @@
+package hash
+
+var tab32 = [32]uint32{
+ 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30,
+ 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31,
+}
+
+func log2(value uint32) uint32 {
+ value |= value >> 1
+ value |= value >> 2
+ value |= value >> 4
+ value |= value >> 8
+ value |= value >> 16
+ return tab32[(value*0x07c4acdd)>>27]
+}
diff --git a/hash/log2_test.go b/hash/log2_test.go
new file mode 100644
index 0000000..d5ff95b
--- /dev/null
+++ b/hash/log2_test.go
@@ -0,0 +1,53 @@
+package hash
+
+import (
+ "fmt"
+ "testing"
+)
+
+func TestLog2(t *testing.T) {
+ testCases := []struct {
+ num, want uint32
+ }{
+ {0, 0},
+ {1, 0},
+ {2, 1},
+ {3, 1},
+ {4, 2},
+ {7, 2},
+ {8, 3},
+ {15, 3},
+ {16, 4},
+ {31, 4},
+ {32, 5},
+ {63, 5},
+ {64, 6},
+ {127, 6},
+ {128, 7},
+ {255, 7},
+ {256, 8},
+ {511, 8},
+ {512, 9},
+ {1023, 9},
+ {1024, 10},
+ }
+ for _, tc := range testCases {
+ t.Run(fmt.Sprintf("log2(%v)=%v", tc.num, tc.want), func(t *testing.T) {
+ got := log2(tc.num)
+ if got != tc.want {
+ t.Errorf("got %v, want %v", got, tc.want)
+ }
+ })
+ }
+}
+
+func BenchmarkLog2(b *testing.B) {
+ benchCases := []uint32{1, 1024}
+ for _, bc := range benchCases {
+ b.Run(fmt.Sprintf("log(%v)", bc), func(b *testing.B) {
+ for i := 0; i < b.N; i++ {
+ log2(bc)
+ }
+ })
+ }
+}
diff --git a/hash/page.go b/hash/page.go
new file mode 100644
index 0000000..1044f95
--- /dev/null
+++ b/hash/page.go
@@ -0,0 +1,21 @@
+package hash
+
+/*
+ * routines dealing with a data page
+ *
+ * page format:
+ * ┌───┬────────┬────────┬────────┐
+ * p │ n │ keyoff │ datoff │ keyoff │
+ * ├───┴────┬───┴───┬────┴──┬─────┤
+ * │ datoff │ free │ ptr │ ──> │
+ * ├────────┴───────┴───────┴─────┤
+ * │ F R E E A R E A │
+ * ├──────────────┬───────────────┤
+ * │ <──── ─ ─ ─ │ data │
+ * ├────────┬─────┴────┬──────────┤
+ * │ key │ data │ key │
+ * └────────┴──────────┴──────────┘
+ *
+ * Pointer to the free space is always: p[p[0] + 2]
+ * Amount of free space on the page is: p[p[0] + 1]
+ */
diff --git a/hash/testdata/aliases.db b/hash/testdata/aliases.db
new file mode 100644
index 0000000..0fb69a5
--- /dev/null
+++ b/hash/testdata/aliases.db
Binary files differ