From ec8341e9d71349057c456d047420f9435dc78cf1 Mon Sep 17 00:00:00 2001 From: Dimitri Sokolyuk Date: Thu, 10 Nov 2016 22:56:18 +0100 Subject: Resolve OCR --- go/ocr-numbers/ocr_numbers.go | 103 ++++++++++++++++++++---------------------- 1 file changed, 50 insertions(+), 53 deletions(-) diff --git a/go/ocr-numbers/ocr_numbers.go b/go/ocr-numbers/ocr_numbers.go index 4c0227c..cbc9fa6 100644 --- a/go/ocr-numbers/ocr_numbers.go +++ b/go/ocr-numbers/ocr_numbers.go @@ -1,7 +1,9 @@ package ocr import ( - "regexp" + "bufio" + "bytes" + "fmt" "strings" ) @@ -35,68 +37,28 @@ var rules = []map[string][]int{ }, } -func splitByLine(s string) [][]string { - l := strings.Split(s, "\n") - var ret [][]string - for _, v := range l { - if len(v) > 0 { - ret = append(ret, splitLineByGroup(v)) - } - } - return ret -} - -func splitLineByGroup(line string) []string { - l := len(line) / 3 - ret := make([]string, l) - for i := 0; i < l; i++ { +func splitByDigit(line string) []string { + ret := make([]string, len(line)/3) + for i := range ret { ret[i] = line[i*3 : i*3+3] } return ret } -func Recognize(s string) []string { - // bogus part: sanitize input, split by groups - re, _ := regexp.Compile("\n *\n") - s = re.ReplaceAllString(s, "\n\n") - groups := strings.Split(s, "\n\n") - var ret []string - for _, v := range groups { - ret = append(ret, recognizeDigit(v)) - } - return ret -} - -func recognizeDigit(s string) string { - if len(s) == 0 { - return "?" - } - l := splitByLine(s) - if len(l) == 0 { - return "?" - } - o := make([][]int, len(l[0])) - for i := range o { - o[i] = []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9} - } - for i, v := range l { - for j, z := range v { - o[j] = intersect(o[j], rules[i%len(rules)][z]) - } - } - var ret []rune - for _, v := range o { +func toString(r [][]int) string { + buf := new(bytes.Buffer) + for _, v := range r { if len(v) == 1 { - ret = append(ret, rune('0'+v[0])) + fmt.Fprint(buf, v[0]) } else { - ret = append(ret, '?') + fmt.Fprint(buf, "?") } } - return string(ret) + return buf.String() } -func intersect(a, b []int) []int { - has := func(x []int, a int) bool { +func intersection(a, b []int) []int { + isInSet := func(x []int, a int) bool { for _, v := range x { if v == a { return true @@ -106,9 +68,44 @@ func intersect(a, b []int) []int { } var r []int for _, v := range a { - if has(b, v) { + if isInSet(b, v) { r = append(r, v) } } return r } + +func recognizeDigit(s []string, set [][]int, k int) [][]int { + if set == nil { + set = make([][]int, len(s)) + for i := range set { + set[i] = []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9} + } + } + ret := make([][]int, len(s)) + for i, v := range s { + if r, ok := rules[k][v]; ok { + ret[i] = intersection(set[i], r) + } + } + return ret +} + +func Recognize(s string) []string { + scan := bufio.NewScanner(strings.NewReader(s)) + var set [][]int + var ret []string + for i := 0; scan.Scan(); { + if t := scan.Text(); len(t) > 0 { + dig := splitByDigit(t) + set = recognizeDigit(dig, set, i) + if i++; i == 4 { + str := toString(set) + ret = append(ret, str) + set = nil + i = 0 + } + } + } + return ret +} -- cgit v1.2.3