From d334a7d31c4fdd05438b37a2ed78f98c6fb682da Mon Sep 17 00:00:00 2001 From: Dimitri Sokolyuk Date: Sun, 11 Sep 2016 13:51:45 +0200 Subject: Solve OCR --- go/ocr-numbers/README.md | 96 +++++++++++++++++++++++++++ go/ocr-numbers/ocr_numbers.go | 113 ++++++++++++++++++++++++++++++++ go/ocr-numbers/ocr_numbers_test.go | 130 +++++++++++++++++++++++++++++++++++++ 3 files changed, 339 insertions(+) create mode 100644 go/ocr-numbers/README.md create mode 100644 go/ocr-numbers/ocr_numbers.go create mode 100644 go/ocr-numbers/ocr_numbers_test.go diff --git a/go/ocr-numbers/README.md b/go/ocr-numbers/README.md new file mode 100644 index 0000000..9b212ad --- /dev/null +++ b/go/ocr-numbers/README.md @@ -0,0 +1,96 @@ +# Ocr Numbers + +Write a program that, given a 3 x 4 grid of pipes, underscores, and spaces, can determine which number is represented, or whether it is garbled. + +# Step One + +To begin with, convert a simple binary font to a string containing 0 or 1. + +The binary font uses pipes and underscores, four rows high and three columns wide. + +``` + _ # + | | # zero. + |_| # + # the fourth row is always blank +``` + +Is converted to "0" + +``` + # + | # one. + | # + # (blank fourth row) +``` + +Is converted to "1" + +If the input is the correct size, but not recognizable, your program should return '?' + +If the input is the incorrect size, your program should return an error. + +# Step Two + +Update your program to recognize multi-character binary strings, replacing garbled numbers with ? + +# Step Three + +Update your program to recognize all numbers 0 through 9, both individually and as part of a larger string. + +``` + _ + _| +|_ + +``` + +Is converted to "2" + +``` + _ _ _ _ _ _ _ _ # + | _| _||_||_ |_ ||_||_|| | # decimal numbers. + ||_ _| | _||_| ||_| _||_| # + # fourth line is always blank +``` + +Is converted to "1234567890" + +# Step Four + +Update your program to handle multiple numbers, one per line. When converting several lines, join the lines with commas. + +``` + _ _ + | _| _| + ||_ _| + + _ _ +|_||_ |_ + | _||_| + + _ _ _ + ||_||_| + ||_| _| + +``` + +Is converted to "123,456,789" + +To run the tests simply run the command `go test` in the exercise directory. + +If the test suite contains benchmarks, you can run these with the `-bench` +flag: + + go test -bench . + +For more detailed info about the Go track see the [help +page](http://exercism.io/languages/go). + +## Source + +Inspired by the Bank OCR kata [http://codingdojo.org/cgi-bin/wiki.pl?KataBankOCR](http://codingdojo.org/cgi-bin/wiki.pl?KataBankOCR) + +## Submitting Incomplete Problems +It's possible to submit an incomplete solution so you can see how others have completed the exercise. + diff --git a/go/ocr-numbers/ocr_numbers.go b/go/ocr-numbers/ocr_numbers.go new file mode 100644 index 0000000..fb9c502 --- /dev/null +++ b/go/ocr-numbers/ocr_numbers.go @@ -0,0 +1,113 @@ +package ocr + +import ( + "regexp" + "strings" +) + +/* + _ _ _ _ _ _ _ _ +| | | _| _||_||_ |_ ||_||_| +|_| ||_ _| | _||_| ||_| _| + +*/ + +var rules = []map[string][]int{ + { // 1. line + " _ ": []int{0, 2, 3, 5, 6, 7, 8, 9}, + " ": []int{1, 4}, + }, + { // 2. line + "| |": []int{0}, + " |": []int{1, 7}, + " _|": []int{2, 3}, + "|_ ": []int{5, 6}, + "|_|": []int{4, 8, 9}, + }, + { // 3. line + "|_|": []int{0, 6, 8}, + " |": []int{1, 4, 7}, + "|_ ": []int{2}, + " _|": []int{3, 5, 9}, + }, + { // 4. line + " ": []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}, + }, +} + +func splitByLine(s string) [][]string { + l := strings.Split(s, "\n") + var ret [][]string + for _, v := range l { + if len(v) > 0 { + ret = append(ret, splitLineByGroup(v)) + } + } + return ret +} + +func splitLineByGroup(line string) []string { + l := len(line) / 3 + ret := make([]string, l) + for i := 0; i < l; i++ { + ret[i] = line[i*3 : i*3+3] + } + return ret +} + +func Recognize(s string) []string { + re, _ := regexp.Compile("\n *\n") + s = re.ReplaceAllString(s, "\n\n") + groups := strings.Split(s, "\n\n") + var ret []string + for _, v := range groups { + ret = append(ret, recognizeDigit(v)) + } + return ret +} + +func recognizeDigit(s string) string { + if len(s) == 0 { + return "?" + } + l := splitByLine(s) + if len(l) == 0 { + return "?" + } + o := make([][]int, len(l[0])) + for i := range o { + o[i] = []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9} + } + for i, v := range l { + for j, z := range v { + o[j] = intersect(o[j], rules[i%len(rules)][z]) + } + } + var ret []rune + for _, v := range o { + if len(v) > 0 { + ret = append(ret, rune('0'+v[0])) + } else { + ret = append(ret, '?') + } + } + return string(ret) +} + +func intersect(a, b []int) []int { + has := func(x []int, a int) bool { + for _, v := range x { + if v == a { + return true + } + } + return false + } + var r []int + for _, v := range a { + if has(b, v) { + r = append(r, v) + } + } + return r +} diff --git a/go/ocr-numbers/ocr_numbers_test.go b/go/ocr-numbers/ocr_numbers_test.go new file mode 100644 index 0000000..707a91a --- /dev/null +++ b/go/ocr-numbers/ocr_numbers_test.go @@ -0,0 +1,130 @@ +// Go requirements: +// +// Define a function recognizeDigit as README Step 1 except make it recognize +// all ten digits 0 to 9. Pick what you like for parameters and return values +// but make it useful as a subroutine for README step 2. +// +// For README Step 2 define, +// +// func Recognize(string) []string +// +// and implement it using recognizeDigit. +// +// Input strings tested here have a \n at the beginning of each line and +// no trailing \n on the last line. (This makes for readable raw string +// literals.) +// +// For bonus points, gracefully handle misformatted data. What should you +// do with a partial cell? Discard it? Pad with spaces? Report it with a +// "?" character? What should you do if the first character is not \n? + +package ocr + +import ( + "reflect" + "testing" +) + +var tests = []struct { + in string + out []string +}{ + {` + _ +| | +|_| + `, []string{"0"}}, + {` + + | + | + `, []string{"1"}}, + {` + _ + _| +|_ + `, []string{"2"}}, + {` + _ + _| + _| + `, []string{"3"}}, + {` + +|_| + | + `, []string{"4"}}, + {` + _ +|_ + _| + `, []string{"5"}}, + {` + _ +|_ +|_| + `, []string{"6"}}, + {` + _ + | + | + `, []string{"7"}}, + {` + _ +|_| +|_| + `, []string{"8"}}, + {` + _ +|_| + _| + `, []string{"9"}}, + {` + _ + || | + ||_| + `, []string{"10"}}, + {` + +| | +| | + `, []string{"?"}}, + {` + _ _ _ _ + | || | || | | || || | + | ||_| ||_| | ||_||_| + `, []string{"110101100"}}, + {` + _ _ _ + | || | || | || || | + | | _| ||_| | ||_||_| + `, []string{"11?10?1?0"}}, + {` + _ _ _ _ _ _ _ _ + | _| _||_||_ |_ ||_||_|| | + ||_ _| | _||_| ||_| _||_| + `, []string{"1234567890"}}, + {` + _ _ + | _| _| + ||_ _| + + _ _ +|_||_ |_ + | _||_| + + _ _ _ + ||_||_| + ||_| _| + `, []string{"123", "456", "789"}}, +} + +var _ = recognizeDigit // step 1. + +func TestRecognize(t *testing.T) { + for _, test := range tests { + if res := Recognize(test.in); !reflect.DeepEqual(res, test.out) { + t.Fatalf("Recognize(`%s`) = %q, want %q.", test.in, res, test.out) + } + } +} -- cgit v1.2.3