From ce64ad22a11308c8fafa1bc8b40e3e21210cc3ee Mon Sep 17 00:00:00 2001 From: Dimitri Sokolyuk Date: Sun, 22 Mar 2015 15:09:01 +0100 Subject: Initial import --- errors.go | 13 +++++ lexer.go | 184 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ main.go | 28 ++++++++++ tokens.go | 126 ++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 351 insertions(+) create mode 100644 errors.go create mode 100644 lexer.go create mode 100644 main.go create mode 100644 tokens.go diff --git a/errors.go b/errors.go new file mode 100644 index 0000000..91b0513 --- /dev/null +++ b/errors.go @@ -0,0 +1,13 @@ +package main + +import "errors" + +var ( + syntaxError = errors.New("SYNTAX ERROR") + domainError = errors.New("DOMAIN ERROR") + lengthError = errors.New("LENGTH ERROR") + rankError = errors.New("RANK ERROR") + indexError = errors.New("INDEX ERROR") + nonceError = errors.New("NONCE ERROR") + valueError = errors.New("VALUE ERROR") +) diff --git a/lexer.go b/lexer.go new file mode 100644 index 0000000..bdbca7b --- /dev/null +++ b/lexer.go @@ -0,0 +1,184 @@ +package main + +import ( + "log" + "unicode/utf8" +) + +type item struct { + typ int + val string +} + +type yyLex struct { + input string + start int + pos int + width int + items chan item +} + +func (y *yyLex) Error(s string) { + log.Println(s) +} + +/* +type yySymType struct{} +func (y *yyLex) Lex(lval *yySymType) (ret int) { + item := <-y.items + return item.typ +} +*/ + +func (y *yyLex) Lex() (int, string) { + item := <-y.items + return item.typ, item.val +} + +func lex(input string) *yyLex { + l := &yyLex{ + input: input, + items: make(chan item), + } + go l.run() + return l +} + +func (y *yyLex) run() { + defer close(y.items) + for { + c := y.next() + + if t, ok := Tockens[c]; ok { + switch t { + case COMMENT, EOF: + return + case BLANK: + y.ignore() + case QUOTE: + y.lexQuoted() + case DIGIT, NEG: + y.lexNumber() + case CHAR: + y.lexString() + default: + y.emit(t) + } + } else { + y.ignore() + } + } +} + +func (y *yyLex) lexQuoted() { + y.ignore() + defer func() { + y.emit(QUOTED) + y.next() + y.ignore() + }() + for { + switch Tockens[y.next()] { + case EOF: + return + case QUOTE: + if !y.acceptTocken(QUOTE) { + y.backup() + return + } + y.next() + } + } +} + +func (y *yyLex) lexString() { + defer y.emit(STRING) + for { + if Tockens[y.next()] != CHAR { + y.backup() + return + } + } +} + +func (y *yyLex) lexNumber() { + t := INTEGER + y.acceptTocken(NEG) + y.acceptDigits() + if y.acceptTocken(DOT) { + t = FLOAT + y.acceptDigits() + } + if y.acceptRune('e', 'E') { + t = FLOAT + y.acceptTocken(NEG) + y.acceptDigits() + } + if y.acceptRune('j', 'J') { + t = COMPLEX + y.acceptTocken(NEG) + y.acceptDigits() + if y.acceptTocken(DOT) { + y.acceptDigits() + } + if y.acceptRune('e', 'E') { + y.acceptTocken(NEG) + y.acceptDigits() + } + } + y.emit(t) +} + +func (y *yyLex) acceptDigits() { + for y.acceptTocken(DIGIT) { + } +} + +func (y *yyLex) emit(t int) { + y.items <- item{ + typ: t, + val: y.input[y.start:y.pos], + } + y.start = y.pos +} + +func (y *yyLex) next() (r rune) { + if y.pos >= len(y.input) { + y.width = 0 + return EOF + } + r, y.width = utf8.DecodeRuneInString(y.input[y.pos:]) + y.pos += y.width + return +} + +func (y *yyLex) ignore() { + y.start = y.pos +} + +func (y *yyLex) backup() { + y.pos -= y.width +} + +func (y *yyLex) peek() rune { + defer y.backup() + return y.next() +} + +func (y *yyLex) acceptRune(valid ...rune) bool { + for _, r := range valid { + if y.next() == r { + return true + } + y.backup() + } + return false +} + +func (y *yyLex) acceptTocken(valid int) bool { + if t, ok := Tockens[y.next()]; ok && t == valid { + return true + } + y.backup() + return false +} diff --git a/main.go b/main.go new file mode 100644 index 0000000..2d615b1 --- /dev/null +++ b/main.go @@ -0,0 +1,28 @@ +package main + +import ( + "bufio" + "io" + "os" + "fmt" +) + +func main() { + in := bufio.NewReader(os.Stdin) + + for { + os.Stdout.WriteString("\t") + line, err := in.ReadString('\n') + if err == io.EOF { + return + } + l := lex(line) + for { + i, s := l.Lex() + if i == EOF { + break + } + fmt.Println(i, s) + } + } +} diff --git a/tokens.go b/tokens.go new file mode 100644 index 0000000..e606535 --- /dev/null +++ b/tokens.go @@ -0,0 +1,126 @@ +package main + +const ( + EOF = iota + BLANK + INTEGER + FLOAT + COMPLEX + DIGIT + CHAR + STRING + QUOTED + QUOTE + DOT + COMA + NEG + COMMENT + ALPHA + OMEGA + IOTA + RHO + DEL + PLUS + MINUS + TIMES + DIVIDE + DROP + TAKE + QUAD + SLASH + SLASHBAR + BACKSLASH +) + +var Tockens = map[rune]int{ + EOF: EOF, + + ' ': BLANK, + '\t': BLANK, + + '0': DIGIT, + '1': DIGIT, + '2': DIGIT, + '3': DIGIT, + '4': DIGIT, + '5': DIGIT, + '6': DIGIT, + '7': DIGIT, + '8': DIGIT, + '9': DIGIT, + + 'A': CHAR, + 'B': CHAR, + 'C': CHAR, + 'D': CHAR, + 'E': CHAR, + 'F': CHAR, + 'G': CHAR, + 'H': CHAR, + 'I': CHAR, + 'J': CHAR, + 'K': CHAR, + 'L': CHAR, + 'M': CHAR, + 'N': CHAR, + 'O': CHAR, + 'P': CHAR, + 'Q': CHAR, + 'R': CHAR, + 'S': CHAR, + 'T': CHAR, + 'U': CHAR, + 'V': CHAR, + 'W': CHAR, + 'X': CHAR, + 'Y': CHAR, + 'Z': CHAR, + + 'a': CHAR, + 'b': CHAR, + 'c': CHAR, + 'd': CHAR, + 'e': CHAR, + 'f': CHAR, + 'g': CHAR, + 'h': CHAR, + 'i': CHAR, + 'j': CHAR, + 'k': CHAR, + 'l': CHAR, + 'm': CHAR, + 'n': CHAR, + 'o': CHAR, + 'p': CHAR, + 'q': CHAR, + 'r': CHAR, + 's': CHAR, + 't': CHAR, + 'u': CHAR, + 'v': CHAR, + 'w': CHAR, + 'x': CHAR, + 'y': CHAR, + 'z': CHAR, + + '\'': QUOTE, + '.': DOT, + ',': COMA, + '¯': NEG, + '⍝': COMMENT, + '⍺': ALPHA, + '⍵': OMEGA, + '⍳': IOTA, + '⍴': RHO, + '∇': DEL, + '+': PLUS, + '-': MINUS, + '×': TIMES, + '÷': DIVIDE, + '↓': DROP, + '↑': TAKE, + '⎕': QUAD, + '/': SLASH, + '⌿': SLASHBAR, + '\\': BACKSLASH, +} -- cgit v1.2.3