123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450 |
- package govaluate
- import (
- "bytes"
- "errors"
- "fmt"
- "regexp"
- "strconv"
- "time"
- "unicode"
- )
- func parseTokens(expression string, functions map[string]ExpressionFunction) ([]ExpressionToken, error) {
- var ret []ExpressionToken
- var token ExpressionToken
- var stream *lexerStream
- var state lexerState
- var err error
- var found bool
- stream = newLexerStream(expression)
- state = validLexerStates[0]
- for stream.canRead() {
- token, err, found = readToken(stream, state, functions)
- if err != nil {
- return ret, err
- }
- if !found {
- break
- }
- state, err = getLexerStateForToken(token.Kind)
- if err != nil {
- return ret, err
- }
- // append this valid token
- ret = append(ret, token)
- }
- err = checkBalance(ret)
- if err != nil {
- return nil, err
- }
- return ret, nil
- }
- func readToken(stream *lexerStream, state lexerState, functions map[string]ExpressionFunction) (ExpressionToken, error, bool) {
- var function ExpressionFunction
- var ret ExpressionToken
- var tokenValue interface{}
- var tokenTime time.Time
- var tokenString string
- var kind TokenKind
- var character rune
- var found bool
- var completed bool
- var err error
- // numeric is 0-9, or .
- // string starts with '
- // variable is alphanumeric, always starts with a letter
- // bracket always means variable
- // symbols are anything non-alphanumeric
- // all others read into a buffer until they reach the end of the stream
- for stream.canRead() {
- character = stream.readCharacter()
- if unicode.IsSpace(character) {
- continue
- }
- kind = UNKNOWN
- // numeric constant
- if isNumeric(character) {
- tokenString = readTokenUntilFalse(stream, isNumeric)
- tokenValue, err = strconv.ParseFloat(tokenString, 64)
- if err != nil {
- errorMsg := fmt.Sprintf("Unable to parse numeric value '%v' to float64\n", tokenString)
- return ExpressionToken{}, errors.New(errorMsg), false
- }
- kind = NUMERIC
- break
- }
- // comma, separator
- if character == ',' {
- tokenValue = ","
- kind = SEPARATOR
- break
- }
- // escaped variable
- if character == '[' {
- tokenValue, completed = readUntilFalse(stream, true, false, true, isNotClosingBracket)
- kind = VARIABLE
- if !completed {
- return ExpressionToken{}, errors.New("Unclosed parameter bracket"), false
- }
- // above method normally rewinds us to the closing bracket, which we want to skip.
- stream.rewind(-1)
- break
- }
- // regular variable - or function?
- if unicode.IsLetter(character) {
- tokenString = readTokenUntilFalse(stream, isVariableName)
- tokenValue = tokenString
- kind = VARIABLE
- // boolean?
- if tokenValue == "true" {
- kind = BOOLEAN
- tokenValue = true
- } else {
- if tokenValue == "false" {
- kind = BOOLEAN
- tokenValue = false
- }
- }
- // textual operator?
- if tokenValue == "in" || tokenValue == "IN" {
- // force lower case for consistency
- tokenValue = "in"
- kind = COMPARATOR
- }
- // function?
- function, found = functions[tokenString]
- if found {
- kind = FUNCTION
- tokenValue = function
- }
- break
- }
- if !isNotQuote(character) {
- tokenValue, completed = readUntilFalse(stream, true, false, true, isNotQuote)
- if !completed {
- return ExpressionToken{}, errors.New("Unclosed string literal"), false
- }
- // advance the stream one position, since reading until false assumes the terminator is a real token
- stream.rewind(-1)
- // check to see if this can be parsed as a time.
- tokenTime, found = tryParseTime(tokenValue.(string))
- if found {
- kind = TIME
- tokenValue = tokenTime
- } else {
- kind = STRING
- }
- break
- }
- if character == '(' {
- tokenValue = character
- kind = CLAUSE
- break
- }
- if character == ')' {
- tokenValue = character
- kind = CLAUSE_CLOSE
- break
- }
- // must be a known symbol
- tokenString = readTokenUntilFalse(stream, isNotAlphanumeric)
- tokenValue = tokenString
- // quick hack for the case where "-" can mean "prefixed negation" or "minus", which are used
- // very differently.
- if state.canTransitionTo(PREFIX) {
- _, found = prefixSymbols[tokenString]
- if found {
- kind = PREFIX
- break
- }
- }
- _, found = modifierSymbols[tokenString]
- if found {
- kind = MODIFIER
- break
- }
- _, found = logicalSymbols[tokenString]
- if found {
- kind = LOGICALOP
- break
- }
- _, found = comparatorSymbols[tokenString]
- if found {
- kind = COMPARATOR
- break
- }
- _, found = ternarySymbols[tokenString]
- if found {
- kind = TERNARY
- break
- }
- errorMessage := fmt.Sprintf("Invalid token: '%s'", tokenString)
- return ret, errors.New(errorMessage), false
- }
- ret.Kind = kind
- ret.Value = tokenValue
- return ret, nil, (kind != UNKNOWN)
- }
- func readTokenUntilFalse(stream *lexerStream, condition func(rune) bool) string {
- var ret string
- stream.rewind(1)
- ret, _ = readUntilFalse(stream, false, true, true, condition)
- return ret
- }
- /*
- Returns the string that was read until the given [condition] was false, or whitespace was broken.
- Returns false if the stream ended before whitespace was broken or condition was met.
- */
- func readUntilFalse(stream *lexerStream, includeWhitespace bool, breakWhitespace bool, allowEscaping bool, condition func(rune) bool) (string, bool) {
- var tokenBuffer bytes.Buffer
- var character rune
- var conditioned bool
- conditioned = false
- for stream.canRead() {
- character = stream.readCharacter()
- // Use backslashes to escape anything
- if allowEscaping && character == '\\' {
- character = stream.readCharacter()
- tokenBuffer.WriteString(string(character))
- continue
- }
- if unicode.IsSpace(character) {
- if breakWhitespace && tokenBuffer.Len() > 0 {
- conditioned = true
- break
- }
- if !includeWhitespace {
- continue
- }
- }
- if condition(character) {
- tokenBuffer.WriteString(string(character))
- } else {
- conditioned = true
- stream.rewind(1)
- break
- }
- }
- return tokenBuffer.String(), conditioned
- }
- /*
- Checks to see if any optimizations can be performed on the given [tokens], which form a complete, valid expression.
- The returns slice will represent the optimized (or unmodified) list of tokens to use.
- */
- func optimizeTokens(tokens []ExpressionToken) ([]ExpressionToken, error) {
- var token ExpressionToken
- var symbol OperatorSymbol
- var err error
- var index int
- for index, token = range tokens {
- // if we find a regex operator, and the right-hand value is a constant, precompile and replace with a pattern.
- if token.Kind != COMPARATOR {
- continue
- }
- symbol = comparatorSymbols[token.Value.(string)]
- if symbol != REQ && symbol != NREQ {
- continue
- }
- index++
- token = tokens[index]
- if token.Kind == STRING {
- token.Kind = PATTERN
- token.Value, err = regexp.Compile(token.Value.(string))
- if err != nil {
- return tokens, err
- }
- tokens[index] = token
- }
- }
- return tokens, nil
- }
- /*
- Checks the balance of tokens which have multiple parts, such as parenthesis.
- */
- func checkBalance(tokens []ExpressionToken) error {
- var stream *tokenStream
- var token ExpressionToken
- var parens int
- stream = newTokenStream(tokens)
- for stream.hasNext() {
- token = stream.next()
- if token.Kind == CLAUSE {
- parens++
- continue
- }
- if token.Kind == CLAUSE_CLOSE {
- parens--
- continue
- }
- }
- if parens != 0 {
- return errors.New("Unbalanced parenthesis")
- }
- return nil
- }
- func isNumeric(character rune) bool {
- return unicode.IsDigit(character) || character == '.'
- }
- func isNotQuote(character rune) bool {
- return character != '\'' && character != '"'
- }
- func isNotAlphanumeric(character rune) bool {
- return !(unicode.IsDigit(character) ||
- unicode.IsLetter(character) ||
- character == '(' ||
- character == ')' ||
- !isNotQuote(character))
- }
- func isVariableName(character rune) bool {
- return unicode.IsLetter(character) ||
- unicode.IsDigit(character) ||
- character == '_'
- }
- func isNotClosingBracket(character rune) bool {
- return character != ']'
- }
- /*
- Attempts to parse the [candidate] as a Time.
- Tries a series of standardized date formats, returns the Time if one applies,
- otherwise returns false through the second return.
- */
- func tryParseTime(candidate string) (time.Time, bool) {
- var ret time.Time
- var found bool
- timeFormats := [...]string{
- time.ANSIC,
- time.UnixDate,
- time.RubyDate,
- time.Kitchen,
- time.RFC3339,
- time.RFC3339Nano,
- "2006-01-02", // RFC 3339
- "2006-01-02 15:04", // RFC 3339 with minutes
- "2006-01-02 15:04:05", // RFC 3339 with seconds
- "2006-01-02 15:04:05-07:00", // RFC 3339 with seconds and timezone
- "2006-01-02T15Z0700", // ISO8601 with hour
- "2006-01-02T15:04Z0700", // ISO8601 with minutes
- "2006-01-02T15:04:05Z0700", // ISO8601 with seconds
- "2006-01-02T15:04:05.999999999Z0700", // ISO8601 with nanoseconds
- }
- for _, format := range timeFormats {
- ret, found = tryParseExactTime(candidate, format)
- if found {
- return ret, true
- }
- }
- return time.Now(), false
- }
- func tryParseExactTime(candidate string, format string) (time.Time, bool) {
- var ret time.Time
- var err error
- ret, err = time.ParseInLocation(format, candidate, time.Local)
- if err != nil {
- return time.Now(), false
- }
- return ret, true
- }
|