本文简单介绍了一种四则运算求值的实现方法(基于语法分析)
双栈算法可以实现四则运算的求值,但是扩展性比较低,更好的方式是基于语法分析来实现,整体大概包括以下几个步骤:
- 词法分析
- 语法分析
- 语法树生成
- 运算求值
我们先来看词法分析:
首先定义我们需要用到的词法类型:
local token_type =
{add = 1, -- '+'minus = 2, -- '-'mul = 3, -- '*'div = 4, -- '/'lbrace = 5, -- '('rbrace = 6, -- ')'num = 7, -- '33.6' etc.
}return token_type
基本就是 加减乘除等 6 个符号类型,外加 1 个数字类型,其中数字类型有些特殊,我们除了需要知道他的类型(即数字类型)以外,还需要知道他的实际数值,所以我们需要对数字类型的数值进行额外的记录(后面会有说明).
接着我们来定义一些用于进行词法分析的辅助函数(和结构):
local token_map =
{['+'] = token_type.add,['-'] = token_type.minus,['*'] = token_type.mul,['/'] = token_type.div,['('] = token_type.lbrace,[')'] = token_type.rbrace,
}local function is_space(c)return c == ' ' or c == '\t'
endlocal function is_letter(c)return (c >= 'a' and c <= 'z') or(c >= 'A' and c <= 'Z')
endlocal function is_number(c)return c >= '0' and c <= '9'
endlocal function is_digit(c)return c == '.'
endlocal function skip_space(raw_exp, from_index)if raw_exp thenwhile is_space(raw_exp:sub(from_index, from_index)) dofrom_index = from_index + 1endendreturn from_index
endlocal function read_num(raw_exp, from_index)if raw_exp thenlocal start_index = from_indexlocal cur_char = raw_exp:sub(from_index, from_index)while is_number(cur_char) or is_digit(cur_char) dofrom_index = from_index + 1cur_char = raw_exp:sub(from_index, from_index)endreturn from_index, raw_exp:sub(start_index, from_index - 1)end
end
接着我们就可以进行实际的词法分析了,基本思路便是依次检查各个字符(中间会忽略空白字符),如果在 token_map 中有直接的 token 映射则直接解析成功,否则就尝试 read_num,代码中的 result_values 则是用于记录数字类型的实际数值,相关代码如下:
local function parse_token(raw_exp)local result_tokens = {}local result_values = {}if raw_exp thenlocal parse_index = 1while parse_index <= #raw_exp doparse_index = skip_space(raw_exp, parse_index)local cur_char = raw_exp:sub(parse_index, parse_index)-- handle normal tokenif token_map[cur_char] thentable.insert(result_tokens, token_map[cur_char])parse_index = parse_index + 1elselocal index, num = read_num(raw_exp, parse_index)if num and #num > 0 thenif tonumber(num) thentable.insert(result_tokens, token_type.num)-- store num valuesresult_values[#result_tokens] = tonumber(num)parse_index = indexelseprint("error token number : " .. num)parse_index = indexendelseprint("error to parse token : " .. raw_exp .. " at " .. parse_index)breakendendendendreturn result_tokens, result_values
end
最后我们对词法分析的结果再做一次封装,用以更方便的使用:
local lexer = {}
lexer.__index = lexerfunction lexer:init(tokens, values)self.tokens = tokens or {}self.values = values or {}self.index = 1
endfunction lexer:match(token)if self.tokens[self.index] == token thenreturn trueelsereturn falseend
endfunction lexer:peek()return self.tokens[self.index], self.values[self.index]
endfunction lexer:next()local token, value = self.tokens[self.index], self.values[self.index]self.index = self.index + 1return token, value
endfunction lexer.create(raw_exp)local new_lexer = setmetatable({}, lexer)new_lexer:init(parse_token(raw_exp))return new_lexer
endreturn lexer
OK, 词法分析结束,我们接着来做语法分析,其中的核心就是我们要明确四则运算表达式的 BNF 范式:
expression: term { ("+" | "-") term }
term: factor { ("*" | "/") factor }
factor: NUMBER | "(" expression ")" | - factor
上面就是经典的四则运算 BNF 范式,有了这个范式,我们便可以据此直接(按递归下降方式)写出语法分析的代码:
local parser = {}function parser.parse_factor(lexer)if lexer:match(token_type.num) thenlexer:next()elseif lexer:match(token_type.lbrace) thenlexer:next()parser.parse_expression(lexer)lexer:next()elseif lexer:match(token_type.minus) thenlexer:next()parser.parse_factor(lexer)elseprint("error to parse factor : " .. tostring(lexer:peek()))end
endfunction parser.parse_term(lexer)parser.parse_factor(lexer)while true doif lexer:match(token_type.mul) thenlexer:next()parser.parse_factor(lexer)elseif lexer:match(token_type.div) thenlexer:next()parser.parse_factor(lexer)elsebreakendend
endfunction parser.parse_expression(lexer)parser.parse_term(lexer)while true doif lexer:match(token_type.add) thenlexer:next()parser.parse_term(lexer)elseif lexer:match(token_type.minus) thenlexer:next()parser.parse_term(lexer)elsebreakendend
endfunction parser.parse(raw_exp)local lexer = lexer.create(raw_exp)parser.parse_expression(lexer)
endreturn parser
看到这里可能会产生疑问:我们的目的是实现四则运算的求值,上面的语法分析似乎只是解析了一遍语法(如果语法正确的话其实没有任何输出),怎么来进行实际的求值呢?
其实这个问题就引出了我们要介绍的第三个话题:语法树生成.其实在上面的语法分析过程中,我们不仅需要进行语法解析,还需要同时生成一颗对应的抽象语法树,而之后的四则运算求值就可以直接在这颗生成的抽象语法树上进行.
我们首先来定义语法树的各类节点:
local num_syntax_node = {}
num_syntax_node.__index = num_syntax_nodefunction num_syntax_node:init(num)self.num = numreturn self
endfunction num_syntax_node.create(num)local node = setmetatable({}, num_syntax_node)return node:init(num)
endreturn num_syntax_node
上面的就是最简单的数字节点,其余的节点(加减乘除)如下:
local add_syntax_node = {}
add_syntax_node.__index = add_syntax_nodefunction add_syntax_node:init(left_node, right_node)self.left_node = left_nodeself.right_node = right_nodereturn self
endfunction add_syntax_node.create(left_node, right_node)local node = setmetatable({}, add_syntax_node)return node:init(left_node, right_node)
endreturn add_syntax_node
local minus_syntax_node = {}
minus_syntax_node.__index = minus_syntax_nodefunction minus_syntax_node:init(left_node, right_node)self.left_node = left_nodeself.right_node = right_nodereturn self
endfunction minus_syntax_node.create(left_node, right_node)local node = setmetatable({}, minus_syntax_node)return node:init(left_node, right_node)
endreturn minus_syntax_node
local mul_syntax_node = {}
mul_syntax_node.__index = mul_syntax_nodefunction mul_syntax_node:init(left_node, right_node)self.left_node = left_nodeself.right_node = right_nodereturn self
endfunction mul_syntax_node.create(left_node, right_node)local node = setmetatable({}, mul_syntax_node)return node:init(left_node, right_node)
endreturn mul_syntax_node
local div_syntax_node = {}
div_syntax_node.__index = div_syntax_nodefunction div_syntax_node:init(left_node, right_node)self.left_node = left_nodeself.right_node = right_nodereturn self
endfunction div_syntax_node.create(left_node, right_node)local node = setmetatable({}, div_syntax_node)return node:init(left_node, right_node)
endreturn div_syntax_node
接着我们需要在上面的语法分析过程中生成对应的语法树节点:
local parser = {}function parser.parse_factor(lexer)if lexer:match(token_type.num) thenlocal _, num = lexer:next()return num_syntax_node.create(num)elseif lexer:match(token_type.lbrace) thenlexer:next()local node = parser.parse_expression(lexer)lexer:next()return nodeelseif lexer:match(token_type.minus) then-- convert to "0 - factor"lexer:next()local node = parser.parse_factor(lexer)return minus_syntax_node.create(num_syntax_node.create(0), node)elseprint("error to parse factor : " .. tostring(lexer:peek()))end
endfunction parser.parse_term(lexer)local node = parser.parse_factor(lexer)while true doif lexer:match(token_type.mul) thenlexer:next()node = mul_syntax_node.create(node, parser.parse_factor(lexer))elseif lexer:match(token_type.div) thenlexer:next()node = div_syntax_node.create(node, parser.parse_factor(lexer))elsebreakendendreturn node
endfunction parser.parse_expression(lexer)local node = parser.parse_term(lexer)while true doif lexer:match(token_type.add) thenlexer:next()node = add_syntax_node.create(node, parser.parse_term(lexer))elseif lexer:match(token_type.minus) thenlexer:next()node = minus_syntax_node.create(node, parser.parse_term(lexer))elsebreakendendreturn node
endfunction parser.parse(raw_exp)local lexer = lexer.create(raw_exp)local syntax_tree = parser.parse_expression(lexer)-- TODO use syntax_tree to evaluate
end
最后一个问题就是如何通过语法树进行求值了,过程其实很简单,直接定义各个节点的求值函数(为各个节点统一添加 evaluate 方法),然后递归求解即可:
local num_syntax_node = {}
num_syntax_node.__index = num_syntax_nodefunction num_syntax_node:init(num)self.num = numreturn self
endfunction num_syntax_node:evaluate()return self.num
endfunction num_syntax_node.create(num)local node = setmetatable({}, num_syntax_node)return node:init(num)
endreturn num_syntax_node
local add_syntax_node = {}
add_syntax_node.__index = add_syntax_nodefunction add_syntax_node:init(left_node, right_node)self.left_node = left_nodeself.right_node = right_nodereturn self
endfunction add_syntax_node:evaluate()local left_val = self.left_node:evaluate()local right_val = self.right_node:evaluate()return left_val + right_val
endfunction add_syntax_node.create(left_node, right_node)local node = setmetatable({}, add_syntax_node)return node:init(left_node, right_node)
endreturn add_syntax_node
local minus_syntax_node = {}
minus_syntax_node.__index = minus_syntax_nodefunction minus_syntax_node:init(left_node, right_node)self.left_node = left_nodeself.right_node = right_nodereturn self
endfunction minus_syntax_node:evaluate()local left_val = self.left_node:evaluate()local right_val = self.right_node:evaluate()return left_val - right_val
endfunction minus_syntax_node.create(left_node, right_node)local node = setmetatable({}, minus_syntax_node)return node:init(left_node, right_node)
endreturn minus_syntax_node
local mul_syntax_node = {}
mul_syntax_node.__index = mul_syntax_nodefunction mul_syntax_node:init(left_node, right_node)self.left_node = left_nodeself.right_node = right_nodereturn self
endfunction mul_syntax_node:evaluate()local left_val = self.left_node:evaluate()local right_val = self.right_node:evaluate()return left_val * right_val
endfunction mul_syntax_node.create(left_node, right_node)local node = setmetatable({}, mul_syntax_node)return node:init(left_node, right_node)
endreturn mul_syntax_node
local div_syntax_node = {}
div_syntax_node.__index = div_syntax_nodefunction div_syntax_node:init(left_node, right_node)self.left_node = left_nodeself.right_node = right_nodereturn self
endfunction div_syntax_node:evaluate()local left_val = self.left_node:evaluate()local right_val = self.right_node:evaluate()return left_val / right_val
endfunction div_syntax_node.create(left_node, right_node)local node = setmetatable({}, div_syntax_node)return node:init(left_node, right_node)
endreturn div_syntax_node
上述的语法分析代码修改如下:
function parser.parse(raw_exp)local lexer = lexer.create(raw_exp)local syntax_tree = parser.parse_expression(lexer)return syntax_tree:evaluate()
end
至此我们便可以顺利的对四则运算进行求值了:
local exp = "(3 + 4 * (5 + 6) - 7) * 0 + -3 * (5 + 6)"
print(parser.parse(exp))