/******************************************************************************
* File: ShuntingYard.cpp
* Author: Keith Schwarz (htiek@cs.stanford.edu)
*
* Implementation of the functions exported by ShuntingYard.hh.
*/
#include "ShuntingYard.hh"
#include "Lexer.hh" // Generated by strain from Expression.strain
#include <sstream>
#include <stdexcept>
#include <stack>
using namespace std;
using namespace strain;
/* Lexing a string works by taking the string, funneling it through the strain
* lexer, then funneling it back out to a vector.
*/
vector<string> InfixLex(const string& expr) {
vector<string> result;
/* Build up a stringstream holding the string to lex, then wrap it up in a
* strain lexer.
*/
istringstream streamWrapper(expr);
Lexer lexer(streamWrapper);
/* Continously read a token/lexeme pair from the lexer until we find the
* end of the stream. To do this, we use the comma operator to, in the same
* expression, read the lexeme and then check if it was valid.
*/
pair<Lexer::LexemeType, Lexeme> lexeme;
while (lexeme = lexer.next(), lexeme.first != Lexer::None)
result.push_back(lexeme_cast<const string>(lexeme.second));
return result;
}
/* Given a string, returns whether that string is an operator. */
static bool IsOperator(const string& token) {
return token == "+" ||
token == "-" ||
token == "*" ||
token == "/" ||
token == "%";
}
/* Given a string encoding an operator, returns the precedence of that
* operator. Currently there's only two levels of precedence, but this could
* easily be expanded upon.
*/
static int PrecedenceOf(const string& token) {
if (token == "+" || token == "-") return 0;
if (token == "*" || token == "/" || token == "%") return 1;
throw runtime_error("Unknown operator: " + token);
}
/* Implementation of the shunting-yard algorithm. The parameters are the input
* sequence, the output sequence, and the* operator stack, respectively.
*/
static void ShuntingYardConvert(const vector<string>& expr,
vector<string>& result,
stack<string>& operators) {
/* We will maintain a finite-state control determining what we should expect
* to find next in the input. Our grammar for infix expressions is
*
* E -> #
* E -> E op E
* E -> (E)
*
* This means that at any point, we can be in one of three states:
*
* 1. We're either at the start of the input or at the start of a
* parenthesized expression, in which case we need to find something that
* is a value (either another parenthesized expression or a literal).
* 2. We have just finished reading a value and expect to find either the
* end of the input, or a close parenthesis, or an operator.
* 3. We have just finished reading an operator and are expecting a value.
*
* This information can be distilled down and encoded as whether or not
* we are expecting to find an operator as the next token.
*/
bool expectingOperator = false;
/* Scan across all input tokens, updating the sequence as we go. */
for (size_t i = 0; i < expr.size(); ++i) {
/* Case 1: Token is an operator. */
if (IsOperator(expr[i])) {
/* Confirm we were expecting one. */
if (!expectingOperator)
throw runtime_error("Unexpected operator: " + expr[i]);
/* Next, move all operators off the stack until this operator has the
* highest precedence of them all.
*/
while (!operators.empty() && IsOperator(operators.top()) &&
PrecedenceOf(operators.top()) >= PrecedenceOf(expr[i])) {
result.push_back(operators.top()); operators.pop();
}
/* Add the operator to the operator stack. */
operators.push(expr[i]);
/* We're no longer expecting an operator; we just found one. */
expectingOperator = false;
}
/* Case 2: Found an open parenthesis. */
else if (expr[i] == "(") {
/* We had better not have been expecting an operator here! */
if (expectingOperator)
throw runtime_error("Expected operator, found (.");
operators.push(expr[i]);
}
/* Case 3: Found a close parenthesis. */
else if (expr[i] == ")") {
/* This should appear after parsing a number, which would be when an
* operator is expected.
*/
if (!expectingOperator)
throw runtime_error("Expected value, found ).");
/* Keep moving tokens over from the operator stack to the result until
* we find a matching open parenthesis. If we run out of tokens, then
* there must be a syntax error.
*/
while (!operators.empty() && operators.top() != "(") {
result.push_back(operators.top()); operators.pop();
}
/* We've now moved everything, so check whether we found a matching open
* parenthesis.
*/
if (operators.empty())
throw runtime_error("Imbalanced parentheses.");
/* Remove the parenthesis we matched. */
operators.pop();
/* We are now expecting an operator, since we just read a value. */
expectingOperator = true;
}
/* Case 4: Found a number. */
else {
/* If we're expecting an operator, we're very disappointed. */
if (expectingOperator)
throw runtime_error("Expecting operator, found " + expr[i]);
/* Shift the number, then say we're looking for an operator. */
result.push_back(expr[i]);
expectingOperator = true;
}
}
/* At this point we've parsed everything. We should be expecting an
* operator, since the last thing we read should have been a value.
*/
if (!expectingOperator)
throw runtime_error("Expected value, didn't find one.");
/* Keep shifting all the operators back off the operator stack onto the end
* of the input. If we find an open parenthesis, then the input is
* malformed.
*/
while (!operators.empty()) {
if (operators.top() == "(")
throw runtime_error("Imbalanced parentheses.");
result.push_back(operators.top()); operators.pop();
}
}
/* Implementation of the shunting-yard algorithm. */
vector<string> ShuntingYardToRPN(const vector<string>& expr) {
/* The resulting sequence of tokens. */
vector<string> result;
/* The operator stack. */
stack<string> operators;
/* Apply the actual shunting-yard algorithm to the input sequence. */
ShuntingYardConvert(expr, result, operators);
/* Return the resulting sequence. */
return result;
}