Writing a parser: ADL Parser – part 2

In the previous part we wrote the first Parser methods, now we’ll write the methods for parsing expressions. In the introduction to ADL, I showed the order in which expressions are evaluated. To parse expressions, we’ll reverse the order and assume that everything is an AND-expression (lowest precedence):

Expression ParseExpression()
{
  return ParseAndExpression();
}

An AND-expression consists of one or more OR-expressions, separated by and operators:

Expression ParseAndExpression()
{
  Expression node = ParseOrExpression();

  while (!AtEndOfSource && _currentToken.Equals(TokenType.Word, "and"))
  {
    ReadNextToken(); // skip 'and'
    node = new AndExpression(node, ParseOrExpression());
  }

  return node;
}

An OR-expression consists of one or more comparisons, separated by or operators:

Expression ParseOrExpression()
{
  Expression node = ParseComparison();

  while (!AtEndOfSource && _currentToken.Equals(TokenType.Word, "or"))
  {
    ReadNextToken(); // skip 'or'
    node = new OrExpression(node, ParseComparison());
  }

  return node;
}

A comparison is an additive expression, or 2 additive expressions separated by a comparison operator:

Expression ParseComparison()
{
  Expression node = ParseAdditiveExpression();

  if (!AtEndOfSource && _currentToken.Type == TokenType.Symbol)
  {
    ComparisonOperator operator;
    switch (_currentToken.Value)
    {
      case "==": operator = ComparisonOperator.Equal; break;
      case "<>": operator = ComparisonOperator.NotEquals; break;
      case "<":  operator = ComparisonOperator.LessThan; break;
      case ">":  operator = ComparisonOperator.GreaterThan; break;
      case "<=": operator = ComparisonOperator.LessThanOrEqual; break;
      case ">=": operator = ComparisonOperator.GreaterThanOrEqual; break;
      default: return node;
    }
    
    ReadNextToken(); // skip comparison operator
    return new Comparison(operator, node, ParseAdditiveExpression());
  }
  
  return node;
}

An additive expression consists of one or more multiplicative expressions, separated by + or - operators:

Expression ParseAdditiveExpression()
{
  Expression node = ParseMultiplicativeExpression();

  while (!AtEndOfSource)
  {
    if (_currentToken.Equals(TokenType.Symbol, "+"))
    {
      ReadNextToken(); // skip '+'
      node = new Addition(node, ParseMultiplicativeExpression());
    }
    else if (_currentToken.Equals(TokenType.Symbol, "-"))
    {
      ReadNextToken(); // skip '-'
      node = new Subtraction(node, ParseMultiplicativeExpression());
    }
    else break;
  }

  return node;
}

A multiplicative expression consists of one or more unary expressions, separated by * or / operators:

Expression ParseMultiplicativeExpression()
{
  Expression node = ParseUnaryExpression();

  while (!AtEndOfSource)
  {
    if (_currentToken.Equals(TokenType.Symbol, "*"))
    {
      ReadNextToken(); // skip '*'
      node = new Multiplication(node, ParseUnaryExpression());
    }
    else if (_currentToken.Equals(TokenType.Symbol, "/"))
    {
      ReadNextToken(); // skip '/'
      node = new Division(node, ParseUnaryExpression());
    }
    else break;
  }

  return node;
}

A unary expression is a base expression, optionally prefixed by a -, + or not operator:

Expression ParseUnaryExpression()
{
  CheckForUnexpectedEndOfSource();

  if (_currentToken.Equals(TokenType.Symbol, "-"))
  {
    ReadNextToken(); // skip '-'
    return new Negation(ParseBaseExpression());
  }
  else if (_currentToken.Equals(TokenType.Word, "not"))
  {
    ReadNextToken(); // skip 'not'
    return new NotExpression(ParseBaseExpression());
  }
  else if (_currentToken.Equals(TokenType.Symbol, "+"))
  {
    ReadNextToken(); // skip '+'
  }

  return ParseBaseExpression();
}

A base expression is either an integer constant, a string constant, a variable, a function call or a group expression:

Expression ParseBaseExpression()
{
  CheckForUnexpectedEndOfSource();

  switch(_currentToken.Type)
  {
    case TokenType.Integer: return ParseIntegerConstant();
    case TokenType.String: return ParseStringConstant();
    case TokenType.Word: return ParseVariableOrFunctionCall();
    default: // TokenType.Symbol
      if (_currentToken.Value == "(")
      {
        return ParseGroupExpression();
      }
      else throw new ParserException("Expected an expression.");
  }
}

A group expression is an expression between parenthesis:

Expression ParseGroupExpression()
{
  ReadNextToken(); // skip '('
  Expression expression = ParseExpression();
  SkipExpected(TokenType.Symbol, ")"); // skip ')'
  return expression;
}

A variable and a function call both start with an identifier. To disambiguate, we have to read the next token:

Expression ParseVariableOrFunctionCall()
{
  string name = _currentToken.Value;
  ReadNextToken();
  if (!AtEndOfSource && _currentToken.Equals(TokenType.Symbol, "("))
  {
    return ParseFunctionCall(name);
  }
  else
  {
    return new Variable(name);
  }
}

A string constant is just the read string token:

Expression ParseStringConstant()
{
  string value = _currentToken.Value;
  ReadNextToken(); // skip string constant
  return new StringConstant(value);
}

An integer constant has to be parsed from the string value:

Expression ParseIntegerConstant()
{
  int value;
  if (int.TryParse(_currentToken.Value, out value))
  {
    ReadNextToken(); // skip integer constant
    return new IntegerConstant(value);
  }
  else throw new ParserException("Invalid integer constant " + _currentToken.Value);
}

A function call consists of an identifier, followed by zero or more arguments between parentheses:

FunctionCall ParseFunctionCall(string name)
{
  ReadNextToken(); // skip '('
  CheckForUnexpectedEndOfSource();

  List<Expression> arguments = new List<Expression>();
  if (!_currentToken.Equals(TokenType.Symbol, ")")
  {
    arguments.Add(ParseExpression());
    CheckForUnexpectedEndOfSource();

    while (_currentToken.Equals(TokenType.Symbol, ","))
    {
      ReadNextToken(); // skip ','
      arguments.Add(ParseExpression());
      CheckForUnexpectedEndOfSource();
    }

    if (!_currentToken.Equals(TokenType.Symbol, ")")
    {
      throw new ParserException("Expected ')'.");
    }
  }

  ReadNextToken(); // skip ')'
  return new FunctionCall(name, arguments.ToArray());
}

Now our parser is ready. To test the parser, I’ve created a test application that reads code from a TextBox, parses it and shows the parse tree in a TreeView. I’m not going to show the source code of the test application in this post, but you can download the complete Visual Studio 2005 solution with both projects (TC.Adl and TC.Adl.Test) here.