Writing a parser: ADL Parser – part 1

We’ll now write the Parser class:

using System;
using System.Collections.Generic;
using System.Text;
using System.IO;
using TC.Adl.ParserNodes;

namespace TC.Adl
{
  public class Parser
  {
    Tokenizer _tokenizer;
    Token _currentToken;
  }
}

Our Parser class has only 2 fields:

  • _tokenizer: the Tokenizer to read tokens from.
  • _currentToken: the current token (most recently read).

The constructor of the Parser class will accept a TextReader argument, create a Tokenizer that uses that TextReader, store it in _tokenizer and read the first token:

public Parser(TextReader source)
{
  if (source == null) throw new ArgumentNullException("source");

  _tokenizer = new Tokenizer(source);
  ReadNextToken();
}

Now we’ll add some private helper methods.

Reading a token is simple: just call Tokenizer.ReadNextToken(), which returns a Token or null at the end of the source code.

private void ReadNextToken()
{
  _currentToken = _tokenizer.ReadNextToken();
}

To determine if we’re at the end of the source, we just have to check the current token for null:

private bool AtEndOfSource 
{
  get { return _currentToken == null; }
}

We’ll need a method that throws an exception when the end of the source has been reached unexpectedly:

private void CheckForUnexpectedEndOfSource()
{
  if (AtEndOfSource)
  {
    throw new ParserException("Unexpected end of source.");
  }
}

We’ll also need a method that verifies the current token and skips it:

private void SkipExpected(TokenType type, string value)
{
  CheckForUnexpectedEndOfSource();

  if (!_currentToken.Equals(type, value))
  {
    throw new ParserException("Expected '" + value + "'.");
  }

  ReadNextToken();
}

Now that we’ve written the private helper methods, we can write the only public method: the ReadNextStatement method. This methods reads a statement and returns it. If we reach the end of the source, we return null, else we check the first token to determine the type of statement:

  • If the current token is the word if, it’s an if-statement.
  • If the current token is the word while, it’s a while-statement.
  • If the current token is the word for, it’s a for-statement.
  • If it’s any other word, we assume it’s an assignment or a function call.
public Statement ReadNextStatement()
{
  if (AtEndOfSource)
  {
    return null;
  }

  // all the statements start with a word
  if (_currentToken.Type != TokenType.Word)
  {
    throw new ParserException("Expected a statement.");
  }

  if (_currentToken.Value == "if")
  {
    return ParseIfStatement();
  }

  if (_currentToken.Value == "while")
  {
    return ParseWhileStatement();
  }

  if (_currentToken.Value == "for")
  {
    return ParseForStatement();
  }

  return ParseAssignmentOrFunctionCallStatement();
}

An if-statement starts with the word if, followed by a condition, the word then, a block of statements, an optional block of statements prefixed with the word else and the words end if:

IfStatement ParseIfStatement()
{
  ReadNextToken(); // skip 'if'

  Expression condition = ParseExpression();

  SkipExpected(TokenType.Word, "then"); // skip 'then'

  List<Statement> trueStatements = new List<Statement>();
  List<Statement> falseStatements = new List<Statement>();
  List<Statement> statements = trueStatements;
  Statement statement;

  CheckForUnexpectedEndOfSource();
  while (!_currentToken.Equals(TokenType.Word, "end"))
  {
    if (_currentToken.Equals(TokenType.Word, "else"))
    {
      ReadNextToken(); // skip 'else'
      CheckForUnexpectedEndOfSource();
      statements = falseStatements;
    }

    statement = ReadNextStatement();
    if (statement != null)
    {
      statements.Add(statement);
    }
    else
    {
      throw new ParserException("Unexpected end of source.");
    }
  }

  ReadNextToken(); // skip 'end'
  SkipExpected(TokenType.Word, "if"); // skip 'if'

  return new IfStatement(
    condition,
    new StatementCollection(trueStatements)
    new StatementCollection(falseStatements));
}

A while-statement starts with the word while, followed by a condition, the word do, a block of statements and the words end while:

WhileStatement ParseWhileStatement()
{
  ReadNextToken(); // skip 'while'

  Expression condition = ParseExpression();

  SkipExpected(TokenType.Word, "do"); // skip 'do'

  List<Statement> statements = new List<Statement>();
  Statement statement;

  CheckForUnexpectedEndOfSource();
  while (!_currentToken.Equals(TokenType.Word, "end"))
  {
    statement = ReadNextStatement();
    if (statement != null)
    {
      statements.Add(statement);
    }
    else
    {
      throw new ParserException("Unexpected end of source.");
    }
  }

  ReadNextToken(); // skip 'end'
  SkipExpected(TokenType.Word, "while"); // skip 'while'

  return new WhileStatement(condition, new StatementCollection(statements));
}

A for-statement starts with the word for, followed by a variable, the symbol :=, a start-value, the word to, an end-value, optionally the word by with a step-size, the word do, a block of statements and the words end for:

ForStatement ParseForStatement()
{
  ReadNextToken(); // skip 'for'
  CheckForUnexpectedEndOfSource();

  if (_currentToken.Type != TokenType.Word)
  {
    throw new ParserException("Expected a variable.");
  }

  Variable variable = new Variable(_currentToken.Value);
  ReadNextToken();

  SkipExpected(TokenType.Symbol, ":="); // skip ':='
  Expression startValue = ParseExpression();

  SkipExpected(TokenType.Word, "to"); // skip 'to'
  Expression endValue = ParseExpression();
  CheckForUnexpectedEndOfSource();

  Expression stepSize;
  if (_currentToken.Equals(TokenType.Word, "by"))
  {
    ReadNextToken(); // skip 'by'
    stepSize = ParseExpression();
  }
  else
  {
    stepSize = new IntegerConstant(1);
  }

  SkipExpected(TokenType.Word, "do");
  
  List<Statement> statements = new List<Statement>();
  Statement statement;
  
  CheckForUnexpectedEndOfSource();
  while (!_currentToken.Equals(TokenType.Word, "end"))
  {
    statement = ReadNextStatement();
    if (statement != null)
    {
      statements.Add(statement);
    }
    else
    {
      throw new ParserException("Unexpected end of source.");
    }
  }

  ReadNextToken(); // skip 'end'
  SkipExpected(TokenType.Word, "for"); // skip 'for'

  return new ForStatement(
    variable, startValue, endValue, stepSize,
    new StatementCollection(statements));
}

An assignment and a function call statement both start with an identifier, so we’ll have to read the next token to determine if it’s an assignment or a function call statement:

Statement ParseAssignmentOrFunctionCallStatement()
{
  Token token = _currentToken;
  ReadNextToken();
  CheckForUnexpectedEndOfSource();

  if (_currentToken.Equals(TokenType.Symbol, ":="))
  {
    return ParseAssignment(new Variable(token.Value));
  }

  if (_currentToken.Equals(TokenType.Symbol, "("))
  {
    return new FunctionCallStatement(ParseFunctionCall(token.Value));
  }

  throw new ParserException("Expected a statement.");
}

An assignment just has an expression after the :=:

Assignment ParseAssignment(Variable variable)
{
    ReadNextToken(); // skip ':='
    return new Assignment(variable, ParseExpression());
}

In the next post, we’ll write the methods for parsing expression.