We’ll now write the Parser class:
using System;
using System.Collections.Generic;
using System.Text;
using System.IO;
using TC.Adl.ParserNodes;
namespace TC.Adl
{
public class Parser
{
Tokenizer _tokenizer;
Token _currentToken;
}
}
Our Parser class has only 2 fields:
- _tokenizer: the Tokenizer to read tokens from.
- _currentToken: the current token (most recently read).
The constructor of the Parser class will accept a TextReader argument, create a Tokenizer that uses that TextReader, store it in _tokenizer and read the first token:
public Parser(TextReader source)
{
if (source == null) throw new ArgumentNullException("source");
_tokenizer = new Tokenizer(source);
ReadNextToken();
}
Now we’ll add some private helper methods.
Reading a token is simple: just call Tokenizer.ReadNextToken()
, which returns a Token or null at the end of the source code.
private void ReadNextToken()
{
_currentToken = _tokenizer.ReadNextToken();
}
To determine if we’re at the end of the source, we just have to check the current token for null:
private bool AtEndOfSource
{
get { return _currentToken == null; }
}
We’ll need a method that throws an exception when the end of the source has been reached unexpectedly:
private void CheckForUnexpectedEndOfSource()
{
if (AtEndOfSource)
{
throw new ParserException("Unexpected end of source.");
}
}
We’ll also need a method that verifies the current token and skips it:
private void SkipExpected(TokenType type, string value)
{
CheckForUnexpectedEndOfSource();
if (!_currentToken.Equals(type, value))
{
throw new ParserException("Expected '" + value + "'.");
}
ReadNextToken();
}
Now that we’ve written the private helper methods, we can write the only public method: the ReadNextStatement method. This methods reads a statement and returns it. If we reach the end of the source, we return null, else we check the first token to determine the type of statement:
- If the current token is the word if, it’s an if-statement.
- If the current token is the word while, it’s a while-statement.
- If the current token is the word for, it’s a for-statement.
- If it’s any other word, we assume it’s an assignment or a function call.
public Statement ReadNextStatement()
{
if (AtEndOfSource)
{
return null;
}
// all the statements start with a word
if (_currentToken.Type != TokenType.Word)
{
throw new ParserException("Expected a statement.");
}
if (_currentToken.Value == "if")
{
return ParseIfStatement();
}
if (_currentToken.Value == "while")
{
return ParseWhileStatement();
}
if (_currentToken.Value == "for")
{
return ParseForStatement();
}
return ParseAssignmentOrFunctionCallStatement();
}
An if-statement starts with the word if, followed by a condition, the word then, a block of statements, an optional block of statements prefixed with the word else and the words end if:
IfStatement ParseIfStatement()
{
ReadNextToken(); // skip 'if'
Expression condition = ParseExpression();
SkipExpected(TokenType.Word, "then"); // skip 'then'
List<Statement> trueStatements = new List<Statement>();
List<Statement> falseStatements = new List<Statement>();
List<Statement> statements = trueStatements;
Statement statement;
CheckForUnexpectedEndOfSource();
while (!_currentToken.Equals(TokenType.Word, "end"))
{
if (_currentToken.Equals(TokenType.Word, "else"))
{
ReadNextToken(); // skip 'else'
CheckForUnexpectedEndOfSource();
statements = falseStatements;
}
statement = ReadNextStatement();
if (statement != null)
{
statements.Add(statement);
}
else
{
throw new ParserException("Unexpected end of source.");
}
}
ReadNextToken(); // skip 'end'
SkipExpected(TokenType.Word, "if"); // skip 'if'
return new IfStatement(
condition,
new StatementCollection(trueStatements)
new StatementCollection(falseStatements));
}
A while-statement starts with the word while, followed by a condition, the word do, a block of statements and the words end while:
WhileStatement ParseWhileStatement()
{
ReadNextToken(); // skip 'while'
Expression condition = ParseExpression();
SkipExpected(TokenType.Word, "do"); // skip 'do'
List<Statement> statements = new List<Statement>();
Statement statement;
CheckForUnexpectedEndOfSource();
while (!_currentToken.Equals(TokenType.Word, "end"))
{
statement = ReadNextStatement();
if (statement != null)
{
statements.Add(statement);
}
else
{
throw new ParserException("Unexpected end of source.");
}
}
ReadNextToken(); // skip 'end'
SkipExpected(TokenType.Word, "while"); // skip 'while'
return new WhileStatement(condition, new StatementCollection(statements));
}
A for-statement starts with the word for, followed by a variable, the symbol :=
, a start-value, the word to, an end-value, optionally the word by with a step-size, the word do, a block of statements and the words end for:
ForStatement ParseForStatement()
{
ReadNextToken(); // skip 'for'
CheckForUnexpectedEndOfSource();
if (_currentToken.Type != TokenType.Word)
{
throw new ParserException("Expected a variable.");
}
Variable variable = new Variable(_currentToken.Value);
ReadNextToken();
SkipExpected(TokenType.Symbol, ":="); // skip ':='
Expression startValue = ParseExpression();
SkipExpected(TokenType.Word, "to"); // skip 'to'
Expression endValue = ParseExpression();
CheckForUnexpectedEndOfSource();
Expression stepSize;
if (_currentToken.Equals(TokenType.Word, "by"))
{
ReadNextToken(); // skip 'by'
stepSize = ParseExpression();
}
else
{
stepSize = new IntegerConstant(1);
}
SkipExpected(TokenType.Word, "do");
List<Statement> statements = new List<Statement>();
Statement statement;
CheckForUnexpectedEndOfSource();
while (!_currentToken.Equals(TokenType.Word, "end"))
{
statement = ReadNextStatement();
if (statement != null)
{
statements.Add(statement);
}
else
{
throw new ParserException("Unexpected end of source.");
}
}
ReadNextToken(); // skip 'end'
SkipExpected(TokenType.Word, "for"); // skip 'for'
return new ForStatement(
variable, startValue, endValue, stepSize,
new StatementCollection(statements));
}
An assignment and a function call statement both start with an identifier, so we’ll have to read the next token to determine if it’s an assignment or a function call statement:
Statement ParseAssignmentOrFunctionCallStatement()
{
Token token = _currentToken;
ReadNextToken();
CheckForUnexpectedEndOfSource();
if (_currentToken.Equals(TokenType.Symbol, ":="))
{
return ParseAssignment(new Variable(token.Value));
}
if (_currentToken.Equals(TokenType.Symbol, "("))
{
return new FunctionCallStatement(ParseFunctionCall(token.Value));
}
throw new ParserException("Expected a statement.");
}
An assignment just has an expression after the :=
:
Assignment ParseAssignment(Variable variable)
{
ReadNextToken(); // skip ':='
return new Assignment(variable, ParseExpression());
}
In the next post, we’ll write the methods for parsing expression.