Commit 9027f75c authored by Christophe Dubach's avatar Christophe Dubach

initial commit for part 1 of CT coursework

parents
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="CompilerConfiguration">
<wildcardResourcePatterns>
<entry name="!?*.java" />
<entry name="!?*.form" />
<entry name="!?*.class" />
<entry name="!?*.groovy" />
<entry name="!?*.scala" />
<entry name="!?*.flex" />
<entry name="!?*.kt" />
<entry name="!?*.clj" />
</wildcardResourcePatterns>
</component>
</project>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="Encoding">
<file url="PROJECT" charset="UTF-8" />
</component>
</project>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="KotlinCommonCompilerArguments">
<option name="languageVersion" value="1.1" />
<option name="apiVersion" value="1.1" />
</component>
</project>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectRootManager" version="2" languageLevel="JDK_1_8" default="false" project-jdk-name="1.8" project-jdk-type="JavaSDK">
<output url="file://$PROJECT_DIR$/out" />
</component>
</project>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectModuleManager">
<modules>
<module fileurl="file://$PROJECT_DIR$/ct-19-20.iml" filepath="$PROJECT_DIR$/ct-19-20.iml" />
</modules>
</component>
</project>
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="VcsDirectoryMappings">
<mapping directory="" vcs="Git" />
</component>
</project>
\ No newline at end of file
This diff is collapsed.
# Description of the course work is likely to be upatded!#
Please note that the description of the course work might be updated from time to time to clarify things or fix mistakes.
# Deadlines #
1. [Part 1 (parser)](desc/part1/), Thursday 10 October 2019 at 11am, weight = 20%
2. Part 2 (ast builder + semantic analyser), Thursday 24 October 2019 at 11am, weight = 20%
3. Part 3 (code generator), Thursday 14 November 2019 at 11am, weight = 30%
4. Part 4 (LLVM-based compiler pass), Monday 13 January 2020, 10am, weight = 30%
Note that specific instructions for each part can be found above by clicking on the part name.
# Scoreboard #
We automatically run a series of hidden test programs using your compiler about twice a day.
You can keep track of your progress and see how many tests pass/fail using the scoreboard by following this link:
[http://www.inf.ed.ac.uk/teaching/courses/ct/19-20/scoreboard/scoreboard.html](http://www.inf.ed.ac.uk/teaching/courses/ct/19-20/scoreboard/scoreboard.html)
# Marking #
The marking will be done using an automated test suite on a dice machine using Java 8 (1.8 runtime).
Please note that you are not allowed to modify the `Main.java` file which is the main entry point to the compiler.
A checksum on the file will be performed to ensure the file has not be tempered with.
Also make sure that the build script provided remains unchanged so that your project can be built on dice.
Furthermore, you may not use any external libraries.
For parts 1-3 of the coursework, the marking will be a function of the number of successful tests as shown in the scoreboard and a series of hidden tests.
## Part 1-2
66.6% of the mark will be determined by the scoreboard tests and 33.3% will be determined by the hidden tests.
You will get one point for each passing test and -1 for each failing test (only for the lexer, parser and semantic analysis components).
Then, the mark is calculated by dividing the number of points achieved by the number of tests.
The hidden tests are marked independently from the visible ones.
## Part 3
66.6% of the mark will be determined by the visible scoreboard tests and 33.3% will be determined by the hidden tests and 10% by the test coverage.
The mark will directly be proportial to the number of passed tests (no negative point).
## Part 4
To be announced
# Tests #
Although we do not mark you on the tests you will create for testing your own compiler, we do ask you to add all the tests you used into the `tests` folder of your repository.
If we find students that do not have any tests (or very few), and they managed to pass most of our tests, this will raise suspicion that this might be a case of academic misconduct.
Also make sure that you do not share your tests as they should be written by yourself alone (we will run plagiarism detection software on all the code, including the tests, that is in your repository).
# Setup #
## Register your student id and name
First, we will need you fill up [this google form](https://docs.google.com/forms/d/18vX_s1xaNRx8lRaIthIRmXf_QlPbishkFAE5dE4jmuE)
in order for us to register you for the automarking.
If you are not registered, we won't be able to mark you.
Also please make sure to keep `ct-19-20` as your repository name, otherwise autmarking will fail.
## GitLab ##
We will rely on gitlab and it is mandatory to use it for this coursework.
GitLab is an online repository that can be used with the git control revision system.
The university runs a GitLab hosting service, and all students are provided with an account.
The username is your univeristy id number (sXXXXXXX) and your password is the EASE.
Please make sure to use your university id when login on the University gitlab, otherwise, we will be unable to automatically mark your coursework.
Important: do not share your code and repository with anyone and keep your source code secret.
If we identify that two students have identical portion of code, both will be considered to have cheated.
## Obtaining your own copy of the ct-19-20 repository
We are going to be using the Git revision control system during the course. Git is installed on DICE machines. If you use your own machine then make sure to install Git.
You will need to have your own copy of the ct-19-20 repository. In order to fork this repository, click the fork button:
![Forking the CT-19-20 repository](/figures/gl_fork1.png "Forking this repository.")
![Forking the CT-19-20 repository](/figures/gl_fork2.png "Forking this repository.")
Then, make the repository private
![Making repository private](/figures/gl_private1.png "Making repository private.")
![Making repository private](/figures/gl_private2.png "Making repository private.")
![Making repository private](/figures/gl_private3.png "Making repository private.")
Now, grant access to the teaching staff
![Granting the teaching staff read access](/figures/gl_permissions1.png "Granting the teaching staff read access.")
![Granting the teaching staff read access](/figures/gl_permissions2.png "Granting the teaching staff read access.")
You should grant the following users *Reporter* access:
* Christophe Dubach (username: cdubach)
* Hugh Leather (username: hleather)
* Aaron Smith (username: asmith47)
* Christof Schlaak (username: s1894023)
* Rodrigo Rocha (username: s1533346)
* Andrej Ivanis (username: s1210443)
Next, you will have to clone the forked repository to your local machine. In order to clone the repository you should launch a terminal and type:
```
$ git clone https://sXXXXXXX@git.ecdf.ed.ac.uk/sXXXXXXX/ct-19-20.git
```
where sXXXXXXX is your student id
## Development environment (editor)
You can choose to use a development environment for your project. DICE machines have the following integrated development environments (IDE) for Java installed:
* Community edition of [IntelliJ](https://www.jetbrains.com/idea/).
* [Eclipse](https://www.eclipse.org/downloads/packages/eclipse-ide-java-developers/marsr) for Java.
Alternatively, you can use Emacs, vim, or your favourite text editor. Choose whichever you are confident with.
We reccommend using IntelliJ Idea. To launch it on dice machines, open a terminal and type
```
idea18-ce
```
To import the project, after idea launches select "Import Project" and select the root directory of your project. On the following screen, ensure that the "Create project from existing sources" option is selected. You will then be presented with a series of screens. Just keep selecting "Next" without modifying any options. If you are asked whether to overwrite an existing .iml file, select the overwrite option.
To confirm that the project is setup correctly, you can try to run the Main.java file directly from the idea. To do so, right click the Main file in the src directory. In the context menu, select the "Run Main.main()" option. The program should now have run successfully
If you prefer to use eclipse, then launch via
```
eclipse
```
Once eclispe has started, all you will have to do is to go on File > Open Project from File Systems, select the root directory of your project, and click finish.
## Building the ct-19-20 project
In order to build the project you must have Ant installed. On DICE machines Ant is already installed.
Your local copy of the ct-19-20 repository contains an Ant build file (`build.xml`).
If you are using an IDE, then you can import the build file.
Otherwise, you can build the project from the commandline by typing:
```
$ ant build
```
This command outputs your compiler in a directory called `bin` within the project structure. Thereafter, you can run your compiler from the commandline by typing:
```
$ java -cp bin Main
```
The parameter `cp` instructs the Java Runtime to include the local directory `bin` when it looks for class files.
You can find a series of tests in the `tests` folder. To run the lexer on one of them, you can type:
```
$ java -cp bin Main -lexer tests/fibonacci.c dummy.out
```
You can clean the `bin` directory by typing:
```
$ ant clean
```
This command effectively deletes the `bin` directory.
## Working with git and pushing your changes
Since we are using an automated marking mechnism (based on how many progams can run successfully through your compiler), it is important to understand how git works. If you want to benefit from the nightly automatic marking feedback, please ensure that you push all your changes daily onto your GitLab centralised repository.
We suggest you follow the excelent [tutorial](https://www.atlassian.com/git/tutorials/what-is-version-control) from atlassian on how to use git. In particular you will need to understand the following basic meachnisms:
* [add and commit](https://www.atlassian.com/git/tutorials/saving-changes)
* [push](https://www.atlassian.com/git/tutorials/syncing/git-push)
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!-- SCC Ant build file
@author Daniel Hillerström (dhil)
@description This file is a (mostly) generic ant build file for your SCC project.
You might have to change the value of the attribute "name" inside
the "project"-tag below. The value should the name of your project.
You can import this build file into Eclipse or IntelliJ.
In addition, you can use it directly from the commandline:
$ ant build
The above command will build your compiler. Ant will create
and output your compiler in directory called "bin".
After a successful build you can run your compiler by typing
$ java -cp bin Main
To clean "bin" simply type
$ ant clean
on the commandline.
-->
<!-- Replace the value of the attribute "name" with the name of YOUR project. -->
<project basedir="." default="build" name="ct-19-20">
<!-- You do not need to touch anything below this comment -->
<property name="src" location="src"/>
<property name="bin" location="bin"/>
<property environment="env"/>
<property name="debuglevel" value="source,lines,vars"/>
<property name="target" value="1.8"/>
<property name="source" value="1.8"/>
<target name="init">
<tstamp/>
<mkdir dir="${bin}"/>
<copy includeemptydirs="false" todir="${bin}">
<fileset dir="${src}">
<exclude name="**/*.launch"/>
<exclude name="**/*.java"/>
<exclude name="**/*.class"/>
</fileset>
</copy>
</target>
<target name="clean">
<delete dir="${bin}"/>
</target>
<target depends="clean" name="cleanall"/>
<target depends="build-subprojects,build-project" name="build"/>
<target name="build-subprojects"/>
<target depends="init" name="build-project">
<echo message="${ant.project.name}: ${ant.file}"/>
<javac debug="true" debuglevel="${debuglevel}" destdir="bin" includeantruntime="false" source="${source}" target="${target}">
<src path="src"/>
</javac>
</target>
<target description="Build all projects which reference this project. Useful to propagate changes." name="build-refprojects"/>
<target name="Main">
<java classname="Main" failonerror="true" fork="yes" classpath="${bin}">
<arg line="-parser tests/simple.c tests/simple.out"/>
</java>
</target>
</project>
<?xml version="1.0" encoding="UTF-8"?>
<module type="JAVA_MODULE" version="4">
<component name="NewModuleRootManager" inherit-compiler-output="true">
<exclude-output />
<content url="file://$MODULE_DIR$">
<sourceFolder url="file://$MODULE_DIR$/src" isTestSource="false" />
</content>
<orderEntry type="inheritedJdk" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
</module>
\ No newline at end of file
# Part I : Parsing
The goal of part I is to write a lexical and syntactic analyser - a parser - for a subset of C; mini-C.
As you have learnt in the course, parsing consists of three parts:
1. Scanner: the job of the scanner is to read the input file one character at a time.
2. Lexer: the lexer transforms the stream of characters into a stream of tokens. These tokens represents the lexem (i.e. a word in natural languages)
3. Parser: the parser finally consumes the tokens and determine if the input conforms to the rule of the grammar.
The scanner has already been implemented for you and we provide some partial implementations of the lexer and parser.
You will have to implement the rest.
We strongly encourage you to write a recursive descent parser and as such make your grammar LL(k).
We have provided utility function in the parser class to allow look ahead.
## 0. C language
We strongly encourage you to familiarise yourself with C, before starting implementing the compiler.
Since our target language mini-C is a subset of C, it might be a good idea to learn how to use a C compiler such as gcc.
In case of doubts about the language semantic when implementing your compiler, a good default is to use the same semantic as in C.
This [web tutorial](https://www.tutorialspoint.com/cprogramming/index.htm) is a good starting point and contains most of the information you will need.
Since we are only targeting a subset of C, you do not have to read all of it but only the parts that correspond to our target language (which is described by the EBNF grammar).
## 1. Lexing
The file `Tokeniser.java` contains a partial implementation of a lexer. Your job is to complete the implementation.
In particular, you have to complete the implementation of the method `next` in the `Tokeniser`-class.
It is strongly recommended that you fill in the missing details, rather than implementing your own `Lexer` from scratch.
Furthermore, do not remove the existing public methods, e.g. `getErrorCount` and `nextToken`.
The tokens that your lexer needs to recognise with the regular expression definition are given in the file `Token.java`.
Note that you are **not** allowed to use [the Java regular expression matcher](https://docs.oracle.com/javase/7/docs/api/java/util/regex/Matcher.html) in your implementation!
Please note that comments should be treated as whitespace. Comments can either be single line:
`// this is a comment`
or multiline
`/* this is a
multiline comment */`
The "#include" directive should be tokenised into the INCLUDE token. However, you should not do anything with the "include" preprocessor directive (it is completely ignored in the rest of the compiler).
Furthermore, for numbers we consider only integers, and therefore you need not implement support for hexadecimal or octal numbers.
The list of characters to escape are the same as in Java plus the null character `'\0'`.
Please check this [link](http://docs.oracle.com/javase/tutorial/java/data/characters.html) for the full list of Java escaped characters.
For instance, `"I am a \"string\""` should return a string token whose data field is `I am a "string"`.
If an unknown escaped character appears, this should be reported as an error.
A hint: It is recommended to use the [Character-class methods](https://docs.oracle.com/javase/7/docs/api/java/lang/Character.html) to test whether a character is a digit, whitespace, etc.
## 2. Grammar
Your next job will consists in taking the [grammar](../../grammar/ebnf.txt) expressed in EBNF form and transform it into an equivalent context-free LL(k) grammar.
You should make sure that the resulting grammar is non-ambiguous, eliminate left recursion and ensure that the usual C precedence rules for operators are respected based on this table:
| Precedence |Operator | Description |
| :------------ | :------------ | :----------- |
| 1 | () | Function call |
| 1 | \[\] | Array subscripting |
| 1 | . | Structure member access |
| 2 | - | Unary minus |
| 2 | (type) | Type cast |
| 2 | * | Pointer indirection |
| 2 | sizeof(type) | Size of type|
| 3 | * / % | Multiplication, division, remainder |
| 4 | + - | Addition, substraction |
| 5 | < <= > >= | Relational operators |
| 6 | == \!= | Relational operators |
| 7 | && | Logical AND |
| 8 | ⎮⎮ | Logical OR |
For instance, the expression 2\*3+2 should be parsed as (2\*3)+2.
Note that although we require you to parse the expression following the precedence, this will only be checked in part 2 of the coursework where you will have to output the Abstract Syntax Tree.
However, we encourage you to directly implement the correct precedence rules to avoid problems in the later stages of your compiler.
This is done by modifying the grammar slightly as seen during the lectures.
## 3. Parser
After having transformed the grammar into a LL(k)-grammar and implemented the lexer you will have to implement a parser.
The parser determines whether a given source program is syntactically correct or incorrect.
A partial implementation of a recursive-decent parser has already been provided.
The provided `Parser`-class has the following interface:
* `int getErrorCount()` returns the number of parsing errors.
* `void parse()` initiates the parsing of a given source program.
In addition, the `Parser`-class contains various private methods, of which some are ultility methods, e.g.
* `void error(TokenClass... expected)` takes a variable number of expected tokens, and emits an error accordingly.
* `Token lookAhead(int i)` returns the `i`'th token in the token-stream.
* `void nextToken()` advances the token-stream by one, i.e. it consumes one token from the stream.
* `Token expect(TokenClass... expected)` takes a variable number of expected tokens, and consumes them from the token-stream if present, otherwise it generates an error using the `error`-method.
* `boolean accept(TokenClass... expected)` tests whether the next token(s) are identical to the `expected`. However, it *does not* consume any tokens from the token-stream.
* `void parseProgram()` parses a "Program-production" from the LL(k) grammar. Similarly, `void parseIncludes()` parses an "Includes-production". Three additional empty methods have been provided: `parseStructDecls`, `parseVarDecls` and `parseFunDecls` are to be completed by you. Furthermore, you will need to add more parse methods yourself. For each nonterminal you should have a corresponding parse method.
Your parser *should* only determine whether a given source program is syntactically correct.
The `Main`-class relies on the error count provided by the `Parser`-class.
Therefore, make sure you use the `error`-method in the `Parser`-class to report errors correctly!
## Files
* grammar/ebnf.txt : This file describes the grammar of our mini-C language in EBNF format.
* Scanner : This class implements the scanner which returns character strings.
* Token : This class represent the different tokens from the language.
* Tokeniser: This class converts character strings into tokens.
# Part I : Parsing
The goal of part I is to write a lexical and syntactic analyser - a parser - for a subset of C; mini-C.
As you have learnt in the course, parsing consists of three parts:
1. Scanner: the job of the scanner is to read the input file one character at a time.
2. Lexer: the lexer transforms the stream of characters into a stream of tokens. These tokens represents the lexem (i.e. a word in natural languages)
3. Parser: the parser finally consumes the tokens and determine if the input conforms to the rule of the grammar.
The scanner has already been implemented for you and we provide some partial implementations of the lexer and parser.
You will have to implement the rest.
We strongly encourage you to write a recursive descent parser and as such make your grammar LL(k).
We have provided utility function in the parser class to allow look ahead.
## 0. C language
We strongly encourage you to familiarise yourself with C, before starting implementing the compiler.
Since our target language mini-C is a subset of C, it might be a good idea to learn how to use a C compiler such as gcc.
In case of doubts about the language semantic when implementing your compiler, a good default is to use the same semantic as in C.
This [web tutorial](https://www.tutorialspoint.com/cprogramming/index.htm) is a good starting point and contains most of the information you will need.
Since we are only targeting a subset of C, you do not have to read all of it but only the parts that correspond to our target language (which is described by the EBNF grammar).
## 1. Lexing
The file `Tokeniser.java` contains a partial implementation of a lexer. Your job is to complete the implementation.
In particular, you have to complete the implementation of the method `next` in the `Tokeniser`-class.
It is strongly recommended that you fill in the missing details, rather than implementing your own `Lexer` from scratch.
Furthermore, do not remove the existing public methods, e.g. `getErrorCount` and `nextToken`.
The tokens that your lexer needs to recognise with the regular expression definition are given in the file `Token.java`.
Note that you are **not** allowed to use [the Java regular expression matcher](https://docs.oracle.com/javase/7/docs/api/java/util/regex/Matcher.html) in your implementation!
Please note that comments should be treated as whitespace. Comments can either be single line:
`// this is a comment`
or multiline
`/* this is a
multiline comment */`
The "#include" directive should be tokenised into the INCLUDE token. However, you should not do anything with the "include" preprocessor directive (it is completely ignored in the rest of the compiler).
Furthermore, for numbers we consider only integers, and therefore you need not implement support for hexadecimal or octal numbers.
The list of characters to escape are the same as in Java plus the null character `'\0'`.
Please check this [link](http://docs.oracle.com/javase/tutorial/java/data/characters.html) for the full list of Java escaped characters.
For instance, `"I am a \"string\""` should return a string token whose data field is `I am a "string"`.
If an unknown escaped character appears, this should be reported as an error.
A hint: It is recommended to use the [Character-class methods](https://docs.oracle.com/javase/7/docs/api/java/lang/Character.html) to test whether a character is a digit, whitespace, etc.
## 2. Grammar
Your next job will consists in taking the [grammar](../../grammar/ebnf.txt) expressed in EBNF form and transform it into an equivalent context-free LL(k) grammar.
You should make sure that the resulting grammar is non-ambiguous, eliminate left recursion and ensure that the usual C precedence rules for operators are respected based on this table:
| Precedence |Operator | Description |
| :------------ | :------------ | :----------- |
| 1 | () | Function call |
| 1 | \[\] | Array subscripting |
| 1 | . | Structure member access |
| 2 | - | Unary minus |
| 2 | (type) | Type cast |
| 2 | * | Pointer indirection |
| 2 | sizeof(type) | Size of type|
| 3 | * / % | Multiplication, division, remainder |
| 4 | + - | Addition, substraction |
| 5 | < <= > >= | Relational operators |
| 6 | == \!= | Relational operators |
| 7 | && | Logical AND |
| 8 | ⎮⎮ | Logical OR |
For instance, the expression 2\*3+2 should be parsed as (2\*3)+2.
Note that although we require you to parse the expression following the precedence, this will only be checked in part 2 of the coursework where you will have to output the Abstract Syntax Tree.
However, we encourage you to directly implement the correct precedence rules to avoid problems in the later stages of your compiler.
This is done by modifying the grammar slightly as seen during the course lectures.
## 3. Parser
After having transformed the grammar into a LL(k)-grammar and implemented the lexer you will have to implement a parser.
The parser determines whether a given source program is syntactically correct or incorrect.
A partial implementation of a recursive-decent parser has already been provided.
The provided `Parser`-class has the following interface:
* `int getErrorCount()` returns the number of parsing errors.
* `void parse()` initiates the parsing of a given source program.
In addition, the `Parser`-class contains various private methods, of which some are ultility methods, e.g.
* `void error(TokenClass... expected)` takes a variable number of expected tokens, and emits an error accordingly.
* `Token lookAhead(int i)` returns the `i`'th token in the token-stream.
* `void nextToken()` advances the token-stream by one, i.e. it consumes one token from the stream.
* `Token expect(TokenClass... expected)` takes a variable number of expected tokens, and consumes them from the token-stream if present, otherwise it generates an error using the `error`-method.
* `boolean accept(TokenClass... expected)` tests whether the next token(s) are identical to the `expected`. However, it *does not* consume any tokens from the token-stream.
* `void parseProgram()` parses a "Program-production" from the LL(k) grammar. Similarly, `void parseIncludes()` parses an "Includes-production". Three additional empty methods have been provided: `parseStructDecls`, `parseVarDecls` and `parseFunDecls` are to be completed by you. Furthermore, you will need to add more parse methods yourself. For each nonterminal you should have a corresponding parse method.
Your parser *should* only determine whether a given source program is syntactically correct.
The `Main`-class relies on the error count provided by the `Parser`-class.
Therefore, make sure you use the `error`-method in the `Parser`-class to report errors correctly!
## Files
* grammar/ebnf.txt : This file describes the grammar of our mini-C language in EBNF format.
* Scanner : This class implements the scanner which returns character strings.
* Token : This class represent the different tokens from the language.
* Tokeniser: This class converts character strings into tokens.
# # comment
# () grouping
# [] optional
# * zero or more
# + one or more
# | alternative
program ::= (include)* (structdecl)* (vardecl)* (fundecl)* EOF
include ::= "#include" STRING_LITERAL
structdecl ::= structtype "{" (vardecl)+ "}" ";" # structure declaration
vardecl ::= type IDENT ";" # normal declaration, e.g. int a;
| type IDENT "[" INT_LITERAL "]" ";" # array declaration, e.g. int a[2];
fundecl ::= type IDENT "(" params ")" block # function declaration
type ::= ("int" | "char" | "void" | structtype) ["*"]
structtype ::= "struct" IDENT
params ::= [ type IDENT ("," type IDENT)* ]
stmt ::= block
| "while" "(" exp ")" stmt # while loop
| "if" "(" exp ")" stmt ["else" stmt] # if then else
| "return" [exp] ";" # return
| exp "=" exp ";" # assignment
| exp ";" # expression statement, e.g. a function call
block ::= "{" (vardecl)* (stmt)* "}"
exp ::= "(" exp ")"
| (IDENT | INT_LITERAL)
| "-" exp
| CHAR_LITERAL
| STRING_LITERAL
| exp (">" | "<" | ">=" | "<=" | "!=" | "==" | "+" | "-" | "/" | "*" | "%" | "||" | "&&") exp # binary operators
| arrayaccess | fieldaccess | valueat | funcall | sizeof | typecast
funcall ::= IDENT "(" [ exp ("," exp)* ] ")"
arrayaccess ::= exp "[" exp "]" # array access
fieldaccess ::= exp "." IDENT # structure field member access
valueat ::= "*" exp # Value at operator (pointer indirection)
sizeof ::= "sizeof" "(" type ")" # size of type
typecast ::= "(" type ")" exp # type casting
import lexer.Scanner;
import lexer.Token;
import lexer.Tokeniser;
import parser.Parser;
import java.io.File;
import java.io.FileNotFoundException;
/**
* The Main file implies an interface for the subsequent components, e.g.
* * The Tokeniser must have a constructor which accepts a Scanner,
* moreover Tokeniser must provide a public method getErrorCount
* which returns the total number of lexing errors.
*/
public class Main {
private static final int FILE_NOT_FOUND = 2;
private static final int MODE_FAIL = 254;
private static final int LEXER_FAIL = 250;
private static final int PARSER_FAIL = 245;
private static final int SEM_FAIL = 240;
private static final int PASS = 0;
private enum Mode {
LEXER, PARSER, AST, SEMANTICANALYSIS, GEN
}
private static void usage() {
System.out.println("Usage: java "+Main.class.getSimpleName()+" pass inputfile outputfile");
System.out.println("where pass is either: -lexer, -parser, -ast, -sem or -gen");
System.exit(-1);
}
public static void main(String[] args) {
if (args.length != 3)
usage();
Mode mode = null;
switch (args[0]) {
case "-lexer": mode = Mode.LEXER; break; case "-parser": mode = Mode.PARSER; break;
case "-ast": mode = Mode.AST; break; case "-sem": mode = Mode.SEMANTICANALYSIS; break;
case "-gen": mode = Mode.GEN; break;
default:
usage();
break;
}
File inputFile = new File(args[1]);
File outputFile = new File(args[2]);
Scanner scanner;
try {
scanner = new Scanner(inputFile);
} catch (FileNotFoundException e) {
System.out.println("File "+inputFile.toString()+" does not exist.");
System.exit(FILE_NOT_FOUND);
return;
}
Tokeniser tokeniser = new Tokeniser(scanner);
if (mode == Mode.LEXER) {
for (Token t = tokeniser.nextToken(); t.tokenClass != Token.TokenClass.EOF; t = tokeniser.nextToken())
System.out.println(t);
if (tokeniser.getErrorCount() == 0)
System.out.println("Lexing: pass");
else
System.out.println("Lexing: failed ("+tokeniser.getErrorCount()+" errors)");
System.exit(tokeniser.getErrorCount() == 0 ? PASS : LEXER_FAIL);
} else if (mode == Mode.PARSER) {
Parser parser = new Parser(tokeniser);
parser.parse();
if (parser.getErrorCount() == 0)
System.out.println("Parsing: pass");
else
System.out.println("Parsing: failed ("+parser.getErrorCount()+" errors)");
System.exit(parser.getErrorCount() == 0 ? PASS : PARSER_FAIL);
} else if (mode == Mode.AST) {
System.exit(MODE_FAIL);
} else if (mode == Mode.GEN) {
System.exit(MODE_FAIL);
} else {
System.exit(MODE_FAIL);
}
}
}
\ No newline at end of file
package lexer;
import java.io.*;
/**
* @author cdubach
*/
public class Scanner {
private BufferedReader input;
private int peeked = -1;
private int line = 1;
private int column = 0;
public Scanner(File source) throws FileNotFoundException {
input = new BufferedReader(new FileReader(source));
}
public int getColumn() {
return column;
}
public int getLine() {
return line;
}
public char peek() throws IOException {
if (peeked != -1)
return (char)peeked;
int r = input.read();
if (r == -1)
throw new EOFException();
peeked = r;
return (char) r;
}
public char next() throws IOException {
char r;
if (peeked != -1) {
r = (char) peeked;
peeked = -1;
} else {
int i = input.read();
if (i == -1)
throw new EOFException();
r = (char) i;
}
if (r == '\n' || r == '\r') {
line++;
column =0;
} else {
column++;
}
return r;
}
public void close() throws IOException {
input.close();
}
}
package lexer;
import util.Position;
/**
* @author cdubach
*/
public class Token {
public enum TokenClass {