tiny compiler

by 夏泽民 Jul 21, 2019

https://github.com/xiazemin/the-super-tiny-compiler-1/blob/master/compiler.go
https://github.com/xiazemin/the-super-tiny-compiler
https://adampresley.github.io/2015/06/01/writing-a-lexer-and-parser-in-go-part-3.html
https://adampresley.github.io/2015/06/01/writing-a-lexer-and-parser-in-go-part-2.html
https://adampresley.github.io/2015/06/01/writing-a-lexer-and-parser-in-go-part-1.html

/**

TTTTTTTTTTTTTTTTTTTTTTTHHHHHHHHH HHHHHHHHHEEEEEEEEEEEEEEEEEEEEEE

T:::::::::::::::::::::TH:::::::H H:::::::HE::::::::::::::::::::E

T:::::::::::::::::::::TH:::::::H H:::::::HE::::::::::::::::::::E

T:::::TT:::::::TT:::::THH::::::H H::::::HHEE::::::EEEEEEEEE::::E

TTTTTT T:::::T TTTTTT H:::::H H:::::H E:::::E EEEEEE

T:::::T H:::::H H:::::H E:::::E

T:::::T H::::::HHHHH::::::H E::::::EEEEEEEEEE

T:::::T H:::::::::::::::::H E:::::::::::::::E

T:::::T H:::::::::::::::::H E:::::::::::::::E

T:::::T H::::::HHHHH::::::H E::::::EEEEEEEEEE

T:::::T H:::::H H:::::H E:::::E

T:::::T H:::::H H:::::H E:::::E EEEEEE

TT:::::::TT HH::::::H H::::::HHEE::::::EEEEEEEE:::::E

T:::::::::T H:::::::H H:::::::HE::::::::::::::::::::E

T:::::::::T H:::::::H H:::::::HE::::::::::::::::::::E

TTTTTTTTTTT HHHHHHHHH HHHHHHHHHEEEEEEEEEEEEEEEEEEEEEE
*

SSSSSSSSSSSSSSS UUUUUUUU UUUUUUUUPPPPPPPPPPPPPPPPP EEEEEEEEEEEEEEEEEEEEEERRRRRRRRRRRRRRRRR

SS:::::::::::::::SU::::::U U::::::UP::::::::::::::::P E::::::::::::::::::::ER::::::::::::::::R

S:::::SSSSSS::::::SU::::::U U::::::UP::::::PPPPPP:::::P E::::::::::::::::::::ER::::::RRRRRR:::::R

S:::::S SSSSSSSUU:::::U U:::::UUPP:::::P P:::::PEE::::::EEEEEEEEE::::ERR:::::R R:::::R

S:::::S U:::::U U:::::U P::::P P:::::P E:::::E EEEEEE R::::R R:::::R

S:::::S U:::::U U:::::U P::::P P:::::P E:::::E R::::R R:::::R

S::::SSSS U:::::U U:::::U P::::PPPPPP:::::P E::::::EEEEEEEEEE R::::RRRRRR:::::R

SS::::::SSSSS U:::::U U:::::U P:::::::::::::PP E:::::::::::::::E R:::::::::::::RR

SSS::::::::SS U:::::U U:::::U P::::PPPPPPPPP E:::::::::::::::E R::::RRRRRR:::::R

SSSSSS::::S U:::::U U:::::U P::::P E::::::EEEEEEEEEE R::::R R:::::R

S:::::S U:::::U U:::::U P::::P E:::::E R::::R R:::::R

S:::::S U::::::U U::::::U P::::P E:::::E EEEEEE R::::R R:::::R

SSSSSSS S:::::S U:::::::UUU:::::::U PP::::::PP EE::::::EEEEEEEE:::::ERR:::::R R:::::R

S::::::SSSSSS:::::S UU:::::::::::::UU P::::::::P E::::::::::::::::::::ER::::::R R:::::R

S:::::::::::::::SS UU:::::::::UU P::::::::P E::::::::::::::::::::ER::::::R R:::::R

SSSSSSSSSSSSSSS UUUUUUUUU PPPPPPPPPP EEEEEEEEEEEEEEEEEEEEEERRRRRRRR RRRRRRR
*

TTTTTTTTTTTTTTTTTTTTTTTIIIIIIIIIINNNNNNNN NNNNNNNNYYYYYYY YYYYYYY

T:::::::::::::::::::::TI::::::::IN:::::::N N::::::NY:::::Y Y:::::Y

T:::::::::::::::::::::TI::::::::IN::::::::N N::::::NY:::::Y Y:::::Y

T:::::TT:::::::TT:::::TII::::::IIN:::::::::N N::::::NY::::::Y Y::::::Y

TTTTTT T:::::T TTTTTT I::::I N::::::::::N N::::::NYYY:::::Y Y:::::YYY

T:::::T I::::I N:::::::::::N N::::::N Y:::::Y Y:::::Y

T:::::T I::::I N:::::::N::::N N::::::N Y:::::Y:::::Y

T:::::T I::::I N::::::N N::::N N::::::N Y:::::::::Y

T:::::T I::::I N::::::N N::::N:::::::N Y:::::::Y

T:::::T I::::I N::::::N N:::::::::::N Y:::::Y

T:::::T I::::I N::::::N N::::::::::N Y:::::Y

T:::::T I::::I N::::::N N:::::::::N Y:::::Y

TT:::::::TT II::::::IIN::::::N N::::::::N Y:::::Y

T:::::::::T I::::::::IN::::::N N:::::::N YYYY:::::YYYY

T:::::::::T I::::::::IN::::::N N::::::N Y:::::::::::Y

TTTTTTTTTTT IIIIIIIIIINNNNNNNN NNNNNNN YYYYYYYYYYYYY
*

CCCCCCCCCCCCC OOOOOOOOO MMMMMMMM MMMMMMMMPPPPPPPPPPPPPPPPP IIIIIIIIIILLLLLLLLLLL EEEEEEEEEEEEEEEEEEEEEERRRRRRRRRRRRRRRRR

CCC::::::::::::C OO:::::::::OO M:::::::M M:::::::MP::::::::::::::::P I::::::::IL:::::::::L E::::::::::::::::::::ER::::::::::::::::R

CC:::::::::::::::C OO:::::::::::::OO M::::::::M M::::::::MP::::::PPPPPP:::::P I::::::::IL:::::::::L E::::::::::::::::::::ER::::::RRRRRR:::::R

C:::::CCCCCCCC::::CO:::::::OOO:::::::OM:::::::::M M:::::::::MPP:::::P P:::::PII::::::IILL:::::::LL EE::::::EEEEEEEEE::::ERR:::::R R:::::R

C:::::C CCCCCCO::::::O O::::::OM::::::::::M M::::::::::M P::::P P:::::P I::::I L:::::L E:::::E EEEEEE R::::R R:::::R

C:::::C O:::::O O:::::OM:::::::::::M M:::::::::::M P::::P P:::::P I::::I L:::::L E:::::E R::::R R:::::R

C:::::C O:::::O O:::::OM:::::::M::::M M::::M:::::::M P::::PPPPPP:::::P I::::I L:::::L E::::::EEEEEEEEEE R::::RRRRRR:::::R

C:::::C O:::::O O:::::OM::::::M M::::M M::::M M::::::M P:::::::::::::PP I::::I L:::::L E:::::::::::::::E R:::::::::::::RR

C:::::C O:::::O O:::::OM::::::M M::::M::::M M::::::M P::::PPPPPPPPP I::::I L:::::L E:::::::::::::::E R::::RRRRRR:::::R

C:::::C O:::::O O:::::OM::::::M M:::::::M M::::::M P::::P I::::I L:::::L E::::::EEEEEEEEEE R::::R R:::::R

C:::::C O:::::O O:::::OM::::::M M:::::M M::::::M P::::P I::::I L:::::L E:::::E R::::R R:::::R

C:::::C CCCCCCO::::::O O::::::OM::::::M MMMMM M::::::M P::::P I::::I L:::::L LLLLLL E:::::E EEEEEE R::::R R:::::R

C:::::CCCCCCCC::::CO:::::::OOO:::::::OM::::::M M::::::MPP::::::PP II::::::IILL:::::::LLLLLLLLL:::::LEE::::::EEEEEEEE:::::ERR:::::R R:::::R

CC:::::::::::::::C OO:::::::::::::OO M::::::M M::::::MP::::::::P I::::::::IL::::::::::::::::::::::LE::::::::::::::::::::ER::::::R R:::::R

CCC::::::::::::C OO:::::::::OO M::::::M M::::::MP::::::::P I::::::::IL::::::::::::::::::::::LE::::::::::::::::::::ER::::::R R:::::R

CCCCCCCCCCCCC OOOOOOOOO MMMMMMMM MMMMMMMMPPPPPPPPPP IIIIIIIIIILLLLLLLLLLLLLLLLLLLLLLLLEEEEEEEEEEEEEEEEEEEEEERRRRRRRR RRRRRRR
*

=======================================================================================================================================================================

=======================================================================================================================================================================

=======================================================================================================================================================================

=======================================================================================================================================================================
*/

/**

Today we’re going to write a compiler together. But not just any compiler… A

super duper teeny tiny compiler! A compiler that is so small that if you

remove all the comments this file would only be ~200 lines of actual code.
*

We’re going to compile some lisp-like function calls into some C-like

function calls.
*

If you are not familiar with one or the other. I’ll just give you a quick intro.
*

If we had two functions add and subtract they would be written like this:
*

LISP C
*

2 + 2 (add 2 2) add(2, 2)

4 - 2 (subtract 4 2) subtract(4, 2)

2 + (4 - 2) (add 2 (subtract 4 2)) add(2, subtract(4, 2))
*

Easy peezy right?
*

Well good, because this is exactly what we are going to compile. While this

is neither a complete LISP or C syntax, it will be enough of the syntax to

demonstrate many of the major pieces of a modern compiler.
*/

/**

Most compilers break down into three primary stages: Parsing, Transformation,

and Code Generation
*

1. Parsing is taking raw code and turning it into a more abstract

representation of the code.
*

1. Transformation takes this abstract representation and manipulates to do

whatever the compiler wants it to.
*

1. Code Generation takes the transformed representation of the code and

turns it into new code.
*/

/**

Parsing

Parsing typically gets broken down into two phases: Lexical Analysis and

Syntactic Analysis.
*

1. Lexical Analysis takes the raw code and splits it apart into these things

called tokens by a thing called a tokenizer (or lexer).
*

Tokens are an array of tiny little objects that describe an isolated piece

of the syntax. They could be numbers, labels, punctuation, operators,

whatever.
*

1. Syntactic Analysis takes the tokens and reformats them into a

representation that describes each part of the syntax and their relation

to one another. This is known as an intermediate representation or

Abstract Syntax Tree.
*

An Abstract Syntax Tree, or AST for short, is a deeply nested object that

represents code in a way that is both easy to work with and tells us a lot

of information.
*

For the following syntax:
*

(add 2 (subtract 4 2))
*

Tokens might look something like this:
*

[

{ type: ‘paren’, value: ‘(‘ },

{ type: ‘name’, value: ‘add’ },

{ type: ‘number’, value: ‘2’ },

{ type: ‘paren’, value: ‘(‘ },

{ type: ‘name’, value: ‘subtract’ },

{ type: ‘number’, value: ‘4’ },

{ type: ‘number’, value: ‘2’ },

{ type: ‘paren’, value: ‘)’ },

{ type: ‘paren’, value: ‘)’ }

And an Abstract Syntax Tree (AST) might look like this:
*

{

type: ‘Program’,

body: [{

type: ‘CallExpression’,

name: ‘add’,

params: [{

type: ‘NumberLiteral’,

value: ‘2’

}, {

type: ‘CallExpression’,

name: ‘subtract’,

params: [{

type: ‘NumberLiteral’,

value: ‘4’

}, {

type: ‘NumberLiteral’,

value: ‘2’

}]

}]

}]

}
*/

/**

Transformation

The next type of stage for a compiler is transformation. Again, this just

takes the AST from the last step and makes changes to it. It can manipulate

the AST in the same language or it can translate it into an entirely new

language.
*

Let’s look at how we would transform an AST.
*

You might notice that our AST has elements within it that look very similar.

There are these objects with a type property. Each of these are known as an

AST Node. These nodes have defined properties on them that describe one

isolated part of the tree.
*

We can have a node for a “NumberLiteral”:
*

{

type: ‘NumberLiteral’,

value: ‘2’

Or maybe a node for a “CallExpression”:
*

{

type: ‘CallExpression’,

name: ‘subtract’,

params: […nested nodes go here…]

When transforming the AST we can manipulate nodes by

adding/removing/replacing properties, we can add new nodes, remove nodes, or

we could leave the existing AST alone and create an entirely new one based

on it.
*

Since we’re targeting a new language, we’re going to focus on creating an

entirely new AST that is specific to the target language.
*

Traversal

In order to navigate through all of these nodes, we need to be able to

traverse through them. This traversal process goes to each node in the AST

depth-first.
*

{

type: ‘Program’,

body: [{

type: ‘CallExpression’,

name: ‘add’,

params: [{

type: ‘NumberLiteral’,

value: ‘2’

}, {

type: ‘CallExpression’,

name: ‘subtract’,

params: [{

type: ‘NumberLiteral’,

value: ‘4’

}, {

type: ‘NumberLiteral’,

value: ‘2’

}]

}]

}]

So for the above AST we would go:
*

1. Program - Starting at the top level of the AST

1. CallExpression (add) - Moving to the first element of the Program’s body

1. NumberLiteral (2) - Moving to the first element of CallExpression’s params

1. CallExpression (subtract) - Moving to the second element of CallExpression’s params

1. NumberLiteral (4) - Moving to the first element of CallExpression’s params

1. NumberLiteral (2) - Moving to the second element of CallExpression’s params
  *

If we were manipulating this AST directly, instead of creating a separate AST,

we would likely introduce all sorts of abstractions here. But just visiting

each node in the tree is enough.
*

The reason I use the word “visiting” is because there is this pattern of how

to represent operations on elements of an object structure.
*

Visitors

The basic idea here is that we are going to create a “visitor” object that

has methods that will accept different node types.
*

var visitor = {

NumberLiteral() {},

CallExpression() {}

};
*

When we traverse our AST we will call the methods on this visitor whenever we

encounter a node of a matching type.
*

In order to make this useful we will also pass the node and a reference to

the parent node.
*

var visitor = {

NumberLiteral(node, parent) {},

CallExpression(node, parent) {}

};
*/

/**

Code Generation

The final phase of a compiler is code generation. Sometimes compilers will do

things that overlap with transformation, but for the most part code

generation just means take our AST and string-ify code back out.
*

Code generators work several different ways, some compilers will reuse the

tokens from earlier, others will have created a separate representation of

the code so that they can print node linearly, but from what I can tell most

will use the same AST we just created, which is what we’re going to focus on.
*

Effectively our code generator will know how to “print” all of the different

node types of the AST, and it will recursively call itself to print nested

nodes until everything is printed into one long string of code.
*/

/**

And that’s it! That’s all the different pieces of a compiler.
*

Now that isn’t to say every compiler looks exactly like I described here.

Compilers serve many different purposes, and they might need more steps than

I have detailed.
*

But now you should have a general high-level idea of what most compilers look

like.
*

Now that I’ve explained all of this, you’re all good to go write your own

compilers right?
*

Just kidding, that’s what I’m here to help with :P
*

So let’s begin…
*/

/**

============================================================================

(/^▽^)/

THE TOKENIZER!

============================================================================
*/

/**

We’re gonna start off with our first phase of parsing, lexical analysis, with

the tokenizer.
*

We’re just going to take our string of code and break it down into an array

of tokens.
*

(add 2 (subtract 4 2)) => [{ type: ‘paren’, value: ‘(‘ }, …]
*/
package main

import (
“fmt”
“log”
“strings”
)

type token struct {
kind string
value string
}

// We start by accepting an input string of code, and we’re gonna set up two
// things…
func tokenizer(input string) []token {
// A new line is appended to the program
input += “\n”

// A `current` variable for tracking our position in the code like a cursor.

current := 0

// And a slice of our `token` type for appending tokens to.

tokens := []token{}

// We start by creating a `for` loop where we are setting up our `current`

// variable to be incremented as much as we want `inside` the loop.

//

// We do this because we may want to increment `current` many times within a

// single loop because our tokens can be any length.

for current < len([]rune(input)) {

	// We're also going to store the `current` character in the `input`.

	char := string([]rune(input)[current])

	// The first thing we want to check for is an open parenthesis. This will

	// later be used for `CallExpressions` but for now we only care about the

	// character.

	//

	// We check to see if we have an open parenthesis:

	if char == "(" {

		// If we do, we append a new token to our slice with the kind `paren`

		// and set the value to an open parenthesis.

		tokens = append(tokens, token{

			kind:  "paren",

			value: "(",

		})

		// Then we increment `current`

		current++

		// And we `continue` onto the next cycle of the loop.

		continue

	}

	// Next we're going to check for a closing parenthesis. We do the same exact

	// thing as before: Check for a closing parenthesis, add a new token,

	// increment `current`, and `continue`.

	if char == ")" {

		tokens = append(tokens, token{

			kind:  "paren",

			value: ")",

		})

		current++

		continue

	}

	// Moving on, we're now going to check for whitespace. This is interesting

	// because we care that whitespace exists to separate characters, but it

	// isn't actually important for us to store as a token. We would only throw

	// it out later.

	//

	// So here we're just going to test for existence and if it does exist we're

	// going to just `continue` on.

	if char == " " {

		current++

		continue

	}

	// The next type of token is a number. This is different than what we have

	// seen before because a number could be any number of characters and we

	// want to capture the entire sequence of characters as one token.

	//

	//   (add 123 456)

	//        ^^^ ^^^

	//        Only two separate tokens

	//

	// So we start this off when we encounter the first number in a sequence.

	if isNumber(char) {

		// We're going to create a `value` string that we are going to append

		// characters to.

		value := ""

		// Then we're going to loop through each character in the sequence until

		// we encounter a character that is not a number, pushing each character

		// that is a number to our `value` and incrementing `current` as we go.

		for isNumber(char) {

			value += char

			current++

			char = string([]rune(input)[current])

		}

		// After that we append our `number` token to the `tokens` slice.

		tokens = append(tokens, token{

			kind:  "number",

			value: value,

		})

		// And we continue on.

		continue

	}

	// The last type of token will be a `name` token. This is a sequence of

	// letters instead of numbers, that are the names of functions in our lisp

	// syntax.

	//

	//   (add 2 4)

	//    ^^^

	//    Name token

	//

	if isLetter(char) {

		value := ""

		// Again we're just going to loop through all the letters pushing them to

		// a value.

		for isLetter(char) {

			value += char

			current++

			char = string([]rune(input)[current])

		}

		// And appending that value as a token with the type `name` and continuing.

		tokens = append(tokens, token{

			kind:  "name",

			value: value,

		})

		continue

	}

	break

}

// Then at the end of our `tokenizer` we simply return the tokens array.

return tokens }

// isNumber accepts a string and will check to see whether or not what has been
// passed through is between the range of 0 - 9.
func isNumber(char string) bool {
if char == “” {
return false
}
n := []rune(char)[0]
if n >= ‘0’ && n <= ‘9’ {
return true
}
return false
}

// isLetter works in a similar way to isNumber, but checks the range for a
// letter in the range of a - z.
func isLetter(char string) bool {
if char == “” {
return false
}
n := []rune(char)[0]
if n >= ‘a’ && n <= ‘z’ {
return true
}
return false
}

/**

============================================================================

ヽ/❀o ل͜ o\ﾉ

THE PARSER!!!

============================================================================
*/

/**

For our parser we’re going to take our array of tokens and turn it into an

AST.
*

[{ type: ‘paren’, value: ‘(‘ }, …] => { type: ‘Program’, body: […] }
*/

// We will define our type, node here. Within node are pointers types to what
// would otherwise be recursive types in Go. e.g.
//
// callee node
//
// Would cause the Go compiler to complain about a recursive type. When we want
// to use one of these types to pass through to a function, for example, we’d
// use & as it’d be a reference. But we’ll come to that a bit later on.
type node struct {
kind string
value string
name string
callee *node
expression *node
body []node
params []node
arguments *[]node
context *[]node
}

// Type ast is just another alias type. I find this makes part of the code
// more readable, as you’ll come to see that there are a ton of references to
// node.
type ast node

// This is the counter variable that we’ll use for parsing.
var pc int

// This variable will store our slice of tokens inside of it.
var pt []token

// Okay, so we define a parser function that accepts our slice of tokens.
func parser(tokens []token) ast {
// Here, we initially give both the parser counter and the parser tokens a
// value.
pc = 0
pt = tokens

// Now, we're going to create our AST which will have a root which is a

// `Program` node.

ast := ast{

	kind: "Program",

	body: []node{},

}

// And we're going to kickstart our `walk` function, which you can find just

// below this, we'll be pushing nodes to our `ast.body` slice.

//

// The reason we are doing this inside a loop is because our program can have

// `CallExpressions` after one another instead of being nested.

//

//   (add 2 2)

//   (subtract 4 2)

//

for pc < len(pt) {

	ast.body = append(ast.body, walk())

}

// At the end of our parser we'll return the AST.

return ast }

// But this time we’re going to use recursion instead of a while loop. So we
// define a walk function.
func walk() node {

// Inside the walk function we start by grabbing the `current` token.

token := pt[pc]

// We're going to split each type of token off into a different code path,

// starting off with `number` tokens.

//

// We test to see if we have a `number` token.

if token.kind == "number" {

	// If we have one, we'll increment `current`.

	pc++

	// And we'll return a new AST node called `NumberLiteral` and setting its

	// value to the value of our token.

	return node{

		kind:  "NumberLiteral",

		value: token.value,

	}

}

// Next we're going to look for CallExpressions. We start this off when we

// encounter an open parenthesis.

if token.kind == "paren" && token.value == "(" {

	// We'll increment `current` to skip the parenthesis since we don't care

	// about it in our AST.

	pc++

	token = pt[pc]

	// We create a base node with the type `CallExpression`, and we're going

	// to set the name as the current token's value since the next token after

	// the open parenthesis is the name of the function.

	n := node{

		kind:   "CallExpression",

		name:   token.value,

		params: []node{},

	}

	// We increment `current` *again* to skip the name token.

	pc++

	token = pt[pc]

	// And now we want to loop through each token that will be the `params` of

	// our `CallExpression` until we encounter a closing parenthesis.

	//

	// Now this is where recursion comes in. Instead of trying to parse a

	// potentially infinitely nested set of nodes we're going to rely on

	// recursion to resolve things.

	//

	// To explain this, let's take our Lisp code. You can see that the

	// parameters of the `add` are a number and a nested `CallExpression` that

	// includes its own numbers.

	//

	//   (add 2 (subtract 4 2))

	//

	// You'll also notice that in our tokens array we have multiple closing

	// parenthesis.

	//

	//   [

	//     { type: 'paren',  value: '('        },

	//     { type: 'name',   value: 'add'      },

	//     { type: 'number', value: '2'        },

	//     { type: 'paren',  value: '('        },

	//     { type: 'name',   value: 'subtract' },

	//     { type: 'number', value: '4'        },

	//     { type: 'number', value: '2'        },

	//     { type: 'paren',  value: ')'        }, <<< Closing parenthesis

	//     { type: 'paren',  value: ')'        }  <<< Closing parenthesis

	//   ]

	//

	// We're going to rely on the nested `walk` function to increment our

	// `current` variable past any nested `CallExpressions`.

	// So we create a `for` loop that will continue until it encounters a

	// token with a `type` of `'paren'` and a `value` of a closing

	// parenthesis.

	for token.kind != "paren" || (token.kind == "paren" && token.value != ")") {

		// we'll call the `walk` function which will return a `node` and we'll

		// push it into our `node.params`.

		n.params = append(n.params, walk())

		token = pt[pc]

	}

	// Finally we will increment `current` one last time to skip the closing

	// parenthesis.

	pc++

	// And return the node.

	return n

}

// Again, if we haven't recognized the token type by now we're going to

// throw an error.

log.Fatal(token.kind)

return node{} }

/**

============================================================================

⌒(❀>◞౪◟<❀)⌒

THE TRAVERSER!!!

============================================================================
*/

/**

So now we have our AST, and we want to be able to visit different nodes with

a visitor. We need to be able to call the methods on the visitor whenever we

encounter a node with a matching type.
*

traverse(ast, {

Program(node, parent) {

// …

},
*

CallExpression(node, parent) {

// …

},
*

NumberLiteral(node, parent) {

// …

}

});
*/

// We will define our visitor type here in such a way that strings can
// be tested against easily and then represent the function associoated with it.
//
// e.g.
// “NumberLiteral” : func(n *node, p node) {
// // do something
// }
//
type visitor map[string]func(n *node, p node)

// So we define a traverser function which accepts an AST and a
// visitor. Inside we’re going to define two functions…
func traverser(a ast, v visitor) {

// We kickstart the traverser by calling `traverseNode` with our ast

// with no `parent` because the top level of the AST doesn't have a parent.

traverseNode(node(a), node{}, v) }

// A traverseArray function that will allow us to iterate over a slice and
// call the next function that we will define: traverseNode.
func traverseArray(a []node, p node, v visitor) {
for _, child := range a {
traverseNode(child, p, v)
}
}

// traverseNode will accept a node and its parent node. So that it can
// pass both to our visitor methods.
func traverseNode(n, p node, v visitor) {

// We start by testing for the existence of a method on the visitor with a

// matching `type` and then calling it with the `node` and its `parent`.

for k, va := range v {

	if k == n.kind {

		va(&n, p)

	}

}

// Next we are going to split things up by the current node type.

switch n.kind {

// We'll start with our top level `Program`. Since Program nodes have a

// property named body that has an array of nodes, we will call

// `traverseArray` to traverse down into them.

//

// (Remember that `traverseArray` will in turn call `traverseNode` so  we

// are causing the tree to be traversed recursively)

case "Program":

	traverseArray(n.body, n, v)

	break

// Next we do the same with `CallExpressions` and traverse their `params`.

case "CallExpression":

	traverseArray(n.params, n, v)

	break

// In the case of `NumberLiterals` we don't have any child nodes to visit,

// so we'll just break.

case "NumberLiteral":

	break

// And again, if we haven't recognized the node type then we'll throw an

// error.

default:

	log.Fatal(n.kind)

} }

/**

============================================================================

⁽(◍˃̵͈̑ᴗ˂̵͈̑)⁽

THE TRANSFORMER!!!

============================================================================
*/

/**

Next up, the transformer. Our transformer is going to take the AST that we

have built and pass it to our traverser function with a visitor and will

create a new ast.
*

Original AST Transformed AST

type: ‘Program’, type: ‘Program’,

body: [{ body: [{

type: ‘CallExpression’, type: ‘ExpressionStatement’,

name: ‘add’, expression: {

params: [{ type: ‘CallExpression’,

type: ‘NumberLiteral’, callee: {

value: ‘2’ type: ‘Identifier’,

}, { name: ‘add’

type: ‘CallExpression’, },

name: ‘subtract’, arguments: [{

params: [{ type: ‘NumberLiteral’,

type: ‘NumberLiteral’, value: ‘2’

value: ‘4’ }, {

}, { type: ‘CallExpression’,

type: ‘NumberLiteral’, callee: {

value: ‘2’ type: ‘Identifier’,

}] name: ‘subtract’

}] },

}] arguments: [{

} type: ‘NumberLiteral’,

value: ‘4’

———————————- }, {

type: ‘NumberLiteral’,

value: ‘2’

}]

(sorry the other one is longer.) }]

}

}]

}

// So we have our transformer function which will accept the lisp ast.
func transformer(a ast) ast {

// We'll create a new ast which like our previous AST will have a program

// node.

nast := ast{

	kind: "Program",

	body: []node{},

}

// Next I'm going to cheat a little and create a bit of a hack. We're going to

// use a property named `context` on our parent nodes that we're going to push

// nodes to their parent's `context`. Normally you would have a better

// abstraction than this, but for our purposes this keeps things simple.

//

// Just take note that the context is a reference *from* the old ast *to* the

// new ast.

a.context = &nast.body

// We'll start by calling the traverser function with our ast and a visitor.

traverser(a, map[string]func(n *node, p node){

	// The first visitor method accepts `NumberLiterals`

	"NumberLiteral": func(n *node, p node) {

		// We'll create a new node also named `NumberLiteral` that we will push to

		// the parent context.

		*p.context = append(*p.context, node{

			kind:  "NumberLiteral",

			value: n.value,

		})

	},

	// Next up, `CallExpressions`.

	"CallExpression": func(n *node, p node) {

		// We start creating a new node `CallExpression` with a nested

		// `Identifier`.

		e := node{

			kind: "CallExpression",

			callee: &node{

				kind: "Identifier",

				name: n.name,

			},

			arguments: new([]node),

		}

		// Next we're going to define a new context on the original

		// `CallExpression` node that will reference the `expression`'s arguments

		// so that we can push arguments.

		n.context = e.arguments

		// Then we're going to check if the parent node is a `CallExpression`.

		// If it is not...

		if p.kind != "CallExpression" {

			// We're going to wrap our `CallExpression` node with an

			// `ExpressionStatement`.

			es := node{

				kind:       "ExpressionStatement",

				expression: &e,

			}

			// Last, we push our (possibly wrapped) `CallExpression` to the `parent`'s

			// `context`.

			*p.context = append(*p.context, es)

		} else {

			*p.context = append(*p.context, e)

		}

	},

})

// At the end of our transformer function we'll return the new ast that we

// just created.

return nast }

/**

============================================================================

ヾ（〃＾∇＾）ﾉ♪

THE CODE GENERATOR!!!!

============================================================================
*/

/**

Now let’s move onto our last phase: The Code Generator.
*

Our code generator is going to recursively call itself to print each node in

the tree into one giant string.
*/

func codeGenerator(n node) string {

// We'll break things down by the `type` of the `node`.

switch n.kind {

// If we have a `Program` node. We will map through each node in the `body`

// and run them through the code generator and join them with a newline.

case "Program":

	var r []string

	for _, no := range n.body {

		r = append(r, codeGenerator(no))

	}

	return strings.Join(r, "\n")

	// For `ExpressionStatements` we'll call the code generator on the nested

	// expression and we'll add a semicolon...

case "ExpressionStatement":

	return codeGenerator(*n.expression) + ";"

// For `CallExpressions` we will print the `callee`, add an open

// parenthesis, we'll map through each node in the `arguments` array and run

// them through the code generator, joining them with a comma, and then

// we'll add a closing parenthesis.

case "CallExpression":

	var ra []string

	c := codeGenerator(*n.callee)

	for _, no := range *n.arguments {

		ra = append(ra, codeGenerator(no))

	}

	r := strings.Join(ra, ", ")

	return c + "(" + r + ")"

// For `Identifiers` we'll just return the `node`'s name.

case "Identifier":

	return n.name

// For `NumberLiterals` we'll just return the `node`'s value.

case "NumberLiteral":

	return n.value

// And if we haven't recognized the node, we'll throw an error.

default:

	log.Fatal("err")

	return ""

} }

/**

============================================================================

(۶* ‘ヮ’)۶”

!!!!!!!!THE COMPILER!!!!!!!!

============================================================================
*/

/**

FINALLY! We’ll create our compiler function. Here we will link together

every part of the pipeline.
*

1. input => tokenizer => tokens

1. tokens => parser => ast

1. ast => transformer => newAst

1. newAst => generator => output
  */

func compiler(input string) string {
tokens := tokenizer(input)
ast := parser(tokens)
nast := transformer(ast)
out := codeGenerator(node(nast))

// and simply return the output!

return out }

func main() {
program := “(add 10 (subtract 10 6))”
out := compiler(program)
fmt.Println(out)
}

Category golang