tiny compiler

https://github.com/xiazemin/the-super-tiny-compiler-1/blob/master/compiler.go
https://github.com/xiazemin/the-super-tiny-compiler
https://adampresley.github.io/2015/06/01/writing-a-lexer-and-parser-in-go-part-3.html
https://adampresley.github.io/2015/06/01/writing-a-lexer-and-parser-in-go-part-2.html
https://adampresley.github.io/2015/06/01/writing-a-lexer-and-parser-in-go-part-1.html

/**



  • TTTTTTTTTTTTTTTTTTTTTTTHHHHHHHHH HHHHHHHHHEEEEEEEEEEEEEEEEEEEEEE

  • T:::::::::::::::::::::TH:::::::H H:::::::HE::::::::::::::::::::E

  • T:::::::::::::::::::::TH:::::::H H:::::::HE::::::::::::::::::::E

  • T:::::TT:::::::TT:::::THH::::::H H::::::HHEE::::::EEEEEEEEE::::E

  • TTTTTT T:::::T TTTTTT H:::::H H:::::H E:::::E EEEEEE

  • T:::::T H:::::H H:::::H E:::::E

  • T:::::T H::::::HHHHH::::::H E::::::EEEEEEEEEE

  • T:::::T H:::::::::::::::::H E:::::::::::::::E

  • T:::::T H:::::::::::::::::H E:::::::::::::::E

  • T:::::T H::::::HHHHH::::::H E::::::EEEEEEEEEE

  • T:::::T H:::::H H:::::H E:::::E

  • T:::::T H:::::H H:::::H E:::::E EEEEEE

  • TT:::::::TT HH::::::H H::::::HHEE::::::EEEEEEEE:::::E

  • T:::::::::T H:::::::H H:::::::HE::::::::::::::::::::E

  • T:::::::::T H:::::::H H:::::::HE::::::::::::::::::::E

  • TTTTTTTTTTT HHHHHHHHH HHHHHHHHHEEEEEEEEEEEEEEEEEEEEEE
    *

  • SSSSSSSSSSSSSSS UUUUUUUU UUUUUUUUPPPPPPPPPPPPPPPPP EEEEEEEEEEEEEEEEEEEEEERRRRRRRRRRRRRRRRR

  • SS:::::::::::::::SU::::::U U::::::UP::::::::::::::::P E::::::::::::::::::::ER::::::::::::::::R

  • S:::::SSSSSS::::::SU::::::U U::::::UP::::::PPPPPP:::::P E::::::::::::::::::::ER::::::RRRRRR:::::R

  • S:::::S SSSSSSSUU:::::U U:::::UUPP:::::P P:::::PEE::::::EEEEEEEEE::::ERR:::::R R:::::R

  • S:::::S U:::::U U:::::U P::::P P:::::P E:::::E EEEEEE R::::R R:::::R

  • S:::::S U:::::U U:::::U P::::P P:::::P E:::::E R::::R R:::::R

  • S::::SSSS U:::::U U:::::U P::::PPPPPP:::::P E::::::EEEEEEEEEE R::::RRRRRR:::::R

  • SS::::::SSSSS U:::::U U:::::U P:::::::::::::PP E:::::::::::::::E R:::::::::::::RR

  • SSS::::::::SS U:::::U U:::::U P::::PPPPPPPPP E:::::::::::::::E R::::RRRRRR:::::R

  • SSSSSS::::S U:::::U U:::::U P::::P E::::::EEEEEEEEEE R::::R R:::::R

  • S:::::S U:::::U U:::::U P::::P E:::::E R::::R R:::::R

  • S:::::S U::::::U U::::::U P::::P E:::::E EEEEEE R::::R R:::::R

  • SSSSSSS S:::::S U:::::::UUU:::::::U PP::::::PP EE::::::EEEEEEEE:::::ERR:::::R R:::::R

  • S::::::SSSSSS:::::S UU:::::::::::::UU P::::::::P E::::::::::::::::::::ER::::::R R:::::R

  • S:::::::::::::::SS UU:::::::::UU P::::::::P E::::::::::::::::::::ER::::::R R:::::R

  • SSSSSSSSSSSSSSS UUUUUUUUU PPPPPPPPPP EEEEEEEEEEEEEEEEEEEEEERRRRRRRR RRRRRRR
    *

  • TTTTTTTTTTTTTTTTTTTTTTTIIIIIIIIIINNNNNNNN NNNNNNNNYYYYYYY YYYYYYY

  • T:::::::::::::::::::::TI::::::::IN:::::::N N::::::NY:::::Y Y:::::Y

  • T:::::::::::::::::::::TI::::::::IN::::::::N N::::::NY:::::Y Y:::::Y

  • T:::::TT:::::::TT:::::TII::::::IIN:::::::::N N::::::NY::::::Y Y::::::Y

  • TTTTTT T:::::T TTTTTT I::::I N::::::::::N N::::::NYYY:::::Y Y:::::YYY

  • T:::::T I::::I N:::::::::::N N::::::N Y:::::Y Y:::::Y

  • T:::::T I::::I N:::::::N::::N N::::::N Y:::::Y:::::Y

  • T:::::T I::::I N::::::N N::::N N::::::N Y:::::::::Y

  • T:::::T I::::I N::::::N N::::N:::::::N Y:::::::Y

  • T:::::T I::::I N::::::N N:::::::::::N Y:::::Y

  • T:::::T I::::I N::::::N N::::::::::N Y:::::Y

  • T:::::T I::::I N::::::N N:::::::::N Y:::::Y

  • TT:::::::TT II::::::IIN::::::N N::::::::N Y:::::Y

  • T:::::::::T I::::::::IN::::::N N:::::::N YYYY:::::YYYY

  • T:::::::::T I::::::::IN::::::N N::::::N Y:::::::::::Y

  • TTTTTTTTTTT IIIIIIIIIINNNNNNNN NNNNNNN YYYYYYYYYYYYY
    *

  • CCCCCCCCCCCCC OOOOOOOOO MMMMMMMM MMMMMMMMPPPPPPPPPPPPPPPPP IIIIIIIIIILLLLLLLLLLL EEEEEEEEEEEEEEEEEEEEEERRRRRRRRRRRRRRRRR

  • CCC::::::::::::C OO:::::::::OO M:::::::M M:::::::MP::::::::::::::::P I::::::::IL:::::::::L E::::::::::::::::::::ER::::::::::::::::R

  • CC:::::::::::::::C OO:::::::::::::OO M::::::::M M::::::::MP::::::PPPPPP:::::P I::::::::IL:::::::::L E::::::::::::::::::::ER::::::RRRRRR:::::R

  • C:::::CCCCCCCC::::CO:::::::OOO:::::::OM:::::::::M M:::::::::MPP:::::P P:::::PII::::::IILL:::::::LL EE::::::EEEEEEEEE::::ERR:::::R R:::::R

  • C:::::C CCCCCCO::::::O O::::::OM::::::::::M M::::::::::M P::::P P:::::P I::::I L:::::L E:::::E EEEEEE R::::R R:::::R

  • C:::::C O:::::O O:::::OM:::::::::::M M:::::::::::M P::::P P:::::P I::::I L:::::L E:::::E R::::R R:::::R

  • C:::::C O:::::O O:::::OM:::::::M::::M M::::M:::::::M P::::PPPPPP:::::P I::::I L:::::L E::::::EEEEEEEEEE R::::RRRRRR:::::R

  • C:::::C O:::::O O:::::OM::::::M M::::M M::::M M::::::M P:::::::::::::PP I::::I L:::::L E:::::::::::::::E R:::::::::::::RR

  • C:::::C O:::::O O:::::OM::::::M M::::M::::M M::::::M P::::PPPPPPPPP I::::I L:::::L E:::::::::::::::E R::::RRRRRR:::::R

  • C:::::C O:::::O O:::::OM::::::M M:::::::M M::::::M P::::P I::::I L:::::L E::::::EEEEEEEEEE R::::R R:::::R

  • C:::::C O:::::O O:::::OM::::::M M:::::M M::::::M P::::P I::::I L:::::L E:::::E R::::R R:::::R

  • C:::::C CCCCCCO::::::O O::::::OM::::::M MMMMM M::::::M P::::P I::::I L:::::L LLLLLL E:::::E EEEEEE R::::R R:::::R

  • C:::::CCCCCCCC::::CO:::::::OOO:::::::OM::::::M M::::::MPP::::::PP II::::::IILL:::::::LLLLLLLLL:::::LEE::::::EEEEEEEE:::::ERR:::::R R:::::R

  • CC:::::::::::::::C OO:::::::::::::OO M::::::M M::::::MP::::::::P I::::::::IL::::::::::::::::::::::LE::::::::::::::::::::ER::::::R R:::::R

  • CCC::::::::::::C OO:::::::::OO M::::::M M::::::MP::::::::P I::::::::IL::::::::::::::::::::::LE::::::::::::::::::::ER::::::R R:::::R

  • CCCCCCCCCCCCC OOOOOOOOO MMMMMMMM MMMMMMMMPPPPPPPPPP IIIIIIIIIILLLLLLLLLLLLLLLLLLLLLLLLEEEEEEEEEEEEEEEEEEEEEERRRRRRRR RRRRRRR
    *

  • =======================================================================================================================================================================

  • =======================================================================================================================================================================

  • =======================================================================================================================================================================

  • =======================================================================================================================================================================
    */



/**



  • Today we’re going to write a compiler together. But not just any compiler… A

  • super duper teeny tiny compiler! A compiler that is so small that if you

  • remove all the comments this file would only be ~200 lines of actual code.
    *

  • We’re going to compile some lisp-like function calls into some C-like

  • function calls.
    *

  • If you are not familiar with one or the other. I’ll just give you a quick intro.
    *

  • If we had two functions add and subtract they would be written like this:
    *

  • LISP C
    *

  • 2 + 2 (add 2 2) add(2, 2)

  • 4 - 2 (subtract 4 2) subtract(4, 2)

  • 2 + (4 - 2) (add 2 (subtract 4 2)) add(2, subtract(4, 2))
    *

  • Easy peezy right?
    *

  • Well good, because this is exactly what we are going to compile. While this

  • is neither a complete LISP or C syntax, it will be enough of the syntax to

  • demonstrate many of the major pieces of a modern compiler.
    */



/**



  • Most compilers break down into three primary stages: Parsing, Transformation,

  • and Code Generation
    *



    1. Parsing is taking raw code and turning it into a more abstract



  • representation of the code.
    *



    1. Transformation takes this abstract representation and manipulates to do



  • whatever the compiler wants it to.
    *



    1. Code Generation takes the transformed representation of the code and



  • turns it into new code.
    */



/**



  • Parsing




  • *



  • Parsing typically gets broken down into two phases: Lexical Analysis and

  • Syntactic Analysis.
    *



    1. Lexical Analysis takes the raw code and splits it apart into these things



  • called tokens by a thing called a tokenizer (or lexer).
    *

  • Tokens are an array of tiny little objects that describe an isolated piece

  • of the syntax. They could be numbers, labels, punctuation, operators,

  • whatever.
    *



    1. Syntactic Analysis takes the tokens and reformats them into a



  • representation that describes each part of the syntax and their relation

  • to one another. This is known as an intermediate representation or

  • Abstract Syntax Tree.
    *

  • An Abstract Syntax Tree, or AST for short, is a deeply nested object that

  • represents code in a way that is both easy to work with and tells us a lot

  • of information.
    *

  • For the following syntax:
    *

  • (add 2 (subtract 4 2))
    *

  • Tokens might look something like this:
    *

  • [

  • { type: ‘paren’, value: ‘(‘ },

  • { type: ‘name’, value: ‘add’ },

  • { type: ‘number’, value: ‘2’ },

  • { type: ‘paren’, value: ‘(‘ },

  • { type: ‘name’, value: ‘subtract’ },

  • { type: ‘number’, value: ‘4’ },

  • { type: ‘number’, value: ‘2’ },

  • { type: ‘paren’, value: ‘)’ },

  • { type: ‘paren’, value: ‘)’ }

  • ]
    *

  • And an Abstract Syntax Tree (AST) might look like this:
    *

  • {

  • type: ‘Program’,

  • body: [{

  • type: ‘CallExpression’,

  • name: ‘add’,

  • params: [{

  • type: ‘NumberLiteral’,

  • value: ‘2’

  • }, {

  • type: ‘CallExpression’,

  • name: ‘subtract’,

  • params: [{

  • type: ‘NumberLiteral’,

  • value: ‘4’

  • }, {

  • type: ‘NumberLiteral’,

  • value: ‘2’

  • }]

  • }]

  • }]

  • }
    */



/**



  • Transformation




  • *



  • The next type of stage for a compiler is transformation. Again, this just

  • takes the AST from the last step and makes changes to it. It can manipulate

  • the AST in the same language or it can translate it into an entirely new

  • language.
    *

  • Let’s look at how we would transform an AST.
    *

  • You might notice that our AST has elements within it that look very similar.

  • There are these objects with a type property. Each of these are known as an

  • AST Node. These nodes have defined properties on them that describe one

  • isolated part of the tree.
    *

  • We can have a node for a “NumberLiteral”:
    *

  • {

  • type: ‘NumberLiteral’,

  • value: ‘2’

  • }
    *

  • Or maybe a node for a “CallExpression”:
    *

  • {

  • type: ‘CallExpression’,

  • name: ‘subtract’,

  • params: […nested nodes go here…]

  • }
    *

  • When transforming the AST we can manipulate nodes by

  • adding/removing/replacing properties, we can add new nodes, remove nodes, or

  • we could leave the existing AST alone and create an entirely new one based

  • on it.
    *

  • Since we’re targeting a new language, we’re going to focus on creating an

  • entirely new AST that is specific to the target language.
    *

  • Traversal




  • *



  • In order to navigate through all of these nodes, we need to be able to

  • traverse through them. This traversal process goes to each node in the AST

  • depth-first.
    *

  • {

  • type: ‘Program’,

  • body: [{

  • type: ‘CallExpression’,

  • name: ‘add’,

  • params: [{

  • type: ‘NumberLiteral’,

  • value: ‘2’

  • }, {

  • type: ‘CallExpression’,

  • name: ‘subtract’,

  • params: [{

  • type: ‘NumberLiteral’,

  • value: ‘4’

  • }, {

  • type: ‘NumberLiteral’,

  • value: ‘2’

  • }]

  • }]

  • }]

  • }
    *

  • So for the above AST we would go:
    *



    1. Program - Starting at the top level of the AST





    1. CallExpression (add) - Moving to the first element of the Program’s body





    1. NumberLiteral (2) - Moving to the first element of CallExpression’s params





    1. CallExpression (subtract) - Moving to the second element of CallExpression’s params





    1. NumberLiteral (4) - Moving to the first element of CallExpression’s params





    1. NumberLiteral (2) - Moving to the second element of CallExpression’s params
      *



  • If we were manipulating this AST directly, instead of creating a separate AST,

  • we would likely introduce all sorts of abstractions here. But just visiting

  • each node in the tree is enough.
    *

  • The reason I use the word “visiting” is because there is this pattern of how

  • to represent operations on elements of an object structure.
    *

  • Visitors




  • *



  • The basic idea here is that we are going to create a “visitor” object that

  • has methods that will accept different node types.
    *

  • var visitor = {

  • NumberLiteral() {},

  • CallExpression() {}

  • };
    *

  • When we traverse our AST we will call the methods on this visitor whenever we

  • encounter a node of a matching type.
    *

  • In order to make this useful we will also pass the node and a reference to

  • the parent node.
    *

  • var visitor = {

  • NumberLiteral(node, parent) {},

  • CallExpression(node, parent) {}

  • };
    */



/**



  • Code Generation




  • *



  • The final phase of a compiler is code generation. Sometimes compilers will do

  • things that overlap with transformation, but for the most part code

  • generation just means take our AST and string-ify code back out.
    *

  • Code generators work several different ways, some compilers will reuse the

  • tokens from earlier, others will have created a separate representation of

  • the code so that they can print node linearly, but from what I can tell most

  • will use the same AST we just created, which is what we’re going to focus on.
    *

  • Effectively our code generator will know how to “print” all of the different

  • node types of the AST, and it will recursively call itself to print nested

  • nodes until everything is printed into one long string of code.
    */



/**



  • And that’s it! That’s all the different pieces of a compiler.
    *

  • Now that isn’t to say every compiler looks exactly like I described here.

  • Compilers serve many different purposes, and they might need more steps than

  • I have detailed.
    *

  • But now you should have a general high-level idea of what most compilers look

  • like.
    *

  • Now that I’ve explained all of this, you’re all good to go write your own

  • compilers right?
    *

  • Just kidding, that’s what I’m here to help with :P
    *

  • So let’s begin…
    */



/**



  • ============================================================================

  • (/^▽^)/

  • THE TOKENIZER!

  • ============================================================================
    */



/**



  • We’re gonna start off with our first phase of parsing, lexical analysis, with

  • the tokenizer.
    *

  • We’re just going to take our string of code and break it down into an array

  • of tokens.
    *

  • (add 2 (subtract 4 2)) => [{ type: ‘paren’, value: ‘(‘ }, …]
    */
    package main



import (
“fmt”
“log”
“strings”
)



type token struct {
kind string
value string
}



// We start by accepting an input string of code, and we’re gonna set up two
// things…
func tokenizer(input string) []token {
// A new line is appended to the program
input += “\n”



// A `current` variable for tracking our position in the code like a cursor.
current := 0

// And a slice of our `token` type for appending tokens to.
tokens := []token{}

// We start by creating a `for` loop where we are setting up our `current`
// variable to be incremented as much as we want `inside` the loop.
//
// We do this because we may want to increment `current` many times within a
// single loop because our tokens can be any length.
for current < len([]rune(input)) {

// We're also going to store the `current` character in the `input`.
char := string([]rune(input)[current])

// The first thing we want to check for is an open parenthesis. This will
// later be used for `CallExpressions` but for now we only care about the
// character.
//
// We check to see if we have an open parenthesis:
if char == "(" {

// If we do, we append a new token to our slice with the kind `paren`
// and set the value to an open parenthesis.
tokens = append(tokens, token{
kind: "paren",
value: "(",
})

// Then we increment `current`
current++

// And we `continue` onto the next cycle of the loop.
continue
}

// Next we're going to check for a closing parenthesis. We do the same exact
// thing as before: Check for a closing parenthesis, add a new token,
// increment `current`, and `continue`.
if char == ")" {
tokens = append(tokens, token{
kind: "paren",
value: ")",
})
current++
continue
}

// Moving on, we're now going to check for whitespace. This is interesting
// because we care that whitespace exists to separate characters, but it
// isn't actually important for us to store as a token. We would only throw
// it out later.
//
// So here we're just going to test for existence and if it does exist we're
// going to just `continue` on.
if char == " " {
current++
continue
}

// The next type of token is a number. This is different than what we have
// seen before because a number could be any number of characters and we
// want to capture the entire sequence of characters as one token.
//
// (add 123 456)
// ^^^ ^^^
// Only two separate tokens
//
// So we start this off when we encounter the first number in a sequence.
if isNumber(char) {

// We're going to create a `value` string that we are going to append
// characters to.
value := ""

// Then we're going to loop through each character in the sequence until
// we encounter a character that is not a number, pushing each character
// that is a number to our `value` and incrementing `current` as we go.
for isNumber(char) {
value += char
current++
char = string([]rune(input)[current])
}

// After that we append our `number` token to the `tokens` slice.
tokens = append(tokens, token{
kind: "number",
value: value,
})

// And we continue on.
continue
}

// The last type of token will be a `name` token. This is a sequence of
// letters instead of numbers, that are the names of functions in our lisp
// syntax.
//
// (add 2 4)
// ^^^
// Name token
//
if isLetter(char) {
value := ""

// Again we're just going to loop through all the letters pushing them to
// a value.
for isLetter(char) {
value += char
current++
char = string([]rune(input)[current])
}

// And appending that value as a token with the type `name` and continuing.
tokens = append(tokens, token{
kind: "name",
value: value,
})
continue
}
break
}

// Then at the end of our `tokenizer` we simply return the tokens array.
return tokens }


// isNumber accepts a string and will check to see whether or not what has been
// passed through is between the range of 0 - 9.
func isNumber(char string) bool {
if char == “” {
return false
}
n := []rune(char)[0]
if n >= ‘0’ && n <= ‘9’ {
return true
}
return false
}



// isLetter works in a similar way to isNumber, but checks the range for a
// letter in the range of a - z.
func isLetter(char string) bool {
if char == “” {
return false
}
n := []rune(char)[0]
if n >= ‘a’ && n <= ‘z’ {
return true
}
return false
}



/**



  • ============================================================================

  • ヽ/❀o ل͜ o\ノ

  • THE PARSER!!!

  • ============================================================================
    */



/**



  • For our parser we’re going to take our array of tokens and turn it into an

  • AST.
    *

  • [{ type: ‘paren’, value: ‘(‘ }, …] => { type: ‘Program’, body: […] }
    */



// We will define our type, node here. Within node are pointers types to what
// would otherwise be recursive types in Go. e.g.
//
// callee node
//
// Would cause the Go compiler to complain about a recursive type. When we want
// to use one of these types to pass through to a function, for example, we’d
// use & as it’d be a reference. But we’ll come to that a bit later on.
type node struct {
kind string
value string
name string
callee *node
expression *node
body []node
params []node
arguments *[]node
context *[]node
}



// Type ast is just another alias type. I find this makes part of the code
// more readable, as you’ll come to see that there are a ton of references to
// node.
type ast node



// This is the counter variable that we’ll use for parsing.
var pc int



// This variable will store our slice of tokens inside of it.
var pt []token



// Okay, so we define a parser function that accepts our slice of tokens.
func parser(tokens []token) ast {
// Here, we initially give both the parser counter and the parser tokens a
// value.
pc = 0
pt = tokens



// Now, we're going to create our AST which will have a root which is a
// `Program` node.
ast := ast{
kind: "Program",
body: []node{},
}

// And we're going to kickstart our `walk` function, which you can find just
// below this, we'll be pushing nodes to our `ast.body` slice.
//
// The reason we are doing this inside a loop is because our program can have
// `CallExpressions` after one another instead of being nested.
//
// (add 2 2)
// (subtract 4 2)
//
for pc < len(pt) {
ast.body = append(ast.body, walk())
}

// At the end of our parser we'll return the AST.
return ast }


// But this time we’re going to use recursion instead of a while loop. So we
// define a walk function.
func walk() node {



// Inside the walk function we start by grabbing the `current` token.
token := pt[pc]

// We're going to split each type of token off into a different code path,
// starting off with `number` tokens.
//
// We test to see if we have a `number` token.
if token.kind == "number" {

// If we have one, we'll increment `current`.
pc++

// And we'll return a new AST node called `NumberLiteral` and setting its
// value to the value of our token.
return node{
kind: "NumberLiteral",
value: token.value,
}
}

// Next we're going to look for CallExpressions. We start this off when we
// encounter an open parenthesis.
if token.kind == "paren" && token.value == "(" {

// We'll increment `current` to skip the parenthesis since we don't care
// about it in our AST.
pc++
token = pt[pc]

// We create a base node with the type `CallExpression`, and we're going
// to set the name as the current token's value since the next token after
// the open parenthesis is the name of the function.
n := node{
kind: "CallExpression",
name: token.value,
params: []node{},
}

// We increment `current` *again* to skip the name token.
pc++
token = pt[pc]

// And now we want to loop through each token that will be the `params` of
// our `CallExpression` until we encounter a closing parenthesis.
//
// Now this is where recursion comes in. Instead of trying to parse a
// potentially infinitely nested set of nodes we're going to rely on
// recursion to resolve things.
//
// To explain this, let's take our Lisp code. You can see that the
// parameters of the `add` are a number and a nested `CallExpression` that
// includes its own numbers.
//
// (add 2 (subtract 4 2))
//
// You'll also notice that in our tokens array we have multiple closing
// parenthesis.
//
// [
// { type: 'paren', value: '(' },
// { type: 'name', value: 'add' },
// { type: 'number', value: '2' },
// { type: 'paren', value: '(' },
// { type: 'name', value: 'subtract' },
// { type: 'number', value: '4' },
// { type: 'number', value: '2' },
// { type: 'paren', value: ')' }, <<< Closing parenthesis
// { type: 'paren', value: ')' } <<< Closing parenthesis
// ]
//
// We're going to rely on the nested `walk` function to increment our
// `current` variable past any nested `CallExpressions`.

// So we create a `for` loop that will continue until it encounters a
// token with a `type` of `'paren'` and a `value` of a closing
// parenthesis.
for token.kind != "paren" || (token.kind == "paren" && token.value != ")") {
// we'll call the `walk` function which will return a `node` and we'll
// push it into our `node.params`.
n.params = append(n.params, walk())
token = pt[pc]
}

// Finally we will increment `current` one last time to skip the closing
// parenthesis.
pc++

// And return the node.
return n
}

// Again, if we haven't recognized the token type by now we're going to
// throw an error.
log.Fatal(token.kind)
return node{} }


/**



  • ============================================================================

  • ⌒(❀>◞౪◟<❀)⌒

  • THE TRAVERSER!!!

  • ============================================================================
    */



/**



  • So now we have our AST, and we want to be able to visit different nodes with

  • a visitor. We need to be able to call the methods on the visitor whenever we

  • encounter a node with a matching type.
    *

  • traverse(ast, {

  • Program(node, parent) {

  • // …

  • },
    *

  • CallExpression(node, parent) {

  • // …

  • },
    *

  • NumberLiteral(node, parent) {

  • // …

  • }

  • });
    */



// We will define our visitor type here in such a way that strings can
// be tested against easily and then represent the function associoated with it.
//
// e.g.
// “NumberLiteral” : func(n *node, p node) {
// // do something
// }
//
type visitor map[string]func(n *node, p node)



// So we define a traverser function which accepts an AST and a
// visitor. Inside we’re going to define two functions…
func traverser(a ast, v visitor) {



// We kickstart the traverser by calling `traverseNode` with our ast
// with no `parent` because the top level of the AST doesn't have a parent.
traverseNode(node(a), node{}, v) }


// A traverseArray function that will allow us to iterate over a slice and
// call the next function that we will define: traverseNode.
func traverseArray(a []node, p node, v visitor) {
for _, child := range a {
traverseNode(child, p, v)
}
}



// traverseNode will accept a node and its parent node. So that it can
// pass both to our visitor methods.
func traverseNode(n, p node, v visitor) {



// We start by testing for the existence of a method on the visitor with a
// matching `type` and then calling it with the `node` and its `parent`.
for k, va := range v {
if k == n.kind {
va(&n, p)
}
}

// Next we are going to split things up by the current node type.
switch n.kind {

// We'll start with our top level `Program`. Since Program nodes have a
// property named body that has an array of nodes, we will call
// `traverseArray` to traverse down into them.
//
// (Remember that `traverseArray` will in turn call `traverseNode` so we
// are causing the tree to be traversed recursively)
case "Program":
traverseArray(n.body, n, v)
break

// Next we do the same with `CallExpressions` and traverse their `params`.
case "CallExpression":
traverseArray(n.params, n, v)
break

// In the case of `NumberLiterals` we don't have any child nodes to visit,
// so we'll just break.
case "NumberLiteral":
break

// And again, if we haven't recognized the node type then we'll throw an
// error.
default:
log.Fatal(n.kind)
} }


/**



  • ============================================================================

  • ⁽(◍˃̵͈̑ᴗ˂̵͈̑)⁽

  • THE TRANSFORMER!!!

  • ============================================================================
    */



/**



  • Next up, the transformer. Our transformer is going to take the AST that we

  • have built and pass it to our traverser function with a visitor and will

  • create a new ast.
    *













  • Original AST Transformed AST














  • { {










  • type: ‘Program’, type: ‘Program’,










  • body: [{ body: [{










  • type: ‘CallExpression’, type: ‘ExpressionStatement’,










  • name: ‘add’, expression: {










  • params: [{ type: ‘CallExpression’,










  • type: ‘NumberLiteral’, callee: {










  • value: ‘2’ type: ‘Identifier’,










  • }, { name: ‘add’










  • type: ‘CallExpression’, },










  • name: ‘subtract’, arguments: [{










  • params: [{ type: ‘NumberLiteral’,










  • type: ‘NumberLiteral’, value: ‘2’










  • value: ‘4’ }, {










  • }, { type: ‘CallExpression’,










  • type: ‘NumberLiteral’, callee: {










  • value: ‘2’ type: ‘Identifier’,










  • }] name: ‘subtract’










  • }] },










  • }] arguments: [{










  • } type: ‘NumberLiteral’,









  • value: ‘4’










  • ———————————- }, {









  • type: ‘NumberLiteral’,









  • value: ‘2’









  • }]










  • (sorry the other one is longer.) }]









  • }









  • }]









  • }





  • */





// So we have our transformer function which will accept the lisp ast.
func transformer(a ast) ast {



// We'll create a new ast which like our previous AST will have a program
// node.
nast := ast{
kind: "Program",
body: []node{},
}

// Next I'm going to cheat a little and create a bit of a hack. We're going to
// use a property named `context` on our parent nodes that we're going to push
// nodes to their parent's `context`. Normally you would have a better
// abstraction than this, but for our purposes this keeps things simple.
//
// Just take note that the context is a reference *from* the old ast *to* the
// new ast.
a.context = &nast.body

// We'll start by calling the traverser function with our ast and a visitor.
traverser(a, map[string]func(n *node, p node){

// The first visitor method accepts `NumberLiterals`
"NumberLiteral": func(n *node, p node) {
// We'll create a new node also named `NumberLiteral` that we will push to
// the parent context.
*p.context = append(*p.context, node{
kind: "NumberLiteral",
value: n.value,
})
},

// Next up, `CallExpressions`.
"CallExpression": func(n *node, p node) {

// We start creating a new node `CallExpression` with a nested
// `Identifier`.
e := node{
kind: "CallExpression",
callee: &node{
kind: "Identifier",
name: n.name,
},
arguments: new([]node),
}

// Next we're going to define a new context on the original
// `CallExpression` node that will reference the `expression`'s arguments
// so that we can push arguments.
n.context = e.arguments

// Then we're going to check if the parent node is a `CallExpression`.
// If it is not...
if p.kind != "CallExpression" {

// We're going to wrap our `CallExpression` node with an
// `ExpressionStatement`.
es := node{
kind: "ExpressionStatement",
expression: &e,
}

// Last, we push our (possibly wrapped) `CallExpression` to the `parent`'s
// `context`.
*p.context = append(*p.context, es)
} else {
*p.context = append(*p.context, e)
}

},
})

// At the end of our transformer function we'll return the new ast that we
// just created.
return nast }


/**



  • ============================================================================

  • ヾ(〃^∇^)ノ♪

  • THE CODE GENERATOR!!!!

  • ============================================================================
    */



/**



  • Now let’s move onto our last phase: The Code Generator.
    *

  • Our code generator is going to recursively call itself to print each node in

  • the tree into one giant string.
    */



func codeGenerator(n node) string {



// We'll break things down by the `type` of the `node`.
switch n.kind {

// If we have a `Program` node. We will map through each node in the `body`
// and run them through the code generator and join them with a newline.
case "Program":
var r []string
for _, no := range n.body {
r = append(r, codeGenerator(no))
}
return strings.Join(r, "\n")

// For `ExpressionStatements` we'll call the code generator on the nested
// expression and we'll add a semicolon...
case "ExpressionStatement":
return codeGenerator(*n.expression) + ";"

// For `CallExpressions` we will print the `callee`, add an open
// parenthesis, we'll map through each node in the `arguments` array and run
// them through the code generator, joining them with a comma, and then
// we'll add a closing parenthesis.
case "CallExpression":
var ra []string
c := codeGenerator(*n.callee)

for _, no := range *n.arguments {
ra = append(ra, codeGenerator(no))
}

r := strings.Join(ra, ", ")
return c + "(" + r + ")"

// For `Identifiers` we'll just return the `node`'s name.
case "Identifier":
return n.name

// For `NumberLiterals` we'll just return the `node`'s value.
case "NumberLiteral":
return n.value

// And if we haven't recognized the node, we'll throw an error.
default:
log.Fatal("err")
return ""
} }


/**



  • ============================================================================

  • (۶* ‘ヮ’)۶”

  • !!!!!!!!THE COMPILER!!!!!!!!

  • ============================================================================
    */



/**



  • FINALLY! We’ll create our compiler function. Here we will link together

  • every part of the pipeline.
    *



    1. input => tokenizer => tokens





    1. tokens => parser => ast





    1. ast => transformer => newAst





    1. newAst => generator => output
      */





func compiler(input string) string {
tokens := tokenizer(input)
ast := parser(tokens)
nast := transformer(ast)
out := codeGenerator(node(nast))



// and simply return the output!
return out }


func main() {
program := “(add 10 (subtract 10 6))”
out := compiler(program)
fmt.Println(out)
}


Category golang