chalk/src/chalk/lexer.cr

99 lines
2.9 KiB
Crystal

require "lex"
module Chalk
module Compiler
# The type of a token that can be lexed.
enum TokenType
Any,
Str,
Id,
SpriteRow,
LitDec,
LitBin,
LitHex,
OpAdd
OpSub
OpMul
OpDiv
OpOr
OpAnd
OpXor
KwSprite
KwInline
KwFun
KwU0
KwU4
KwU8
KwU12
KwVar
KwIf
KwElse
KwWhile
KwReturn
end
# A class that stores the string it matched and its token type.
class Token
def initialize(@string : String, @type : TokenType)
end
# Gets the string this token represents.
getter string : String
# Gets the type of this token.
getter type : TokenType
end
# Creates a new Lexer with default token values.
# The lexer is backed by liblex. When a string is
# matched by several tokens, the longest match is chosen
# first, followed by the match with the highest enum value.
class Lexer
def initialize
@lexer = Lex::Lexer.new
@lexer.add_pattern(".", TokenType::Any.value)
@lexer.add_pattern("\"(\\\\\"|[^\"])*\"",
TokenType::Str.value)
@lexer.add_pattern("[a-zA-Z_][a-zA-Z_0-9]*",
TokenType::Id.value)
@lexer.add_pattern("`[ x]*`",
TokenType::SpriteRow.value)
@lexer.add_pattern("[0-9]+",
TokenType::LitDec.value)
@lexer.add_pattern("0b[0-1]+",
TokenType::LitBin.value)
@lexer.add_pattern("0x[0-9a-fA-F]+",
TokenType::LitHex.value)
@lexer.add_pattern("\\+", TokenType::OpAdd.value)
@lexer.add_pattern("-", TokenType::OpSub.value)
@lexer.add_pattern("\\*", TokenType::OpMul.value)
@lexer.add_pattern("/", TokenType::OpDiv.value)
@lexer.add_pattern("&", TokenType::OpAdd.value)
@lexer.add_pattern("\\|", TokenType::OpOr.value)
@lexer.add_pattern("^", TokenType::OpXor.value)
@lexer.add_pattern("sprite", TokenType::KwSprite.value)
@lexer.add_pattern("inline", TokenType::KwInline.value)
@lexer.add_pattern("fun", TokenType::KwFun.value)
@lexer.add_pattern("u0", TokenType::KwU0.value)
@lexer.add_pattern("u4", TokenType::KwU4.value)
@lexer.add_pattern("u8", TokenType::KwU8.value)
@lexer.add_pattern("u12", TokenType::KwU12.value)
@lexer.add_pattern("var", TokenType::KwVar.value)
@lexer.add_pattern("if", TokenType::KwIf.value)
@lexer.add_pattern("else", TokenType::KwElse.value)
@lexer.add_pattern("while", TokenType::KwWhile.value)
@lexer.add_pattern("return", TokenType::KwReturn.value)
end
# Converts a string into tokens.
def lex(string)
return @lexer.lex(string)
.select { |t| !t[0][0].whitespace? }
.map do |tuple|
string, id = tuple
Token.new(string, TokenType.new(id))
end
end
end
end
end