Compare commits
116 Commits
sidenotes
...
5d53678e83
| Author | SHA1 | Date | |
|---|---|---|---|
| 5d53678e83 | |||
| 3cb66a606d | |||
| 074db07275 | |||
| e3834ed6ea | |||
| 1bdb4a650e | |||
| 6966973497 | |||
| 8ee016e189 | |||
| fa0a96f057 | |||
| a2c84f5c40 | |||
| 768c43df2d | |||
| 579d988f4a | |||
| 45bc113e3f | |||
| 1abc13b20f | |||
| cdc9e28c90 | |||
| 8a48a110ff | |||
| 0eb1abd26d | |||
| a7a6d7ff13 | |||
| fb544e0545 | |||
| 84029fbc5b | |||
| 8039e459fa | |||
| f202c8ea44 | |||
| d8d1aa66e6 | |||
| 79ef221820 | |||
| 67ecc741d0 | |||
| 80d722568e | |||
| b9fcac974d | |||
| 31e9e58304 | |||
| 8f09b518ba | |||
| 2d6aab6b71 | |||
| 6712c0064a | |||
| 5e6d97ab36 | |||
| ea753fdfe7 | |||
| 1db8a24b4d | |||
| f8adac8b76 | |||
| 4bae586e36 | |||
| 3522c34adf | |||
| 96fc519b3c | |||
| 28f686eb80 | |||
| 6f0c95e49c | |||
| 130086db00 | |||
| 2a1ad171c0 | |||
| db8a050bdf | |||
| 3ff5ce4dec | |||
| 9f8855a4d3 | |||
| d3515d3fa5 | |||
| 277427af57 | |||
| f6c1079bda | |||
| 252d82469c | |||
| 1879ba2c2b | |||
| fc444c1986 | |||
| ae9805e4f2 | |||
| 33b1457e91 | |||
| 9e399ebe3c | |||
| eac1151616 | |||
| f7a7100fea | |||
| c207d1dfcf | |||
| df051fd643 | |||
| 419ab937b6 | |||
| 7ff919c31b | |||
| ee90351c17 | |||
| fbdbf67ce3 | |||
| a7e32d300a | |||
| 56387cb936 | |||
| df965816ac | |||
| c7341c9b15 | |||
| 00322d7e9f | |||
| ef93632130 | |||
| 0f744888ef | |||
| a5b84bab69 | |||
| 12725500a8 | |||
| 1917c08e51 | |||
| b304057560 | |||
| e5a39d8dfb | |||
| 54ccef9c72 | |||
| c103c6acbf | |||
| d6f53076c0 | |||
| b07ea8fe9c | |||
| 9a7441779f | |||
| a6f27e446d | |||
| e7f0ccfa16 | |||
| e5d01a4e19 | |||
| b7d72f2fbf | |||
| 281dbbd174 | |||
| 153349f3d5 | |||
| 8d22acfe78 | |||
| c1b030ee97 | |||
| 803f52b2d0 | |||
| 2f96abeef6 | |||
| 163fcd2b2e | |||
| 9ddcb1b3f2 | |||
| 133979218a | |||
| ef545be03c | |||
| c534dc7508 | |||
| 263ffe2b8c | |||
| 67181fb033 | |||
| a026e67a3b | |||
| d9544398b9 | |||
| 1c4bb29fdd | |||
| 765d497724 | |||
| 80410c9200 | |||
| 4e918db5cb | |||
| 382102f071 | |||
| 6e88780f8b | |||
| e3035b9d66 | |||
| 8765626898 | |||
| c38247df9e | |||
| baf44f8627 | |||
| 19aa126025 | |||
| a406fb0846 | |||
| 75664e90bb | |||
| f74209c970 | |||
| c7ce8a3107 | |||
| b3b906dd90 | |||
| b8e0e0b4ce | |||
| eb02e1e6b0 | |||
| b2fc6ea5a8 |
38
.drone.yml
Normal file
38
.drone.yml
Normal file
@@ -0,0 +1,38 @@
|
|||||||
|
kind: pipeline
|
||||||
|
type: docker
|
||||||
|
name: default
|
||||||
|
|
||||||
|
volumes:
|
||||||
|
- name: live-output
|
||||||
|
temp: {}
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- name: test-compiler
|
||||||
|
image: archlinux
|
||||||
|
commands:
|
||||||
|
- pacman -Sy cmake gcc make llvm bison flex gettext --noconfirm
|
||||||
|
- cd code/compiler
|
||||||
|
- ./test.sh
|
||||||
|
- name: build-live
|
||||||
|
image: klakegg/hugo:ext-alpine
|
||||||
|
commands:
|
||||||
|
- hugo -D --baseUrl "http://danilafe.com:8080"
|
||||||
|
volumes:
|
||||||
|
- name: live-output
|
||||||
|
path: /live-output
|
||||||
|
environment:
|
||||||
|
HUGO_DESTINATION: /live-output
|
||||||
|
- name: upload-live
|
||||||
|
image: eeacms/rsync
|
||||||
|
commands:
|
||||||
|
- eval `ssh-agent -s`
|
||||||
|
- echo "$CUSTOM_KEY" | ssh-add -
|
||||||
|
- mkdir -p ~/.ssh
|
||||||
|
- echo -e "Host *\n\tStrictHostKeyChecking no\n\n" > ~/.ssh/config
|
||||||
|
- rsync -rv -e "ssh -p 22" /live-output/ blog-live@danilafe.com:/var/www/blog-live/ --checksum
|
||||||
|
environment:
|
||||||
|
CUSTOM_KEY:
|
||||||
|
from_secret: live_ssh_key
|
||||||
|
volumes:
|
||||||
|
- name: live-output
|
||||||
|
path: /live-output
|
||||||
@@ -1,4 +1,5 @@
|
|||||||
@import "style.scss";
|
@import "variables.scss";
|
||||||
|
@import "mixins.scss";
|
||||||
|
|
||||||
.gmachine-instruction {
|
.gmachine-instruction {
|
||||||
display: flex;
|
display: flex;
|
||||||
@@ -6,7 +7,7 @@
|
|||||||
}
|
}
|
||||||
|
|
||||||
.gmachine-instruction-name {
|
.gmachine-instruction-name {
|
||||||
padding: 10px;
|
padding: .8rem;
|
||||||
border-right: $standard-border;
|
border-right: $standard-border;
|
||||||
flex-grow: 1;
|
flex-grow: 1;
|
||||||
flex-basis: 20%;
|
flex-basis: 20%;
|
||||||
@@ -28,12 +29,12 @@
|
|||||||
}
|
}
|
||||||
|
|
||||||
.gmachine-inner-label {
|
.gmachine-inner-label {
|
||||||
padding: 10px;
|
padding: .8rem;
|
||||||
font-weight: bold;
|
font-weight: bold;
|
||||||
}
|
}
|
||||||
|
|
||||||
.gmachine-inner-text {
|
.gmachine-inner-text {
|
||||||
padding: 10px;
|
padding: .8rem;
|
||||||
text-align: right;
|
text-align: right;
|
||||||
flex-grow: 1;
|
flex-grow: 1;
|
||||||
}
|
}
|
||||||
|
|||||||
19
assets/scss/stack.scss
Normal file
19
assets/scss/stack.scss
Normal file
@@ -0,0 +1,19 @@
|
|||||||
|
@import "variables.scss";
|
||||||
|
@import "mixins.scss";
|
||||||
|
|
||||||
|
.stack {
|
||||||
|
display: flex;
|
||||||
|
flex-direction: column;
|
||||||
|
max-width: 10rem;
|
||||||
|
margin: auto;
|
||||||
|
@include bordered-block;
|
||||||
|
}
|
||||||
|
|
||||||
|
.stack-element {
|
||||||
|
text-align: center;
|
||||||
|
min-height: 1.5rem;
|
||||||
|
|
||||||
|
&:not(:last-child) {
|
||||||
|
border-bottom: $standard-border;
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -1,3 +1,2 @@
|
|||||||
defn main = { sum 320 6 }
|
defn main = { sum 320 6 }
|
||||||
defn sum x y = { x + y }
|
defn sum x y = { x + y }
|
||||||
|
|
||||||
|
|||||||
42
code/compiler/09/CMakeLists.txt
Normal file
42
code/compiler/09/CMakeLists.txt
Normal file
@@ -0,0 +1,42 @@
|
|||||||
|
cmake_minimum_required(VERSION 3.1)
|
||||||
|
project(compiler)
|
||||||
|
|
||||||
|
# Find all the required packages
|
||||||
|
find_package(BISON)
|
||||||
|
find_package(FLEX)
|
||||||
|
find_package(LLVM REQUIRED CONFIG)
|
||||||
|
|
||||||
|
# Set up the flex and bison targets
|
||||||
|
bison_target(parser
|
||||||
|
${CMAKE_CURRENT_SOURCE_DIR}/parser.y
|
||||||
|
${CMAKE_CURRENT_BINARY_DIR}/parser.cpp
|
||||||
|
COMPILE_FLAGS "-d")
|
||||||
|
flex_target(scanner
|
||||||
|
${CMAKE_CURRENT_SOURCE_DIR}/scanner.l
|
||||||
|
${CMAKE_CURRENT_BINARY_DIR}/scanner.cpp)
|
||||||
|
add_flex_bison_dependency(scanner parser)
|
||||||
|
|
||||||
|
# Find all the relevant LLVM components
|
||||||
|
llvm_map_components_to_libnames(LLVM_LIBS core x86asmparser x86codegen)
|
||||||
|
|
||||||
|
# Create compiler executable
|
||||||
|
add_executable(compiler
|
||||||
|
ast.cpp ast.hpp definition.cpp
|
||||||
|
llvm_context.cpp llvm_context.hpp
|
||||||
|
type_env.cpp type_env.hpp
|
||||||
|
env.cpp env.hpp
|
||||||
|
type.cpp type.hpp
|
||||||
|
error.cpp error.hpp
|
||||||
|
binop.cpp binop.hpp
|
||||||
|
instruction.cpp instruction.hpp
|
||||||
|
${BISON_parser_OUTPUTS}
|
||||||
|
${FLEX_scanner_OUTPUTS}
|
||||||
|
main.cpp
|
||||||
|
)
|
||||||
|
|
||||||
|
# Configure compiler executable
|
||||||
|
target_include_directories(compiler PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
|
||||||
|
target_include_directories(compiler PUBLIC ${CMAKE_CURRENT_BINARY_DIR})
|
||||||
|
target_include_directories(compiler PUBLIC ${LLVM_INCLUDE_DIRS})
|
||||||
|
target_compile_definitions(compiler PUBLIC ${LLVM_DEFINITIONS})
|
||||||
|
target_link_libraries(compiler ${LLVM_LIBS})
|
||||||
264
code/compiler/09/ast.cpp
Normal file
264
code/compiler/09/ast.cpp
Normal file
@@ -0,0 +1,264 @@
|
|||||||
|
#include "ast.hpp"
|
||||||
|
#include <ostream>
|
||||||
|
#include "binop.hpp"
|
||||||
|
#include "error.hpp"
|
||||||
|
|
||||||
|
static void print_indent(int n, std::ostream& to) {
|
||||||
|
while(n--) to << " ";
|
||||||
|
}
|
||||||
|
|
||||||
|
type_ptr ast::typecheck_common(type_mgr& mgr, const type_env& env) {
|
||||||
|
node_type = typecheck(mgr, env);
|
||||||
|
return node_type;
|
||||||
|
}
|
||||||
|
|
||||||
|
void ast::resolve_common(const type_mgr& mgr) {
|
||||||
|
type_var* var;
|
||||||
|
type_ptr resolved_type = mgr.resolve(node_type, var);
|
||||||
|
if(var) throw type_error("ambiguously typed program");
|
||||||
|
|
||||||
|
resolve(mgr);
|
||||||
|
node_type = std::move(resolved_type);
|
||||||
|
}
|
||||||
|
|
||||||
|
void ast_int::print(int indent, std::ostream& to) const {
|
||||||
|
print_indent(indent, to);
|
||||||
|
to << "INT: " << value << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
type_ptr ast_int::typecheck(type_mgr& mgr, const type_env& env) const {
|
||||||
|
return type_ptr(new type_base("Int"));
|
||||||
|
}
|
||||||
|
|
||||||
|
void ast_int::resolve(const type_mgr& mgr) const {
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
void ast_int::compile(const env_ptr& env, std::vector<instruction_ptr>& into) const {
|
||||||
|
into.push_back(instruction_ptr(new instruction_pushint(value)));
|
||||||
|
}
|
||||||
|
|
||||||
|
void ast_lid::print(int indent, std::ostream& to) const {
|
||||||
|
print_indent(indent, to);
|
||||||
|
to << "LID: " << id << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
type_ptr ast_lid::typecheck(type_mgr& mgr, const type_env& env) const {
|
||||||
|
return env.lookup(id);
|
||||||
|
}
|
||||||
|
|
||||||
|
void ast_lid::resolve(const type_mgr& mgr) const {
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
void ast_lid::compile(const env_ptr& env, std::vector<instruction_ptr>& into) const {
|
||||||
|
into.push_back(instruction_ptr(
|
||||||
|
env->has_variable(id) ?
|
||||||
|
(instruction*) new instruction_push(env->get_offset(id)) :
|
||||||
|
(instruction*) new instruction_pushglobal(id)));
|
||||||
|
}
|
||||||
|
|
||||||
|
void ast_uid::print(int indent, std::ostream& to) const {
|
||||||
|
print_indent(indent, to);
|
||||||
|
to << "UID: " << id << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
type_ptr ast_uid::typecheck(type_mgr& mgr, const type_env& env) const {
|
||||||
|
return env.lookup(id);
|
||||||
|
}
|
||||||
|
|
||||||
|
void ast_uid::resolve(const type_mgr& mgr) const {
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
void ast_uid::compile(const env_ptr& env, std::vector<instruction_ptr>& into) const {
|
||||||
|
into.push_back(instruction_ptr(new instruction_pushglobal(id)));
|
||||||
|
}
|
||||||
|
|
||||||
|
void ast_binop::print(int indent, std::ostream& to) const {
|
||||||
|
print_indent(indent, to);
|
||||||
|
to << "BINOP: " << op_name(op) << std::endl;
|
||||||
|
left->print(indent + 1, to);
|
||||||
|
right->print(indent + 1, to);
|
||||||
|
}
|
||||||
|
|
||||||
|
type_ptr ast_binop::typecheck(type_mgr& mgr, const type_env& env) const {
|
||||||
|
type_ptr ltype = left->typecheck_common(mgr, env);
|
||||||
|
type_ptr rtype = right->typecheck_common(mgr, env);
|
||||||
|
type_ptr ftype = env.lookup(op_name(op));
|
||||||
|
if(!ftype) throw type_error(std::string("unknown binary operator ") + op_name(op));
|
||||||
|
|
||||||
|
type_ptr return_type = mgr.new_type();
|
||||||
|
type_ptr arrow_one = type_ptr(new type_arr(rtype, return_type));
|
||||||
|
type_ptr arrow_two = type_ptr(new type_arr(ltype, arrow_one));
|
||||||
|
|
||||||
|
mgr.unify(arrow_two, ftype);
|
||||||
|
return return_type;
|
||||||
|
}
|
||||||
|
|
||||||
|
void ast_binop::resolve(const type_mgr& mgr) const {
|
||||||
|
left->resolve_common(mgr);
|
||||||
|
right->resolve_common(mgr);
|
||||||
|
}
|
||||||
|
|
||||||
|
void ast_binop::compile(const env_ptr& env, std::vector<instruction_ptr>& into) const {
|
||||||
|
right->compile(env, into);
|
||||||
|
left->compile(env_ptr(new env_offset(1, env)), into);
|
||||||
|
|
||||||
|
into.push_back(instruction_ptr(new instruction_pushglobal(op_action(op))));
|
||||||
|
into.push_back(instruction_ptr(new instruction_mkapp()));
|
||||||
|
into.push_back(instruction_ptr(new instruction_mkapp()));
|
||||||
|
}
|
||||||
|
|
||||||
|
void ast_app::print(int indent, std::ostream& to) const {
|
||||||
|
print_indent(indent, to);
|
||||||
|
to << "APP:" << std::endl;
|
||||||
|
left->print(indent + 1, to);
|
||||||
|
right->print(indent + 1, to);
|
||||||
|
}
|
||||||
|
|
||||||
|
type_ptr ast_app::typecheck(type_mgr& mgr, const type_env& env) const {
|
||||||
|
type_ptr ltype = left->typecheck_common(mgr, env);
|
||||||
|
type_ptr rtype = right->typecheck_common(mgr, env);
|
||||||
|
|
||||||
|
type_ptr return_type = mgr.new_type();
|
||||||
|
type_ptr arrow = type_ptr(new type_arr(rtype, return_type));
|
||||||
|
mgr.unify(arrow, ltype);
|
||||||
|
return return_type;
|
||||||
|
}
|
||||||
|
|
||||||
|
void ast_app::resolve(const type_mgr& mgr) const {
|
||||||
|
left->resolve_common(mgr);
|
||||||
|
right->resolve_common(mgr);
|
||||||
|
}
|
||||||
|
|
||||||
|
void ast_app::compile(const env_ptr& env, std::vector<instruction_ptr>& into) const {
|
||||||
|
right->compile(env, into);
|
||||||
|
left->compile(env_ptr(new env_offset(1, env)), into);
|
||||||
|
into.push_back(instruction_ptr(new instruction_mkapp()));
|
||||||
|
}
|
||||||
|
|
||||||
|
void ast_case::print(int indent, std::ostream& to) const {
|
||||||
|
print_indent(indent, to);
|
||||||
|
to << "CASE: " << std::endl;
|
||||||
|
for(auto& branch : branches) {
|
||||||
|
print_indent(indent + 1, to);
|
||||||
|
branch->pat->print(to);
|
||||||
|
to << std::endl;
|
||||||
|
branch->expr->print(indent + 2, to);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
type_ptr ast_case::typecheck(type_mgr& mgr, const type_env& env) const {
|
||||||
|
type_var* var;
|
||||||
|
type_ptr case_type = mgr.resolve(of->typecheck_common(mgr, env), var);
|
||||||
|
type_ptr branch_type = mgr.new_type();
|
||||||
|
|
||||||
|
for(auto& branch : branches) {
|
||||||
|
type_env new_env = env.scope();
|
||||||
|
branch->pat->match(case_type, mgr, new_env);
|
||||||
|
type_ptr curr_branch_type = branch->expr->typecheck_common(mgr, new_env);
|
||||||
|
mgr.unify(branch_type, curr_branch_type);
|
||||||
|
}
|
||||||
|
|
||||||
|
case_type = mgr.resolve(case_type, var);
|
||||||
|
if(!dynamic_cast<type_data*>(case_type.get())) {
|
||||||
|
throw type_error("attempting case analysis of non-data type");
|
||||||
|
}
|
||||||
|
|
||||||
|
return branch_type;
|
||||||
|
}
|
||||||
|
|
||||||
|
void ast_case::resolve(const type_mgr& mgr) const {
|
||||||
|
of->resolve_common(mgr);
|
||||||
|
for(auto& branch : branches) {
|
||||||
|
branch->expr->resolve_common(mgr);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void ast_case::compile(const env_ptr& env, std::vector<instruction_ptr>& into) const {
|
||||||
|
type_data* type = dynamic_cast<type_data*>(of->node_type.get());
|
||||||
|
|
||||||
|
of->compile(env, into);
|
||||||
|
into.push_back(instruction_ptr(new instruction_eval()));
|
||||||
|
|
||||||
|
instruction_jump* jump_instruction = new instruction_jump();
|
||||||
|
into.push_back(instruction_ptr(jump_instruction));
|
||||||
|
for(auto& branch : branches) {
|
||||||
|
std::vector<instruction_ptr> branch_instructions;
|
||||||
|
pattern_var* vpat;
|
||||||
|
pattern_constr* cpat;
|
||||||
|
|
||||||
|
if((vpat = dynamic_cast<pattern_var*>(branch->pat.get()))) {
|
||||||
|
branch->expr->compile(env_ptr(new env_offset(1, env)), branch_instructions);
|
||||||
|
|
||||||
|
for(auto& constr_pair : type->constructors) {
|
||||||
|
if(jump_instruction->tag_mappings.find(constr_pair.second.tag) !=
|
||||||
|
jump_instruction->tag_mappings.end())
|
||||||
|
break;
|
||||||
|
|
||||||
|
jump_instruction->tag_mappings[constr_pair.second.tag] =
|
||||||
|
jump_instruction->branches.size();
|
||||||
|
}
|
||||||
|
jump_instruction->branches.push_back(std::move(branch_instructions));
|
||||||
|
} else if((cpat = dynamic_cast<pattern_constr*>(branch->pat.get()))) {
|
||||||
|
env_ptr new_env = env;
|
||||||
|
for(auto it = cpat->params.rbegin(); it != cpat->params.rend(); it++) {
|
||||||
|
new_env = env_ptr(new env_var(*it, new_env));
|
||||||
|
}
|
||||||
|
|
||||||
|
branch_instructions.push_back(instruction_ptr(new instruction_split(
|
||||||
|
cpat->params.size())));
|
||||||
|
branch->expr->compile(new_env, branch_instructions);
|
||||||
|
branch_instructions.push_back(instruction_ptr(new instruction_slide(
|
||||||
|
cpat->params.size())));
|
||||||
|
|
||||||
|
int new_tag = type->constructors[cpat->constr].tag;
|
||||||
|
if(jump_instruction->tag_mappings.find(new_tag) !=
|
||||||
|
jump_instruction->tag_mappings.end())
|
||||||
|
throw type_error("technically not a type error: duplicate pattern");
|
||||||
|
|
||||||
|
jump_instruction->tag_mappings[new_tag] =
|
||||||
|
jump_instruction->branches.size();
|
||||||
|
jump_instruction->branches.push_back(std::move(branch_instructions));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for(auto& constr_pair : type->constructors) {
|
||||||
|
if(jump_instruction->tag_mappings.find(constr_pair.second.tag) ==
|
||||||
|
jump_instruction->tag_mappings.end())
|
||||||
|
throw type_error("non-total pattern");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void pattern_var::print(std::ostream& to) const {
|
||||||
|
to << var;
|
||||||
|
}
|
||||||
|
|
||||||
|
void pattern_var::match(type_ptr t, type_mgr& mgr, type_env& env) const {
|
||||||
|
env.bind(var, t);
|
||||||
|
}
|
||||||
|
|
||||||
|
void pattern_constr::print(std::ostream& to) const {
|
||||||
|
to << constr;
|
||||||
|
for(auto& param : params) {
|
||||||
|
to << " " << param;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void pattern_constr::match(type_ptr t, type_mgr& mgr, type_env& env) const {
|
||||||
|
type_ptr constructor_type = env.lookup(constr);
|
||||||
|
if(!constructor_type) {
|
||||||
|
throw type_error(std::string("pattern using unknown constructor ") + constr);
|
||||||
|
}
|
||||||
|
|
||||||
|
for(int i = 0; i < params.size(); i++) {
|
||||||
|
type_arr* arr = dynamic_cast<type_arr*>(constructor_type.get());
|
||||||
|
if(!arr) throw type_error("too many parameters in constructor pattern");
|
||||||
|
|
||||||
|
env.bind(params[i], arr->left);
|
||||||
|
constructor_type = arr->right;
|
||||||
|
}
|
||||||
|
|
||||||
|
mgr.unify(t, constructor_type);
|
||||||
|
}
|
||||||
141
code/compiler/09/ast.hpp
Normal file
141
code/compiler/09/ast.hpp
Normal file
@@ -0,0 +1,141 @@
|
|||||||
|
#pragma once
|
||||||
|
#include <memory>
|
||||||
|
#include <vector>
|
||||||
|
#include "type.hpp"
|
||||||
|
#include "type_env.hpp"
|
||||||
|
#include "binop.hpp"
|
||||||
|
#include "instruction.hpp"
|
||||||
|
#include "env.hpp"
|
||||||
|
|
||||||
|
struct ast {
|
||||||
|
type_ptr node_type;
|
||||||
|
|
||||||
|
virtual ~ast() = default;
|
||||||
|
|
||||||
|
virtual void print(int indent, std::ostream& to) const = 0;
|
||||||
|
virtual type_ptr typecheck(type_mgr& mgr, const type_env& env) const = 0;
|
||||||
|
virtual void resolve(const type_mgr& mgr) const = 0;
|
||||||
|
virtual void compile(const env_ptr& env,
|
||||||
|
std::vector<instruction_ptr>& into) const = 0;
|
||||||
|
|
||||||
|
type_ptr typecheck_common(type_mgr& mgr, const type_env& env);
|
||||||
|
void resolve_common(const type_mgr& mgr);
|
||||||
|
};
|
||||||
|
|
||||||
|
using ast_ptr = std::unique_ptr<ast>;
|
||||||
|
|
||||||
|
struct pattern {
|
||||||
|
virtual ~pattern() = default;
|
||||||
|
|
||||||
|
virtual void print(std::ostream& to) const = 0;
|
||||||
|
virtual void match(type_ptr t, type_mgr& mgr, type_env& env) const = 0;
|
||||||
|
};
|
||||||
|
|
||||||
|
using pattern_ptr = std::unique_ptr<pattern>;
|
||||||
|
|
||||||
|
struct branch {
|
||||||
|
pattern_ptr pat;
|
||||||
|
ast_ptr expr;
|
||||||
|
|
||||||
|
branch(pattern_ptr p, ast_ptr a)
|
||||||
|
: pat(std::move(p)), expr(std::move(a)) {}
|
||||||
|
};
|
||||||
|
|
||||||
|
using branch_ptr = std::unique_ptr<branch>;
|
||||||
|
|
||||||
|
struct ast_int : public ast {
|
||||||
|
int value;
|
||||||
|
|
||||||
|
explicit ast_int(int v)
|
||||||
|
: value(v) {}
|
||||||
|
|
||||||
|
void print(int indent, std::ostream& to) const;
|
||||||
|
type_ptr typecheck(type_mgr& mgr, const type_env& env) const;
|
||||||
|
void resolve(const type_mgr& mgr) const;
|
||||||
|
void compile(const env_ptr& env, std::vector<instruction_ptr>& into) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct ast_lid : public ast {
|
||||||
|
std::string id;
|
||||||
|
|
||||||
|
explicit ast_lid(std::string i)
|
||||||
|
: id(std::move(i)) {}
|
||||||
|
|
||||||
|
void print(int indent, std::ostream& to) const;
|
||||||
|
type_ptr typecheck(type_mgr& mgr, const type_env& env) const;
|
||||||
|
void resolve(const type_mgr& mgr) const;
|
||||||
|
void compile(const env_ptr& env, std::vector<instruction_ptr>& into) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct ast_uid : public ast {
|
||||||
|
std::string id;
|
||||||
|
|
||||||
|
explicit ast_uid(std::string i)
|
||||||
|
: id(std::move(i)) {}
|
||||||
|
|
||||||
|
void print(int indent, std::ostream& to) const;
|
||||||
|
type_ptr typecheck(type_mgr& mgr, const type_env& env) const;
|
||||||
|
void resolve(const type_mgr& mgr) const;
|
||||||
|
void compile(const env_ptr& env, std::vector<instruction_ptr>& into) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct ast_binop : public ast {
|
||||||
|
binop op;
|
||||||
|
ast_ptr left;
|
||||||
|
ast_ptr right;
|
||||||
|
|
||||||
|
ast_binop(binop o, ast_ptr l, ast_ptr r)
|
||||||
|
: op(o), left(std::move(l)), right(std::move(r)) {}
|
||||||
|
|
||||||
|
void print(int indent, std::ostream& to) const;
|
||||||
|
type_ptr typecheck(type_mgr& mgr, const type_env& env) const;
|
||||||
|
void resolve(const type_mgr& mgr) const;
|
||||||
|
void compile(const env_ptr& env, std::vector<instruction_ptr>& into) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct ast_app : public ast {
|
||||||
|
ast_ptr left;
|
||||||
|
ast_ptr right;
|
||||||
|
|
||||||
|
ast_app(ast_ptr l, ast_ptr r)
|
||||||
|
: left(std::move(l)), right(std::move(r)) {}
|
||||||
|
|
||||||
|
void print(int indent, std::ostream& to) const;
|
||||||
|
type_ptr typecheck(type_mgr& mgr, const type_env& env) const;
|
||||||
|
void resolve(const type_mgr& mgr) const;
|
||||||
|
void compile(const env_ptr& env, std::vector<instruction_ptr>& into) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct ast_case : public ast {
|
||||||
|
ast_ptr of;
|
||||||
|
std::vector<branch_ptr> branches;
|
||||||
|
|
||||||
|
ast_case(ast_ptr o, std::vector<branch_ptr> b)
|
||||||
|
: of(std::move(o)), branches(std::move(b)) {}
|
||||||
|
|
||||||
|
void print(int indent, std::ostream& to) const;
|
||||||
|
type_ptr typecheck(type_mgr& mgr, const type_env& env) const;
|
||||||
|
void resolve(const type_mgr& mgr) const;
|
||||||
|
void compile(const env_ptr& env, std::vector<instruction_ptr>& into) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct pattern_var : public pattern {
|
||||||
|
std::string var;
|
||||||
|
|
||||||
|
pattern_var(std::string v)
|
||||||
|
: var(std::move(v)) {}
|
||||||
|
|
||||||
|
void print(std::ostream &to) const;
|
||||||
|
void match(type_ptr t, type_mgr& mgr, type_env& env) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct pattern_constr : public pattern {
|
||||||
|
std::string constr;
|
||||||
|
std::vector<std::string> params;
|
||||||
|
|
||||||
|
pattern_constr(std::string c, std::vector<std::string> p)
|
||||||
|
: constr(std::move(c)), params(std::move(p)) {}
|
||||||
|
|
||||||
|
void print(std::ostream &to) const;
|
||||||
|
void match(type_ptr t, type_mgr&, type_env& env) const;
|
||||||
|
};
|
||||||
21
code/compiler/09/binop.cpp
Normal file
21
code/compiler/09/binop.cpp
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
#include "binop.hpp"
|
||||||
|
|
||||||
|
std::string op_name(binop op) {
|
||||||
|
switch(op) {
|
||||||
|
case PLUS: return "+";
|
||||||
|
case MINUS: return "-";
|
||||||
|
case TIMES: return "*";
|
||||||
|
case DIVIDE: return "/";
|
||||||
|
}
|
||||||
|
return "??";
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string op_action(binop op) {
|
||||||
|
switch(op) {
|
||||||
|
case PLUS: return "plus";
|
||||||
|
case MINUS: return "minus";
|
||||||
|
case TIMES: return "times";
|
||||||
|
case DIVIDE: return "divide";
|
||||||
|
}
|
||||||
|
return "??";
|
||||||
|
}
|
||||||
12
code/compiler/09/binop.hpp
Normal file
12
code/compiler/09/binop.hpp
Normal file
@@ -0,0 +1,12 @@
|
|||||||
|
#pragma once
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
enum binop {
|
||||||
|
PLUS,
|
||||||
|
MINUS,
|
||||||
|
TIMES,
|
||||||
|
DIVIDE
|
||||||
|
};
|
||||||
|
|
||||||
|
std::string op_name(binop op);
|
||||||
|
std::string op_action(binop op);
|
||||||
121
code/compiler/09/definition.cpp
Normal file
121
code/compiler/09/definition.cpp
Normal file
@@ -0,0 +1,121 @@
|
|||||||
|
#include "definition.hpp"
|
||||||
|
#include "error.hpp"
|
||||||
|
#include "ast.hpp"
|
||||||
|
#include "instruction.hpp"
|
||||||
|
#include "llvm_context.hpp"
|
||||||
|
#include <llvm/IR/DerivedTypes.h>
|
||||||
|
#include <llvm/IR/Function.h>
|
||||||
|
#include <llvm/IR/Type.h>
|
||||||
|
|
||||||
|
void definition_defn::typecheck_first(type_mgr& mgr, type_env& env) {
|
||||||
|
return_type = mgr.new_type();
|
||||||
|
type_ptr full_type = return_type;
|
||||||
|
|
||||||
|
for(auto it = params.rbegin(); it != params.rend(); it++) {
|
||||||
|
type_ptr param_type = mgr.new_type();
|
||||||
|
full_type = type_ptr(new type_arr(param_type, full_type));
|
||||||
|
param_types.push_back(param_type);
|
||||||
|
}
|
||||||
|
|
||||||
|
env.bind(name, full_type);
|
||||||
|
}
|
||||||
|
|
||||||
|
void definition_defn::typecheck_second(type_mgr& mgr, const type_env& env) const {
|
||||||
|
type_env new_env = env.scope();
|
||||||
|
auto param_it = params.begin();
|
||||||
|
auto type_it = param_types.rbegin();
|
||||||
|
|
||||||
|
while(param_it != params.end() && type_it != param_types.rend()) {
|
||||||
|
new_env.bind(*param_it, *type_it);
|
||||||
|
param_it++;
|
||||||
|
type_it++;
|
||||||
|
}
|
||||||
|
|
||||||
|
type_ptr body_type = body->typecheck_common(mgr, new_env);
|
||||||
|
mgr.unify(return_type, body_type);
|
||||||
|
}
|
||||||
|
|
||||||
|
void definition_defn::resolve(const type_mgr& mgr) {
|
||||||
|
type_var* var;
|
||||||
|
body->resolve_common(mgr);
|
||||||
|
|
||||||
|
return_type = mgr.resolve(return_type, var);
|
||||||
|
if(var) throw type_error("ambiguously typed program");
|
||||||
|
for(auto& param_type : param_types) {
|
||||||
|
param_type = mgr.resolve(param_type, var);
|
||||||
|
if(var) throw type_error("ambiguously typed program");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void definition_defn::compile() {
|
||||||
|
env_ptr new_env = env_ptr(new env_offset(0, nullptr));
|
||||||
|
for(auto it = params.rbegin(); it != params.rend(); it++) {
|
||||||
|
new_env = env_ptr(new env_var(*it, new_env));
|
||||||
|
}
|
||||||
|
body->compile(new_env, instructions);
|
||||||
|
instructions.push_back(instruction_ptr(new instruction_update(params.size())));
|
||||||
|
instructions.push_back(instruction_ptr(new instruction_pop(params.size())));
|
||||||
|
}
|
||||||
|
void definition_defn::gen_llvm_first(llvm_context& ctx) {
|
||||||
|
generated_function = ctx.create_custom_function(name, params.size());
|
||||||
|
}
|
||||||
|
|
||||||
|
void definition_defn::gen_llvm_second(llvm_context& ctx) {
|
||||||
|
ctx.builder.SetInsertPoint(&generated_function->getEntryBlock());
|
||||||
|
for(auto& instruction : instructions) {
|
||||||
|
instruction->gen_llvm(ctx, generated_function);
|
||||||
|
}
|
||||||
|
ctx.builder.CreateRetVoid();
|
||||||
|
}
|
||||||
|
|
||||||
|
void definition_data::typecheck_first(type_mgr& mgr, type_env& env) {
|
||||||
|
type_data* this_type = new type_data(name);
|
||||||
|
type_ptr return_type = type_ptr(this_type);
|
||||||
|
int next_tag = 0;
|
||||||
|
|
||||||
|
for(auto& constructor : constructors) {
|
||||||
|
constructor->tag = next_tag;
|
||||||
|
this_type->constructors[constructor->name] = { next_tag++ };
|
||||||
|
|
||||||
|
type_ptr full_type = return_type;
|
||||||
|
for(auto it = constructor->types.rbegin(); it != constructor->types.rend(); it++) {
|
||||||
|
type_ptr type = type_ptr(new type_base(*it));
|
||||||
|
full_type = type_ptr(new type_arr(type, full_type));
|
||||||
|
}
|
||||||
|
|
||||||
|
env.bind(constructor->name, full_type);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void definition_data::typecheck_second(type_mgr& mgr, const type_env& env) const {
|
||||||
|
// Nothing
|
||||||
|
}
|
||||||
|
|
||||||
|
void definition_data::resolve(const type_mgr& mgr) {
|
||||||
|
// Nothing
|
||||||
|
}
|
||||||
|
|
||||||
|
void definition_data::compile() {
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
void definition_data::gen_llvm_first(llvm_context& ctx) {
|
||||||
|
for(auto& constructor : constructors) {
|
||||||
|
auto new_function =
|
||||||
|
ctx.create_custom_function(constructor->name, constructor->types.size());
|
||||||
|
std::vector<instruction_ptr> instructions;
|
||||||
|
instructions.push_back(instruction_ptr(
|
||||||
|
new instruction_pack(constructor->tag, constructor->types.size())
|
||||||
|
));
|
||||||
|
instructions.push_back(instruction_ptr(new instruction_update(0)));
|
||||||
|
ctx.builder.SetInsertPoint(&new_function->getEntryBlock());
|
||||||
|
for (auto& instruction : instructions) {
|
||||||
|
instruction->gen_llvm(ctx, new_function);
|
||||||
|
}
|
||||||
|
ctx.builder.CreateRetVoid();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void definition_data::gen_llvm_second(llvm_context& ctx) {
|
||||||
|
// Nothing
|
||||||
|
}
|
||||||
73
code/compiler/09/definition.hpp
Normal file
73
code/compiler/09/definition.hpp
Normal file
@@ -0,0 +1,73 @@
|
|||||||
|
#pragma once
|
||||||
|
#include <memory>
|
||||||
|
#include <vector>
|
||||||
|
#include "instruction.hpp"
|
||||||
|
#include "llvm_context.hpp"
|
||||||
|
#include "type_env.hpp"
|
||||||
|
|
||||||
|
struct ast;
|
||||||
|
using ast_ptr = std::unique_ptr<ast>;
|
||||||
|
|
||||||
|
struct definition {
|
||||||
|
virtual ~definition() = default;
|
||||||
|
|
||||||
|
virtual void typecheck_first(type_mgr& mgr, type_env& env) = 0;
|
||||||
|
virtual void typecheck_second(type_mgr& mgr, const type_env& env) const = 0;
|
||||||
|
virtual void resolve(const type_mgr& mgr) = 0;
|
||||||
|
virtual void compile() = 0;
|
||||||
|
virtual void gen_llvm_first(llvm_context& ctx) = 0;
|
||||||
|
virtual void gen_llvm_second(llvm_context& ctx) = 0;
|
||||||
|
};
|
||||||
|
|
||||||
|
using definition_ptr = std::unique_ptr<definition>;
|
||||||
|
|
||||||
|
struct constructor {
|
||||||
|
std::string name;
|
||||||
|
std::vector<std::string> types;
|
||||||
|
int8_t tag;
|
||||||
|
|
||||||
|
constructor(std::string n, std::vector<std::string> ts)
|
||||||
|
: name(std::move(n)), types(std::move(ts)) {}
|
||||||
|
};
|
||||||
|
|
||||||
|
using constructor_ptr = std::unique_ptr<constructor>;
|
||||||
|
|
||||||
|
struct definition_defn : public definition {
|
||||||
|
std::string name;
|
||||||
|
std::vector<std::string> params;
|
||||||
|
ast_ptr body;
|
||||||
|
|
||||||
|
type_ptr return_type;
|
||||||
|
std::vector<type_ptr> param_types;
|
||||||
|
|
||||||
|
std::vector<instruction_ptr> instructions;
|
||||||
|
|
||||||
|
llvm::Function* generated_function;
|
||||||
|
|
||||||
|
definition_defn(std::string n, std::vector<std::string> p, ast_ptr b)
|
||||||
|
: name(std::move(n)), params(std::move(p)), body(std::move(b)) {
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
void typecheck_first(type_mgr& mgr, type_env& env);
|
||||||
|
void typecheck_second(type_mgr& mgr, const type_env& env) const;
|
||||||
|
void resolve(const type_mgr& mgr);
|
||||||
|
void compile();
|
||||||
|
void gen_llvm_first(llvm_context& ctx);
|
||||||
|
void gen_llvm_second(llvm_context& ctx);
|
||||||
|
};
|
||||||
|
|
||||||
|
struct definition_data : public definition {
|
||||||
|
std::string name;
|
||||||
|
std::vector<constructor_ptr> constructors;
|
||||||
|
|
||||||
|
definition_data(std::string n, std::vector<constructor_ptr> cs)
|
||||||
|
: name(std::move(n)), constructors(std::move(cs)) {}
|
||||||
|
|
||||||
|
void typecheck_first(type_mgr& mgr, type_env& env);
|
||||||
|
void typecheck_second(type_mgr& mgr, const type_env& env) const;
|
||||||
|
void resolve(const type_mgr& mgr);
|
||||||
|
void compile();
|
||||||
|
void gen_llvm_first(llvm_context& ctx);
|
||||||
|
void gen_llvm_second(llvm_context& ctx);
|
||||||
|
};
|
||||||
23
code/compiler/09/env.cpp
Normal file
23
code/compiler/09/env.cpp
Normal file
@@ -0,0 +1,23 @@
|
|||||||
|
#include "env.hpp"
|
||||||
|
|
||||||
|
int env_var::get_offset(const std::string& name) const {
|
||||||
|
if(name == this->name) return 0;
|
||||||
|
if(parent) return parent->get_offset(name) + 1;
|
||||||
|
throw 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool env_var::has_variable(const std::string& name) const {
|
||||||
|
if(name == this->name) return true;
|
||||||
|
if(parent) return parent->has_variable(name);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
int env_offset::get_offset(const std::string& name) const {
|
||||||
|
if(parent) return parent->get_offset(name) + offset;
|
||||||
|
throw 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool env_offset::has_variable(const std::string& name) const {
|
||||||
|
if(parent) return parent->has_variable(name);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
34
code/compiler/09/env.hpp
Normal file
34
code/compiler/09/env.hpp
Normal file
@@ -0,0 +1,34 @@
|
|||||||
|
#pragma once
|
||||||
|
#include <memory>
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
struct env {
|
||||||
|
virtual ~env() = default;
|
||||||
|
|
||||||
|
virtual int get_offset(const std::string& name) const = 0;
|
||||||
|
virtual bool has_variable(const std::string& name) const = 0;
|
||||||
|
};
|
||||||
|
|
||||||
|
using env_ptr = std::shared_ptr<env>;
|
||||||
|
|
||||||
|
struct env_var : public env {
|
||||||
|
std::string name;
|
||||||
|
env_ptr parent;
|
||||||
|
|
||||||
|
env_var(std::string& n, env_ptr p)
|
||||||
|
: name(std::move(n)), parent(std::move(p)) {}
|
||||||
|
|
||||||
|
int get_offset(const std::string& name) const;
|
||||||
|
bool has_variable(const std::string& name) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct env_offset : public env {
|
||||||
|
int offset;
|
||||||
|
env_ptr parent;
|
||||||
|
|
||||||
|
env_offset(int o, env_ptr p)
|
||||||
|
: offset(o), parent(std::move(p)) {}
|
||||||
|
|
||||||
|
int get_offset(const std::string& name) const;
|
||||||
|
bool has_variable(const std::string& name) const;
|
||||||
|
};
|
||||||
5
code/compiler/09/error.cpp
Normal file
5
code/compiler/09/error.cpp
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
#include "error.hpp"
|
||||||
|
|
||||||
|
const char* type_error::what() const noexcept {
|
||||||
|
return "an error occured while checking the types of the program";
|
||||||
|
}
|
||||||
21
code/compiler/09/error.hpp
Normal file
21
code/compiler/09/error.hpp
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
#pragma once
|
||||||
|
#include <exception>
|
||||||
|
#include "type.hpp"
|
||||||
|
|
||||||
|
struct type_error : std::exception {
|
||||||
|
std::string description;
|
||||||
|
|
||||||
|
type_error(std::string d)
|
||||||
|
: description(std::move(d)) {}
|
||||||
|
|
||||||
|
const char* what() const noexcept override;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct unification_error : public type_error {
|
||||||
|
type_ptr left;
|
||||||
|
type_ptr right;
|
||||||
|
|
||||||
|
unification_error(type_ptr l, type_ptr r)
|
||||||
|
: left(std::move(l)), right(std::move(r)),
|
||||||
|
type_error("failed to unify types") {}
|
||||||
|
};
|
||||||
2
code/compiler/09/examples/bad1.txt
Normal file
2
code/compiler/09/examples/bad1.txt
Normal file
@@ -0,0 +1,2 @@
|
|||||||
|
data Bool = { True, False }
|
||||||
|
defn main = { 3 + True }
|
||||||
1
code/compiler/09/examples/bad2.txt
Normal file
1
code/compiler/09/examples/bad2.txt
Normal file
@@ -0,0 +1 @@
|
|||||||
|
defn main = { 1 2 3 4 5 }
|
||||||
8
code/compiler/09/examples/bad3.txt
Normal file
8
code/compiler/09/examples/bad3.txt
Normal file
@@ -0,0 +1,8 @@
|
|||||||
|
data List = { Nil, Cons Int List }
|
||||||
|
|
||||||
|
defn head l = {
|
||||||
|
case l of {
|
||||||
|
Nil -> { 0 }
|
||||||
|
Cons x y z -> { x }
|
||||||
|
}
|
||||||
|
}
|
||||||
129
code/compiler/09/examples/primes.txt
Normal file
129
code/compiler/09/examples/primes.txt
Normal file
@@ -0,0 +1,129 @@
|
|||||||
|
data List = { Nil, Cons Nat List }
|
||||||
|
data Bool = { True, False }
|
||||||
|
data Nat = { O, S Nat }
|
||||||
|
|
||||||
|
defn ifN c t e = {
|
||||||
|
case c of {
|
||||||
|
True -> { t }
|
||||||
|
False -> { e }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
defn ifL c t e = {
|
||||||
|
case c of {
|
||||||
|
True -> { t }
|
||||||
|
False -> { e }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
defn toInt n = {
|
||||||
|
case n of {
|
||||||
|
O -> { 0 }
|
||||||
|
S np -> { 1 + toInt np }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
defn lte n m = {
|
||||||
|
case m of {
|
||||||
|
O -> {
|
||||||
|
case n of {
|
||||||
|
O -> { True }
|
||||||
|
S np -> { False }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
S mp -> {
|
||||||
|
case n of {
|
||||||
|
O -> { True }
|
||||||
|
S np -> { lte np mp }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
defn minus n m = {
|
||||||
|
case m of {
|
||||||
|
O -> { n }
|
||||||
|
S mp -> {
|
||||||
|
case n of {
|
||||||
|
O -> { O }
|
||||||
|
S np -> {
|
||||||
|
minus np mp
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
defn mod n m = {
|
||||||
|
ifN (lte m n) (mod (minus n m) m) n
|
||||||
|
}
|
||||||
|
|
||||||
|
defn notDivisibleBy n m = {
|
||||||
|
case (mod m n) of {
|
||||||
|
O -> { False }
|
||||||
|
S mp -> { True }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
defn filter f l = {
|
||||||
|
case l of {
|
||||||
|
Nil -> { Nil }
|
||||||
|
Cons x xs -> { ifL (f x) (Cons x (filter f xs)) (filter f xs) }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
defn map f l = {
|
||||||
|
case l of {
|
||||||
|
Nil -> { Nil }
|
||||||
|
Cons x xs -> { Cons (f x) (map f xs) }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
defn nats = {
|
||||||
|
Cons (S (S O)) (map S nats)
|
||||||
|
}
|
||||||
|
|
||||||
|
defn primesRec l = {
|
||||||
|
case l of {
|
||||||
|
Nil -> { Nil }
|
||||||
|
Cons p xs -> { Cons p (primesRec (filter (notDivisibleBy p) xs)) }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
defn primes = {
|
||||||
|
primesRec nats
|
||||||
|
}
|
||||||
|
|
||||||
|
defn take n l = {
|
||||||
|
case l of {
|
||||||
|
Nil -> { Nil }
|
||||||
|
Cons x xs -> {
|
||||||
|
case n of {
|
||||||
|
O -> { Nil }
|
||||||
|
S np -> { Cons x (take np xs) }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
defn head l = {
|
||||||
|
case l of {
|
||||||
|
Nil -> { O }
|
||||||
|
Cons x xs -> { x }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
defn reverseAcc a l = {
|
||||||
|
case l of {
|
||||||
|
Nil -> { a }
|
||||||
|
Cons x xs -> { reverseAcc (Cons x a) xs }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
defn reverse l = {
|
||||||
|
reverseAcc Nil l
|
||||||
|
}
|
||||||
|
|
||||||
|
defn main = {
|
||||||
|
toInt (head (reverse (take ((S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S O))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))) primes)))
|
||||||
|
}
|
||||||
31
code/compiler/09/examples/runtime1.c
Normal file
31
code/compiler/09/examples/runtime1.c
Normal file
@@ -0,0 +1,31 @@
|
|||||||
|
#include "../runtime.h"
|
||||||
|
|
||||||
|
void f_add(struct stack* s) {
|
||||||
|
struct node_num* left = (struct node_num*) eval(stack_peek(s, 0));
|
||||||
|
struct node_num* right = (struct node_num*) eval(stack_peek(s, 1));
|
||||||
|
stack_push(s, (struct node_base*) alloc_num(left->value + right->value));
|
||||||
|
}
|
||||||
|
|
||||||
|
void f_main(struct stack* s) {
|
||||||
|
// PushInt 320
|
||||||
|
stack_push(s, (struct node_base*) alloc_num(320));
|
||||||
|
|
||||||
|
// PushInt 6
|
||||||
|
stack_push(s, (struct node_base*) alloc_num(6));
|
||||||
|
|
||||||
|
// PushGlobal f_add (the function for +)
|
||||||
|
stack_push(s, (struct node_base*) alloc_global(f_add, 2));
|
||||||
|
|
||||||
|
struct node_base* left;
|
||||||
|
struct node_base* right;
|
||||||
|
|
||||||
|
// MkApp
|
||||||
|
left = stack_pop(s);
|
||||||
|
right = stack_pop(s);
|
||||||
|
stack_push(s, (struct node_base*) alloc_app(left, right));
|
||||||
|
|
||||||
|
// MkApp
|
||||||
|
left = stack_pop(s);
|
||||||
|
right = stack_pop(s);
|
||||||
|
stack_push(s, (struct node_base*) alloc_app(left, right));
|
||||||
|
}
|
||||||
2
code/compiler/09/examples/works1.txt
Normal file
2
code/compiler/09/examples/works1.txt
Normal file
@@ -0,0 +1,2 @@
|
|||||||
|
defn main = { sum 320 6 }
|
||||||
|
defn sum x y = { x + y }
|
||||||
3
code/compiler/09/examples/works2.txt
Normal file
3
code/compiler/09/examples/works2.txt
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
defn add x y = { x + y }
|
||||||
|
defn double x = { add x x }
|
||||||
|
defn main = { double 163 }
|
||||||
8
code/compiler/09/examples/works3.txt
Normal file
8
code/compiler/09/examples/works3.txt
Normal file
@@ -0,0 +1,8 @@
|
|||||||
|
data List = { Nil, Cons Int List }
|
||||||
|
defn length l = {
|
||||||
|
case l of {
|
||||||
|
Nil -> { 0 }
|
||||||
|
Cons x xs -> { 1 + length xs }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
defn main = { length (Cons 1 (Cons 2 (Cons 3 Nil))) }
|
||||||
16
code/compiler/09/examples/works4.txt
Normal file
16
code/compiler/09/examples/works4.txt
Normal file
@@ -0,0 +1,16 @@
|
|||||||
|
data List = { Nil, Cons Int List }
|
||||||
|
|
||||||
|
defn add x y = { x + y }
|
||||||
|
defn mul x y = { x * y }
|
||||||
|
|
||||||
|
defn foldr f b l = {
|
||||||
|
case l of {
|
||||||
|
Nil -> { b }
|
||||||
|
Cons x xs -> { f x (foldr f b xs) }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
defn main = {
|
||||||
|
foldr add 0 (Cons 1 (Cons 2 (Cons 3 (Cons 4 Nil)))) +
|
||||||
|
foldr mul 1 (Cons 1 (Cons 2 (Cons 3 (Cons 4 Nil))))
|
||||||
|
}
|
||||||
17
code/compiler/09/examples/works5.txt
Normal file
17
code/compiler/09/examples/works5.txt
Normal file
@@ -0,0 +1,17 @@
|
|||||||
|
data List = { Nil, Cons Int List }
|
||||||
|
|
||||||
|
defn sumZip l m = {
|
||||||
|
case l of {
|
||||||
|
Nil -> { 0 }
|
||||||
|
Cons x xs -> {
|
||||||
|
case m of {
|
||||||
|
Nil -> { 0 }
|
||||||
|
Cons y ys -> { x + y + sumZip xs ys }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
defn ones = { Cons 1 ones }
|
||||||
|
|
||||||
|
defn main = { sumZip ones (Cons 1 (Cons 2 (Cons 3 Nil))) }
|
||||||
177
code/compiler/09/instruction.cpp
Normal file
177
code/compiler/09/instruction.cpp
Normal file
@@ -0,0 +1,177 @@
|
|||||||
|
#include "instruction.hpp"
|
||||||
|
#include "llvm_context.hpp"
|
||||||
|
#include <llvm/IR/BasicBlock.h>
|
||||||
|
#include <llvm/IR/Function.h>
|
||||||
|
|
||||||
|
using namespace llvm;
|
||||||
|
|
||||||
|
static void print_indent(int n, std::ostream& to) {
|
||||||
|
while(n--) to << " ";
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_pushint::print(int indent, std::ostream& to) const {
|
||||||
|
print_indent(indent, to);
|
||||||
|
to << "PushInt(" << value << ")" << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_pushint::gen_llvm(llvm_context& ctx, Function* f) const {
|
||||||
|
ctx.create_push(f, ctx.create_num(f, ctx.create_i32(value)));
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_pushglobal::print(int indent, std::ostream& to) const {
|
||||||
|
print_indent(indent, to);
|
||||||
|
to << "PushGlobal(" << name << ")" << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_pushglobal::gen_llvm(llvm_context& ctx, Function* f) const {
|
||||||
|
auto& global_f = ctx.custom_functions.at("f_" + name);
|
||||||
|
auto arity = ctx.create_i32(global_f->arity);
|
||||||
|
ctx.create_push(f, ctx.create_global(f, global_f->function, arity));
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_push::print(int indent, std::ostream& to) const {
|
||||||
|
print_indent(indent, to);
|
||||||
|
to << "Push(" << offset << ")" << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_push::gen_llvm(llvm_context& ctx, Function* f) const {
|
||||||
|
ctx.create_push(f, ctx.create_peek(f, ctx.create_size(offset)));
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_pop::print(int indent, std::ostream& to) const {
|
||||||
|
print_indent(indent, to);
|
||||||
|
to << "Pop(" << count << ")" << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_pop::gen_llvm(llvm_context& ctx, Function* f) const {
|
||||||
|
ctx.create_popn(f, ctx.create_size(count));
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_mkapp::print(int indent, std::ostream& to) const {
|
||||||
|
print_indent(indent, to);
|
||||||
|
to << "MkApp()" << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_mkapp::gen_llvm(llvm_context& ctx, Function* f) const {
|
||||||
|
auto left = ctx.create_pop(f);
|
||||||
|
auto right = ctx.create_pop(f);
|
||||||
|
ctx.create_push(f, ctx.create_app(f, left, right));
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_update::print(int indent, std::ostream& to) const {
|
||||||
|
print_indent(indent, to);
|
||||||
|
to << "Update(" << offset << ")" << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_update::gen_llvm(llvm_context& ctx, Function* f) const {
|
||||||
|
ctx.create_update(f, ctx.create_size(offset));
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_pack::print(int indent, std::ostream& to) const {
|
||||||
|
print_indent(indent, to);
|
||||||
|
to << "Pack(" << tag << ", " << size << ")" << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_pack::gen_llvm(llvm_context& ctx, Function* f) const {
|
||||||
|
ctx.create_pack(f, ctx.create_size(size), ctx.create_i8(tag));
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_split::print(int indent, std::ostream& to) const {
|
||||||
|
print_indent(indent, to);
|
||||||
|
to << "Split()" << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_split::gen_llvm(llvm_context& ctx, Function* f) const {
|
||||||
|
ctx.create_split(f, ctx.create_size(size));
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_jump::print(int indent, std::ostream& to) const {
|
||||||
|
print_indent(indent, to);
|
||||||
|
to << "Jump(" << std::endl;
|
||||||
|
for(auto& instruction_set : branches) {
|
||||||
|
for(auto& instruction : instruction_set) {
|
||||||
|
instruction->print(indent + 2, to);
|
||||||
|
}
|
||||||
|
to << std::endl;
|
||||||
|
}
|
||||||
|
print_indent(indent, to);
|
||||||
|
to << ")" << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_jump::gen_llvm(llvm_context& ctx, Function* f) const {
|
||||||
|
auto top_node = ctx.create_peek(f, ctx.create_size(0));
|
||||||
|
auto tag = ctx.unwrap_data_tag(top_node);
|
||||||
|
auto safety_block = BasicBlock::Create(ctx.ctx, "safety", f);
|
||||||
|
auto switch_op = ctx.builder.CreateSwitch(tag, safety_block, tag_mappings.size());
|
||||||
|
std::vector<BasicBlock*> blocks;
|
||||||
|
|
||||||
|
for(auto& branch : branches) {
|
||||||
|
auto branch_block = BasicBlock::Create(ctx.ctx, "branch", f);
|
||||||
|
ctx.builder.SetInsertPoint(branch_block);
|
||||||
|
for(auto& instruction : branch) {
|
||||||
|
instruction->gen_llvm(ctx, f);
|
||||||
|
}
|
||||||
|
ctx.builder.CreateBr(safety_block);
|
||||||
|
blocks.push_back(branch_block);
|
||||||
|
}
|
||||||
|
|
||||||
|
for(auto& mapping : tag_mappings) {
|
||||||
|
switch_op->addCase(ctx.create_i8(mapping.first), blocks[mapping.second]);
|
||||||
|
}
|
||||||
|
|
||||||
|
ctx.builder.SetInsertPoint(safety_block);
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_slide::print(int indent, std::ostream& to) const {
|
||||||
|
print_indent(indent, to);
|
||||||
|
to << "Slide(" << offset << ")" << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_slide::gen_llvm(llvm_context& ctx, Function* f) const {
|
||||||
|
ctx.create_slide(f, ctx.create_size(offset));
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_binop::print(int indent, std::ostream& to) const {
|
||||||
|
print_indent(indent, to);
|
||||||
|
to << "BinOp(" << op_action(op) << ")" << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_binop::gen_llvm(llvm_context& ctx, Function* f) const {
|
||||||
|
auto left_int = ctx.unwrap_num(ctx.create_pop(f));
|
||||||
|
auto right_int = ctx.unwrap_num(ctx.create_pop(f));
|
||||||
|
llvm::Value* result;
|
||||||
|
switch(op) {
|
||||||
|
case PLUS: result = ctx.builder.CreateAdd(left_int, right_int); break;
|
||||||
|
case MINUS: result = ctx.builder.CreateSub(left_int, right_int); break;
|
||||||
|
case TIMES: result = ctx.builder.CreateMul(left_int, right_int); break;
|
||||||
|
case DIVIDE: result = ctx.builder.CreateSDiv(left_int, right_int); break;
|
||||||
|
}
|
||||||
|
ctx.create_push(f, ctx.create_num(f, result));
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_eval::print(int indent, std::ostream& to) const {
|
||||||
|
print_indent(indent, to);
|
||||||
|
to << "Eval()" << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_eval::gen_llvm(llvm_context& ctx, Function* f) const {
|
||||||
|
ctx.create_unwind(f);
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_alloc::print(int indent, std::ostream& to) const {
|
||||||
|
print_indent(indent, to);
|
||||||
|
to << "Alloc(" << amount << ")" << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_alloc::gen_llvm(llvm_context& ctx, Function* f) const {
|
||||||
|
ctx.create_alloc(f, ctx.create_size(amount));
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_unwind::print(int indent, std::ostream& to) const {
|
||||||
|
print_indent(indent, to);
|
||||||
|
to << "Unwind()" << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_unwind::gen_llvm(llvm_context& ctx, Function* f) const {
|
||||||
|
// Nothing
|
||||||
|
}
|
||||||
142
code/compiler/09/instruction.hpp
Normal file
142
code/compiler/09/instruction.hpp
Normal file
@@ -0,0 +1,142 @@
|
|||||||
|
#pragma once
|
||||||
|
#include <llvm/IR/Function.h>
|
||||||
|
#include <string>
|
||||||
|
#include <memory>
|
||||||
|
#include <vector>
|
||||||
|
#include <map>
|
||||||
|
#include <ostream>
|
||||||
|
#include "binop.hpp"
|
||||||
|
#include "llvm_context.hpp"
|
||||||
|
|
||||||
|
struct instruction {
|
||||||
|
virtual ~instruction() = default;
|
||||||
|
|
||||||
|
virtual void print(int indent, std::ostream& to) const = 0;
|
||||||
|
virtual void gen_llvm(llvm_context& ctx, llvm::Function* f) const = 0;
|
||||||
|
};
|
||||||
|
|
||||||
|
using instruction_ptr = std::unique_ptr<instruction>;
|
||||||
|
|
||||||
|
struct instruction_pushint : public instruction {
|
||||||
|
int value;
|
||||||
|
|
||||||
|
instruction_pushint(int v)
|
||||||
|
: value(v) {}
|
||||||
|
|
||||||
|
void print(int indent, std::ostream& to) const;
|
||||||
|
void gen_llvm(llvm_context& ctx, llvm::Function* f) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct instruction_pushglobal : public instruction {
|
||||||
|
std::string name;
|
||||||
|
|
||||||
|
instruction_pushglobal(std::string n)
|
||||||
|
: name(std::move(n)) {}
|
||||||
|
|
||||||
|
void print(int indent, std::ostream& to) const;
|
||||||
|
void gen_llvm(llvm_context& ctx, llvm::Function* f) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct instruction_push : public instruction {
|
||||||
|
int offset;
|
||||||
|
|
||||||
|
instruction_push(int o)
|
||||||
|
: offset(o) {}
|
||||||
|
|
||||||
|
void print(int indent, std::ostream& to) const;
|
||||||
|
void gen_llvm(llvm_context& ctx, llvm::Function* f) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct instruction_pop : public instruction {
|
||||||
|
int count;
|
||||||
|
|
||||||
|
instruction_pop(int c)
|
||||||
|
: count(c) {}
|
||||||
|
|
||||||
|
void print(int indent, std::ostream& to) const;
|
||||||
|
void gen_llvm(llvm_context& ctx, llvm::Function* f) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct instruction_mkapp : public instruction {
|
||||||
|
void print(int indent, std::ostream& to) const;
|
||||||
|
void gen_llvm(llvm_context& ctx, llvm::Function* f) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct instruction_update : public instruction {
|
||||||
|
int offset;
|
||||||
|
|
||||||
|
instruction_update(int o)
|
||||||
|
: offset(o) {}
|
||||||
|
|
||||||
|
void print(int indent, std::ostream& to) const;
|
||||||
|
void gen_llvm(llvm_context& ctx, llvm::Function* f) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct instruction_pack : public instruction {
|
||||||
|
int tag;
|
||||||
|
int size;
|
||||||
|
|
||||||
|
instruction_pack(int t, int s)
|
||||||
|
: tag(t), size(s) {}
|
||||||
|
|
||||||
|
void print(int indent, std::ostream& to) const;
|
||||||
|
void gen_llvm(llvm_context& ctx, llvm::Function* f) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct instruction_split : public instruction {
|
||||||
|
int size;
|
||||||
|
|
||||||
|
instruction_split(int s)
|
||||||
|
: size(s) {}
|
||||||
|
|
||||||
|
void print(int indent, std::ostream& to) const;
|
||||||
|
void gen_llvm(llvm_context& ctx, llvm::Function* f) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct instruction_jump : public instruction {
|
||||||
|
std::vector<std::vector<instruction_ptr>> branches;
|
||||||
|
std::map<int, int> tag_mappings;
|
||||||
|
|
||||||
|
void print(int indent, std::ostream& to) const;
|
||||||
|
void gen_llvm(llvm_context& ctx, llvm::Function* f) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct instruction_slide : public instruction {
|
||||||
|
int offset;
|
||||||
|
|
||||||
|
instruction_slide(int o)
|
||||||
|
: offset(o) {}
|
||||||
|
|
||||||
|
void print(int indent, std::ostream& to) const;
|
||||||
|
void gen_llvm(llvm_context& ctx, llvm::Function* f) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct instruction_binop : public instruction {
|
||||||
|
binop op;
|
||||||
|
|
||||||
|
instruction_binop(binop o)
|
||||||
|
: op(o) {}
|
||||||
|
|
||||||
|
void print(int indent, std::ostream& to) const;
|
||||||
|
void gen_llvm(llvm_context& ctx, llvm::Function* f) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct instruction_eval : public instruction {
|
||||||
|
void print(int indent, std::ostream& to) const;
|
||||||
|
void gen_llvm(llvm_context& ctx, llvm::Function* f) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct instruction_alloc : public instruction {
|
||||||
|
int amount;
|
||||||
|
|
||||||
|
instruction_alloc(int a)
|
||||||
|
: amount(a) {}
|
||||||
|
|
||||||
|
void print(int indent, std::ostream& to) const;
|
||||||
|
void gen_llvm(llvm_context& ctx, llvm::Function* f) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct instruction_unwind : public instruction {
|
||||||
|
void print(int indent, std::ostream& to) const;
|
||||||
|
void gen_llvm(llvm_context& ctx, llvm::Function* f) const;
|
||||||
|
};
|
||||||
278
code/compiler/09/llvm_context.cpp
Normal file
278
code/compiler/09/llvm_context.cpp
Normal file
@@ -0,0 +1,278 @@
|
|||||||
|
#include "llvm_context.hpp"
|
||||||
|
#include <llvm/IR/DerivedTypes.h>
|
||||||
|
|
||||||
|
using namespace llvm;
|
||||||
|
|
||||||
|
void llvm_context::create_types() {
|
||||||
|
stack_type = StructType::create(ctx, "stack");
|
||||||
|
gmachine_type = StructType::create(ctx, "gmachine");
|
||||||
|
stack_ptr_type = PointerType::getUnqual(stack_type);
|
||||||
|
gmachine_ptr_type = PointerType::getUnqual(gmachine_type);
|
||||||
|
tag_type = IntegerType::getInt8Ty(ctx);
|
||||||
|
struct_types["node_base"] = StructType::create(ctx, "node_base");
|
||||||
|
struct_types["node_app"] = StructType::create(ctx, "node_app");
|
||||||
|
struct_types["node_num"] = StructType::create(ctx, "node_num");
|
||||||
|
struct_types["node_global"] = StructType::create(ctx, "node_global");
|
||||||
|
struct_types["node_ind"] = StructType::create(ctx, "node_ind");
|
||||||
|
struct_types["node_data"] = StructType::create(ctx, "node_data");
|
||||||
|
node_ptr_type = PointerType::getUnqual(struct_types.at("node_base"));
|
||||||
|
function_type = FunctionType::get(Type::getVoidTy(ctx), { gmachine_ptr_type }, false);
|
||||||
|
|
||||||
|
gmachine_type->setBody(
|
||||||
|
stack_ptr_type,
|
||||||
|
node_ptr_type,
|
||||||
|
IntegerType::getInt64Ty(ctx),
|
||||||
|
IntegerType::getInt64Ty(ctx)
|
||||||
|
);
|
||||||
|
struct_types.at("node_base")->setBody(
|
||||||
|
IntegerType::getInt32Ty(ctx),
|
||||||
|
IntegerType::getInt8Ty(ctx),
|
||||||
|
node_ptr_type
|
||||||
|
);
|
||||||
|
struct_types.at("node_app")->setBody(
|
||||||
|
struct_types.at("node_base"),
|
||||||
|
node_ptr_type,
|
||||||
|
node_ptr_type
|
||||||
|
);
|
||||||
|
struct_types.at("node_num")->setBody(
|
||||||
|
struct_types.at("node_base"),
|
||||||
|
IntegerType::getInt32Ty(ctx)
|
||||||
|
);
|
||||||
|
struct_types.at("node_global")->setBody(
|
||||||
|
struct_types.at("node_base"),
|
||||||
|
FunctionType::get(Type::getVoidTy(ctx), { stack_ptr_type }, false)
|
||||||
|
);
|
||||||
|
struct_types.at("node_ind")->setBody(
|
||||||
|
struct_types.at("node_base"),
|
||||||
|
node_ptr_type
|
||||||
|
);
|
||||||
|
struct_types.at("node_data")->setBody(
|
||||||
|
struct_types.at("node_base"),
|
||||||
|
IntegerType::getInt8Ty(ctx),
|
||||||
|
PointerType::getUnqual(node_ptr_type)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
void llvm_context::create_functions() {
|
||||||
|
auto void_type = Type::getVoidTy(ctx);
|
||||||
|
auto sizet_type = IntegerType::get(ctx, sizeof(size_t) * 8);
|
||||||
|
functions["stack_init"] = Function::Create(
|
||||||
|
FunctionType::get(void_type, { stack_ptr_type }, false),
|
||||||
|
Function::LinkageTypes::ExternalLinkage,
|
||||||
|
"stack_init",
|
||||||
|
&module
|
||||||
|
);
|
||||||
|
functions["stack_free"] = Function::Create(
|
||||||
|
FunctionType::get(void_type, { stack_ptr_type }, false),
|
||||||
|
Function::LinkageTypes::ExternalLinkage,
|
||||||
|
"stack_free",
|
||||||
|
&module
|
||||||
|
);
|
||||||
|
functions["stack_push"] = Function::Create(
|
||||||
|
FunctionType::get(void_type, { stack_ptr_type, node_ptr_type }, false),
|
||||||
|
Function::LinkageTypes::ExternalLinkage,
|
||||||
|
"stack_push",
|
||||||
|
&module
|
||||||
|
);
|
||||||
|
functions["stack_pop"] = Function::Create(
|
||||||
|
FunctionType::get(node_ptr_type, { stack_ptr_type }, false),
|
||||||
|
Function::LinkageTypes::ExternalLinkage,
|
||||||
|
"stack_pop",
|
||||||
|
&module
|
||||||
|
);
|
||||||
|
functions["stack_peek"] = Function::Create(
|
||||||
|
FunctionType::get(node_ptr_type, { stack_ptr_type, sizet_type }, false),
|
||||||
|
Function::LinkageTypes::ExternalLinkage,
|
||||||
|
"stack_peek",
|
||||||
|
&module
|
||||||
|
);
|
||||||
|
functions["stack_popn"] = Function::Create(
|
||||||
|
FunctionType::get(void_type, { stack_ptr_type, sizet_type }, false),
|
||||||
|
Function::LinkageTypes::ExternalLinkage,
|
||||||
|
"stack_popn",
|
||||||
|
&module
|
||||||
|
);
|
||||||
|
functions["gmachine_slide"] = Function::Create(
|
||||||
|
FunctionType::get(void_type, { gmachine_ptr_type, sizet_type }, false),
|
||||||
|
Function::LinkageTypes::ExternalLinkage,
|
||||||
|
"gmachine_slide",
|
||||||
|
&module
|
||||||
|
);
|
||||||
|
functions["gmachine_update"] = Function::Create(
|
||||||
|
FunctionType::get(void_type, { gmachine_ptr_type, sizet_type }, false),
|
||||||
|
Function::LinkageTypes::ExternalLinkage,
|
||||||
|
"gmachine_update",
|
||||||
|
&module
|
||||||
|
);
|
||||||
|
functions["gmachine_alloc"] = Function::Create(
|
||||||
|
FunctionType::get(void_type, { gmachine_ptr_type, sizet_type }, false),
|
||||||
|
Function::LinkageTypes::ExternalLinkage,
|
||||||
|
"gmachine_alloc",
|
||||||
|
&module
|
||||||
|
);
|
||||||
|
functions["gmachine_pack"] = Function::Create(
|
||||||
|
FunctionType::get(void_type, { gmachine_ptr_type, sizet_type, tag_type }, false),
|
||||||
|
Function::LinkageTypes::ExternalLinkage,
|
||||||
|
"gmachine_pack",
|
||||||
|
&module
|
||||||
|
);
|
||||||
|
functions["gmachine_split"] = Function::Create(
|
||||||
|
FunctionType::get(void_type, { gmachine_ptr_type, sizet_type }, false),
|
||||||
|
Function::LinkageTypes::ExternalLinkage,
|
||||||
|
"gmachine_split",
|
||||||
|
&module
|
||||||
|
);
|
||||||
|
functions["gmachine_track"] = Function::Create(
|
||||||
|
FunctionType::get(node_ptr_type, { gmachine_ptr_type, node_ptr_type }, false),
|
||||||
|
Function::LinkageTypes::ExternalLinkage,
|
||||||
|
"gmachine_track",
|
||||||
|
&module
|
||||||
|
);
|
||||||
|
|
||||||
|
auto int32_type = IntegerType::getInt32Ty(ctx);
|
||||||
|
functions["alloc_app"] = Function::Create(
|
||||||
|
FunctionType::get(node_ptr_type, { node_ptr_type, node_ptr_type }, false),
|
||||||
|
Function::LinkageTypes::ExternalLinkage,
|
||||||
|
"alloc_app",
|
||||||
|
&module
|
||||||
|
);
|
||||||
|
functions["alloc_num"] = Function::Create(
|
||||||
|
FunctionType::get(node_ptr_type, { int32_type }, false),
|
||||||
|
Function::LinkageTypes::ExternalLinkage,
|
||||||
|
"alloc_num",
|
||||||
|
&module
|
||||||
|
);
|
||||||
|
functions["alloc_global"] = Function::Create(
|
||||||
|
FunctionType::get(node_ptr_type, { function_type, int32_type }, false),
|
||||||
|
Function::LinkageTypes::ExternalLinkage,
|
||||||
|
"alloc_global",
|
||||||
|
&module
|
||||||
|
);
|
||||||
|
functions["alloc_ind"] = Function::Create(
|
||||||
|
FunctionType::get(node_ptr_type, { node_ptr_type }, false),
|
||||||
|
Function::LinkageTypes::ExternalLinkage,
|
||||||
|
"alloc_ind",
|
||||||
|
&module
|
||||||
|
);
|
||||||
|
|
||||||
|
functions["unwind"] = Function::Create(
|
||||||
|
FunctionType::get(void_type, { gmachine_ptr_type }, false),
|
||||||
|
Function::LinkageTypes::ExternalLinkage,
|
||||||
|
"unwind",
|
||||||
|
&module
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
ConstantInt* llvm_context::create_i8(int8_t i) {
|
||||||
|
return ConstantInt::get(ctx, APInt(8, i));
|
||||||
|
}
|
||||||
|
ConstantInt* llvm_context::create_i32(int32_t i) {
|
||||||
|
return ConstantInt::get(ctx, APInt(32, i));
|
||||||
|
}
|
||||||
|
ConstantInt* llvm_context::create_size(size_t i) {
|
||||||
|
return ConstantInt::get(ctx, APInt(sizeof(size_t) * 8, i));
|
||||||
|
}
|
||||||
|
|
||||||
|
Value* llvm_context::create_pop(Function* f) {
|
||||||
|
auto pop_f = functions.at("stack_pop");
|
||||||
|
return builder.CreateCall(pop_f, { unwrap_gmachine_stack_ptr(f->arg_begin()) });
|
||||||
|
}
|
||||||
|
Value* llvm_context::create_peek(Function* f, Value* off) {
|
||||||
|
auto peek_f = functions.at("stack_peek");
|
||||||
|
return builder.CreateCall(peek_f, { unwrap_gmachine_stack_ptr(f->arg_begin()), off });
|
||||||
|
}
|
||||||
|
void llvm_context::create_push(Function* f, Value* v) {
|
||||||
|
auto push_f = functions.at("stack_push");
|
||||||
|
builder.CreateCall(push_f, { unwrap_gmachine_stack_ptr(f->arg_begin()), v });
|
||||||
|
}
|
||||||
|
void llvm_context::create_popn(Function* f, Value* off) {
|
||||||
|
auto popn_f = functions.at("stack_popn");
|
||||||
|
builder.CreateCall(popn_f, { unwrap_gmachine_stack_ptr(f->arg_begin()), off });
|
||||||
|
}
|
||||||
|
void llvm_context::create_update(Function* f, Value* off) {
|
||||||
|
auto update_f = functions.at("gmachine_update");
|
||||||
|
builder.CreateCall(update_f, { f->arg_begin(), off });
|
||||||
|
}
|
||||||
|
void llvm_context::create_pack(Function* f, Value* c, Value* t) {
|
||||||
|
auto pack_f = functions.at("gmachine_pack");
|
||||||
|
builder.CreateCall(pack_f, { f->arg_begin(), c, t });
|
||||||
|
}
|
||||||
|
void llvm_context::create_split(Function* f, Value* c) {
|
||||||
|
auto split_f = functions.at("gmachine_split");
|
||||||
|
builder.CreateCall(split_f, { f->arg_begin(), c });
|
||||||
|
}
|
||||||
|
void llvm_context::create_slide(Function* f, Value* off) {
|
||||||
|
auto slide_f = functions.at("gmachine_slide");
|
||||||
|
builder.CreateCall(slide_f, { f->arg_begin(), off });
|
||||||
|
}
|
||||||
|
void llvm_context::create_alloc(Function* f, Value* n) {
|
||||||
|
auto alloc_f = functions.at("gmachine_alloc");
|
||||||
|
builder.CreateCall(alloc_f, { f->arg_begin(), n });
|
||||||
|
}
|
||||||
|
Value* llvm_context::create_track(Function* f, Value* v) {
|
||||||
|
auto track_f = functions.at("gmachine_track");
|
||||||
|
return builder.CreateCall(track_f, { f->arg_begin(), v });
|
||||||
|
}
|
||||||
|
|
||||||
|
void llvm_context::create_unwind(Function* f) {
|
||||||
|
auto unwind_f = functions.at("unwind");
|
||||||
|
builder.CreateCall(unwind_f, { f->args().begin() });
|
||||||
|
}
|
||||||
|
|
||||||
|
Value* llvm_context::unwrap_gmachine_stack_ptr(Value* g) {
|
||||||
|
auto offset_0 = create_i32(0);
|
||||||
|
return builder.CreateGEP(g, { offset_0, offset_0 });
|
||||||
|
}
|
||||||
|
|
||||||
|
Value* llvm_context::unwrap_num(Value* v) {
|
||||||
|
auto num_ptr_type = PointerType::getUnqual(struct_types.at("node_num"));
|
||||||
|
auto cast = builder.CreatePointerCast(v, num_ptr_type);
|
||||||
|
auto offset_0 = create_i32(0);
|
||||||
|
auto offset_1 = create_i32(1);
|
||||||
|
auto int_ptr = builder.CreateGEP(cast, { offset_0, offset_1 });
|
||||||
|
return builder.CreateLoad(int_ptr);
|
||||||
|
}
|
||||||
|
Value* llvm_context::create_num(Function* f, Value* v) {
|
||||||
|
auto alloc_num_f = functions.at("alloc_num");
|
||||||
|
auto alloc_num_call = builder.CreateCall(alloc_num_f, { v });
|
||||||
|
return create_track(f, alloc_num_call);
|
||||||
|
}
|
||||||
|
|
||||||
|
Value* llvm_context::unwrap_data_tag(Value* v) {
|
||||||
|
auto data_ptr_type = PointerType::getUnqual(struct_types.at("node_data"));
|
||||||
|
auto cast = builder.CreatePointerCast(v, data_ptr_type);
|
||||||
|
auto offset_0 = create_i32(0);
|
||||||
|
auto offset_1 = create_i32(1);
|
||||||
|
auto tag_ptr = builder.CreateGEP(cast, { offset_0, offset_1 });
|
||||||
|
return builder.CreateLoad(tag_ptr);
|
||||||
|
}
|
||||||
|
|
||||||
|
Value* llvm_context::create_global(Function* f, Value* gf, Value* a) {
|
||||||
|
auto alloc_global_f = functions.at("alloc_global");
|
||||||
|
auto alloc_global_call = builder.CreateCall(alloc_global_f, { gf, a });
|
||||||
|
return create_track(f, alloc_global_call);
|
||||||
|
}
|
||||||
|
|
||||||
|
Value* llvm_context::create_app(Function* f, Value* l, Value* r) {
|
||||||
|
auto alloc_app_f = functions.at("alloc_app");
|
||||||
|
auto alloc_app_call = builder.CreateCall(alloc_app_f, { l, r });
|
||||||
|
return create_track(f, alloc_app_call);
|
||||||
|
}
|
||||||
|
|
||||||
|
llvm::Function* llvm_context::create_custom_function(std::string name, int32_t arity) {
|
||||||
|
auto void_type = llvm::Type::getVoidTy(ctx);
|
||||||
|
auto new_function = llvm::Function::Create(
|
||||||
|
function_type,
|
||||||
|
llvm::Function::LinkageTypes::ExternalLinkage,
|
||||||
|
"f_" + name,
|
||||||
|
&module
|
||||||
|
);
|
||||||
|
auto start_block = llvm::BasicBlock::Create(ctx, "entry", new_function);
|
||||||
|
|
||||||
|
auto new_custom_f = custom_function_ptr(new custom_function());
|
||||||
|
new_custom_f->arity = arity;
|
||||||
|
new_custom_f->function = new_function;
|
||||||
|
custom_functions["f_" + name] = std::move(new_custom_f);
|
||||||
|
|
||||||
|
return new_function;
|
||||||
|
}
|
||||||
72
code/compiler/09/llvm_context.hpp
Normal file
72
code/compiler/09/llvm_context.hpp
Normal file
@@ -0,0 +1,72 @@
|
|||||||
|
#pragma once
|
||||||
|
#include <llvm/IR/DerivedTypes.h>
|
||||||
|
#include <llvm/IR/Function.h>
|
||||||
|
#include <llvm/IR/LLVMContext.h>
|
||||||
|
#include <llvm/IR/IRBuilder.h>
|
||||||
|
#include <llvm/IR/Module.h>
|
||||||
|
#include <llvm/IR/Value.h>
|
||||||
|
#include <map>
|
||||||
|
|
||||||
|
struct llvm_context {
|
||||||
|
struct custom_function {
|
||||||
|
llvm::Function* function;
|
||||||
|
int32_t arity;
|
||||||
|
};
|
||||||
|
|
||||||
|
using custom_function_ptr = std::unique_ptr<custom_function>;
|
||||||
|
|
||||||
|
llvm::LLVMContext ctx;
|
||||||
|
llvm::IRBuilder<> builder;
|
||||||
|
llvm::Module module;
|
||||||
|
|
||||||
|
std::map<std::string, custom_function_ptr> custom_functions;
|
||||||
|
std::map<std::string, llvm::Function*> functions;
|
||||||
|
std::map<std::string, llvm::StructType*> struct_types;
|
||||||
|
|
||||||
|
llvm::StructType* stack_type;
|
||||||
|
llvm::StructType* gmachine_type;
|
||||||
|
llvm::PointerType* stack_ptr_type;
|
||||||
|
llvm::PointerType* gmachine_ptr_type;
|
||||||
|
llvm::PointerType* node_ptr_type;
|
||||||
|
llvm::IntegerType* tag_type;
|
||||||
|
llvm::FunctionType* function_type;
|
||||||
|
|
||||||
|
llvm_context()
|
||||||
|
: builder(ctx), module("bloglang", ctx) {
|
||||||
|
create_types();
|
||||||
|
create_functions();
|
||||||
|
}
|
||||||
|
|
||||||
|
void create_types();
|
||||||
|
void create_functions();
|
||||||
|
|
||||||
|
llvm::ConstantInt* create_i8(int8_t);
|
||||||
|
llvm::ConstantInt* create_i32(int32_t);
|
||||||
|
llvm::ConstantInt* create_size(size_t);
|
||||||
|
|
||||||
|
llvm::Value* create_pop(llvm::Function*);
|
||||||
|
llvm::Value* create_peek(llvm::Function*, llvm::Value*);
|
||||||
|
void create_push(llvm::Function*, llvm::Value*);
|
||||||
|
void create_popn(llvm::Function*, llvm::Value*);
|
||||||
|
void create_update(llvm::Function*, llvm::Value*);
|
||||||
|
void create_pack(llvm::Function*, llvm::Value*, llvm::Value*);
|
||||||
|
void create_split(llvm::Function*, llvm::Value*);
|
||||||
|
void create_slide(llvm::Function*, llvm::Value*);
|
||||||
|
void create_alloc(llvm::Function*, llvm::Value*);
|
||||||
|
llvm::Value* create_track(llvm::Function*, llvm::Value*);
|
||||||
|
|
||||||
|
void create_unwind(llvm::Function*);
|
||||||
|
|
||||||
|
llvm::Value* unwrap_gmachine_stack_ptr(llvm::Value*);
|
||||||
|
|
||||||
|
llvm::Value* unwrap_num(llvm::Value*);
|
||||||
|
llvm::Value* create_num(llvm::Function*, llvm::Value*);
|
||||||
|
|
||||||
|
llvm::Value* unwrap_data_tag(llvm::Value*);
|
||||||
|
|
||||||
|
llvm::Value* create_global(llvm::Function*, llvm::Value*, llvm::Value*);
|
||||||
|
|
||||||
|
llvm::Value* create_app(llvm::Function*, llvm::Value*, llvm::Value*);
|
||||||
|
|
||||||
|
llvm::Function* create_custom_function(std::string name, int32_t arity);
|
||||||
|
};
|
||||||
176
code/compiler/09/main.cpp
Normal file
176
code/compiler/09/main.cpp
Normal file
@@ -0,0 +1,176 @@
|
|||||||
|
#include "ast.hpp"
|
||||||
|
#include <iostream>
|
||||||
|
#include "binop.hpp"
|
||||||
|
#include "definition.hpp"
|
||||||
|
#include "instruction.hpp"
|
||||||
|
#include "llvm_context.hpp"
|
||||||
|
#include "parser.hpp"
|
||||||
|
#include "error.hpp"
|
||||||
|
#include "type.hpp"
|
||||||
|
#include "llvm/IR/LegacyPassManager.h"
|
||||||
|
#include "llvm/IR/Verifier.h"
|
||||||
|
#include "llvm/Support/TargetSelect.h"
|
||||||
|
#include "llvm/Support/TargetRegistry.h"
|
||||||
|
#include "llvm/Support/raw_ostream.h"
|
||||||
|
#include "llvm/Support/FileSystem.h"
|
||||||
|
#include "llvm/Target/TargetOptions.h"
|
||||||
|
#include "llvm/Target/TargetMachine.h"
|
||||||
|
|
||||||
|
void yy::parser::error(const std::string& msg) {
|
||||||
|
std::cout << "An error occured: " << msg << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
extern std::vector<definition_ptr> program;
|
||||||
|
|
||||||
|
void typecheck_program(
|
||||||
|
const std::vector<definition_ptr>& prog,
|
||||||
|
type_mgr& mgr, type_env& env) {
|
||||||
|
type_ptr int_type = type_ptr(new type_base("Int"));
|
||||||
|
type_ptr binop_type = type_ptr(new type_arr(
|
||||||
|
int_type,
|
||||||
|
type_ptr(new type_arr(int_type, int_type))));
|
||||||
|
|
||||||
|
env.bind("+", binop_type);
|
||||||
|
env.bind("-", binop_type);
|
||||||
|
env.bind("*", binop_type);
|
||||||
|
env.bind("/", binop_type);
|
||||||
|
|
||||||
|
for(auto& def : prog) {
|
||||||
|
def->typecheck_first(mgr, env);
|
||||||
|
}
|
||||||
|
|
||||||
|
for(auto& def : prog) {
|
||||||
|
def->typecheck_second(mgr, env);
|
||||||
|
}
|
||||||
|
|
||||||
|
for(auto& pair : env.names) {
|
||||||
|
std::cout << pair.first << ": ";
|
||||||
|
pair.second->print(mgr, std::cout);
|
||||||
|
std::cout << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
for(auto& def : prog) {
|
||||||
|
def->resolve(mgr);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void compile_program(const std::vector<definition_ptr>& prog) {
|
||||||
|
for(auto& def : prog) {
|
||||||
|
def->compile();
|
||||||
|
|
||||||
|
definition_defn* defn = dynamic_cast<definition_defn*>(def.get());
|
||||||
|
if(!defn) continue;
|
||||||
|
for(auto& instruction : defn->instructions) {
|
||||||
|
instruction->print(0, std::cout);
|
||||||
|
}
|
||||||
|
std::cout << std::endl;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void gen_llvm_internal_op(llvm_context& ctx, binop op) {
|
||||||
|
auto new_function = ctx.create_custom_function(op_action(op), 2);
|
||||||
|
std::vector<instruction_ptr> instructions;
|
||||||
|
instructions.push_back(instruction_ptr(new instruction_push(1)));
|
||||||
|
instructions.push_back(instruction_ptr(new instruction_eval()));
|
||||||
|
instructions.push_back(instruction_ptr(new instruction_push(1)));
|
||||||
|
instructions.push_back(instruction_ptr(new instruction_eval()));
|
||||||
|
instructions.push_back(instruction_ptr(new instruction_binop(op)));
|
||||||
|
instructions.push_back(instruction_ptr(new instruction_update(2)));
|
||||||
|
instructions.push_back(instruction_ptr(new instruction_pop(2)));
|
||||||
|
ctx.builder.SetInsertPoint(&new_function->getEntryBlock());
|
||||||
|
for(auto& instruction : instructions) {
|
||||||
|
instruction->gen_llvm(ctx, new_function);
|
||||||
|
}
|
||||||
|
ctx.builder.CreateRetVoid();
|
||||||
|
}
|
||||||
|
|
||||||
|
void output_llvm(llvm_context& ctx, const std::string& filename) {
|
||||||
|
std::string targetTriple = llvm::sys::getDefaultTargetTriple();
|
||||||
|
|
||||||
|
llvm::InitializeNativeTarget();
|
||||||
|
llvm::InitializeNativeTargetAsmParser();
|
||||||
|
llvm::InitializeNativeTargetAsmPrinter();
|
||||||
|
|
||||||
|
std::string error;
|
||||||
|
const llvm::Target* target =
|
||||||
|
llvm::TargetRegistry::lookupTarget(targetTriple, error);
|
||||||
|
if (!target) {
|
||||||
|
std::cerr << error << std::endl;
|
||||||
|
} else {
|
||||||
|
std::string cpu = "generic";
|
||||||
|
std::string features = "";
|
||||||
|
llvm::TargetOptions options;
|
||||||
|
llvm::TargetMachine* targetMachine =
|
||||||
|
target->createTargetMachine(targetTriple, cpu, features,
|
||||||
|
options, llvm::Optional<llvm::Reloc::Model>());
|
||||||
|
|
||||||
|
ctx.module.setDataLayout(targetMachine->createDataLayout());
|
||||||
|
ctx.module.setTargetTriple(targetTriple);
|
||||||
|
|
||||||
|
std::error_code ec;
|
||||||
|
llvm::raw_fd_ostream file(filename, ec, llvm::sys::fs::F_None);
|
||||||
|
if (ec) {
|
||||||
|
throw 0;
|
||||||
|
} else {
|
||||||
|
llvm::TargetMachine::CodeGenFileType type = llvm::TargetMachine::CGFT_ObjectFile;
|
||||||
|
llvm::legacy::PassManager pm;
|
||||||
|
if (targetMachine->addPassesToEmitFile(pm, file, NULL, type)) {
|
||||||
|
throw 0;
|
||||||
|
} else {
|
||||||
|
pm.run(ctx.module);
|
||||||
|
file.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void gen_llvm(const std::vector<definition_ptr>& prog) {
|
||||||
|
llvm_context ctx;
|
||||||
|
gen_llvm_internal_op(ctx, PLUS);
|
||||||
|
gen_llvm_internal_op(ctx, MINUS);
|
||||||
|
gen_llvm_internal_op(ctx, TIMES);
|
||||||
|
gen_llvm_internal_op(ctx, DIVIDE);
|
||||||
|
|
||||||
|
for(auto& definition : prog) {
|
||||||
|
definition->gen_llvm_first(ctx);
|
||||||
|
}
|
||||||
|
|
||||||
|
for(auto& definition : prog) {
|
||||||
|
definition->gen_llvm_second(ctx);
|
||||||
|
}
|
||||||
|
ctx.module.print(llvm::outs(), nullptr);
|
||||||
|
output_llvm(ctx, "program.o");
|
||||||
|
}
|
||||||
|
|
||||||
|
int main() {
|
||||||
|
yy::parser parser;
|
||||||
|
type_mgr mgr;
|
||||||
|
type_env env;
|
||||||
|
|
||||||
|
parser.parse();
|
||||||
|
for(auto& definition : program) {
|
||||||
|
definition_defn* def = dynamic_cast<definition_defn*>(definition.get());
|
||||||
|
if(!def) continue;
|
||||||
|
|
||||||
|
std::cout << def->name;
|
||||||
|
for(auto& param : def->params) std::cout << " " << param;
|
||||||
|
std::cout << ":" << std::endl;
|
||||||
|
|
||||||
|
def->body->print(1, std::cout);
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
typecheck_program(program, mgr, env);
|
||||||
|
compile_program(program);
|
||||||
|
gen_llvm(program);
|
||||||
|
} catch(unification_error& err) {
|
||||||
|
std::cout << "failed to unify types: " << std::endl;
|
||||||
|
std::cout << " (1) \033[34m";
|
||||||
|
err.left->print(mgr, std::cout);
|
||||||
|
std::cout << "\033[0m" << std::endl;
|
||||||
|
std::cout << " (2) \033[32m";
|
||||||
|
err.right->print(mgr, std::cout);
|
||||||
|
std::cout << "\033[0m" << std::endl;
|
||||||
|
} catch(type_error& err) {
|
||||||
|
std::cout << "failed to type check program: " << err.description << std::endl;
|
||||||
|
}
|
||||||
|
}
|
||||||
141
code/compiler/09/parser.y
Normal file
141
code/compiler/09/parser.y
Normal file
@@ -0,0 +1,141 @@
|
|||||||
|
%{
|
||||||
|
#include <string>
|
||||||
|
#include <iostream>
|
||||||
|
#include "ast.hpp"
|
||||||
|
#include "definition.hpp"
|
||||||
|
#include "parser.hpp"
|
||||||
|
|
||||||
|
std::vector<definition_ptr> program;
|
||||||
|
extern yy::parser::symbol_type yylex();
|
||||||
|
|
||||||
|
%}
|
||||||
|
|
||||||
|
%token PLUS
|
||||||
|
%token TIMES
|
||||||
|
%token MINUS
|
||||||
|
%token DIVIDE
|
||||||
|
%token <int> INT
|
||||||
|
%token DEFN
|
||||||
|
%token DATA
|
||||||
|
%token CASE
|
||||||
|
%token OF
|
||||||
|
%token OCURLY
|
||||||
|
%token CCURLY
|
||||||
|
%token OPAREN
|
||||||
|
%token CPAREN
|
||||||
|
%token COMMA
|
||||||
|
%token ARROW
|
||||||
|
%token EQUAL
|
||||||
|
%token <std::string> LID
|
||||||
|
%token <std::string> UID
|
||||||
|
|
||||||
|
%language "c++"
|
||||||
|
%define api.value.type variant
|
||||||
|
%define api.token.constructor
|
||||||
|
|
||||||
|
%type <std::vector<std::string>> lowercaseParams uppercaseParams
|
||||||
|
%type <std::vector<definition_ptr>> program definitions
|
||||||
|
%type <std::vector<branch_ptr>> branches
|
||||||
|
%type <std::vector<constructor_ptr>> constructors
|
||||||
|
%type <ast_ptr> aAdd aMul case app appBase
|
||||||
|
%type <definition_ptr> definition defn data
|
||||||
|
%type <branch_ptr> branch
|
||||||
|
%type <pattern_ptr> pattern
|
||||||
|
%type <constructor_ptr> constructor
|
||||||
|
|
||||||
|
%start program
|
||||||
|
|
||||||
|
%%
|
||||||
|
|
||||||
|
program
|
||||||
|
: definitions { program = std::move($1); }
|
||||||
|
;
|
||||||
|
|
||||||
|
definitions
|
||||||
|
: definitions definition { $$ = std::move($1); $$.push_back(std::move($2)); }
|
||||||
|
| definition { $$ = std::vector<definition_ptr>(); $$.push_back(std::move($1)); }
|
||||||
|
;
|
||||||
|
|
||||||
|
definition
|
||||||
|
: defn { $$ = std::move($1); }
|
||||||
|
| data { $$ = std::move($1); }
|
||||||
|
;
|
||||||
|
|
||||||
|
defn
|
||||||
|
: DEFN LID lowercaseParams EQUAL OCURLY aAdd CCURLY
|
||||||
|
{ $$ = definition_ptr(
|
||||||
|
new definition_defn(std::move($2), std::move($3), std::move($6))); }
|
||||||
|
;
|
||||||
|
|
||||||
|
lowercaseParams
|
||||||
|
: %empty { $$ = std::vector<std::string>(); }
|
||||||
|
| lowercaseParams LID { $$ = std::move($1); $$.push_back(std::move($2)); }
|
||||||
|
;
|
||||||
|
|
||||||
|
uppercaseParams
|
||||||
|
: %empty { $$ = std::vector<std::string>(); }
|
||||||
|
| uppercaseParams UID { $$ = std::move($1); $$.push_back(std::move($2)); }
|
||||||
|
;
|
||||||
|
|
||||||
|
aAdd
|
||||||
|
: aAdd PLUS aMul { $$ = ast_ptr(new ast_binop(PLUS, std::move($1), std::move($3))); }
|
||||||
|
| aAdd MINUS aMul { $$ = ast_ptr(new ast_binop(MINUS, std::move($1), std::move($3))); }
|
||||||
|
| aMul { $$ = std::move($1); }
|
||||||
|
;
|
||||||
|
|
||||||
|
aMul
|
||||||
|
: aMul TIMES app { $$ = ast_ptr(new ast_binop(TIMES, std::move($1), std::move($3))); }
|
||||||
|
| aMul DIVIDE app { $$ = ast_ptr(new ast_binop(DIVIDE, std::move($1), std::move($3))); }
|
||||||
|
| app { $$ = std::move($1); }
|
||||||
|
;
|
||||||
|
|
||||||
|
app
|
||||||
|
: app appBase { $$ = ast_ptr(new ast_app(std::move($1), std::move($2))); }
|
||||||
|
| appBase { $$ = std::move($1); }
|
||||||
|
;
|
||||||
|
|
||||||
|
appBase
|
||||||
|
: INT { $$ = ast_ptr(new ast_int($1)); }
|
||||||
|
| LID { $$ = ast_ptr(new ast_lid(std::move($1))); }
|
||||||
|
| UID { $$ = ast_ptr(new ast_uid(std::move($1))); }
|
||||||
|
| OPAREN aAdd CPAREN { $$ = std::move($2); }
|
||||||
|
| case { $$ = std::move($1); }
|
||||||
|
;
|
||||||
|
|
||||||
|
case
|
||||||
|
: CASE aAdd OF OCURLY branches CCURLY
|
||||||
|
{ $$ = ast_ptr(new ast_case(std::move($2), std::move($5))); }
|
||||||
|
;
|
||||||
|
|
||||||
|
branches
|
||||||
|
: branches branch { $$ = std::move($1); $$.push_back(std::move($2)); }
|
||||||
|
| branch { $$ = std::vector<branch_ptr>(); $$.push_back(std::move($1));}
|
||||||
|
;
|
||||||
|
|
||||||
|
branch
|
||||||
|
: pattern ARROW OCURLY aAdd CCURLY
|
||||||
|
{ $$ = branch_ptr(new branch(std::move($1), std::move($4))); }
|
||||||
|
;
|
||||||
|
|
||||||
|
pattern
|
||||||
|
: LID { $$ = pattern_ptr(new pattern_var(std::move($1))); }
|
||||||
|
| UID lowercaseParams
|
||||||
|
{ $$ = pattern_ptr(new pattern_constr(std::move($1), std::move($2))); }
|
||||||
|
;
|
||||||
|
|
||||||
|
data
|
||||||
|
: DATA UID EQUAL OCURLY constructors CCURLY
|
||||||
|
{ $$ = definition_ptr(new definition_data(std::move($2), std::move($5))); }
|
||||||
|
;
|
||||||
|
|
||||||
|
constructors
|
||||||
|
: constructors COMMA constructor { $$ = std::move($1); $$.push_back(std::move($3)); }
|
||||||
|
| constructor
|
||||||
|
{ $$ = std::vector<constructor_ptr>(); $$.push_back(std::move($1)); }
|
||||||
|
;
|
||||||
|
|
||||||
|
constructor
|
||||||
|
: UID uppercaseParams
|
||||||
|
{ $$ = constructor_ptr(new constructor(std::move($1), std::move($2))); }
|
||||||
|
;
|
||||||
|
|
||||||
269
code/compiler/09/runtime.c
Normal file
269
code/compiler/09/runtime.c
Normal file
@@ -0,0 +1,269 @@
|
|||||||
|
#include <stdint.h>
|
||||||
|
#include <assert.h>
|
||||||
|
#include <memory.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include "runtime.h"
|
||||||
|
|
||||||
|
struct node_base* alloc_node() {
|
||||||
|
struct node_base* new_node = malloc(sizeof(struct node_app));
|
||||||
|
new_node->gc_next = NULL;
|
||||||
|
new_node->gc_reachable = 0;
|
||||||
|
assert(new_node != NULL);
|
||||||
|
return new_node;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct node_app* alloc_app(struct node_base* l, struct node_base* r) {
|
||||||
|
struct node_app* node = (struct node_app*) alloc_node();
|
||||||
|
node->base.tag = NODE_APP;
|
||||||
|
node->left = l;
|
||||||
|
node->right = r;
|
||||||
|
return node;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct node_num* alloc_num(int32_t n) {
|
||||||
|
struct node_num* node = (struct node_num*) alloc_node();
|
||||||
|
node->base.tag = NODE_NUM;
|
||||||
|
node->value = n;
|
||||||
|
return node;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct node_global* alloc_global(void (*f)(struct gmachine*), int32_t a) {
|
||||||
|
struct node_global* node = (struct node_global*) alloc_node();
|
||||||
|
node->base.tag = NODE_GLOBAL;
|
||||||
|
node->arity = a;
|
||||||
|
node->function = f;
|
||||||
|
return node;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct node_ind* alloc_ind(struct node_base* n) {
|
||||||
|
struct node_ind* node = (struct node_ind*) alloc_node();
|
||||||
|
node->base.tag = NODE_IND;
|
||||||
|
node->next = n;
|
||||||
|
return node;
|
||||||
|
}
|
||||||
|
|
||||||
|
void free_node_direct(struct node_base* n) {
|
||||||
|
if(n->tag == NODE_DATA) {
|
||||||
|
free(((struct node_data*) n)->array);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void gc_visit_node(struct node_base* n) {
|
||||||
|
if(n->gc_reachable) return;
|
||||||
|
n->gc_reachable = 1;
|
||||||
|
|
||||||
|
if(n->tag == NODE_APP) {
|
||||||
|
struct node_app* app = (struct node_app*) n;
|
||||||
|
gc_visit_node(app->left);
|
||||||
|
gc_visit_node(app->right);
|
||||||
|
} if(n->tag == NODE_IND) {
|
||||||
|
struct node_ind* ind = (struct node_ind*) n;
|
||||||
|
gc_visit_node(ind->next);
|
||||||
|
} if(n->tag == NODE_DATA) {
|
||||||
|
struct node_data* data = (struct node_data*) n;
|
||||||
|
struct node_base** to_visit = data->array;
|
||||||
|
while(*to_visit) {
|
||||||
|
gc_visit_node(*to_visit);
|
||||||
|
to_visit++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void stack_init(struct stack* s) {
|
||||||
|
s->size = 4;
|
||||||
|
s->count = 0;
|
||||||
|
s->data = malloc(sizeof(*s->data) * s->size);
|
||||||
|
assert(s->data != NULL);
|
||||||
|
}
|
||||||
|
|
||||||
|
void stack_free(struct stack* s) {
|
||||||
|
free(s->data);
|
||||||
|
}
|
||||||
|
|
||||||
|
void stack_push(struct stack* s, struct node_base* n) {
|
||||||
|
while(s->count >= s->size) {
|
||||||
|
s->data = realloc(s->data, sizeof(*s->data) * (s->size *= 2));
|
||||||
|
assert(s->data != NULL);
|
||||||
|
}
|
||||||
|
s->data[s->count++] = n;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct node_base* stack_pop(struct stack* s) {
|
||||||
|
assert(s->count > 0);
|
||||||
|
return s->data[--s->count];
|
||||||
|
}
|
||||||
|
|
||||||
|
struct node_base* stack_peek(struct stack* s, size_t o) {
|
||||||
|
assert(s->count > o);
|
||||||
|
return s->data[s->count - o - 1];
|
||||||
|
}
|
||||||
|
|
||||||
|
void stack_popn(struct stack* s, size_t n) {
|
||||||
|
assert(s->count >= n);
|
||||||
|
s->count -= n;
|
||||||
|
}
|
||||||
|
|
||||||
|
void gmachine_init(struct gmachine* g) {
|
||||||
|
stack_init(&g->stack);
|
||||||
|
g->gc_nodes = NULL;
|
||||||
|
g->gc_node_count = 0;
|
||||||
|
g->gc_node_threshold = 128;
|
||||||
|
}
|
||||||
|
|
||||||
|
void gmachine_free(struct gmachine* g) {
|
||||||
|
stack_free(&g->stack);
|
||||||
|
struct node_base* to_free = g->gc_nodes;
|
||||||
|
struct node_base* next;
|
||||||
|
|
||||||
|
while(to_free) {
|
||||||
|
next = to_free->gc_next;
|
||||||
|
free_node_direct(to_free);
|
||||||
|
free(to_free);
|
||||||
|
to_free = next;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void gmachine_slide(struct gmachine* g, size_t n) {
|
||||||
|
assert(g->stack.count > n);
|
||||||
|
g->stack.data[g->stack.count - n - 1] = g->stack.data[g->stack.count - 1];
|
||||||
|
g->stack.count -= n;
|
||||||
|
}
|
||||||
|
|
||||||
|
void gmachine_update(struct gmachine* g, size_t o) {
|
||||||
|
assert(g->stack.count > o + 1);
|
||||||
|
struct node_ind* ind =
|
||||||
|
(struct node_ind*) g->stack.data[g->stack.count - o - 2];
|
||||||
|
ind->base.tag = NODE_IND;
|
||||||
|
ind->next = g->stack.data[g->stack.count -= 1];
|
||||||
|
}
|
||||||
|
|
||||||
|
void gmachine_alloc(struct gmachine* g, size_t o) {
|
||||||
|
while(o--) {
|
||||||
|
stack_push(&g->stack,
|
||||||
|
gmachine_track(g, (struct node_base*) alloc_ind(NULL)));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void gmachine_pack(struct gmachine* g, size_t n, int8_t t) {
|
||||||
|
assert(g->stack.count >= n);
|
||||||
|
|
||||||
|
struct node_base** data = malloc(sizeof(*data) * (n + 1));
|
||||||
|
assert(data != NULL);
|
||||||
|
memcpy(data, &g->stack.data[g->stack.count - n], n * sizeof(*data));
|
||||||
|
data[n] = NULL;
|
||||||
|
|
||||||
|
struct node_data* new_node = (struct node_data*) alloc_node();
|
||||||
|
new_node->array = data;
|
||||||
|
new_node->base.tag = NODE_DATA;
|
||||||
|
new_node->tag = t;
|
||||||
|
|
||||||
|
stack_popn(&g->stack, n);
|
||||||
|
stack_push(&g->stack, gmachine_track(g, (struct node_base*) new_node));
|
||||||
|
}
|
||||||
|
|
||||||
|
void gmachine_split(struct gmachine* g, size_t n) {
|
||||||
|
struct node_data* node = (struct node_data*) stack_pop(&g->stack);
|
||||||
|
for(size_t i = 0; i < n; i++) {
|
||||||
|
stack_push(&g->stack, node->array[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
struct node_base* gmachine_track(struct gmachine* g, struct node_base* b) {
|
||||||
|
g->gc_node_count++;
|
||||||
|
b->gc_next = g->gc_nodes;
|
||||||
|
g->gc_nodes = b;
|
||||||
|
|
||||||
|
if(g->gc_node_count >= g->gc_node_threshold) {
|
||||||
|
uint64_t nodes_before = g->gc_node_count;
|
||||||
|
gc_visit_node(b);
|
||||||
|
gmachine_gc(g);
|
||||||
|
g->gc_node_threshold = g->gc_node_count * 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
return b;
|
||||||
|
}
|
||||||
|
|
||||||
|
void gmachine_gc(struct gmachine* g) {
|
||||||
|
for(size_t i = 0; i < g->stack.count; i++) {
|
||||||
|
gc_visit_node(g->stack.data[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
struct node_base** head_ptr = &g->gc_nodes;
|
||||||
|
while(*head_ptr) {
|
||||||
|
if((*head_ptr)->gc_reachable) {
|
||||||
|
(*head_ptr)->gc_reachable = 0;
|
||||||
|
head_ptr = &(*head_ptr)->gc_next;
|
||||||
|
} else {
|
||||||
|
struct node_base* to_free = *head_ptr;
|
||||||
|
*head_ptr = to_free->gc_next;
|
||||||
|
free_node_direct(to_free);
|
||||||
|
free(to_free);
|
||||||
|
g->gc_node_count--;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void unwind(struct gmachine* g) {
|
||||||
|
struct stack* s = &g->stack;
|
||||||
|
|
||||||
|
while(1) {
|
||||||
|
struct node_base* peek = stack_peek(s, 0);
|
||||||
|
if(peek->tag == NODE_APP) {
|
||||||
|
struct node_app* n = (struct node_app*) peek;
|
||||||
|
stack_push(s, n->left);
|
||||||
|
} else if(peek->tag == NODE_GLOBAL) {
|
||||||
|
struct node_global* n = (struct node_global*) peek;
|
||||||
|
assert(s->count > n->arity);
|
||||||
|
|
||||||
|
for(size_t i = 1; i <= n->arity; i++) {
|
||||||
|
s->data[s->count - i]
|
||||||
|
= ((struct node_app*) s->data[s->count - i - 1])->right;
|
||||||
|
}
|
||||||
|
|
||||||
|
n->function(g);
|
||||||
|
} else if(peek->tag == NODE_IND) {
|
||||||
|
struct node_ind* n = (struct node_ind*) peek;
|
||||||
|
stack_pop(s);
|
||||||
|
stack_push(s, n->next);
|
||||||
|
} else {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
extern void f_main(struct gmachine* s);
|
||||||
|
|
||||||
|
void print_node(struct node_base* n) {
|
||||||
|
if(n->tag == NODE_APP) {
|
||||||
|
struct node_app* app = (struct node_app*) n;
|
||||||
|
print_node(app->left);
|
||||||
|
putchar(' ');
|
||||||
|
print_node(app->right);
|
||||||
|
} else if(n->tag == NODE_DATA) {
|
||||||
|
printf("(Packed)");
|
||||||
|
} else if(n->tag == NODE_GLOBAL) {
|
||||||
|
struct node_global* global = (struct node_global*) n;
|
||||||
|
printf("(Global: %p)", global->function);
|
||||||
|
} else if(n->tag == NODE_IND) {
|
||||||
|
print_node(((struct node_ind*) n)->next);
|
||||||
|
} else if(n->tag == NODE_NUM) {
|
||||||
|
struct node_num* num = (struct node_num*) n;
|
||||||
|
printf("%d", num->value);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(int argc, char** argv) {
|
||||||
|
struct gmachine gmachine;
|
||||||
|
struct node_global* first_node = alloc_global(f_main, 0);
|
||||||
|
struct node_base* result;
|
||||||
|
|
||||||
|
gmachine_init(&gmachine);
|
||||||
|
gmachine_track(&gmachine, (struct node_base*) first_node);
|
||||||
|
stack_push(&gmachine.stack, (struct node_base*) first_node);
|
||||||
|
unwind(&gmachine);
|
||||||
|
result = stack_pop(&gmachine.stack);
|
||||||
|
printf("Result: ");
|
||||||
|
print_node(result);
|
||||||
|
putchar('\n');
|
||||||
|
gmachine_free(&gmachine);
|
||||||
|
}
|
||||||
84
code/compiler/09/runtime.h
Normal file
84
code/compiler/09/runtime.h
Normal file
@@ -0,0 +1,84 @@
|
|||||||
|
#pragma once
|
||||||
|
#include <stdlib.h>
|
||||||
|
|
||||||
|
struct gmachine;
|
||||||
|
|
||||||
|
enum node_tag {
|
||||||
|
NODE_APP,
|
||||||
|
NODE_NUM,
|
||||||
|
NODE_GLOBAL,
|
||||||
|
NODE_IND,
|
||||||
|
NODE_DATA
|
||||||
|
};
|
||||||
|
|
||||||
|
struct node_base {
|
||||||
|
enum node_tag tag;
|
||||||
|
int8_t gc_reachable;
|
||||||
|
struct node_base* gc_next;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct node_app {
|
||||||
|
struct node_base base;
|
||||||
|
struct node_base* left;
|
||||||
|
struct node_base* right;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct node_num {
|
||||||
|
struct node_base base;
|
||||||
|
int32_t value;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct node_global {
|
||||||
|
struct node_base base;
|
||||||
|
int32_t arity;
|
||||||
|
void (*function)(struct gmachine*);
|
||||||
|
};
|
||||||
|
|
||||||
|
struct node_ind {
|
||||||
|
struct node_base base;
|
||||||
|
struct node_base* next;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct node_data {
|
||||||
|
struct node_base base;
|
||||||
|
int8_t tag;
|
||||||
|
struct node_base** array;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct node_base* alloc_node();
|
||||||
|
struct node_app* alloc_app(struct node_base* l, struct node_base* r);
|
||||||
|
struct node_num* alloc_num(int32_t n);
|
||||||
|
struct node_global* alloc_global(void (*f)(struct gmachine*), int32_t a);
|
||||||
|
struct node_ind* alloc_ind(struct node_base* n);
|
||||||
|
void free_node_direct(struct node_base*);
|
||||||
|
void gc_visit_node(struct node_base*);
|
||||||
|
|
||||||
|
struct stack {
|
||||||
|
size_t size;
|
||||||
|
size_t count;
|
||||||
|
struct node_base** data;
|
||||||
|
};
|
||||||
|
|
||||||
|
void stack_init(struct stack* s);
|
||||||
|
void stack_free(struct stack* s);
|
||||||
|
void stack_push(struct stack* s, struct node_base* n);
|
||||||
|
struct node_base* stack_pop(struct stack* s);
|
||||||
|
struct node_base* stack_peek(struct stack* s, size_t o);
|
||||||
|
void stack_popn(struct stack* s, size_t n);
|
||||||
|
|
||||||
|
struct gmachine {
|
||||||
|
struct stack stack;
|
||||||
|
struct node_base* gc_nodes;
|
||||||
|
int64_t gc_node_count;
|
||||||
|
int64_t gc_node_threshold;
|
||||||
|
};
|
||||||
|
|
||||||
|
void gmachine_init(struct gmachine* g);
|
||||||
|
void gmachine_free(struct gmachine* g);
|
||||||
|
void gmachine_slide(struct gmachine* g, size_t n);
|
||||||
|
void gmachine_update(struct gmachine* g, size_t o);
|
||||||
|
void gmachine_alloc(struct gmachine* g, size_t o);
|
||||||
|
void gmachine_pack(struct gmachine* g, size_t n, int8_t t);
|
||||||
|
void gmachine_split(struct gmachine* g, size_t n);
|
||||||
|
struct node_base* gmachine_track(struct gmachine* g, struct node_base* b);
|
||||||
|
void gmachine_gc(struct gmachine* g);
|
||||||
35
code/compiler/09/scanner.l
Normal file
35
code/compiler/09/scanner.l
Normal file
@@ -0,0 +1,35 @@
|
|||||||
|
%option noyywrap
|
||||||
|
|
||||||
|
%{
|
||||||
|
#include <iostream>
|
||||||
|
#include "ast.hpp"
|
||||||
|
#include "definition.hpp"
|
||||||
|
#include "parser.hpp"
|
||||||
|
|
||||||
|
#define YY_DECL yy::parser::symbol_type yylex()
|
||||||
|
|
||||||
|
%}
|
||||||
|
|
||||||
|
%%
|
||||||
|
|
||||||
|
[ \n]+ {}
|
||||||
|
\+ { return yy::parser::make_PLUS(); }
|
||||||
|
\* { return yy::parser::make_TIMES(); }
|
||||||
|
- { return yy::parser::make_MINUS(); }
|
||||||
|
\/ { return yy::parser::make_DIVIDE(); }
|
||||||
|
[0-9]+ { return yy::parser::make_INT(atoi(yytext)); }
|
||||||
|
defn { return yy::parser::make_DEFN(); }
|
||||||
|
data { return yy::parser::make_DATA(); }
|
||||||
|
case { return yy::parser::make_CASE(); }
|
||||||
|
of { return yy::parser::make_OF(); }
|
||||||
|
\{ { return yy::parser::make_OCURLY(); }
|
||||||
|
\} { return yy::parser::make_CCURLY(); }
|
||||||
|
\( { return yy::parser::make_OPAREN(); }
|
||||||
|
\) { return yy::parser::make_CPAREN(); }
|
||||||
|
, { return yy::parser::make_COMMA(); }
|
||||||
|
-> { return yy::parser::make_ARROW(); }
|
||||||
|
= { return yy::parser::make_EQUAL(); }
|
||||||
|
[a-z][a-zA-Z]* { return yy::parser::make_LID(std::string(yytext)); }
|
||||||
|
[A-Z][a-zA-Z]* { return yy::parser::make_UID(std::string(yytext)); }
|
||||||
|
|
||||||
|
%%
|
||||||
99
code/compiler/09/type.cpp
Normal file
99
code/compiler/09/type.cpp
Normal file
@@ -0,0 +1,99 @@
|
|||||||
|
#include "type.hpp"
|
||||||
|
#include <sstream>
|
||||||
|
#include <algorithm>
|
||||||
|
#include "error.hpp"
|
||||||
|
|
||||||
|
void type_var::print(const type_mgr& mgr, std::ostream& to) const {
|
||||||
|
auto it = mgr.types.find(name);
|
||||||
|
if(it != mgr.types.end()) {
|
||||||
|
it->second->print(mgr, to);
|
||||||
|
} else {
|
||||||
|
to << name;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void type_base::print(const type_mgr& mgr, std::ostream& to) const {
|
||||||
|
to << name;
|
||||||
|
}
|
||||||
|
|
||||||
|
void type_arr::print(const type_mgr& mgr, std::ostream& to) const {
|
||||||
|
left->print(mgr, to);
|
||||||
|
to << " -> (";
|
||||||
|
right->print(mgr, to);
|
||||||
|
to << ")";
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string type_mgr::new_type_name() {
|
||||||
|
int temp = last_id++;
|
||||||
|
std::string str = "";
|
||||||
|
|
||||||
|
while(temp != -1) {
|
||||||
|
str += (char) ('a' + (temp % 26));
|
||||||
|
temp = temp / 26 - 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::reverse(str.begin(), str.end());
|
||||||
|
return str;
|
||||||
|
}
|
||||||
|
|
||||||
|
type_ptr type_mgr::new_type() {
|
||||||
|
return type_ptr(new type_var(new_type_name()));
|
||||||
|
}
|
||||||
|
|
||||||
|
type_ptr type_mgr::new_arrow_type() {
|
||||||
|
return type_ptr(new type_arr(new_type(), new_type()));
|
||||||
|
}
|
||||||
|
|
||||||
|
type_ptr type_mgr::resolve(type_ptr t, type_var*& var) const {
|
||||||
|
type_var* cast;
|
||||||
|
|
||||||
|
var = nullptr;
|
||||||
|
while((cast = dynamic_cast<type_var*>(t.get()))) {
|
||||||
|
auto it = types.find(cast->name);
|
||||||
|
|
||||||
|
if(it == types.end()) {
|
||||||
|
var = cast;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
t = it->second;
|
||||||
|
}
|
||||||
|
|
||||||
|
return t;
|
||||||
|
}
|
||||||
|
|
||||||
|
void type_mgr::unify(type_ptr l, type_ptr r) {
|
||||||
|
type_var* lvar;
|
||||||
|
type_var* rvar;
|
||||||
|
type_arr* larr;
|
||||||
|
type_arr* rarr;
|
||||||
|
type_base* lid;
|
||||||
|
type_base* rid;
|
||||||
|
|
||||||
|
l = resolve(l, lvar);
|
||||||
|
r = resolve(r, rvar);
|
||||||
|
|
||||||
|
if(lvar) {
|
||||||
|
bind(lvar->name, r);
|
||||||
|
return;
|
||||||
|
} else if(rvar) {
|
||||||
|
bind(rvar->name, l);
|
||||||
|
return;
|
||||||
|
} else if((larr = dynamic_cast<type_arr*>(l.get())) &&
|
||||||
|
(rarr = dynamic_cast<type_arr*>(r.get()))) {
|
||||||
|
unify(larr->left, rarr->left);
|
||||||
|
unify(larr->right, rarr->right);
|
||||||
|
return;
|
||||||
|
} else if((lid = dynamic_cast<type_base*>(l.get())) &&
|
||||||
|
(rid = dynamic_cast<type_base*>(r.get()))) {
|
||||||
|
if(lid->name == rid->name) return;
|
||||||
|
}
|
||||||
|
|
||||||
|
throw unification_error(l, r);
|
||||||
|
}
|
||||||
|
|
||||||
|
void type_mgr::bind(const std::string& s, type_ptr t) {
|
||||||
|
type_var* other = dynamic_cast<type_var*>(t.get());
|
||||||
|
|
||||||
|
if(other && other->name == s) return;
|
||||||
|
types[s] = t;
|
||||||
|
}
|
||||||
65
code/compiler/09/type.hpp
Normal file
65
code/compiler/09/type.hpp
Normal file
@@ -0,0 +1,65 @@
|
|||||||
|
#pragma once
|
||||||
|
#include <memory>
|
||||||
|
#include <map>
|
||||||
|
|
||||||
|
struct type_mgr;
|
||||||
|
|
||||||
|
struct type {
|
||||||
|
virtual ~type() = default;
|
||||||
|
|
||||||
|
virtual void print(const type_mgr& mgr, std::ostream& to) const = 0;
|
||||||
|
};
|
||||||
|
|
||||||
|
using type_ptr = std::shared_ptr<type>;
|
||||||
|
|
||||||
|
struct type_var : public type {
|
||||||
|
std::string name;
|
||||||
|
|
||||||
|
type_var(std::string n)
|
||||||
|
: name(std::move(n)) {}
|
||||||
|
|
||||||
|
void print(const type_mgr& mgr, std::ostream& to) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct type_base : public type {
|
||||||
|
std::string name;
|
||||||
|
|
||||||
|
type_base(std::string n)
|
||||||
|
: name(std::move(n)) {}
|
||||||
|
|
||||||
|
void print(const type_mgr& mgr, std::ostream& to) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct type_data : public type_base {
|
||||||
|
struct constructor {
|
||||||
|
int tag;
|
||||||
|
};
|
||||||
|
|
||||||
|
std::map<std::string, constructor> constructors;
|
||||||
|
|
||||||
|
type_data(std::string n)
|
||||||
|
: type_base(std::move(n)) {}
|
||||||
|
};
|
||||||
|
|
||||||
|
struct type_arr : public type {
|
||||||
|
type_ptr left;
|
||||||
|
type_ptr right;
|
||||||
|
|
||||||
|
type_arr(type_ptr l, type_ptr r)
|
||||||
|
: left(std::move(l)), right(std::move(r)) {}
|
||||||
|
|
||||||
|
void print(const type_mgr& mgr, std::ostream& to) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct type_mgr {
|
||||||
|
int last_id = 0;
|
||||||
|
std::map<std::string, type_ptr> types;
|
||||||
|
|
||||||
|
std::string new_type_name();
|
||||||
|
type_ptr new_type();
|
||||||
|
type_ptr new_arrow_type();
|
||||||
|
|
||||||
|
void unify(type_ptr l, type_ptr r);
|
||||||
|
type_ptr resolve(type_ptr t, type_var*& var) const;
|
||||||
|
void bind(const std::string& s, type_ptr t);
|
||||||
|
};
|
||||||
16
code/compiler/09/type_env.cpp
Normal file
16
code/compiler/09/type_env.cpp
Normal file
@@ -0,0 +1,16 @@
|
|||||||
|
#include "type_env.hpp"
|
||||||
|
|
||||||
|
type_ptr type_env::lookup(const std::string& name) const {
|
||||||
|
auto it = names.find(name);
|
||||||
|
if(it != names.end()) return it->second;
|
||||||
|
if(parent) return parent->lookup(name);
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
void type_env::bind(const std::string& name, type_ptr t) {
|
||||||
|
names[name] = t;
|
||||||
|
}
|
||||||
|
|
||||||
|
type_env type_env::scope() const {
|
||||||
|
return type_env(this);
|
||||||
|
}
|
||||||
16
code/compiler/09/type_env.hpp
Normal file
16
code/compiler/09/type_env.hpp
Normal file
@@ -0,0 +1,16 @@
|
|||||||
|
#pragma once
|
||||||
|
#include <map>
|
||||||
|
#include "type.hpp"
|
||||||
|
|
||||||
|
struct type_env {
|
||||||
|
std::map<std::string, type_ptr> names;
|
||||||
|
type_env const* parent = nullptr;
|
||||||
|
|
||||||
|
type_env(type_env const* p)
|
||||||
|
: parent(p) {}
|
||||||
|
type_env() : type_env(nullptr) {}
|
||||||
|
|
||||||
|
type_ptr lookup(const std::string& name) const;
|
||||||
|
void bind(const std::string& name, type_ptr t);
|
||||||
|
type_env scope() const;
|
||||||
|
};
|
||||||
42
code/compiler/10/CMakeLists.txt
Normal file
42
code/compiler/10/CMakeLists.txt
Normal file
@@ -0,0 +1,42 @@
|
|||||||
|
cmake_minimum_required(VERSION 3.1)
|
||||||
|
project(compiler)
|
||||||
|
|
||||||
|
# Find all the required packages
|
||||||
|
find_package(BISON)
|
||||||
|
find_package(FLEX)
|
||||||
|
find_package(LLVM REQUIRED CONFIG)
|
||||||
|
|
||||||
|
# Set up the flex and bison targets
|
||||||
|
bison_target(parser
|
||||||
|
${CMAKE_CURRENT_SOURCE_DIR}/parser.y
|
||||||
|
${CMAKE_CURRENT_BINARY_DIR}/parser.cpp
|
||||||
|
COMPILE_FLAGS "-d")
|
||||||
|
flex_target(scanner
|
||||||
|
${CMAKE_CURRENT_SOURCE_DIR}/scanner.l
|
||||||
|
${CMAKE_CURRENT_BINARY_DIR}/scanner.cpp)
|
||||||
|
add_flex_bison_dependency(scanner parser)
|
||||||
|
|
||||||
|
# Find all the relevant LLVM components
|
||||||
|
llvm_map_components_to_libnames(LLVM_LIBS core x86asmparser x86codegen)
|
||||||
|
|
||||||
|
# Create compiler executable
|
||||||
|
add_executable(compiler
|
||||||
|
ast.cpp ast.hpp definition.cpp
|
||||||
|
llvm_context.cpp llvm_context.hpp
|
||||||
|
type_env.cpp type_env.hpp
|
||||||
|
env.cpp env.hpp
|
||||||
|
type.cpp type.hpp
|
||||||
|
error.cpp error.hpp
|
||||||
|
binop.cpp binop.hpp
|
||||||
|
instruction.cpp instruction.hpp
|
||||||
|
${BISON_parser_OUTPUTS}
|
||||||
|
${FLEX_scanner_OUTPUTS}
|
||||||
|
main.cpp
|
||||||
|
)
|
||||||
|
|
||||||
|
# Configure compiler executable
|
||||||
|
target_include_directories(compiler PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
|
||||||
|
target_include_directories(compiler PUBLIC ${CMAKE_CURRENT_BINARY_DIR})
|
||||||
|
target_include_directories(compiler PUBLIC ${LLVM_INCLUDE_DIRS})
|
||||||
|
target_compile_definitions(compiler PUBLIC ${LLVM_DEFINITIONS})
|
||||||
|
target_link_libraries(compiler ${LLVM_LIBS})
|
||||||
221
code/compiler/10/ast.cpp
Normal file
221
code/compiler/10/ast.cpp
Normal file
@@ -0,0 +1,221 @@
|
|||||||
|
#include "ast.hpp"
|
||||||
|
#include <ostream>
|
||||||
|
#include "binop.hpp"
|
||||||
|
#include "error.hpp"
|
||||||
|
|
||||||
|
static void print_indent(int n, std::ostream& to) {
|
||||||
|
while(n--) to << " ";
|
||||||
|
}
|
||||||
|
|
||||||
|
void ast_int::print(int indent, std::ostream& to) const {
|
||||||
|
print_indent(indent, to);
|
||||||
|
to << "INT: " << value << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
type_ptr ast_int::typecheck(type_mgr& mgr, const type_env& env) {
|
||||||
|
return type_ptr(new type_base("Int"));
|
||||||
|
}
|
||||||
|
|
||||||
|
void ast_int::compile(const env_ptr& env, std::vector<instruction_ptr>& into) const {
|
||||||
|
into.push_back(instruction_ptr(new instruction_pushint(value)));
|
||||||
|
}
|
||||||
|
|
||||||
|
void ast_lid::print(int indent, std::ostream& to) const {
|
||||||
|
print_indent(indent, to);
|
||||||
|
to << "LID: " << id << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
type_ptr ast_lid::typecheck(type_mgr& mgr, const type_env& env) {
|
||||||
|
return env.lookup(id);
|
||||||
|
}
|
||||||
|
|
||||||
|
void ast_lid::compile(const env_ptr& env, std::vector<instruction_ptr>& into) const {
|
||||||
|
into.push_back(instruction_ptr(
|
||||||
|
env->has_variable(id) ?
|
||||||
|
(instruction*) new instruction_push(env->get_offset(id)) :
|
||||||
|
(instruction*) new instruction_pushglobal(id)));
|
||||||
|
}
|
||||||
|
|
||||||
|
void ast_uid::print(int indent, std::ostream& to) const {
|
||||||
|
print_indent(indent, to);
|
||||||
|
to << "UID: " << id << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
type_ptr ast_uid::typecheck(type_mgr& mgr, const type_env& env) {
|
||||||
|
return env.lookup(id);
|
||||||
|
}
|
||||||
|
|
||||||
|
void ast_uid::compile(const env_ptr& env, std::vector<instruction_ptr>& into) const {
|
||||||
|
into.push_back(instruction_ptr(new instruction_pushglobal(id)));
|
||||||
|
}
|
||||||
|
|
||||||
|
void ast_binop::print(int indent, std::ostream& to) const {
|
||||||
|
print_indent(indent, to);
|
||||||
|
to << "BINOP: " << op_name(op) << std::endl;
|
||||||
|
left->print(indent + 1, to);
|
||||||
|
right->print(indent + 1, to);
|
||||||
|
}
|
||||||
|
|
||||||
|
type_ptr ast_binop::typecheck(type_mgr& mgr, const type_env& env) {
|
||||||
|
type_ptr ltype = left->typecheck(mgr, env);
|
||||||
|
type_ptr rtype = right->typecheck(mgr, env);
|
||||||
|
type_ptr ftype = env.lookup(op_name(op));
|
||||||
|
if(!ftype) throw type_error(std::string("unknown binary operator ") + op_name(op));
|
||||||
|
|
||||||
|
type_ptr return_type = mgr.new_type();
|
||||||
|
type_ptr arrow_one = type_ptr(new type_arr(rtype, return_type));
|
||||||
|
type_ptr arrow_two = type_ptr(new type_arr(ltype, arrow_one));
|
||||||
|
|
||||||
|
mgr.unify(arrow_two, ftype);
|
||||||
|
return return_type;
|
||||||
|
}
|
||||||
|
|
||||||
|
void ast_binop::compile(const env_ptr& env, std::vector<instruction_ptr>& into) const {
|
||||||
|
right->compile(env, into);
|
||||||
|
left->compile(env_ptr(new env_offset(1, env)), into);
|
||||||
|
|
||||||
|
into.push_back(instruction_ptr(new instruction_pushglobal(op_action(op))));
|
||||||
|
into.push_back(instruction_ptr(new instruction_mkapp()));
|
||||||
|
into.push_back(instruction_ptr(new instruction_mkapp()));
|
||||||
|
}
|
||||||
|
|
||||||
|
void ast_app::print(int indent, std::ostream& to) const {
|
||||||
|
print_indent(indent, to);
|
||||||
|
to << "APP:" << std::endl;
|
||||||
|
left->print(indent + 1, to);
|
||||||
|
right->print(indent + 1, to);
|
||||||
|
}
|
||||||
|
|
||||||
|
type_ptr ast_app::typecheck(type_mgr& mgr, const type_env& env) {
|
||||||
|
type_ptr ltype = left->typecheck(mgr, env);
|
||||||
|
type_ptr rtype = right->typecheck(mgr, env);
|
||||||
|
|
||||||
|
type_ptr return_type = mgr.new_type();
|
||||||
|
type_ptr arrow = type_ptr(new type_arr(rtype, return_type));
|
||||||
|
mgr.unify(arrow, ltype);
|
||||||
|
return return_type;
|
||||||
|
}
|
||||||
|
|
||||||
|
void ast_app::compile(const env_ptr& env, std::vector<instruction_ptr>& into) const {
|
||||||
|
right->compile(env, into);
|
||||||
|
left->compile(env_ptr(new env_offset(1, env)), into);
|
||||||
|
into.push_back(instruction_ptr(new instruction_mkapp()));
|
||||||
|
}
|
||||||
|
|
||||||
|
void ast_case::print(int indent, std::ostream& to) const {
|
||||||
|
print_indent(indent, to);
|
||||||
|
to << "CASE: " << std::endl;
|
||||||
|
for(auto& branch : branches) {
|
||||||
|
print_indent(indent + 1, to);
|
||||||
|
branch->pat->print(to);
|
||||||
|
to << std::endl;
|
||||||
|
branch->expr->print(indent + 2, to);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
type_ptr ast_case::typecheck(type_mgr& mgr, const type_env& env) {
|
||||||
|
type_var* var;
|
||||||
|
type_ptr case_type = mgr.resolve(of->typecheck(mgr, env), var);
|
||||||
|
type_ptr branch_type = mgr.new_type();
|
||||||
|
|
||||||
|
for(auto& branch : branches) {
|
||||||
|
type_env new_env = env.scope();
|
||||||
|
branch->pat->match(case_type, mgr, new_env);
|
||||||
|
type_ptr curr_branch_type = branch->expr->typecheck(mgr, new_env);
|
||||||
|
mgr.unify(branch_type, curr_branch_type);
|
||||||
|
}
|
||||||
|
|
||||||
|
input_type = mgr.resolve(case_type, var);
|
||||||
|
if(!dynamic_cast<type_data*>(input_type.get())) {
|
||||||
|
throw type_error("attempting case analysis of non-data type");
|
||||||
|
}
|
||||||
|
|
||||||
|
return branch_type;
|
||||||
|
}
|
||||||
|
|
||||||
|
void ast_case::compile(const env_ptr& env, std::vector<instruction_ptr>& into) const {
|
||||||
|
type_data* type = dynamic_cast<type_data*>(input_type.get());
|
||||||
|
|
||||||
|
of->compile(env, into);
|
||||||
|
into.push_back(instruction_ptr(new instruction_eval()));
|
||||||
|
|
||||||
|
instruction_jump* jump_instruction = new instruction_jump();
|
||||||
|
into.push_back(instruction_ptr(jump_instruction));
|
||||||
|
for(auto& branch : branches) {
|
||||||
|
std::vector<instruction_ptr> branch_instructions;
|
||||||
|
pattern_var* vpat;
|
||||||
|
pattern_constr* cpat;
|
||||||
|
|
||||||
|
if((vpat = dynamic_cast<pattern_var*>(branch->pat.get()))) {
|
||||||
|
branch->expr->compile(env_ptr(new env_offset(1, env)), branch_instructions);
|
||||||
|
|
||||||
|
for(auto& constr_pair : type->constructors) {
|
||||||
|
if(jump_instruction->tag_mappings.find(constr_pair.second.tag) !=
|
||||||
|
jump_instruction->tag_mappings.end())
|
||||||
|
break;
|
||||||
|
|
||||||
|
jump_instruction->tag_mappings[constr_pair.second.tag] =
|
||||||
|
jump_instruction->branches.size();
|
||||||
|
}
|
||||||
|
jump_instruction->branches.push_back(std::move(branch_instructions));
|
||||||
|
} else if((cpat = dynamic_cast<pattern_constr*>(branch->pat.get()))) {
|
||||||
|
env_ptr new_env = env;
|
||||||
|
for(auto it = cpat->params.rbegin(); it != cpat->params.rend(); it++) {
|
||||||
|
new_env = env_ptr(new env_var(*it, new_env));
|
||||||
|
}
|
||||||
|
|
||||||
|
branch_instructions.push_back(instruction_ptr(new instruction_split(
|
||||||
|
cpat->params.size())));
|
||||||
|
branch->expr->compile(new_env, branch_instructions);
|
||||||
|
branch_instructions.push_back(instruction_ptr(new instruction_slide(
|
||||||
|
cpat->params.size())));
|
||||||
|
|
||||||
|
int new_tag = type->constructors[cpat->constr].tag;
|
||||||
|
if(jump_instruction->tag_mappings.find(new_tag) !=
|
||||||
|
jump_instruction->tag_mappings.end())
|
||||||
|
throw type_error("technically not a type error: duplicate pattern");
|
||||||
|
|
||||||
|
jump_instruction->tag_mappings[new_tag] =
|
||||||
|
jump_instruction->branches.size();
|
||||||
|
jump_instruction->branches.push_back(std::move(branch_instructions));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for(auto& constr_pair : type->constructors) {
|
||||||
|
if(jump_instruction->tag_mappings.find(constr_pair.second.tag) ==
|
||||||
|
jump_instruction->tag_mappings.end())
|
||||||
|
throw type_error("non-total pattern");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void pattern_var::print(std::ostream& to) const {
|
||||||
|
to << var;
|
||||||
|
}
|
||||||
|
|
||||||
|
void pattern_var::match(type_ptr t, type_mgr& mgr, type_env& env) const {
|
||||||
|
env.bind(var, t);
|
||||||
|
}
|
||||||
|
|
||||||
|
void pattern_constr::print(std::ostream& to) const {
|
||||||
|
to << constr;
|
||||||
|
for(auto& param : params) {
|
||||||
|
to << " " << param;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void pattern_constr::match(type_ptr t, type_mgr& mgr, type_env& env) const {
|
||||||
|
type_ptr constructor_type = env.lookup(constr);
|
||||||
|
if(!constructor_type) {
|
||||||
|
throw type_error(std::string("pattern using unknown constructor ") + constr);
|
||||||
|
}
|
||||||
|
|
||||||
|
for(int i = 0; i < params.size(); i++) {
|
||||||
|
type_arr* arr = dynamic_cast<type_arr*>(constructor_type.get());
|
||||||
|
if(!arr) throw type_error("too many parameters in constructor pattern");
|
||||||
|
|
||||||
|
env.bind(params[i], arr->left);
|
||||||
|
constructor_type = arr->right;
|
||||||
|
}
|
||||||
|
|
||||||
|
mgr.unify(t, constructor_type);
|
||||||
|
}
|
||||||
130
code/compiler/10/ast.hpp
Normal file
130
code/compiler/10/ast.hpp
Normal file
@@ -0,0 +1,130 @@
|
|||||||
|
#pragma once
|
||||||
|
#include <memory>
|
||||||
|
#include <vector>
|
||||||
|
#include "type.hpp"
|
||||||
|
#include "type_env.hpp"
|
||||||
|
#include "binop.hpp"
|
||||||
|
#include "instruction.hpp"
|
||||||
|
#include "env.hpp"
|
||||||
|
|
||||||
|
struct ast {
|
||||||
|
virtual ~ast() = default;
|
||||||
|
|
||||||
|
virtual void print(int indent, std::ostream& to) const = 0;
|
||||||
|
virtual type_ptr typecheck(type_mgr& mgr, const type_env& env) = 0;
|
||||||
|
virtual void compile(const env_ptr& env,
|
||||||
|
std::vector<instruction_ptr>& into) const = 0;
|
||||||
|
};
|
||||||
|
|
||||||
|
using ast_ptr = std::unique_ptr<ast>;
|
||||||
|
|
||||||
|
struct pattern {
|
||||||
|
virtual ~pattern() = default;
|
||||||
|
|
||||||
|
virtual void print(std::ostream& to) const = 0;
|
||||||
|
virtual void match(type_ptr t, type_mgr& mgr, type_env& env) const = 0;
|
||||||
|
};
|
||||||
|
|
||||||
|
using pattern_ptr = std::unique_ptr<pattern>;
|
||||||
|
|
||||||
|
struct branch {
|
||||||
|
pattern_ptr pat;
|
||||||
|
ast_ptr expr;
|
||||||
|
|
||||||
|
branch(pattern_ptr p, ast_ptr a)
|
||||||
|
: pat(std::move(p)), expr(std::move(a)) {}
|
||||||
|
};
|
||||||
|
|
||||||
|
using branch_ptr = std::unique_ptr<branch>;
|
||||||
|
|
||||||
|
struct ast_int : public ast {
|
||||||
|
int value;
|
||||||
|
|
||||||
|
explicit ast_int(int v)
|
||||||
|
: value(v) {}
|
||||||
|
|
||||||
|
void print(int indent, std::ostream& to) const;
|
||||||
|
type_ptr typecheck(type_mgr& mgr, const type_env& env);
|
||||||
|
void compile(const env_ptr& env, std::vector<instruction_ptr>& into) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct ast_lid : public ast {
|
||||||
|
std::string id;
|
||||||
|
|
||||||
|
explicit ast_lid(std::string i)
|
||||||
|
: id(std::move(i)) {}
|
||||||
|
|
||||||
|
void print(int indent, std::ostream& to) const;
|
||||||
|
type_ptr typecheck(type_mgr& mgr, const type_env& env);
|
||||||
|
void compile(const env_ptr& env, std::vector<instruction_ptr>& into) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct ast_uid : public ast {
|
||||||
|
std::string id;
|
||||||
|
|
||||||
|
explicit ast_uid(std::string i)
|
||||||
|
: id(std::move(i)) {}
|
||||||
|
|
||||||
|
void print(int indent, std::ostream& to) const;
|
||||||
|
type_ptr typecheck(type_mgr& mgr, const type_env& env);
|
||||||
|
void compile(const env_ptr& env, std::vector<instruction_ptr>& into) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct ast_binop : public ast {
|
||||||
|
binop op;
|
||||||
|
ast_ptr left;
|
||||||
|
ast_ptr right;
|
||||||
|
|
||||||
|
ast_binop(binop o, ast_ptr l, ast_ptr r)
|
||||||
|
: op(o), left(std::move(l)), right(std::move(r)) {}
|
||||||
|
|
||||||
|
void print(int indent, std::ostream& to) const;
|
||||||
|
type_ptr typecheck(type_mgr& mgr, const type_env& env);
|
||||||
|
void compile(const env_ptr& env, std::vector<instruction_ptr>& into) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct ast_app : public ast {
|
||||||
|
ast_ptr left;
|
||||||
|
ast_ptr right;
|
||||||
|
|
||||||
|
ast_app(ast_ptr l, ast_ptr r)
|
||||||
|
: left(std::move(l)), right(std::move(r)) {}
|
||||||
|
|
||||||
|
void print(int indent, std::ostream& to) const;
|
||||||
|
type_ptr typecheck(type_mgr& mgr, const type_env& env);
|
||||||
|
void compile(const env_ptr& env, std::vector<instruction_ptr>& into) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct ast_case : public ast {
|
||||||
|
ast_ptr of;
|
||||||
|
type_ptr input_type;
|
||||||
|
std::vector<branch_ptr> branches;
|
||||||
|
|
||||||
|
ast_case(ast_ptr o, std::vector<branch_ptr> b)
|
||||||
|
: of(std::move(o)), branches(std::move(b)) {}
|
||||||
|
|
||||||
|
void print(int indent, std::ostream& to) const;
|
||||||
|
type_ptr typecheck(type_mgr& mgr, const type_env& env);
|
||||||
|
void compile(const env_ptr& env, std::vector<instruction_ptr>& into) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct pattern_var : public pattern {
|
||||||
|
std::string var;
|
||||||
|
|
||||||
|
pattern_var(std::string v)
|
||||||
|
: var(std::move(v)) {}
|
||||||
|
|
||||||
|
void print(std::ostream &to) const;
|
||||||
|
void match(type_ptr t, type_mgr& mgr, type_env& env) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct pattern_constr : public pattern {
|
||||||
|
std::string constr;
|
||||||
|
std::vector<std::string> params;
|
||||||
|
|
||||||
|
pattern_constr(std::string c, std::vector<std::string> p)
|
||||||
|
: constr(std::move(c)), params(std::move(p)) {}
|
||||||
|
|
||||||
|
void print(std::ostream &to) const;
|
||||||
|
void match(type_ptr t, type_mgr&, type_env& env) const;
|
||||||
|
};
|
||||||
21
code/compiler/10/binop.cpp
Normal file
21
code/compiler/10/binop.cpp
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
#include "binop.hpp"
|
||||||
|
|
||||||
|
std::string op_name(binop op) {
|
||||||
|
switch(op) {
|
||||||
|
case PLUS: return "+";
|
||||||
|
case MINUS: return "-";
|
||||||
|
case TIMES: return "*";
|
||||||
|
case DIVIDE: return "/";
|
||||||
|
}
|
||||||
|
return "??";
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string op_action(binop op) {
|
||||||
|
switch(op) {
|
||||||
|
case PLUS: return "plus";
|
||||||
|
case MINUS: return "minus";
|
||||||
|
case TIMES: return "times";
|
||||||
|
case DIVIDE: return "divide";
|
||||||
|
}
|
||||||
|
return "??";
|
||||||
|
}
|
||||||
12
code/compiler/10/binop.hpp
Normal file
12
code/compiler/10/binop.hpp
Normal file
@@ -0,0 +1,12 @@
|
|||||||
|
#pragma once
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
enum binop {
|
||||||
|
PLUS,
|
||||||
|
MINUS,
|
||||||
|
TIMES,
|
||||||
|
DIVIDE
|
||||||
|
};
|
||||||
|
|
||||||
|
std::string op_name(binop op);
|
||||||
|
std::string op_action(binop op);
|
||||||
105
code/compiler/10/definition.cpp
Normal file
105
code/compiler/10/definition.cpp
Normal file
@@ -0,0 +1,105 @@
|
|||||||
|
#include "definition.hpp"
|
||||||
|
#include "error.hpp"
|
||||||
|
#include "ast.hpp"
|
||||||
|
#include "instruction.hpp"
|
||||||
|
#include "llvm_context.hpp"
|
||||||
|
#include <llvm/IR/DerivedTypes.h>
|
||||||
|
#include <llvm/IR/Function.h>
|
||||||
|
#include <llvm/IR/Type.h>
|
||||||
|
|
||||||
|
void definition_defn::typecheck_first(type_mgr& mgr, type_env& env) {
|
||||||
|
return_type = mgr.new_type();
|
||||||
|
type_ptr full_type = return_type;
|
||||||
|
|
||||||
|
for(auto it = params.rbegin(); it != params.rend(); it++) {
|
||||||
|
type_ptr param_type = mgr.new_type();
|
||||||
|
full_type = type_ptr(new type_arr(param_type, full_type));
|
||||||
|
param_types.push_back(param_type);
|
||||||
|
}
|
||||||
|
|
||||||
|
env.bind(name, full_type);
|
||||||
|
}
|
||||||
|
|
||||||
|
void definition_defn::typecheck_second(type_mgr& mgr, const type_env& env) const {
|
||||||
|
type_env new_env = env.scope();
|
||||||
|
auto param_it = params.begin();
|
||||||
|
auto type_it = param_types.rbegin();
|
||||||
|
|
||||||
|
while(param_it != params.end() && type_it != param_types.rend()) {
|
||||||
|
new_env.bind(*param_it, *type_it);
|
||||||
|
param_it++;
|
||||||
|
type_it++;
|
||||||
|
}
|
||||||
|
|
||||||
|
type_ptr body_type = body->typecheck(mgr, new_env);
|
||||||
|
mgr.unify(return_type, body_type);
|
||||||
|
}
|
||||||
|
|
||||||
|
void definition_defn::compile() {
|
||||||
|
env_ptr new_env = env_ptr(new env_offset(0, nullptr));
|
||||||
|
for(auto it = params.rbegin(); it != params.rend(); it++) {
|
||||||
|
new_env = env_ptr(new env_var(*it, new_env));
|
||||||
|
}
|
||||||
|
body->compile(new_env, instructions);
|
||||||
|
instructions.push_back(instruction_ptr(new instruction_update(params.size())));
|
||||||
|
instructions.push_back(instruction_ptr(new instruction_pop(params.size())));
|
||||||
|
}
|
||||||
|
void definition_defn::gen_llvm_first(llvm_context& ctx) {
|
||||||
|
generated_function = ctx.create_custom_function(name, params.size());
|
||||||
|
}
|
||||||
|
|
||||||
|
void definition_defn::gen_llvm_second(llvm_context& ctx) {
|
||||||
|
ctx.builder.SetInsertPoint(&generated_function->getEntryBlock());
|
||||||
|
for(auto& instruction : instructions) {
|
||||||
|
instruction->gen_llvm(ctx, generated_function);
|
||||||
|
}
|
||||||
|
ctx.builder.CreateRetVoid();
|
||||||
|
}
|
||||||
|
|
||||||
|
void definition_data::typecheck_first(type_mgr& mgr, type_env& env) {
|
||||||
|
type_data* this_type = new type_data(name);
|
||||||
|
type_ptr return_type = type_ptr(this_type);
|
||||||
|
int next_tag = 0;
|
||||||
|
|
||||||
|
for(auto& constructor : constructors) {
|
||||||
|
constructor->tag = next_tag;
|
||||||
|
this_type->constructors[constructor->name] = { next_tag++ };
|
||||||
|
|
||||||
|
type_ptr full_type = return_type;
|
||||||
|
for(auto it = constructor->types.rbegin(); it != constructor->types.rend(); it++) {
|
||||||
|
type_ptr type = type_ptr(new type_base(*it));
|
||||||
|
full_type = type_ptr(new type_arr(type, full_type));
|
||||||
|
}
|
||||||
|
|
||||||
|
env.bind(constructor->name, full_type);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void definition_data::typecheck_second(type_mgr& mgr, const type_env& env) const {
|
||||||
|
// Nothing
|
||||||
|
}
|
||||||
|
|
||||||
|
void definition_data::compile() {
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
void definition_data::gen_llvm_first(llvm_context& ctx) {
|
||||||
|
for(auto& constructor : constructors) {
|
||||||
|
auto new_function =
|
||||||
|
ctx.create_custom_function(constructor->name, constructor->types.size());
|
||||||
|
std::vector<instruction_ptr> instructions;
|
||||||
|
instructions.push_back(instruction_ptr(
|
||||||
|
new instruction_pack(constructor->tag, constructor->types.size())
|
||||||
|
));
|
||||||
|
instructions.push_back(instruction_ptr(new instruction_update(0)));
|
||||||
|
ctx.builder.SetInsertPoint(&new_function->getEntryBlock());
|
||||||
|
for (auto& instruction : instructions) {
|
||||||
|
instruction->gen_llvm(ctx, new_function);
|
||||||
|
}
|
||||||
|
ctx.builder.CreateRetVoid();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void definition_data::gen_llvm_second(llvm_context& ctx) {
|
||||||
|
// Nothing
|
||||||
|
}
|
||||||
70
code/compiler/10/definition.hpp
Normal file
70
code/compiler/10/definition.hpp
Normal file
@@ -0,0 +1,70 @@
|
|||||||
|
#pragma once
|
||||||
|
#include <memory>
|
||||||
|
#include <vector>
|
||||||
|
#include "instruction.hpp"
|
||||||
|
#include "llvm_context.hpp"
|
||||||
|
#include "type_env.hpp"
|
||||||
|
|
||||||
|
struct ast;
|
||||||
|
using ast_ptr = std::unique_ptr<ast>;
|
||||||
|
|
||||||
|
struct definition {
|
||||||
|
virtual ~definition() = default;
|
||||||
|
|
||||||
|
virtual void typecheck_first(type_mgr& mgr, type_env& env) = 0;
|
||||||
|
virtual void typecheck_second(type_mgr& mgr, const type_env& env) const = 0;
|
||||||
|
virtual void compile() = 0;
|
||||||
|
virtual void gen_llvm_first(llvm_context& ctx) = 0;
|
||||||
|
virtual void gen_llvm_second(llvm_context& ctx) = 0;
|
||||||
|
};
|
||||||
|
|
||||||
|
using definition_ptr = std::unique_ptr<definition>;
|
||||||
|
|
||||||
|
struct constructor {
|
||||||
|
std::string name;
|
||||||
|
std::vector<std::string> types;
|
||||||
|
int8_t tag;
|
||||||
|
|
||||||
|
constructor(std::string n, std::vector<std::string> ts)
|
||||||
|
: name(std::move(n)), types(std::move(ts)) {}
|
||||||
|
};
|
||||||
|
|
||||||
|
using constructor_ptr = std::unique_ptr<constructor>;
|
||||||
|
|
||||||
|
struct definition_defn : public definition {
|
||||||
|
std::string name;
|
||||||
|
std::vector<std::string> params;
|
||||||
|
ast_ptr body;
|
||||||
|
|
||||||
|
type_ptr return_type;
|
||||||
|
std::vector<type_ptr> param_types;
|
||||||
|
|
||||||
|
std::vector<instruction_ptr> instructions;
|
||||||
|
|
||||||
|
llvm::Function* generated_function;
|
||||||
|
|
||||||
|
definition_defn(std::string n, std::vector<std::string> p, ast_ptr b)
|
||||||
|
: name(std::move(n)), params(std::move(p)), body(std::move(b)) {
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
void typecheck_first(type_mgr& mgr, type_env& env);
|
||||||
|
void typecheck_second(type_mgr& mgr, const type_env& env) const;
|
||||||
|
void compile();
|
||||||
|
void gen_llvm_first(llvm_context& ctx);
|
||||||
|
void gen_llvm_second(llvm_context& ctx);
|
||||||
|
};
|
||||||
|
|
||||||
|
struct definition_data : public definition {
|
||||||
|
std::string name;
|
||||||
|
std::vector<constructor_ptr> constructors;
|
||||||
|
|
||||||
|
definition_data(std::string n, std::vector<constructor_ptr> cs)
|
||||||
|
: name(std::move(n)), constructors(std::move(cs)) {}
|
||||||
|
|
||||||
|
void typecheck_first(type_mgr& mgr, type_env& env);
|
||||||
|
void typecheck_second(type_mgr& mgr, const type_env& env) const;
|
||||||
|
void compile();
|
||||||
|
void gen_llvm_first(llvm_context& ctx);
|
||||||
|
void gen_llvm_second(llvm_context& ctx);
|
||||||
|
};
|
||||||
23
code/compiler/10/env.cpp
Normal file
23
code/compiler/10/env.cpp
Normal file
@@ -0,0 +1,23 @@
|
|||||||
|
#include "env.hpp"
|
||||||
|
|
||||||
|
int env_var::get_offset(const std::string& name) const {
|
||||||
|
if(name == this->name) return 0;
|
||||||
|
if(parent) return parent->get_offset(name) + 1;
|
||||||
|
throw 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool env_var::has_variable(const std::string& name) const {
|
||||||
|
if(name == this->name) return true;
|
||||||
|
if(parent) return parent->has_variable(name);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
int env_offset::get_offset(const std::string& name) const {
|
||||||
|
if(parent) return parent->get_offset(name) + offset;
|
||||||
|
throw 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool env_offset::has_variable(const std::string& name) const {
|
||||||
|
if(parent) return parent->has_variable(name);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
34
code/compiler/10/env.hpp
Normal file
34
code/compiler/10/env.hpp
Normal file
@@ -0,0 +1,34 @@
|
|||||||
|
#pragma once
|
||||||
|
#include <memory>
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
struct env {
|
||||||
|
virtual ~env() = default;
|
||||||
|
|
||||||
|
virtual int get_offset(const std::string& name) const = 0;
|
||||||
|
virtual bool has_variable(const std::string& name) const = 0;
|
||||||
|
};
|
||||||
|
|
||||||
|
using env_ptr = std::shared_ptr<env>;
|
||||||
|
|
||||||
|
struct env_var : public env {
|
||||||
|
std::string name;
|
||||||
|
env_ptr parent;
|
||||||
|
|
||||||
|
env_var(std::string& n, env_ptr p)
|
||||||
|
: name(std::move(n)), parent(std::move(p)) {}
|
||||||
|
|
||||||
|
int get_offset(const std::string& name) const;
|
||||||
|
bool has_variable(const std::string& name) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct env_offset : public env {
|
||||||
|
int offset;
|
||||||
|
env_ptr parent;
|
||||||
|
|
||||||
|
env_offset(int o, env_ptr p)
|
||||||
|
: offset(o), parent(std::move(p)) {}
|
||||||
|
|
||||||
|
int get_offset(const std::string& name) const;
|
||||||
|
bool has_variable(const std::string& name) const;
|
||||||
|
};
|
||||||
5
code/compiler/10/error.cpp
Normal file
5
code/compiler/10/error.cpp
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
#include "error.hpp"
|
||||||
|
|
||||||
|
const char* type_error::what() const noexcept {
|
||||||
|
return "an error occured while checking the types of the program";
|
||||||
|
}
|
||||||
21
code/compiler/10/error.hpp
Normal file
21
code/compiler/10/error.hpp
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
#pragma once
|
||||||
|
#include <exception>
|
||||||
|
#include "type.hpp"
|
||||||
|
|
||||||
|
struct type_error : std::exception {
|
||||||
|
std::string description;
|
||||||
|
|
||||||
|
type_error(std::string d)
|
||||||
|
: description(std::move(d)) {}
|
||||||
|
|
||||||
|
const char* what() const noexcept override;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct unification_error : public type_error {
|
||||||
|
type_ptr left;
|
||||||
|
type_ptr right;
|
||||||
|
|
||||||
|
unification_error(type_ptr l, type_ptr r)
|
||||||
|
: left(std::move(l)), right(std::move(r)),
|
||||||
|
type_error("failed to unify types") {}
|
||||||
|
};
|
||||||
2
code/compiler/10/examples/bad1.txt
Normal file
2
code/compiler/10/examples/bad1.txt
Normal file
@@ -0,0 +1,2 @@
|
|||||||
|
data Bool = { True, False }
|
||||||
|
defn main = { 3 + True }
|
||||||
1
code/compiler/10/examples/bad2.txt
Normal file
1
code/compiler/10/examples/bad2.txt
Normal file
@@ -0,0 +1 @@
|
|||||||
|
defn main = { 1 2 3 4 5 }
|
||||||
8
code/compiler/10/examples/bad3.txt
Normal file
8
code/compiler/10/examples/bad3.txt
Normal file
@@ -0,0 +1,8 @@
|
|||||||
|
data List = { Nil, Cons Int List }
|
||||||
|
|
||||||
|
defn head l = {
|
||||||
|
case l of {
|
||||||
|
Nil -> { 0 }
|
||||||
|
Cons x y z -> { x }
|
||||||
|
}
|
||||||
|
}
|
||||||
129
code/compiler/10/examples/primes.txt
Normal file
129
code/compiler/10/examples/primes.txt
Normal file
@@ -0,0 +1,129 @@
|
|||||||
|
data List = { Nil, Cons Nat List }
|
||||||
|
data Bool = { True, False }
|
||||||
|
data Nat = { O, S Nat }
|
||||||
|
|
||||||
|
defn ifN c t e = {
|
||||||
|
case c of {
|
||||||
|
True -> { t }
|
||||||
|
False -> { e }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
defn ifL c t e = {
|
||||||
|
case c of {
|
||||||
|
True -> { t }
|
||||||
|
False -> { e }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
defn toInt n = {
|
||||||
|
case n of {
|
||||||
|
O -> { 0 }
|
||||||
|
S np -> { 1 + toInt np }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
defn lte n m = {
|
||||||
|
case m of {
|
||||||
|
O -> {
|
||||||
|
case n of {
|
||||||
|
O -> { True }
|
||||||
|
S np -> { False }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
S mp -> {
|
||||||
|
case n of {
|
||||||
|
O -> { True }
|
||||||
|
S np -> { lte np mp }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
defn minus n m = {
|
||||||
|
case m of {
|
||||||
|
O -> { n }
|
||||||
|
S mp -> {
|
||||||
|
case n of {
|
||||||
|
O -> { O }
|
||||||
|
S np -> {
|
||||||
|
minus np mp
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
defn mod n m = {
|
||||||
|
ifN (lte m n) (mod (minus n m) m) n
|
||||||
|
}
|
||||||
|
|
||||||
|
defn notDivisibleBy n m = {
|
||||||
|
case (mod m n) of {
|
||||||
|
O -> { False }
|
||||||
|
S mp -> { True }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
defn filter f l = {
|
||||||
|
case l of {
|
||||||
|
Nil -> { Nil }
|
||||||
|
Cons x xs -> { ifL (f x) (Cons x (filter f xs)) (filter f xs) }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
defn map f l = {
|
||||||
|
case l of {
|
||||||
|
Nil -> { Nil }
|
||||||
|
Cons x xs -> { Cons (f x) (map f xs) }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
defn nats = {
|
||||||
|
Cons (S (S O)) (map S nats)
|
||||||
|
}
|
||||||
|
|
||||||
|
defn primesRec l = {
|
||||||
|
case l of {
|
||||||
|
Nil -> { Nil }
|
||||||
|
Cons p xs -> { Cons p (primesRec (filter (notDivisibleBy p) xs)) }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
defn primes = {
|
||||||
|
primesRec nats
|
||||||
|
}
|
||||||
|
|
||||||
|
defn take n l = {
|
||||||
|
case l of {
|
||||||
|
Nil -> { Nil }
|
||||||
|
Cons x xs -> {
|
||||||
|
case n of {
|
||||||
|
O -> { Nil }
|
||||||
|
S np -> { Cons x (take np xs) }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
defn head l = {
|
||||||
|
case l of {
|
||||||
|
Nil -> { O }
|
||||||
|
Cons x xs -> { x }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
defn reverseAcc a l = {
|
||||||
|
case l of {
|
||||||
|
Nil -> { a }
|
||||||
|
Cons x xs -> { reverseAcc (Cons x a) xs }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
defn reverse l = {
|
||||||
|
reverseAcc Nil l
|
||||||
|
}
|
||||||
|
|
||||||
|
defn main = {
|
||||||
|
toInt (head (reverse (take ((S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S O))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))) primes)))
|
||||||
|
}
|
||||||
31
code/compiler/10/examples/runtime1.c
Normal file
31
code/compiler/10/examples/runtime1.c
Normal file
@@ -0,0 +1,31 @@
|
|||||||
|
#include "../runtime.h"
|
||||||
|
|
||||||
|
void f_add(struct stack* s) {
|
||||||
|
struct node_num* left = (struct node_num*) eval(stack_peek(s, 0));
|
||||||
|
struct node_num* right = (struct node_num*) eval(stack_peek(s, 1));
|
||||||
|
stack_push(s, (struct node_base*) alloc_num(left->value + right->value));
|
||||||
|
}
|
||||||
|
|
||||||
|
void f_main(struct stack* s) {
|
||||||
|
// PushInt 320
|
||||||
|
stack_push(s, (struct node_base*) alloc_num(320));
|
||||||
|
|
||||||
|
// PushInt 6
|
||||||
|
stack_push(s, (struct node_base*) alloc_num(6));
|
||||||
|
|
||||||
|
// PushGlobal f_add (the function for +)
|
||||||
|
stack_push(s, (struct node_base*) alloc_global(f_add, 2));
|
||||||
|
|
||||||
|
struct node_base* left;
|
||||||
|
struct node_base* right;
|
||||||
|
|
||||||
|
// MkApp
|
||||||
|
left = stack_pop(s);
|
||||||
|
right = stack_pop(s);
|
||||||
|
stack_push(s, (struct node_base*) alloc_app(left, right));
|
||||||
|
|
||||||
|
// MkApp
|
||||||
|
left = stack_pop(s);
|
||||||
|
right = stack_pop(s);
|
||||||
|
stack_push(s, (struct node_base*) alloc_app(left, right));
|
||||||
|
}
|
||||||
2
code/compiler/10/examples/works1.txt
Normal file
2
code/compiler/10/examples/works1.txt
Normal file
@@ -0,0 +1,2 @@
|
|||||||
|
defn main = { sum 320 6 }
|
||||||
|
defn sum x y = { x + y }
|
||||||
3
code/compiler/10/examples/works2.txt
Normal file
3
code/compiler/10/examples/works2.txt
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
defn add x y = { x + y }
|
||||||
|
defn double x = { add x x }
|
||||||
|
defn main = { double 163 }
|
||||||
8
code/compiler/10/examples/works3.txt
Normal file
8
code/compiler/10/examples/works3.txt
Normal file
@@ -0,0 +1,8 @@
|
|||||||
|
data List = { Nil, Cons Int List }
|
||||||
|
defn length l = {
|
||||||
|
case l of {
|
||||||
|
Nil -> { 0 }
|
||||||
|
Cons x xs -> { 1 + length xs }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
defn main = { length (Cons 1 (Cons 2 (Cons 3 Nil))) }
|
||||||
16
code/compiler/10/examples/works4.txt
Normal file
16
code/compiler/10/examples/works4.txt
Normal file
@@ -0,0 +1,16 @@
|
|||||||
|
data List = { Nil, Cons Int List }
|
||||||
|
|
||||||
|
defn add x y = { x + y }
|
||||||
|
defn mul x y = { x * y }
|
||||||
|
|
||||||
|
defn foldr f b l = {
|
||||||
|
case l of {
|
||||||
|
Nil -> { b }
|
||||||
|
Cons x xs -> { f x (foldr f b xs) }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
defn main = {
|
||||||
|
foldr add 0 (Cons 1 (Cons 2 (Cons 3 (Cons 4 Nil)))) +
|
||||||
|
foldr mul 1 (Cons 1 (Cons 2 (Cons 3 (Cons 4 Nil))))
|
||||||
|
}
|
||||||
17
code/compiler/10/examples/works5.txt
Normal file
17
code/compiler/10/examples/works5.txt
Normal file
@@ -0,0 +1,17 @@
|
|||||||
|
data List = { Nil, Cons Int List }
|
||||||
|
|
||||||
|
defn sumZip l m = {
|
||||||
|
case l of {
|
||||||
|
Nil -> { 0 }
|
||||||
|
Cons x xs -> {
|
||||||
|
case m of {
|
||||||
|
Nil -> { 0 }
|
||||||
|
Cons y ys -> { x + y + sumZip xs ys }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
defn ones = { Cons 1 ones }
|
||||||
|
|
||||||
|
defn main = { sumZip ones (Cons 1 (Cons 2 (Cons 3 Nil))) }
|
||||||
160
code/compiler/10/graph.hpp
Normal file
160
code/compiler/10/graph.hpp
Normal file
@@ -0,0 +1,160 @@
|
|||||||
|
#pragma once
|
||||||
|
#include <algorithm>
|
||||||
|
#include <cstddef>
|
||||||
|
#include <queue>
|
||||||
|
#include <set>
|
||||||
|
#include <string>
|
||||||
|
#include <map>
|
||||||
|
#include <memory>
|
||||||
|
#include <vector>
|
||||||
|
#include <iostream>
|
||||||
|
|
||||||
|
using function = std::string;
|
||||||
|
|
||||||
|
struct group {
|
||||||
|
std::set<function> members;
|
||||||
|
};
|
||||||
|
|
||||||
|
using group_ptr = std::unique_ptr<group>;
|
||||||
|
|
||||||
|
class function_graph {
|
||||||
|
using group_id = size_t;
|
||||||
|
|
||||||
|
struct group_data {
|
||||||
|
std::set<function> functions;
|
||||||
|
std::set<group_id> adjacency_list;
|
||||||
|
size_t indegree;
|
||||||
|
};
|
||||||
|
|
||||||
|
using data_ptr = std::shared_ptr<group_data>;
|
||||||
|
using edge = std::pair<function, function>;
|
||||||
|
using group_edge = std::pair<group_id, group_id>;
|
||||||
|
|
||||||
|
std::map<function, std::set<function>> adjacency_lists;
|
||||||
|
std::set<edge> edges;
|
||||||
|
|
||||||
|
std::set<edge> compute_transitive_edges();
|
||||||
|
void create_groups(
|
||||||
|
const std::set<edge>&,
|
||||||
|
std::map<function, group_id>&,
|
||||||
|
std::map<group_id, data_ptr>&);
|
||||||
|
void create_edges(
|
||||||
|
std::map<function, group_id>&,
|
||||||
|
std::map<group_id, data_ptr>&);
|
||||||
|
std::vector<group_ptr> generate_order(
|
||||||
|
std::map<function, group_id>&,
|
||||||
|
std::map<group_id, data_ptr>&);
|
||||||
|
|
||||||
|
public:
|
||||||
|
void add_edge(const function& from, const function& to);
|
||||||
|
std::vector<group_ptr> compute_order();
|
||||||
|
};
|
||||||
|
|
||||||
|
std::set<function_graph::edge> function_graph::compute_transitive_edges() {
|
||||||
|
std::set<edge> transitive_edges;
|
||||||
|
transitive_edges.insert(edges.begin(), edges.end());
|
||||||
|
for(auto& connector : adjacency_lists) {
|
||||||
|
for(auto& from : adjacency_lists) {
|
||||||
|
edge to_connector { from.first, connector.first };
|
||||||
|
for(auto& to : adjacency_lists) {
|
||||||
|
edge full_jump { from.first, to.first };
|
||||||
|
if(transitive_edges.find(full_jump) != transitive_edges.end()) continue;
|
||||||
|
|
||||||
|
edge from_connector { connector.first, to.first };
|
||||||
|
if(transitive_edges.find(to_connector) != transitive_edges.end() &&
|
||||||
|
transitive_edges.find(from_connector) != transitive_edges.end())
|
||||||
|
transitive_edges.insert(std::move(full_jump));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return transitive_edges;
|
||||||
|
}
|
||||||
|
|
||||||
|
void function_graph::create_groups(
|
||||||
|
const std::set<edge>& transitive_edges,
|
||||||
|
std::map<function, group_id>& group_ids,
|
||||||
|
std::map<group_id, data_ptr>& group_data_map) {
|
||||||
|
group_id id_counter = 0;
|
||||||
|
for(auto& vertex : adjacency_lists) {
|
||||||
|
if(group_ids.find(vertex.first) != group_ids.end())
|
||||||
|
continue;
|
||||||
|
data_ptr new_group(new group_data);
|
||||||
|
new_group->functions.insert(vertex.first);
|
||||||
|
group_data_map[id_counter] = new_group;
|
||||||
|
group_ids[vertex.first] = id_counter;
|
||||||
|
for(auto& other_vertex : adjacency_lists) {
|
||||||
|
if(transitive_edges.find({vertex.first, other_vertex.first}) != transitive_edges.end() &&
|
||||||
|
transitive_edges.find({other_vertex.first, vertex.first}) != transitive_edges.end()) {
|
||||||
|
group_ids[other_vertex.first] = id_counter;
|
||||||
|
new_group->functions.insert(other_vertex.first);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
id_counter++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void function_graph::create_edges(
|
||||||
|
std::map<function, group_id>& group_ids,
|
||||||
|
std::map<group_id, data_ptr>& group_data_map) {
|
||||||
|
std::set<std::pair<group_id, group_id>> group_edges;
|
||||||
|
for(auto& vertex : adjacency_lists) {
|
||||||
|
auto vertex_id = group_ids[vertex.first];
|
||||||
|
auto& vertex_data = group_data_map[vertex_id];
|
||||||
|
for(auto& other_vertex : vertex.second) {
|
||||||
|
auto other_id = group_ids[other_vertex];
|
||||||
|
if(vertex_id == other_id) continue;
|
||||||
|
if(group_edges.find({vertex_id, other_id}) != group_edges.end())
|
||||||
|
continue;
|
||||||
|
group_edges.insert({vertex_id, other_id});
|
||||||
|
vertex_data->adjacency_list.insert(other_id);
|
||||||
|
group_data_map[other_id]->indegree++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<group_ptr> function_graph::generate_order(
|
||||||
|
std::map<function, group_id>& group_ids,
|
||||||
|
std::map<group_id, data_ptr>& group_data_map) {
|
||||||
|
std::queue<group_id> id_queue;
|
||||||
|
std::vector<group_ptr> output;
|
||||||
|
for(auto& group : group_data_map) {
|
||||||
|
if(group.second->indegree == 0) id_queue.push(group.first);
|
||||||
|
}
|
||||||
|
|
||||||
|
while(!id_queue.empty()) {
|
||||||
|
auto new_id = id_queue.front();
|
||||||
|
auto& group_data = group_data_map[new_id];
|
||||||
|
group_ptr output_group(new group);
|
||||||
|
output_group->members = std::move(group_data->functions);
|
||||||
|
id_queue.pop();
|
||||||
|
|
||||||
|
for(auto& adjacent_group : group_data->adjacency_list) {
|
||||||
|
if(--group_data_map[adjacent_group]->indegree == 0)
|
||||||
|
id_queue.push(adjacent_group);
|
||||||
|
}
|
||||||
|
|
||||||
|
output.push_back(std::move(output_group));
|
||||||
|
}
|
||||||
|
|
||||||
|
return output;
|
||||||
|
}
|
||||||
|
|
||||||
|
void function_graph::add_edge(const function& from, const function& to) {
|
||||||
|
auto adjacency_list_it = adjacency_lists.find(from);
|
||||||
|
if(adjacency_list_it != adjacency_lists.end()) {
|
||||||
|
adjacency_list_it->second.insert(to);
|
||||||
|
} else {
|
||||||
|
adjacency_lists[from] = { to };
|
||||||
|
}
|
||||||
|
edges.insert({ from, to });
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<group_ptr> function_graph::compute_order() {
|
||||||
|
std::set<edge> transitive_edges = compute_transitive_edges();
|
||||||
|
std::map<function, group_id> group_ids;
|
||||||
|
std::map<group_id, data_ptr> group_data_map;
|
||||||
|
|
||||||
|
create_groups(transitive_edges, group_ids, group_data_map);
|
||||||
|
create_edges(group_ids, group_data_map);
|
||||||
|
return generate_order(group_ids, group_data_map);
|
||||||
|
}
|
||||||
177
code/compiler/10/instruction.cpp
Normal file
177
code/compiler/10/instruction.cpp
Normal file
@@ -0,0 +1,177 @@
|
|||||||
|
#include "instruction.hpp"
|
||||||
|
#include "llvm_context.hpp"
|
||||||
|
#include <llvm/IR/BasicBlock.h>
|
||||||
|
#include <llvm/IR/Function.h>
|
||||||
|
|
||||||
|
using namespace llvm;
|
||||||
|
|
||||||
|
static void print_indent(int n, std::ostream& to) {
|
||||||
|
while(n--) to << " ";
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_pushint::print(int indent, std::ostream& to) const {
|
||||||
|
print_indent(indent, to);
|
||||||
|
to << "PushInt(" << value << ")" << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_pushint::gen_llvm(llvm_context& ctx, Function* f) const {
|
||||||
|
ctx.create_push(f, ctx.create_num(f, ctx.create_i32(value)));
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_pushglobal::print(int indent, std::ostream& to) const {
|
||||||
|
print_indent(indent, to);
|
||||||
|
to << "PushGlobal(" << name << ")" << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_pushglobal::gen_llvm(llvm_context& ctx, Function* f) const {
|
||||||
|
auto& global_f = ctx.custom_functions.at("f_" + name);
|
||||||
|
auto arity = ctx.create_i32(global_f->arity);
|
||||||
|
ctx.create_push(f, ctx.create_global(f, global_f->function, arity));
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_push::print(int indent, std::ostream& to) const {
|
||||||
|
print_indent(indent, to);
|
||||||
|
to << "Push(" << offset << ")" << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_push::gen_llvm(llvm_context& ctx, Function* f) const {
|
||||||
|
ctx.create_push(f, ctx.create_peek(f, ctx.create_size(offset)));
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_pop::print(int indent, std::ostream& to) const {
|
||||||
|
print_indent(indent, to);
|
||||||
|
to << "Pop(" << count << ")" << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_pop::gen_llvm(llvm_context& ctx, Function* f) const {
|
||||||
|
ctx.create_popn(f, ctx.create_size(count));
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_mkapp::print(int indent, std::ostream& to) const {
|
||||||
|
print_indent(indent, to);
|
||||||
|
to << "MkApp()" << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_mkapp::gen_llvm(llvm_context& ctx, Function* f) const {
|
||||||
|
auto left = ctx.create_pop(f);
|
||||||
|
auto right = ctx.create_pop(f);
|
||||||
|
ctx.create_push(f, ctx.create_app(f, left, right));
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_update::print(int indent, std::ostream& to) const {
|
||||||
|
print_indent(indent, to);
|
||||||
|
to << "Update(" << offset << ")" << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_update::gen_llvm(llvm_context& ctx, Function* f) const {
|
||||||
|
ctx.create_update(f, ctx.create_size(offset));
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_pack::print(int indent, std::ostream& to) const {
|
||||||
|
print_indent(indent, to);
|
||||||
|
to << "Pack(" << tag << ", " << size << ")" << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_pack::gen_llvm(llvm_context& ctx, Function* f) const {
|
||||||
|
ctx.create_pack(f, ctx.create_size(size), ctx.create_i8(tag));
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_split::print(int indent, std::ostream& to) const {
|
||||||
|
print_indent(indent, to);
|
||||||
|
to << "Split()" << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_split::gen_llvm(llvm_context& ctx, Function* f) const {
|
||||||
|
ctx.create_split(f, ctx.create_size(size));
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_jump::print(int indent, std::ostream& to) const {
|
||||||
|
print_indent(indent, to);
|
||||||
|
to << "Jump(" << std::endl;
|
||||||
|
for(auto& instruction_set : branches) {
|
||||||
|
for(auto& instruction : instruction_set) {
|
||||||
|
instruction->print(indent + 2, to);
|
||||||
|
}
|
||||||
|
to << std::endl;
|
||||||
|
}
|
||||||
|
print_indent(indent, to);
|
||||||
|
to << ")" << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_jump::gen_llvm(llvm_context& ctx, Function* f) const {
|
||||||
|
auto top_node = ctx.create_peek(f, ctx.create_size(0));
|
||||||
|
auto tag = ctx.unwrap_data_tag(top_node);
|
||||||
|
auto safety_block = BasicBlock::Create(ctx.ctx, "safety", f);
|
||||||
|
auto switch_op = ctx.builder.CreateSwitch(tag, safety_block, tag_mappings.size());
|
||||||
|
std::vector<BasicBlock*> blocks;
|
||||||
|
|
||||||
|
for(auto& branch : branches) {
|
||||||
|
auto branch_block = BasicBlock::Create(ctx.ctx, "branch", f);
|
||||||
|
ctx.builder.SetInsertPoint(branch_block);
|
||||||
|
for(auto& instruction : branch) {
|
||||||
|
instruction->gen_llvm(ctx, f);
|
||||||
|
}
|
||||||
|
ctx.builder.CreateBr(safety_block);
|
||||||
|
blocks.push_back(branch_block);
|
||||||
|
}
|
||||||
|
|
||||||
|
for(auto& mapping : tag_mappings) {
|
||||||
|
switch_op->addCase(ctx.create_i8(mapping.first), blocks[mapping.second]);
|
||||||
|
}
|
||||||
|
|
||||||
|
ctx.builder.SetInsertPoint(safety_block);
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_slide::print(int indent, std::ostream& to) const {
|
||||||
|
print_indent(indent, to);
|
||||||
|
to << "Slide(" << offset << ")" << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_slide::gen_llvm(llvm_context& ctx, Function* f) const {
|
||||||
|
ctx.create_slide(f, ctx.create_size(offset));
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_binop::print(int indent, std::ostream& to) const {
|
||||||
|
print_indent(indent, to);
|
||||||
|
to << "BinOp(" << op_action(op) << ")" << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_binop::gen_llvm(llvm_context& ctx, Function* f) const {
|
||||||
|
auto left_int = ctx.unwrap_num(ctx.create_pop(f));
|
||||||
|
auto right_int = ctx.unwrap_num(ctx.create_pop(f));
|
||||||
|
llvm::Value* result;
|
||||||
|
switch(op) {
|
||||||
|
case PLUS: result = ctx.builder.CreateAdd(left_int, right_int); break;
|
||||||
|
case MINUS: result = ctx.builder.CreateSub(left_int, right_int); break;
|
||||||
|
case TIMES: result = ctx.builder.CreateMul(left_int, right_int); break;
|
||||||
|
case DIVIDE: result = ctx.builder.CreateSDiv(left_int, right_int); break;
|
||||||
|
}
|
||||||
|
ctx.create_push(f, ctx.create_num(f, result));
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_eval::print(int indent, std::ostream& to) const {
|
||||||
|
print_indent(indent, to);
|
||||||
|
to << "Eval()" << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_eval::gen_llvm(llvm_context& ctx, Function* f) const {
|
||||||
|
ctx.create_unwind(f);
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_alloc::print(int indent, std::ostream& to) const {
|
||||||
|
print_indent(indent, to);
|
||||||
|
to << "Alloc(" << amount << ")" << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_alloc::gen_llvm(llvm_context& ctx, Function* f) const {
|
||||||
|
ctx.create_alloc(f, ctx.create_size(amount));
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_unwind::print(int indent, std::ostream& to) const {
|
||||||
|
print_indent(indent, to);
|
||||||
|
to << "Unwind()" << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_unwind::gen_llvm(llvm_context& ctx, Function* f) const {
|
||||||
|
// Nothing
|
||||||
|
}
|
||||||
142
code/compiler/10/instruction.hpp
Normal file
142
code/compiler/10/instruction.hpp
Normal file
@@ -0,0 +1,142 @@
|
|||||||
|
#pragma once
|
||||||
|
#include <llvm/IR/Function.h>
|
||||||
|
#include <string>
|
||||||
|
#include <memory>
|
||||||
|
#include <vector>
|
||||||
|
#include <map>
|
||||||
|
#include <ostream>
|
||||||
|
#include "binop.hpp"
|
||||||
|
#include "llvm_context.hpp"
|
||||||
|
|
||||||
|
struct instruction {
|
||||||
|
virtual ~instruction() = default;
|
||||||
|
|
||||||
|
virtual void print(int indent, std::ostream& to) const = 0;
|
||||||
|
virtual void gen_llvm(llvm_context& ctx, llvm::Function* f) const = 0;
|
||||||
|
};
|
||||||
|
|
||||||
|
using instruction_ptr = std::unique_ptr<instruction>;
|
||||||
|
|
||||||
|
struct instruction_pushint : public instruction {
|
||||||
|
int value;
|
||||||
|
|
||||||
|
instruction_pushint(int v)
|
||||||
|
: value(v) {}
|
||||||
|
|
||||||
|
void print(int indent, std::ostream& to) const;
|
||||||
|
void gen_llvm(llvm_context& ctx, llvm::Function* f) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct instruction_pushglobal : public instruction {
|
||||||
|
std::string name;
|
||||||
|
|
||||||
|
instruction_pushglobal(std::string n)
|
||||||
|
: name(std::move(n)) {}
|
||||||
|
|
||||||
|
void print(int indent, std::ostream& to) const;
|
||||||
|
void gen_llvm(llvm_context& ctx, llvm::Function* f) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct instruction_push : public instruction {
|
||||||
|
int offset;
|
||||||
|
|
||||||
|
instruction_push(int o)
|
||||||
|
: offset(o) {}
|
||||||
|
|
||||||
|
void print(int indent, std::ostream& to) const;
|
||||||
|
void gen_llvm(llvm_context& ctx, llvm::Function* f) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct instruction_pop : public instruction {
|
||||||
|
int count;
|
||||||
|
|
||||||
|
instruction_pop(int c)
|
||||||
|
: count(c) {}
|
||||||
|
|
||||||
|
void print(int indent, std::ostream& to) const;
|
||||||
|
void gen_llvm(llvm_context& ctx, llvm::Function* f) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct instruction_mkapp : public instruction {
|
||||||
|
void print(int indent, std::ostream& to) const;
|
||||||
|
void gen_llvm(llvm_context& ctx, llvm::Function* f) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct instruction_update : public instruction {
|
||||||
|
int offset;
|
||||||
|
|
||||||
|
instruction_update(int o)
|
||||||
|
: offset(o) {}
|
||||||
|
|
||||||
|
void print(int indent, std::ostream& to) const;
|
||||||
|
void gen_llvm(llvm_context& ctx, llvm::Function* f) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct instruction_pack : public instruction {
|
||||||
|
int tag;
|
||||||
|
int size;
|
||||||
|
|
||||||
|
instruction_pack(int t, int s)
|
||||||
|
: tag(t), size(s) {}
|
||||||
|
|
||||||
|
void print(int indent, std::ostream& to) const;
|
||||||
|
void gen_llvm(llvm_context& ctx, llvm::Function* f) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct instruction_split : public instruction {
|
||||||
|
int size;
|
||||||
|
|
||||||
|
instruction_split(int s)
|
||||||
|
: size(s) {}
|
||||||
|
|
||||||
|
void print(int indent, std::ostream& to) const;
|
||||||
|
void gen_llvm(llvm_context& ctx, llvm::Function* f) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct instruction_jump : public instruction {
|
||||||
|
std::vector<std::vector<instruction_ptr>> branches;
|
||||||
|
std::map<int, int> tag_mappings;
|
||||||
|
|
||||||
|
void print(int indent, std::ostream& to) const;
|
||||||
|
void gen_llvm(llvm_context& ctx, llvm::Function* f) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct instruction_slide : public instruction {
|
||||||
|
int offset;
|
||||||
|
|
||||||
|
instruction_slide(int o)
|
||||||
|
: offset(o) {}
|
||||||
|
|
||||||
|
void print(int indent, std::ostream& to) const;
|
||||||
|
void gen_llvm(llvm_context& ctx, llvm::Function* f) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct instruction_binop : public instruction {
|
||||||
|
binop op;
|
||||||
|
|
||||||
|
instruction_binop(binop o)
|
||||||
|
: op(o) {}
|
||||||
|
|
||||||
|
void print(int indent, std::ostream& to) const;
|
||||||
|
void gen_llvm(llvm_context& ctx, llvm::Function* f) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct instruction_eval : public instruction {
|
||||||
|
void print(int indent, std::ostream& to) const;
|
||||||
|
void gen_llvm(llvm_context& ctx, llvm::Function* f) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct instruction_alloc : public instruction {
|
||||||
|
int amount;
|
||||||
|
|
||||||
|
instruction_alloc(int a)
|
||||||
|
: amount(a) {}
|
||||||
|
|
||||||
|
void print(int indent, std::ostream& to) const;
|
||||||
|
void gen_llvm(llvm_context& ctx, llvm::Function* f) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct instruction_unwind : public instruction {
|
||||||
|
void print(int indent, std::ostream& to) const;
|
||||||
|
void gen_llvm(llvm_context& ctx, llvm::Function* f) const;
|
||||||
|
};
|
||||||
278
code/compiler/10/llvm_context.cpp
Normal file
278
code/compiler/10/llvm_context.cpp
Normal file
@@ -0,0 +1,278 @@
|
|||||||
|
#include "llvm_context.hpp"
|
||||||
|
#include <llvm/IR/DerivedTypes.h>
|
||||||
|
|
||||||
|
using namespace llvm;
|
||||||
|
|
||||||
|
void llvm_context::create_types() {
|
||||||
|
stack_type = StructType::create(ctx, "stack");
|
||||||
|
gmachine_type = StructType::create(ctx, "gmachine");
|
||||||
|
stack_ptr_type = PointerType::getUnqual(stack_type);
|
||||||
|
gmachine_ptr_type = PointerType::getUnqual(gmachine_type);
|
||||||
|
tag_type = IntegerType::getInt8Ty(ctx);
|
||||||
|
struct_types["node_base"] = StructType::create(ctx, "node_base");
|
||||||
|
struct_types["node_app"] = StructType::create(ctx, "node_app");
|
||||||
|
struct_types["node_num"] = StructType::create(ctx, "node_num");
|
||||||
|
struct_types["node_global"] = StructType::create(ctx, "node_global");
|
||||||
|
struct_types["node_ind"] = StructType::create(ctx, "node_ind");
|
||||||
|
struct_types["node_data"] = StructType::create(ctx, "node_data");
|
||||||
|
node_ptr_type = PointerType::getUnqual(struct_types.at("node_base"));
|
||||||
|
function_type = FunctionType::get(Type::getVoidTy(ctx), { gmachine_ptr_type }, false);
|
||||||
|
|
||||||
|
gmachine_type->setBody(
|
||||||
|
stack_ptr_type,
|
||||||
|
node_ptr_type,
|
||||||
|
IntegerType::getInt64Ty(ctx),
|
||||||
|
IntegerType::getInt64Ty(ctx)
|
||||||
|
);
|
||||||
|
struct_types.at("node_base")->setBody(
|
||||||
|
IntegerType::getInt32Ty(ctx),
|
||||||
|
IntegerType::getInt8Ty(ctx),
|
||||||
|
node_ptr_type
|
||||||
|
);
|
||||||
|
struct_types.at("node_app")->setBody(
|
||||||
|
struct_types.at("node_base"),
|
||||||
|
node_ptr_type,
|
||||||
|
node_ptr_type
|
||||||
|
);
|
||||||
|
struct_types.at("node_num")->setBody(
|
||||||
|
struct_types.at("node_base"),
|
||||||
|
IntegerType::getInt32Ty(ctx)
|
||||||
|
);
|
||||||
|
struct_types.at("node_global")->setBody(
|
||||||
|
struct_types.at("node_base"),
|
||||||
|
FunctionType::get(Type::getVoidTy(ctx), { stack_ptr_type }, false)
|
||||||
|
);
|
||||||
|
struct_types.at("node_ind")->setBody(
|
||||||
|
struct_types.at("node_base"),
|
||||||
|
node_ptr_type
|
||||||
|
);
|
||||||
|
struct_types.at("node_data")->setBody(
|
||||||
|
struct_types.at("node_base"),
|
||||||
|
IntegerType::getInt8Ty(ctx),
|
||||||
|
PointerType::getUnqual(node_ptr_type)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
void llvm_context::create_functions() {
|
||||||
|
auto void_type = Type::getVoidTy(ctx);
|
||||||
|
auto sizet_type = IntegerType::get(ctx, sizeof(size_t) * 8);
|
||||||
|
functions["stack_init"] = Function::Create(
|
||||||
|
FunctionType::get(void_type, { stack_ptr_type }, false),
|
||||||
|
Function::LinkageTypes::ExternalLinkage,
|
||||||
|
"stack_init",
|
||||||
|
&module
|
||||||
|
);
|
||||||
|
functions["stack_free"] = Function::Create(
|
||||||
|
FunctionType::get(void_type, { stack_ptr_type }, false),
|
||||||
|
Function::LinkageTypes::ExternalLinkage,
|
||||||
|
"stack_free",
|
||||||
|
&module
|
||||||
|
);
|
||||||
|
functions["stack_push"] = Function::Create(
|
||||||
|
FunctionType::get(void_type, { stack_ptr_type, node_ptr_type }, false),
|
||||||
|
Function::LinkageTypes::ExternalLinkage,
|
||||||
|
"stack_push",
|
||||||
|
&module
|
||||||
|
);
|
||||||
|
functions["stack_pop"] = Function::Create(
|
||||||
|
FunctionType::get(node_ptr_type, { stack_ptr_type }, false),
|
||||||
|
Function::LinkageTypes::ExternalLinkage,
|
||||||
|
"stack_pop",
|
||||||
|
&module
|
||||||
|
);
|
||||||
|
functions["stack_peek"] = Function::Create(
|
||||||
|
FunctionType::get(node_ptr_type, { stack_ptr_type, sizet_type }, false),
|
||||||
|
Function::LinkageTypes::ExternalLinkage,
|
||||||
|
"stack_peek",
|
||||||
|
&module
|
||||||
|
);
|
||||||
|
functions["stack_popn"] = Function::Create(
|
||||||
|
FunctionType::get(void_type, { stack_ptr_type, sizet_type }, false),
|
||||||
|
Function::LinkageTypes::ExternalLinkage,
|
||||||
|
"stack_popn",
|
||||||
|
&module
|
||||||
|
);
|
||||||
|
functions["gmachine_slide"] = Function::Create(
|
||||||
|
FunctionType::get(void_type, { gmachine_ptr_type, sizet_type }, false),
|
||||||
|
Function::LinkageTypes::ExternalLinkage,
|
||||||
|
"gmachine_slide",
|
||||||
|
&module
|
||||||
|
);
|
||||||
|
functions["gmachine_update"] = Function::Create(
|
||||||
|
FunctionType::get(void_type, { gmachine_ptr_type, sizet_type }, false),
|
||||||
|
Function::LinkageTypes::ExternalLinkage,
|
||||||
|
"gmachine_update",
|
||||||
|
&module
|
||||||
|
);
|
||||||
|
functions["gmachine_alloc"] = Function::Create(
|
||||||
|
FunctionType::get(void_type, { gmachine_ptr_type, sizet_type }, false),
|
||||||
|
Function::LinkageTypes::ExternalLinkage,
|
||||||
|
"gmachine_alloc",
|
||||||
|
&module
|
||||||
|
);
|
||||||
|
functions["gmachine_pack"] = Function::Create(
|
||||||
|
FunctionType::get(void_type, { gmachine_ptr_type, sizet_type, tag_type }, false),
|
||||||
|
Function::LinkageTypes::ExternalLinkage,
|
||||||
|
"gmachine_pack",
|
||||||
|
&module
|
||||||
|
);
|
||||||
|
functions["gmachine_split"] = Function::Create(
|
||||||
|
FunctionType::get(void_type, { gmachine_ptr_type, sizet_type }, false),
|
||||||
|
Function::LinkageTypes::ExternalLinkage,
|
||||||
|
"gmachine_split",
|
||||||
|
&module
|
||||||
|
);
|
||||||
|
functions["gmachine_track"] = Function::Create(
|
||||||
|
FunctionType::get(node_ptr_type, { gmachine_ptr_type, node_ptr_type }, false),
|
||||||
|
Function::LinkageTypes::ExternalLinkage,
|
||||||
|
"gmachine_track",
|
||||||
|
&module
|
||||||
|
);
|
||||||
|
|
||||||
|
auto int32_type = IntegerType::getInt32Ty(ctx);
|
||||||
|
functions["alloc_app"] = Function::Create(
|
||||||
|
FunctionType::get(node_ptr_type, { node_ptr_type, node_ptr_type }, false),
|
||||||
|
Function::LinkageTypes::ExternalLinkage,
|
||||||
|
"alloc_app",
|
||||||
|
&module
|
||||||
|
);
|
||||||
|
functions["alloc_num"] = Function::Create(
|
||||||
|
FunctionType::get(node_ptr_type, { int32_type }, false),
|
||||||
|
Function::LinkageTypes::ExternalLinkage,
|
||||||
|
"alloc_num",
|
||||||
|
&module
|
||||||
|
);
|
||||||
|
functions["alloc_global"] = Function::Create(
|
||||||
|
FunctionType::get(node_ptr_type, { function_type, int32_type }, false),
|
||||||
|
Function::LinkageTypes::ExternalLinkage,
|
||||||
|
"alloc_global",
|
||||||
|
&module
|
||||||
|
);
|
||||||
|
functions["alloc_ind"] = Function::Create(
|
||||||
|
FunctionType::get(node_ptr_type, { node_ptr_type }, false),
|
||||||
|
Function::LinkageTypes::ExternalLinkage,
|
||||||
|
"alloc_ind",
|
||||||
|
&module
|
||||||
|
);
|
||||||
|
|
||||||
|
functions["unwind"] = Function::Create(
|
||||||
|
FunctionType::get(void_type, { gmachine_ptr_type }, false),
|
||||||
|
Function::LinkageTypes::ExternalLinkage,
|
||||||
|
"unwind",
|
||||||
|
&module
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
ConstantInt* llvm_context::create_i8(int8_t i) {
|
||||||
|
return ConstantInt::get(ctx, APInt(8, i));
|
||||||
|
}
|
||||||
|
ConstantInt* llvm_context::create_i32(int32_t i) {
|
||||||
|
return ConstantInt::get(ctx, APInt(32, i));
|
||||||
|
}
|
||||||
|
ConstantInt* llvm_context::create_size(size_t i) {
|
||||||
|
return ConstantInt::get(ctx, APInt(sizeof(size_t) * 8, i));
|
||||||
|
}
|
||||||
|
|
||||||
|
Value* llvm_context::create_pop(Function* f) {
|
||||||
|
auto pop_f = functions.at("stack_pop");
|
||||||
|
return builder.CreateCall(pop_f, { unwrap_gmachine_stack_ptr(f->arg_begin()) });
|
||||||
|
}
|
||||||
|
Value* llvm_context::create_peek(Function* f, Value* off) {
|
||||||
|
auto peek_f = functions.at("stack_peek");
|
||||||
|
return builder.CreateCall(peek_f, { unwrap_gmachine_stack_ptr(f->arg_begin()), off });
|
||||||
|
}
|
||||||
|
void llvm_context::create_push(Function* f, Value* v) {
|
||||||
|
auto push_f = functions.at("stack_push");
|
||||||
|
builder.CreateCall(push_f, { unwrap_gmachine_stack_ptr(f->arg_begin()), v });
|
||||||
|
}
|
||||||
|
void llvm_context::create_popn(Function* f, Value* off) {
|
||||||
|
auto popn_f = functions.at("stack_popn");
|
||||||
|
builder.CreateCall(popn_f, { unwrap_gmachine_stack_ptr(f->arg_begin()), off });
|
||||||
|
}
|
||||||
|
void llvm_context::create_update(Function* f, Value* off) {
|
||||||
|
auto update_f = functions.at("gmachine_update");
|
||||||
|
builder.CreateCall(update_f, { f->arg_begin(), off });
|
||||||
|
}
|
||||||
|
void llvm_context::create_pack(Function* f, Value* c, Value* t) {
|
||||||
|
auto pack_f = functions.at("gmachine_pack");
|
||||||
|
builder.CreateCall(pack_f, { f->arg_begin(), c, t });
|
||||||
|
}
|
||||||
|
void llvm_context::create_split(Function* f, Value* c) {
|
||||||
|
auto split_f = functions.at("gmachine_split");
|
||||||
|
builder.CreateCall(split_f, { f->arg_begin(), c });
|
||||||
|
}
|
||||||
|
void llvm_context::create_slide(Function* f, Value* off) {
|
||||||
|
auto slide_f = functions.at("gmachine_slide");
|
||||||
|
builder.CreateCall(slide_f, { f->arg_begin(), off });
|
||||||
|
}
|
||||||
|
void llvm_context::create_alloc(Function* f, Value* n) {
|
||||||
|
auto alloc_f = functions.at("gmachine_alloc");
|
||||||
|
builder.CreateCall(alloc_f, { f->arg_begin(), n });
|
||||||
|
}
|
||||||
|
Value* llvm_context::create_track(Function* f, Value* v) {
|
||||||
|
auto track_f = functions.at("gmachine_track");
|
||||||
|
return builder.CreateCall(track_f, { f->arg_begin(), v });
|
||||||
|
}
|
||||||
|
|
||||||
|
void llvm_context::create_unwind(Function* f) {
|
||||||
|
auto unwind_f = functions.at("unwind");
|
||||||
|
builder.CreateCall(unwind_f, { f->args().begin() });
|
||||||
|
}
|
||||||
|
|
||||||
|
Value* llvm_context::unwrap_gmachine_stack_ptr(Value* g) {
|
||||||
|
auto offset_0 = create_i32(0);
|
||||||
|
return builder.CreateGEP(g, { offset_0, offset_0 });
|
||||||
|
}
|
||||||
|
|
||||||
|
Value* llvm_context::unwrap_num(Value* v) {
|
||||||
|
auto num_ptr_type = PointerType::getUnqual(struct_types.at("node_num"));
|
||||||
|
auto cast = builder.CreatePointerCast(v, num_ptr_type);
|
||||||
|
auto offset_0 = create_i32(0);
|
||||||
|
auto offset_1 = create_i32(1);
|
||||||
|
auto int_ptr = builder.CreateGEP(cast, { offset_0, offset_1 });
|
||||||
|
return builder.CreateLoad(int_ptr);
|
||||||
|
}
|
||||||
|
Value* llvm_context::create_num(Function* f, Value* v) {
|
||||||
|
auto alloc_num_f = functions.at("alloc_num");
|
||||||
|
auto alloc_num_call = builder.CreateCall(alloc_num_f, { v });
|
||||||
|
return create_track(f, alloc_num_call);
|
||||||
|
}
|
||||||
|
|
||||||
|
Value* llvm_context::unwrap_data_tag(Value* v) {
|
||||||
|
auto data_ptr_type = PointerType::getUnqual(struct_types.at("node_data"));
|
||||||
|
auto cast = builder.CreatePointerCast(v, data_ptr_type);
|
||||||
|
auto offset_0 = create_i32(0);
|
||||||
|
auto offset_1 = create_i32(1);
|
||||||
|
auto tag_ptr = builder.CreateGEP(cast, { offset_0, offset_1 });
|
||||||
|
return builder.CreateLoad(tag_ptr);
|
||||||
|
}
|
||||||
|
|
||||||
|
Value* llvm_context::create_global(Function* f, Value* gf, Value* a) {
|
||||||
|
auto alloc_global_f = functions.at("alloc_global");
|
||||||
|
auto alloc_global_call = builder.CreateCall(alloc_global_f, { gf, a });
|
||||||
|
return create_track(f, alloc_global_call);
|
||||||
|
}
|
||||||
|
|
||||||
|
Value* llvm_context::create_app(Function* f, Value* l, Value* r) {
|
||||||
|
auto alloc_app_f = functions.at("alloc_app");
|
||||||
|
auto alloc_app_call = builder.CreateCall(alloc_app_f, { l, r });
|
||||||
|
return create_track(f, alloc_app_call);
|
||||||
|
}
|
||||||
|
|
||||||
|
llvm::Function* llvm_context::create_custom_function(std::string name, int32_t arity) {
|
||||||
|
auto void_type = llvm::Type::getVoidTy(ctx);
|
||||||
|
auto new_function = llvm::Function::Create(
|
||||||
|
function_type,
|
||||||
|
llvm::Function::LinkageTypes::ExternalLinkage,
|
||||||
|
"f_" + name,
|
||||||
|
&module
|
||||||
|
);
|
||||||
|
auto start_block = llvm::BasicBlock::Create(ctx, "entry", new_function);
|
||||||
|
|
||||||
|
auto new_custom_f = custom_function_ptr(new custom_function());
|
||||||
|
new_custom_f->arity = arity;
|
||||||
|
new_custom_f->function = new_function;
|
||||||
|
custom_functions["f_" + name] = std::move(new_custom_f);
|
||||||
|
|
||||||
|
return new_function;
|
||||||
|
}
|
||||||
72
code/compiler/10/llvm_context.hpp
Normal file
72
code/compiler/10/llvm_context.hpp
Normal file
@@ -0,0 +1,72 @@
|
|||||||
|
#pragma once
|
||||||
|
#include <llvm/IR/DerivedTypes.h>
|
||||||
|
#include <llvm/IR/Function.h>
|
||||||
|
#include <llvm/IR/LLVMContext.h>
|
||||||
|
#include <llvm/IR/IRBuilder.h>
|
||||||
|
#include <llvm/IR/Module.h>
|
||||||
|
#include <llvm/IR/Value.h>
|
||||||
|
#include <map>
|
||||||
|
|
||||||
|
struct llvm_context {
|
||||||
|
struct custom_function {
|
||||||
|
llvm::Function* function;
|
||||||
|
int32_t arity;
|
||||||
|
};
|
||||||
|
|
||||||
|
using custom_function_ptr = std::unique_ptr<custom_function>;
|
||||||
|
|
||||||
|
llvm::LLVMContext ctx;
|
||||||
|
llvm::IRBuilder<> builder;
|
||||||
|
llvm::Module module;
|
||||||
|
|
||||||
|
std::map<std::string, custom_function_ptr> custom_functions;
|
||||||
|
std::map<std::string, llvm::Function*> functions;
|
||||||
|
std::map<std::string, llvm::StructType*> struct_types;
|
||||||
|
|
||||||
|
llvm::StructType* stack_type;
|
||||||
|
llvm::StructType* gmachine_type;
|
||||||
|
llvm::PointerType* stack_ptr_type;
|
||||||
|
llvm::PointerType* gmachine_ptr_type;
|
||||||
|
llvm::PointerType* node_ptr_type;
|
||||||
|
llvm::IntegerType* tag_type;
|
||||||
|
llvm::FunctionType* function_type;
|
||||||
|
|
||||||
|
llvm_context()
|
||||||
|
: builder(ctx), module("bloglang", ctx) {
|
||||||
|
create_types();
|
||||||
|
create_functions();
|
||||||
|
}
|
||||||
|
|
||||||
|
void create_types();
|
||||||
|
void create_functions();
|
||||||
|
|
||||||
|
llvm::ConstantInt* create_i8(int8_t);
|
||||||
|
llvm::ConstantInt* create_i32(int32_t);
|
||||||
|
llvm::ConstantInt* create_size(size_t);
|
||||||
|
|
||||||
|
llvm::Value* create_pop(llvm::Function*);
|
||||||
|
llvm::Value* create_peek(llvm::Function*, llvm::Value*);
|
||||||
|
void create_push(llvm::Function*, llvm::Value*);
|
||||||
|
void create_popn(llvm::Function*, llvm::Value*);
|
||||||
|
void create_update(llvm::Function*, llvm::Value*);
|
||||||
|
void create_pack(llvm::Function*, llvm::Value*, llvm::Value*);
|
||||||
|
void create_split(llvm::Function*, llvm::Value*);
|
||||||
|
void create_slide(llvm::Function*, llvm::Value*);
|
||||||
|
void create_alloc(llvm::Function*, llvm::Value*);
|
||||||
|
llvm::Value* create_track(llvm::Function*, llvm::Value*);
|
||||||
|
|
||||||
|
void create_unwind(llvm::Function*);
|
||||||
|
|
||||||
|
llvm::Value* unwrap_gmachine_stack_ptr(llvm::Value*);
|
||||||
|
|
||||||
|
llvm::Value* unwrap_num(llvm::Value*);
|
||||||
|
llvm::Value* create_num(llvm::Function*, llvm::Value*);
|
||||||
|
|
||||||
|
llvm::Value* unwrap_data_tag(llvm::Value*);
|
||||||
|
|
||||||
|
llvm::Value* create_global(llvm::Function*, llvm::Value*, llvm::Value*);
|
||||||
|
|
||||||
|
llvm::Value* create_app(llvm::Function*, llvm::Value*, llvm::Value*);
|
||||||
|
|
||||||
|
llvm::Function* create_custom_function(std::string name, int32_t arity);
|
||||||
|
};
|
||||||
172
code/compiler/10/main.cpp
Normal file
172
code/compiler/10/main.cpp
Normal file
@@ -0,0 +1,172 @@
|
|||||||
|
#include "ast.hpp"
|
||||||
|
#include <iostream>
|
||||||
|
#include "binop.hpp"
|
||||||
|
#include "definition.hpp"
|
||||||
|
#include "instruction.hpp"
|
||||||
|
#include "llvm_context.hpp"
|
||||||
|
#include "parser.hpp"
|
||||||
|
#include "error.hpp"
|
||||||
|
#include "type.hpp"
|
||||||
|
#include "llvm/IR/LegacyPassManager.h"
|
||||||
|
#include "llvm/IR/Verifier.h"
|
||||||
|
#include "llvm/Support/TargetSelect.h"
|
||||||
|
#include "llvm/Support/TargetRegistry.h"
|
||||||
|
#include "llvm/Support/raw_ostream.h"
|
||||||
|
#include "llvm/Support/FileSystem.h"
|
||||||
|
#include "llvm/Target/TargetOptions.h"
|
||||||
|
#include "llvm/Target/TargetMachine.h"
|
||||||
|
|
||||||
|
void yy::parser::error(const std::string& msg) {
|
||||||
|
std::cout << "An error occured: " << msg << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
extern std::vector<definition_ptr> program;
|
||||||
|
|
||||||
|
void typecheck_program(
|
||||||
|
const std::vector<definition_ptr>& prog,
|
||||||
|
type_mgr& mgr, type_env& env) {
|
||||||
|
type_ptr int_type = type_ptr(new type_base("Int"));
|
||||||
|
type_ptr binop_type = type_ptr(new type_arr(
|
||||||
|
int_type,
|
||||||
|
type_ptr(new type_arr(int_type, int_type))));
|
||||||
|
|
||||||
|
env.bind("+", binop_type);
|
||||||
|
env.bind("-", binop_type);
|
||||||
|
env.bind("*", binop_type);
|
||||||
|
env.bind("/", binop_type);
|
||||||
|
|
||||||
|
for(auto& def : prog) {
|
||||||
|
def->typecheck_first(mgr, env);
|
||||||
|
}
|
||||||
|
|
||||||
|
for(auto& def : prog) {
|
||||||
|
def->typecheck_second(mgr, env);
|
||||||
|
}
|
||||||
|
|
||||||
|
for(auto& pair : env.names) {
|
||||||
|
std::cout << pair.first << ": ";
|
||||||
|
pair.second->print(mgr, std::cout);
|
||||||
|
std::cout << std::endl;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void compile_program(const std::vector<definition_ptr>& prog) {
|
||||||
|
for(auto& def : prog) {
|
||||||
|
def->compile();
|
||||||
|
|
||||||
|
definition_defn* defn = dynamic_cast<definition_defn*>(def.get());
|
||||||
|
if(!defn) continue;
|
||||||
|
for(auto& instruction : defn->instructions) {
|
||||||
|
instruction->print(0, std::cout);
|
||||||
|
}
|
||||||
|
std::cout << std::endl;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void gen_llvm_internal_op(llvm_context& ctx, binop op) {
|
||||||
|
auto new_function = ctx.create_custom_function(op_action(op), 2);
|
||||||
|
std::vector<instruction_ptr> instructions;
|
||||||
|
instructions.push_back(instruction_ptr(new instruction_push(1)));
|
||||||
|
instructions.push_back(instruction_ptr(new instruction_eval()));
|
||||||
|
instructions.push_back(instruction_ptr(new instruction_push(1)));
|
||||||
|
instructions.push_back(instruction_ptr(new instruction_eval()));
|
||||||
|
instructions.push_back(instruction_ptr(new instruction_binop(op)));
|
||||||
|
instructions.push_back(instruction_ptr(new instruction_update(2)));
|
||||||
|
instructions.push_back(instruction_ptr(new instruction_pop(2)));
|
||||||
|
ctx.builder.SetInsertPoint(&new_function->getEntryBlock());
|
||||||
|
for(auto& instruction : instructions) {
|
||||||
|
instruction->gen_llvm(ctx, new_function);
|
||||||
|
}
|
||||||
|
ctx.builder.CreateRetVoid();
|
||||||
|
}
|
||||||
|
|
||||||
|
void output_llvm(llvm_context& ctx, const std::string& filename) {
|
||||||
|
std::string targetTriple = llvm::sys::getDefaultTargetTriple();
|
||||||
|
|
||||||
|
llvm::InitializeNativeTarget();
|
||||||
|
llvm::InitializeNativeTargetAsmParser();
|
||||||
|
llvm::InitializeNativeTargetAsmPrinter();
|
||||||
|
|
||||||
|
std::string error;
|
||||||
|
const llvm::Target* target =
|
||||||
|
llvm::TargetRegistry::lookupTarget(targetTriple, error);
|
||||||
|
if (!target) {
|
||||||
|
std::cerr << error << std::endl;
|
||||||
|
} else {
|
||||||
|
std::string cpu = "generic";
|
||||||
|
std::string features = "";
|
||||||
|
llvm::TargetOptions options;
|
||||||
|
llvm::TargetMachine* targetMachine =
|
||||||
|
target->createTargetMachine(targetTriple, cpu, features,
|
||||||
|
options, llvm::Optional<llvm::Reloc::Model>());
|
||||||
|
|
||||||
|
ctx.module.setDataLayout(targetMachine->createDataLayout());
|
||||||
|
ctx.module.setTargetTriple(targetTriple);
|
||||||
|
|
||||||
|
std::error_code ec;
|
||||||
|
llvm::raw_fd_ostream file(filename, ec, llvm::sys::fs::F_None);
|
||||||
|
if (ec) {
|
||||||
|
throw 0;
|
||||||
|
} else {
|
||||||
|
llvm::TargetMachine::CodeGenFileType type = llvm::TargetMachine::CGFT_ObjectFile;
|
||||||
|
llvm::legacy::PassManager pm;
|
||||||
|
if (targetMachine->addPassesToEmitFile(pm, file, NULL, type)) {
|
||||||
|
throw 0;
|
||||||
|
} else {
|
||||||
|
pm.run(ctx.module);
|
||||||
|
file.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void gen_llvm(const std::vector<definition_ptr>& prog) {
|
||||||
|
llvm_context ctx;
|
||||||
|
gen_llvm_internal_op(ctx, PLUS);
|
||||||
|
gen_llvm_internal_op(ctx, MINUS);
|
||||||
|
gen_llvm_internal_op(ctx, TIMES);
|
||||||
|
gen_llvm_internal_op(ctx, DIVIDE);
|
||||||
|
|
||||||
|
for(auto& definition : prog) {
|
||||||
|
definition->gen_llvm_first(ctx);
|
||||||
|
}
|
||||||
|
|
||||||
|
for(auto& definition : prog) {
|
||||||
|
definition->gen_llvm_second(ctx);
|
||||||
|
}
|
||||||
|
ctx.module.print(llvm::outs(), nullptr);
|
||||||
|
output_llvm(ctx, "program.o");
|
||||||
|
}
|
||||||
|
|
||||||
|
int main() {
|
||||||
|
yy::parser parser;
|
||||||
|
type_mgr mgr;
|
||||||
|
type_env env;
|
||||||
|
|
||||||
|
parser.parse();
|
||||||
|
for(auto& definition : program) {
|
||||||
|
definition_defn* def = dynamic_cast<definition_defn*>(definition.get());
|
||||||
|
if(!def) continue;
|
||||||
|
|
||||||
|
std::cout << def->name;
|
||||||
|
for(auto& param : def->params) std::cout << " " << param;
|
||||||
|
std::cout << ":" << std::endl;
|
||||||
|
|
||||||
|
def->body->print(1, std::cout);
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
typecheck_program(program, mgr, env);
|
||||||
|
compile_program(program);
|
||||||
|
gen_llvm(program);
|
||||||
|
} catch(unification_error& err) {
|
||||||
|
std::cout << "failed to unify types: " << std::endl;
|
||||||
|
std::cout << " (1) \033[34m";
|
||||||
|
err.left->print(mgr, std::cout);
|
||||||
|
std::cout << "\033[0m" << std::endl;
|
||||||
|
std::cout << " (2) \033[32m";
|
||||||
|
err.right->print(mgr, std::cout);
|
||||||
|
std::cout << "\033[0m" << std::endl;
|
||||||
|
} catch(type_error& err) {
|
||||||
|
std::cout << "failed to type check program: " << err.description << std::endl;
|
||||||
|
}
|
||||||
|
}
|
||||||
141
code/compiler/10/parser.y
Normal file
141
code/compiler/10/parser.y
Normal file
@@ -0,0 +1,141 @@
|
|||||||
|
%{
|
||||||
|
#include <string>
|
||||||
|
#include <iostream>
|
||||||
|
#include "ast.hpp"
|
||||||
|
#include "definition.hpp"
|
||||||
|
#include "parser.hpp"
|
||||||
|
|
||||||
|
std::vector<definition_ptr> program;
|
||||||
|
extern yy::parser::symbol_type yylex();
|
||||||
|
|
||||||
|
%}
|
||||||
|
|
||||||
|
%token PLUS
|
||||||
|
%token TIMES
|
||||||
|
%token MINUS
|
||||||
|
%token DIVIDE
|
||||||
|
%token <int> INT
|
||||||
|
%token DEFN
|
||||||
|
%token DATA
|
||||||
|
%token CASE
|
||||||
|
%token OF
|
||||||
|
%token OCURLY
|
||||||
|
%token CCURLY
|
||||||
|
%token OPAREN
|
||||||
|
%token CPAREN
|
||||||
|
%token COMMA
|
||||||
|
%token ARROW
|
||||||
|
%token EQUAL
|
||||||
|
%token <std::string> LID
|
||||||
|
%token <std::string> UID
|
||||||
|
|
||||||
|
%language "c++"
|
||||||
|
%define api.value.type variant
|
||||||
|
%define api.token.constructor
|
||||||
|
|
||||||
|
%type <std::vector<std::string>> lowercaseParams uppercaseParams
|
||||||
|
%type <std::vector<definition_ptr>> program definitions
|
||||||
|
%type <std::vector<branch_ptr>> branches
|
||||||
|
%type <std::vector<constructor_ptr>> constructors
|
||||||
|
%type <ast_ptr> aAdd aMul case app appBase
|
||||||
|
%type <definition_ptr> definition defn data
|
||||||
|
%type <branch_ptr> branch
|
||||||
|
%type <pattern_ptr> pattern
|
||||||
|
%type <constructor_ptr> constructor
|
||||||
|
|
||||||
|
%start program
|
||||||
|
|
||||||
|
%%
|
||||||
|
|
||||||
|
program
|
||||||
|
: definitions { program = std::move($1); }
|
||||||
|
;
|
||||||
|
|
||||||
|
definitions
|
||||||
|
: definitions definition { $$ = std::move($1); $$.push_back(std::move($2)); }
|
||||||
|
| definition { $$ = std::vector<definition_ptr>(); $$.push_back(std::move($1)); }
|
||||||
|
;
|
||||||
|
|
||||||
|
definition
|
||||||
|
: defn { $$ = std::move($1); }
|
||||||
|
| data { $$ = std::move($1); }
|
||||||
|
;
|
||||||
|
|
||||||
|
defn
|
||||||
|
: DEFN LID lowercaseParams EQUAL OCURLY aAdd CCURLY
|
||||||
|
{ $$ = definition_ptr(
|
||||||
|
new definition_defn(std::move($2), std::move($3), std::move($6))); }
|
||||||
|
;
|
||||||
|
|
||||||
|
lowercaseParams
|
||||||
|
: %empty { $$ = std::vector<std::string>(); }
|
||||||
|
| lowercaseParams LID { $$ = std::move($1); $$.push_back(std::move($2)); }
|
||||||
|
;
|
||||||
|
|
||||||
|
uppercaseParams
|
||||||
|
: %empty { $$ = std::vector<std::string>(); }
|
||||||
|
| uppercaseParams UID { $$ = std::move($1); $$.push_back(std::move($2)); }
|
||||||
|
;
|
||||||
|
|
||||||
|
aAdd
|
||||||
|
: aAdd PLUS aMul { $$ = ast_ptr(new ast_binop(PLUS, std::move($1), std::move($3))); }
|
||||||
|
| aAdd MINUS aMul { $$ = ast_ptr(new ast_binop(MINUS, std::move($1), std::move($3))); }
|
||||||
|
| aMul { $$ = std::move($1); }
|
||||||
|
;
|
||||||
|
|
||||||
|
aMul
|
||||||
|
: aMul TIMES app { $$ = ast_ptr(new ast_binop(TIMES, std::move($1), std::move($3))); }
|
||||||
|
| aMul DIVIDE app { $$ = ast_ptr(new ast_binop(DIVIDE, std::move($1), std::move($3))); }
|
||||||
|
| app { $$ = std::move($1); }
|
||||||
|
;
|
||||||
|
|
||||||
|
app
|
||||||
|
: app appBase { $$ = ast_ptr(new ast_app(std::move($1), std::move($2))); }
|
||||||
|
| appBase { $$ = std::move($1); }
|
||||||
|
;
|
||||||
|
|
||||||
|
appBase
|
||||||
|
: INT { $$ = ast_ptr(new ast_int($1)); }
|
||||||
|
| LID { $$ = ast_ptr(new ast_lid(std::move($1))); }
|
||||||
|
| UID { $$ = ast_ptr(new ast_uid(std::move($1))); }
|
||||||
|
| OPAREN aAdd CPAREN { $$ = std::move($2); }
|
||||||
|
| case { $$ = std::move($1); }
|
||||||
|
;
|
||||||
|
|
||||||
|
case
|
||||||
|
: CASE aAdd OF OCURLY branches CCURLY
|
||||||
|
{ $$ = ast_ptr(new ast_case(std::move($2), std::move($5))); }
|
||||||
|
;
|
||||||
|
|
||||||
|
branches
|
||||||
|
: branches branch { $$ = std::move($1); $$.push_back(std::move($2)); }
|
||||||
|
| branch { $$ = std::vector<branch_ptr>(); $$.push_back(std::move($1));}
|
||||||
|
;
|
||||||
|
|
||||||
|
branch
|
||||||
|
: pattern ARROW OCURLY aAdd CCURLY
|
||||||
|
{ $$ = branch_ptr(new branch(std::move($1), std::move($4))); }
|
||||||
|
;
|
||||||
|
|
||||||
|
pattern
|
||||||
|
: LID { $$ = pattern_ptr(new pattern_var(std::move($1))); }
|
||||||
|
| UID lowercaseParams
|
||||||
|
{ $$ = pattern_ptr(new pattern_constr(std::move($1), std::move($2))); }
|
||||||
|
;
|
||||||
|
|
||||||
|
data
|
||||||
|
: DATA UID EQUAL OCURLY constructors CCURLY
|
||||||
|
{ $$ = definition_ptr(new definition_data(std::move($2), std::move($5))); }
|
||||||
|
;
|
||||||
|
|
||||||
|
constructors
|
||||||
|
: constructors COMMA constructor { $$ = std::move($1); $$.push_back(std::move($3)); }
|
||||||
|
| constructor
|
||||||
|
{ $$ = std::vector<constructor_ptr>(); $$.push_back(std::move($1)); }
|
||||||
|
;
|
||||||
|
|
||||||
|
constructor
|
||||||
|
: UID uppercaseParams
|
||||||
|
{ $$ = constructor_ptr(new constructor(std::move($1), std::move($2))); }
|
||||||
|
;
|
||||||
|
|
||||||
269
code/compiler/10/runtime.c
Normal file
269
code/compiler/10/runtime.c
Normal file
@@ -0,0 +1,269 @@
|
|||||||
|
#include <stdint.h>
|
||||||
|
#include <assert.h>
|
||||||
|
#include <memory.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include "runtime.h"
|
||||||
|
|
||||||
|
struct node_base* alloc_node() {
|
||||||
|
struct node_base* new_node = malloc(sizeof(struct node_app));
|
||||||
|
new_node->gc_next = NULL;
|
||||||
|
new_node->gc_reachable = 0;
|
||||||
|
assert(new_node != NULL);
|
||||||
|
return new_node;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct node_app* alloc_app(struct node_base* l, struct node_base* r) {
|
||||||
|
struct node_app* node = (struct node_app*) alloc_node();
|
||||||
|
node->base.tag = NODE_APP;
|
||||||
|
node->left = l;
|
||||||
|
node->right = r;
|
||||||
|
return node;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct node_num* alloc_num(int32_t n) {
|
||||||
|
struct node_num* node = (struct node_num*) alloc_node();
|
||||||
|
node->base.tag = NODE_NUM;
|
||||||
|
node->value = n;
|
||||||
|
return node;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct node_global* alloc_global(void (*f)(struct gmachine*), int32_t a) {
|
||||||
|
struct node_global* node = (struct node_global*) alloc_node();
|
||||||
|
node->base.tag = NODE_GLOBAL;
|
||||||
|
node->arity = a;
|
||||||
|
node->function = f;
|
||||||
|
return node;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct node_ind* alloc_ind(struct node_base* n) {
|
||||||
|
struct node_ind* node = (struct node_ind*) alloc_node();
|
||||||
|
node->base.tag = NODE_IND;
|
||||||
|
node->next = n;
|
||||||
|
return node;
|
||||||
|
}
|
||||||
|
|
||||||
|
void free_node_direct(struct node_base* n) {
|
||||||
|
if(n->tag == NODE_DATA) {
|
||||||
|
free(((struct node_data*) n)->array);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void gc_visit_node(struct node_base* n) {
|
||||||
|
if(n->gc_reachable) return;
|
||||||
|
n->gc_reachable = 1;
|
||||||
|
|
||||||
|
if(n->tag == NODE_APP) {
|
||||||
|
struct node_app* app = (struct node_app*) n;
|
||||||
|
gc_visit_node(app->left);
|
||||||
|
gc_visit_node(app->right);
|
||||||
|
} if(n->tag == NODE_IND) {
|
||||||
|
struct node_ind* ind = (struct node_ind*) n;
|
||||||
|
gc_visit_node(ind->next);
|
||||||
|
} if(n->tag == NODE_DATA) {
|
||||||
|
struct node_data* data = (struct node_data*) n;
|
||||||
|
struct node_base** to_visit = data->array;
|
||||||
|
while(*to_visit) {
|
||||||
|
gc_visit_node(*to_visit);
|
||||||
|
to_visit++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void stack_init(struct stack* s) {
|
||||||
|
s->size = 4;
|
||||||
|
s->count = 0;
|
||||||
|
s->data = malloc(sizeof(*s->data) * s->size);
|
||||||
|
assert(s->data != NULL);
|
||||||
|
}
|
||||||
|
|
||||||
|
void stack_free(struct stack* s) {
|
||||||
|
free(s->data);
|
||||||
|
}
|
||||||
|
|
||||||
|
void stack_push(struct stack* s, struct node_base* n) {
|
||||||
|
while(s->count >= s->size) {
|
||||||
|
s->data = realloc(s->data, sizeof(*s->data) * (s->size *= 2));
|
||||||
|
assert(s->data != NULL);
|
||||||
|
}
|
||||||
|
s->data[s->count++] = n;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct node_base* stack_pop(struct stack* s) {
|
||||||
|
assert(s->count > 0);
|
||||||
|
return s->data[--s->count];
|
||||||
|
}
|
||||||
|
|
||||||
|
struct node_base* stack_peek(struct stack* s, size_t o) {
|
||||||
|
assert(s->count > o);
|
||||||
|
return s->data[s->count - o - 1];
|
||||||
|
}
|
||||||
|
|
||||||
|
void stack_popn(struct stack* s, size_t n) {
|
||||||
|
assert(s->count >= n);
|
||||||
|
s->count -= n;
|
||||||
|
}
|
||||||
|
|
||||||
|
void gmachine_init(struct gmachine* g) {
|
||||||
|
stack_init(&g->stack);
|
||||||
|
g->gc_nodes = NULL;
|
||||||
|
g->gc_node_count = 0;
|
||||||
|
g->gc_node_threshold = 128;
|
||||||
|
}
|
||||||
|
|
||||||
|
void gmachine_free(struct gmachine* g) {
|
||||||
|
stack_free(&g->stack);
|
||||||
|
struct node_base* to_free = g->gc_nodes;
|
||||||
|
struct node_base* next;
|
||||||
|
|
||||||
|
while(to_free) {
|
||||||
|
next = to_free->gc_next;
|
||||||
|
free_node_direct(to_free);
|
||||||
|
free(to_free);
|
||||||
|
to_free = next;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void gmachine_slide(struct gmachine* g, size_t n) {
|
||||||
|
assert(g->stack.count > n);
|
||||||
|
g->stack.data[g->stack.count - n - 1] = g->stack.data[g->stack.count - 1];
|
||||||
|
g->stack.count -= n;
|
||||||
|
}
|
||||||
|
|
||||||
|
void gmachine_update(struct gmachine* g, size_t o) {
|
||||||
|
assert(g->stack.count > o + 1);
|
||||||
|
struct node_ind* ind =
|
||||||
|
(struct node_ind*) g->stack.data[g->stack.count - o - 2];
|
||||||
|
ind->base.tag = NODE_IND;
|
||||||
|
ind->next = g->stack.data[g->stack.count -= 1];
|
||||||
|
}
|
||||||
|
|
||||||
|
void gmachine_alloc(struct gmachine* g, size_t o) {
|
||||||
|
while(o--) {
|
||||||
|
stack_push(&g->stack,
|
||||||
|
gmachine_track(g, (struct node_base*) alloc_ind(NULL)));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void gmachine_pack(struct gmachine* g, size_t n, int8_t t) {
|
||||||
|
assert(g->stack.count >= n);
|
||||||
|
|
||||||
|
struct node_base** data = malloc(sizeof(*data) * (n + 1));
|
||||||
|
assert(data != NULL);
|
||||||
|
memcpy(data, &g->stack.data[g->stack.count - n], n * sizeof(*data));
|
||||||
|
data[n] = NULL;
|
||||||
|
|
||||||
|
struct node_data* new_node = (struct node_data*) alloc_node();
|
||||||
|
new_node->array = data;
|
||||||
|
new_node->base.tag = NODE_DATA;
|
||||||
|
new_node->tag = t;
|
||||||
|
|
||||||
|
stack_popn(&g->stack, n);
|
||||||
|
stack_push(&g->stack, gmachine_track(g, (struct node_base*) new_node));
|
||||||
|
}
|
||||||
|
|
||||||
|
void gmachine_split(struct gmachine* g, size_t n) {
|
||||||
|
struct node_data* node = (struct node_data*) stack_pop(&g->stack);
|
||||||
|
for(size_t i = 0; i < n; i++) {
|
||||||
|
stack_push(&g->stack, node->array[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
struct node_base* gmachine_track(struct gmachine* g, struct node_base* b) {
|
||||||
|
g->gc_node_count++;
|
||||||
|
b->gc_next = g->gc_nodes;
|
||||||
|
g->gc_nodes = b;
|
||||||
|
|
||||||
|
if(g->gc_node_count >= g->gc_node_threshold) {
|
||||||
|
uint64_t nodes_before = g->gc_node_count;
|
||||||
|
gc_visit_node(b);
|
||||||
|
gmachine_gc(g);
|
||||||
|
g->gc_node_threshold = g->gc_node_count * 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
return b;
|
||||||
|
}
|
||||||
|
|
||||||
|
void gmachine_gc(struct gmachine* g) {
|
||||||
|
for(size_t i = 0; i < g->stack.count; i++) {
|
||||||
|
gc_visit_node(g->stack.data[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
struct node_base** head_ptr = &g->gc_nodes;
|
||||||
|
while(*head_ptr) {
|
||||||
|
if((*head_ptr)->gc_reachable) {
|
||||||
|
(*head_ptr)->gc_reachable = 0;
|
||||||
|
head_ptr = &(*head_ptr)->gc_next;
|
||||||
|
} else {
|
||||||
|
struct node_base* to_free = *head_ptr;
|
||||||
|
*head_ptr = to_free->gc_next;
|
||||||
|
free_node_direct(to_free);
|
||||||
|
free(to_free);
|
||||||
|
g->gc_node_count--;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void unwind(struct gmachine* g) {
|
||||||
|
struct stack* s = &g->stack;
|
||||||
|
|
||||||
|
while(1) {
|
||||||
|
struct node_base* peek = stack_peek(s, 0);
|
||||||
|
if(peek->tag == NODE_APP) {
|
||||||
|
struct node_app* n = (struct node_app*) peek;
|
||||||
|
stack_push(s, n->left);
|
||||||
|
} else if(peek->tag == NODE_GLOBAL) {
|
||||||
|
struct node_global* n = (struct node_global*) peek;
|
||||||
|
assert(s->count > n->arity);
|
||||||
|
|
||||||
|
for(size_t i = 1; i <= n->arity; i++) {
|
||||||
|
s->data[s->count - i]
|
||||||
|
= ((struct node_app*) s->data[s->count - i - 1])->right;
|
||||||
|
}
|
||||||
|
|
||||||
|
n->function(g);
|
||||||
|
} else if(peek->tag == NODE_IND) {
|
||||||
|
struct node_ind* n = (struct node_ind*) peek;
|
||||||
|
stack_pop(s);
|
||||||
|
stack_push(s, n->next);
|
||||||
|
} else {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
extern void f_main(struct gmachine* s);
|
||||||
|
|
||||||
|
void print_node(struct node_base* n) {
|
||||||
|
if(n->tag == NODE_APP) {
|
||||||
|
struct node_app* app = (struct node_app*) n;
|
||||||
|
print_node(app->left);
|
||||||
|
putchar(' ');
|
||||||
|
print_node(app->right);
|
||||||
|
} else if(n->tag == NODE_DATA) {
|
||||||
|
printf("(Packed)");
|
||||||
|
} else if(n->tag == NODE_GLOBAL) {
|
||||||
|
struct node_global* global = (struct node_global*) n;
|
||||||
|
printf("(Global: %p)", global->function);
|
||||||
|
} else if(n->tag == NODE_IND) {
|
||||||
|
print_node(((struct node_ind*) n)->next);
|
||||||
|
} else if(n->tag == NODE_NUM) {
|
||||||
|
struct node_num* num = (struct node_num*) n;
|
||||||
|
printf("%d", num->value);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(int argc, char** argv) {
|
||||||
|
struct gmachine gmachine;
|
||||||
|
struct node_global* first_node = alloc_global(f_main, 0);
|
||||||
|
struct node_base* result;
|
||||||
|
|
||||||
|
gmachine_init(&gmachine);
|
||||||
|
gmachine_track(&gmachine, (struct node_base*) first_node);
|
||||||
|
stack_push(&gmachine.stack, (struct node_base*) first_node);
|
||||||
|
unwind(&gmachine);
|
||||||
|
result = stack_pop(&gmachine.stack);
|
||||||
|
printf("Result: ");
|
||||||
|
print_node(result);
|
||||||
|
putchar('\n');
|
||||||
|
gmachine_free(&gmachine);
|
||||||
|
}
|
||||||
84
code/compiler/10/runtime.h
Normal file
84
code/compiler/10/runtime.h
Normal file
@@ -0,0 +1,84 @@
|
|||||||
|
#pragma once
|
||||||
|
#include <stdlib.h>
|
||||||
|
|
||||||
|
struct gmachine;
|
||||||
|
|
||||||
|
enum node_tag {
|
||||||
|
NODE_APP,
|
||||||
|
NODE_NUM,
|
||||||
|
NODE_GLOBAL,
|
||||||
|
NODE_IND,
|
||||||
|
NODE_DATA
|
||||||
|
};
|
||||||
|
|
||||||
|
struct node_base {
|
||||||
|
enum node_tag tag;
|
||||||
|
int8_t gc_reachable;
|
||||||
|
struct node_base* gc_next;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct node_app {
|
||||||
|
struct node_base base;
|
||||||
|
struct node_base* left;
|
||||||
|
struct node_base* right;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct node_num {
|
||||||
|
struct node_base base;
|
||||||
|
int32_t value;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct node_global {
|
||||||
|
struct node_base base;
|
||||||
|
int32_t arity;
|
||||||
|
void (*function)(struct gmachine*);
|
||||||
|
};
|
||||||
|
|
||||||
|
struct node_ind {
|
||||||
|
struct node_base base;
|
||||||
|
struct node_base* next;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct node_data {
|
||||||
|
struct node_base base;
|
||||||
|
int8_t tag;
|
||||||
|
struct node_base** array;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct node_base* alloc_node();
|
||||||
|
struct node_app* alloc_app(struct node_base* l, struct node_base* r);
|
||||||
|
struct node_num* alloc_num(int32_t n);
|
||||||
|
struct node_global* alloc_global(void (*f)(struct gmachine*), int32_t a);
|
||||||
|
struct node_ind* alloc_ind(struct node_base* n);
|
||||||
|
void free_node_direct(struct node_base*);
|
||||||
|
void gc_visit_node(struct node_base*);
|
||||||
|
|
||||||
|
struct stack {
|
||||||
|
size_t size;
|
||||||
|
size_t count;
|
||||||
|
struct node_base** data;
|
||||||
|
};
|
||||||
|
|
||||||
|
void stack_init(struct stack* s);
|
||||||
|
void stack_free(struct stack* s);
|
||||||
|
void stack_push(struct stack* s, struct node_base* n);
|
||||||
|
struct node_base* stack_pop(struct stack* s);
|
||||||
|
struct node_base* stack_peek(struct stack* s, size_t o);
|
||||||
|
void stack_popn(struct stack* s, size_t n);
|
||||||
|
|
||||||
|
struct gmachine {
|
||||||
|
struct stack stack;
|
||||||
|
struct node_base* gc_nodes;
|
||||||
|
int64_t gc_node_count;
|
||||||
|
int64_t gc_node_threshold;
|
||||||
|
};
|
||||||
|
|
||||||
|
void gmachine_init(struct gmachine* g);
|
||||||
|
void gmachine_free(struct gmachine* g);
|
||||||
|
void gmachine_slide(struct gmachine* g, size_t n);
|
||||||
|
void gmachine_update(struct gmachine* g, size_t o);
|
||||||
|
void gmachine_alloc(struct gmachine* g, size_t o);
|
||||||
|
void gmachine_pack(struct gmachine* g, size_t n, int8_t t);
|
||||||
|
void gmachine_split(struct gmachine* g, size_t n);
|
||||||
|
struct node_base* gmachine_track(struct gmachine* g, struct node_base* b);
|
||||||
|
void gmachine_gc(struct gmachine* g);
|
||||||
35
code/compiler/10/scanner.l
Normal file
35
code/compiler/10/scanner.l
Normal file
@@ -0,0 +1,35 @@
|
|||||||
|
%option noyywrap
|
||||||
|
|
||||||
|
%{
|
||||||
|
#include <iostream>
|
||||||
|
#include "ast.hpp"
|
||||||
|
#include "definition.hpp"
|
||||||
|
#include "parser.hpp"
|
||||||
|
|
||||||
|
#define YY_DECL yy::parser::symbol_type yylex()
|
||||||
|
|
||||||
|
%}
|
||||||
|
|
||||||
|
%%
|
||||||
|
|
||||||
|
[ \n]+ {}
|
||||||
|
\+ { return yy::parser::make_PLUS(); }
|
||||||
|
\* { return yy::parser::make_TIMES(); }
|
||||||
|
- { return yy::parser::make_MINUS(); }
|
||||||
|
\/ { return yy::parser::make_DIVIDE(); }
|
||||||
|
[0-9]+ { return yy::parser::make_INT(atoi(yytext)); }
|
||||||
|
defn { return yy::parser::make_DEFN(); }
|
||||||
|
data { return yy::parser::make_DATA(); }
|
||||||
|
case { return yy::parser::make_CASE(); }
|
||||||
|
of { return yy::parser::make_OF(); }
|
||||||
|
\{ { return yy::parser::make_OCURLY(); }
|
||||||
|
\} { return yy::parser::make_CCURLY(); }
|
||||||
|
\( { return yy::parser::make_OPAREN(); }
|
||||||
|
\) { return yy::parser::make_CPAREN(); }
|
||||||
|
, { return yy::parser::make_COMMA(); }
|
||||||
|
-> { return yy::parser::make_ARROW(); }
|
||||||
|
= { return yy::parser::make_EQUAL(); }
|
||||||
|
[a-z][a-zA-Z]* { return yy::parser::make_LID(std::string(yytext)); }
|
||||||
|
[A-Z][a-zA-Z]* { return yy::parser::make_UID(std::string(yytext)); }
|
||||||
|
|
||||||
|
%%
|
||||||
99
code/compiler/10/type.cpp
Normal file
99
code/compiler/10/type.cpp
Normal file
@@ -0,0 +1,99 @@
|
|||||||
|
#include "type.hpp"
|
||||||
|
#include <sstream>
|
||||||
|
#include <algorithm>
|
||||||
|
#include "error.hpp"
|
||||||
|
|
||||||
|
void type_var::print(const type_mgr& mgr, std::ostream& to) const {
|
||||||
|
auto it = mgr.types.find(name);
|
||||||
|
if(it != mgr.types.end()) {
|
||||||
|
it->second->print(mgr, to);
|
||||||
|
} else {
|
||||||
|
to << name;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void type_base::print(const type_mgr& mgr, std::ostream& to) const {
|
||||||
|
to << name;
|
||||||
|
}
|
||||||
|
|
||||||
|
void type_arr::print(const type_mgr& mgr, std::ostream& to) const {
|
||||||
|
left->print(mgr, to);
|
||||||
|
to << " -> (";
|
||||||
|
right->print(mgr, to);
|
||||||
|
to << ")";
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string type_mgr::new_type_name() {
|
||||||
|
int temp = last_id++;
|
||||||
|
std::string str = "";
|
||||||
|
|
||||||
|
while(temp != -1) {
|
||||||
|
str += (char) ('a' + (temp % 26));
|
||||||
|
temp = temp / 26 - 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::reverse(str.begin(), str.end());
|
||||||
|
return str;
|
||||||
|
}
|
||||||
|
|
||||||
|
type_ptr type_mgr::new_type() {
|
||||||
|
return type_ptr(new type_var(new_type_name()));
|
||||||
|
}
|
||||||
|
|
||||||
|
type_ptr type_mgr::new_arrow_type() {
|
||||||
|
return type_ptr(new type_arr(new_type(), new_type()));
|
||||||
|
}
|
||||||
|
|
||||||
|
type_ptr type_mgr::resolve(type_ptr t, type_var*& var) const {
|
||||||
|
type_var* cast;
|
||||||
|
|
||||||
|
var = nullptr;
|
||||||
|
while((cast = dynamic_cast<type_var*>(t.get()))) {
|
||||||
|
auto it = types.find(cast->name);
|
||||||
|
|
||||||
|
if(it == types.end()) {
|
||||||
|
var = cast;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
t = it->second;
|
||||||
|
}
|
||||||
|
|
||||||
|
return t;
|
||||||
|
}
|
||||||
|
|
||||||
|
void type_mgr::unify(type_ptr l, type_ptr r) {
|
||||||
|
type_var* lvar;
|
||||||
|
type_var* rvar;
|
||||||
|
type_arr* larr;
|
||||||
|
type_arr* rarr;
|
||||||
|
type_base* lid;
|
||||||
|
type_base* rid;
|
||||||
|
|
||||||
|
l = resolve(l, lvar);
|
||||||
|
r = resolve(r, rvar);
|
||||||
|
|
||||||
|
if(lvar) {
|
||||||
|
bind(lvar->name, r);
|
||||||
|
return;
|
||||||
|
} else if(rvar) {
|
||||||
|
bind(rvar->name, l);
|
||||||
|
return;
|
||||||
|
} else if((larr = dynamic_cast<type_arr*>(l.get())) &&
|
||||||
|
(rarr = dynamic_cast<type_arr*>(r.get()))) {
|
||||||
|
unify(larr->left, rarr->left);
|
||||||
|
unify(larr->right, rarr->right);
|
||||||
|
return;
|
||||||
|
} else if((lid = dynamic_cast<type_base*>(l.get())) &&
|
||||||
|
(rid = dynamic_cast<type_base*>(r.get()))) {
|
||||||
|
if(lid->name == rid->name) return;
|
||||||
|
}
|
||||||
|
|
||||||
|
throw unification_error(l, r);
|
||||||
|
}
|
||||||
|
|
||||||
|
void type_mgr::bind(const std::string& s, type_ptr t) {
|
||||||
|
type_var* other = dynamic_cast<type_var*>(t.get());
|
||||||
|
|
||||||
|
if(other && other->name == s) return;
|
||||||
|
types[s] = t;
|
||||||
|
}
|
||||||
65
code/compiler/10/type.hpp
Normal file
65
code/compiler/10/type.hpp
Normal file
@@ -0,0 +1,65 @@
|
|||||||
|
#pragma once
|
||||||
|
#include <memory>
|
||||||
|
#include <map>
|
||||||
|
|
||||||
|
struct type_mgr;
|
||||||
|
|
||||||
|
struct type {
|
||||||
|
virtual ~type() = default;
|
||||||
|
|
||||||
|
virtual void print(const type_mgr& mgr, std::ostream& to) const = 0;
|
||||||
|
};
|
||||||
|
|
||||||
|
using type_ptr = std::shared_ptr<type>;
|
||||||
|
|
||||||
|
struct type_var : public type {
|
||||||
|
std::string name;
|
||||||
|
|
||||||
|
type_var(std::string n)
|
||||||
|
: name(std::move(n)) {}
|
||||||
|
|
||||||
|
void print(const type_mgr& mgr, std::ostream& to) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct type_base : public type {
|
||||||
|
std::string name;
|
||||||
|
|
||||||
|
type_base(std::string n)
|
||||||
|
: name(std::move(n)) {}
|
||||||
|
|
||||||
|
void print(const type_mgr& mgr, std::ostream& to) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct type_data : public type_base {
|
||||||
|
struct constructor {
|
||||||
|
int tag;
|
||||||
|
};
|
||||||
|
|
||||||
|
std::map<std::string, constructor> constructors;
|
||||||
|
|
||||||
|
type_data(std::string n)
|
||||||
|
: type_base(std::move(n)) {}
|
||||||
|
};
|
||||||
|
|
||||||
|
struct type_arr : public type {
|
||||||
|
type_ptr left;
|
||||||
|
type_ptr right;
|
||||||
|
|
||||||
|
type_arr(type_ptr l, type_ptr r)
|
||||||
|
: left(std::move(l)), right(std::move(r)) {}
|
||||||
|
|
||||||
|
void print(const type_mgr& mgr, std::ostream& to) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct type_mgr {
|
||||||
|
int last_id = 0;
|
||||||
|
std::map<std::string, type_ptr> types;
|
||||||
|
|
||||||
|
std::string new_type_name();
|
||||||
|
type_ptr new_type();
|
||||||
|
type_ptr new_arrow_type();
|
||||||
|
|
||||||
|
void unify(type_ptr l, type_ptr r);
|
||||||
|
type_ptr resolve(type_ptr t, type_var*& var) const;
|
||||||
|
void bind(const std::string& s, type_ptr t);
|
||||||
|
};
|
||||||
16
code/compiler/10/type_env.cpp
Normal file
16
code/compiler/10/type_env.cpp
Normal file
@@ -0,0 +1,16 @@
|
|||||||
|
#include "type_env.hpp"
|
||||||
|
|
||||||
|
type_ptr type_env::lookup(const std::string& name) const {
|
||||||
|
auto it = names.find(name);
|
||||||
|
if(it != names.end()) return it->second;
|
||||||
|
if(parent) return parent->lookup(name);
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
void type_env::bind(const std::string& name, type_ptr t) {
|
||||||
|
names[name] = t;
|
||||||
|
}
|
||||||
|
|
||||||
|
type_env type_env::scope() const {
|
||||||
|
return type_env(this);
|
||||||
|
}
|
||||||
16
code/compiler/10/type_env.hpp
Normal file
16
code/compiler/10/type_env.hpp
Normal file
@@ -0,0 +1,16 @@
|
|||||||
|
#pragma once
|
||||||
|
#include <map>
|
||||||
|
#include "type.hpp"
|
||||||
|
|
||||||
|
struct type_env {
|
||||||
|
std::map<std::string, type_ptr> names;
|
||||||
|
type_env const* parent = nullptr;
|
||||||
|
|
||||||
|
type_env(type_env const* p)
|
||||||
|
: parent(p) {}
|
||||||
|
type_env() : type_env(nullptr) {}
|
||||||
|
|
||||||
|
type_ptr lookup(const std::string& name) const;
|
||||||
|
void bind(const std::string& name, type_ptr t);
|
||||||
|
type_env scope() const;
|
||||||
|
};
|
||||||
4
code/compiler/test.sh
Executable file
4
code/compiler/test.sh
Executable file
@@ -0,0 +1,4 @@
|
|||||||
|
cd 10
|
||||||
|
mkdir -p build && cd build
|
||||||
|
cmake ..
|
||||||
|
make -j8
|
||||||
119
code/cs325-langs/hws/hw1.txt
Normal file
119
code/cs325-langs/hws/hw1.txt
Normal file
@@ -0,0 +1,119 @@
|
|||||||
|
CS 325-001, Analysis of Algorithms, Fall 2019
|
||||||
|
HW1 - Python 3, qsort, BST, and qselect
|
||||||
|
Due electronically on flip on Monday 9/30 at 11:59pm.
|
||||||
|
No late submission will be accepted.
|
||||||
|
|
||||||
|
Need to submit on flip: report.txt, qsort.py, and qselect.py.
|
||||||
|
qselect.py will be automatically graded for correctness (1%).
|
||||||
|
|
||||||
|
flip $ /nfs/farm/classes/eecs/fall2019/cs325-001/submit hw1 qselect.py qsort.py report.txt
|
||||||
|
|
||||||
|
Note:
|
||||||
|
|
||||||
|
1. You can ssh to flip machines from your own machine by:
|
||||||
|
$ ssh access.engr.oregonstate.edu
|
||||||
|
|
||||||
|
2. You can add /nfs/farm/classes/eecs/fall2019/cs325-001/ to your $PATH:
|
||||||
|
$ export PATH=$PATH:/nfs/farm/classes/eecs/fall2019/cs325-001/
|
||||||
|
and add the above command to your ~/.bash_profile,
|
||||||
|
so that you don't need to type it every time.
|
||||||
|
|
||||||
|
(alternatively, you can use symbolic links or aliases to avoid typing the long path)
|
||||||
|
|
||||||
|
3. You can choose to submit each file separately, or submit them together.
|
||||||
|
|
||||||
|
Textbooks for References:
|
||||||
|
[1] CLRS Ch. 9.2 and Ch. 12
|
||||||
|
|
||||||
|
0. Q: What's the best-case, worst-case, and average-case time complexities of quicksort.
|
||||||
|
Briefly explain each case.
|
||||||
|
|
||||||
|
1. [WILL BE GRADED]
|
||||||
|
Quickselect with Randomized Pivot (CLRS Ch. 9.2).
|
||||||
|
|
||||||
|
>>> from qselect import *
|
||||||
|
>>> qselect(2, [3, 10, 4, 7, 19])
|
||||||
|
4
|
||||||
|
>>> qselect(4, [11, 2, 8, 3])
|
||||||
|
11
|
||||||
|
|
||||||
|
Q: What's the best-case, worst-case, and average-case time complexities? Briefly explain.
|
||||||
|
|
||||||
|
Filename: qselect.py
|
||||||
|
|
||||||
|
|
||||||
|
2. Buggy Qsort Revisited
|
||||||
|
|
||||||
|
In the slides we showed a buggy version of qsort which is weird in an interesting way:
|
||||||
|
it actually returns a binary search tree for the given array, rooted at the pivot:
|
||||||
|
|
||||||
|
>>> from qsort import *
|
||||||
|
>>> tree = sort([4,2,6,3,5,7,1,9])
|
||||||
|
>>> tree
|
||||||
|
[[[[], 1, []], 2, [[], 3, []]], 4, [[[], 5, []], 6, [[], 7, [[], 9, []]]]]
|
||||||
|
|
||||||
|
which encodes a binary search tree:
|
||||||
|
|
||||||
|
4
|
||||||
|
/ \
|
||||||
|
2 6
|
||||||
|
/ \ / \
|
||||||
|
1 3 5 7
|
||||||
|
\
|
||||||
|
9
|
||||||
|
|
||||||
|
Now on top of that piece of code, add three functions:
|
||||||
|
* sorted(t): returns the sorted order (infix traversal)
|
||||||
|
* search(t, x): returns whether x is in t
|
||||||
|
* insert(t, x): inserts x into t (in-place) if it is missing, otherwise does nothing.
|
||||||
|
|
||||||
|
>>> sorted(tree)
|
||||||
|
[1, 2, 3, 4, 5, 6, 7, 9]
|
||||||
|
>>> search(tree, 6)
|
||||||
|
True
|
||||||
|
>>> search(tree, 6.5)
|
||||||
|
False
|
||||||
|
>>> insert(tree, 6.5)
|
||||||
|
>>> tree
|
||||||
|
[[[[], 1, []], 2, [[], 3, []]], 4, [[[], 5, []], 6, [[[], 6.5, []], 7, [[], 9, []]]]]
|
||||||
|
>>> insert(tree, 3)
|
||||||
|
>>> tree
|
||||||
|
[[[[], 1, []], 2, [[], 3, []]], 4, [[[], 5, []], 6, [[[], 6.5, []], 7, [[], 9, []]]]]
|
||||||
|
|
||||||
|
Hint: both search and insert should depend on a helper function _search(tree, x) which
|
||||||
|
returns the subtree (a list) rooted at x when x is found, or the [] where x should
|
||||||
|
be inserted.
|
||||||
|
|
||||||
|
e.g.,
|
||||||
|
>>> tree = sort([4,2,6,3,5,7,1,9]) # starting from the initial tree
|
||||||
|
>>> _search(tree, 3)
|
||||||
|
[[], 3, []]
|
||||||
|
>>> _search(tree, 0)
|
||||||
|
[]
|
||||||
|
>>> _search(tree, 6.5)
|
||||||
|
[]
|
||||||
|
>>> _search(tree, 0) is _search(tree, 6.5)
|
||||||
|
False
|
||||||
|
>>> _search(tree, 0) == _search(tree, 6.5)
|
||||||
|
True
|
||||||
|
|
||||||
|
Note the last two []'s are different nodes (with different memory addresses):
|
||||||
|
the first one is the left child of 1, while the second one is the left child of 7
|
||||||
|
(so that insert is very easy).
|
||||||
|
|
||||||
|
Filename: qsort.py
|
||||||
|
|
||||||
|
Q: What are the time complexities for the operations implemented?
|
||||||
|
|
||||||
|
Debriefing (required!): --------------------------
|
||||||
|
|
||||||
|
1. Approximately how many hours did you spend on this assignment?
|
||||||
|
2. Would you rate it as easy, moderate, or difficult?
|
||||||
|
3. Did you work on it mostly alone, or mostly with other people?
|
||||||
|
4. How deeply do you feel you understand the material it covers (0%–100%)?
|
||||||
|
5. Any other comments?
|
||||||
|
|
||||||
|
This section is intended to help us calibrate the homework assignments.
|
||||||
|
Your answers to this section will *not* affect your grade; however, skipping it
|
||||||
|
will certainly do.
|
||||||
|
|
||||||
170
code/cs325-langs/hws/hw10.txt
Normal file
170
code/cs325-langs/hws/hw10.txt
Normal file
@@ -0,0 +1,170 @@
|
|||||||
|
CS 325, Algorithms (MS/MEng-level), Fall 2019
|
||||||
|
|
||||||
|
HW10 - Challenge Problem - RNA Structure Prediction (6%)
|
||||||
|
This problem combines dynamic programming and priority queues.
|
||||||
|
|
||||||
|
Due Wednesday 12/4, 11:59pm.
|
||||||
|
No late submission will be accepted.
|
||||||
|
|
||||||
|
Include in your submission: report.txt, rna.py.
|
||||||
|
Grading:
|
||||||
|
* report.txt -- 1%
|
||||||
|
* 1-best structure -- 2%
|
||||||
|
* number of structures -- 1%
|
||||||
|
* k-best structures -- 2%
|
||||||
|
|
||||||
|
Textbooks for References:
|
||||||
|
[1] KT Ch. 6.5 (DP over intervals -- RNA structure)
|
||||||
|
[2] KT slides: DP I (RNA section)
|
||||||
|
http://www.cs.princeton.edu/~wayne/kleinberg-tardos/
|
||||||
|
|
||||||
|
***Please analyze time/space complexities for each problem in report.txt.
|
||||||
|
|
||||||
|
1. Given an RNA sequence, such as ACAGU, we can predict its secondary structure
|
||||||
|
by tagging each nucleotide as (, ., or ). Each matching pair of () must be
|
||||||
|
AU, GC, or GU (or their mirror symmetries: UA, CG, UG).
|
||||||
|
We also assume pairs can _not_ cross each other.
|
||||||
|
The following are valid structures for ACAGU:
|
||||||
|
|
||||||
|
ACAGU
|
||||||
|
.....
|
||||||
|
...()
|
||||||
|
..(.)
|
||||||
|
.(.).
|
||||||
|
(...)
|
||||||
|
((.))
|
||||||
|
|
||||||
|
We want to find the structure with the maximum number of matching pairs.
|
||||||
|
In the above example, the last structure is optimal (2 pairs).
|
||||||
|
|
||||||
|
>>> best("ACAGU")
|
||||||
|
(2, '((.))')
|
||||||
|
|
||||||
|
Tie-breaking: arbitrary. Don't worry as long as your structure
|
||||||
|
is one of the correct best structures.
|
||||||
|
|
||||||
|
some other cases (more cases at the bottom):
|
||||||
|
|
||||||
|
GCACG
|
||||||
|
(2, '().()')
|
||||||
|
UUCAGGA
|
||||||
|
(3, '(((.)))')
|
||||||
|
GUUAGAGUCU
|
||||||
|
(4, '(.()((.)))')
|
||||||
|
AUAACCUUAUAGGGCUCUG
|
||||||
|
(8, '.(((..)()()((()))))')
|
||||||
|
AACCGCUGUGUCAAGCCCAUCCUGCCUUGUU
|
||||||
|
(11, '(((.(..(.((.)((...().))()))))))')
|
||||||
|
GAUGCCGUGUAGUCCAAAGACUUCACCGUUGG
|
||||||
|
(14, '.()()(()(()())(((.((.)(.))()))))')
|
||||||
|
CAUCGGGGUCUGAGAUGGCCAUGAAGGGCACGUACUGUUU
|
||||||
|
(18, '(()())(((((.)))()(((())(.(.().()()))))))')
|
||||||
|
ACGGCCAGUAAAGGUCAUAUACGCGGAAUGACAGGUCUAUCUAC
|
||||||
|
(19, '.()(((.)(..))(((.()()(())))(((.)((())))))())')
|
||||||
|
AGGCAUCAAACCCUGCAUGGGAGCACCGCCACUGGCGAUUUUGGUA
|
||||||
|
(20, '.(()())...((((()()))((()(.()(((.)))()())))))()')
|
||||||
|
|
||||||
|
2. Total number of all possible structures
|
||||||
|
|
||||||
|
>>> total("ACAGU")
|
||||||
|
6
|
||||||
|
|
||||||
|
3. k-best structures: output the 1-best, 2nd-best, ... kth-best structures.
|
||||||
|
|
||||||
|
>>> kbest("ACAGU", 3)
|
||||||
|
[(2, '((.))'), (1, '(...)'), (1, '.(.).')]
|
||||||
|
|
||||||
|
The list must be sorted.
|
||||||
|
Tie-breaking: arbitrary.
|
||||||
|
|
||||||
|
In case the input k is bigger than the number of possible structures, output all.
|
||||||
|
|
||||||
|
Sanity check: kbest(s, 1)[0][0] == best(s)[0] for each RNA sequence s.
|
||||||
|
|
||||||
|
All three functions should be in one file: rna.py.
|
||||||
|
|
||||||
|
See more testcases at the end.
|
||||||
|
|
||||||
|
Debriefing (required!): --------------------------
|
||||||
|
|
||||||
|
0. What's your name?
|
||||||
|
1. Approximately how many hours did you spend on this assignment?
|
||||||
|
2. Would you rate it as easy, moderate, or difficult?
|
||||||
|
3. Did you work on it mostly alone, or mostly with other people?
|
||||||
|
4. How deeply do you feel you understand the material it covers (0%-100%)?
|
||||||
|
5. Any other comments?
|
||||||
|
|
||||||
|
This section is intended to help us calibrate the homework assignments.
|
||||||
|
Your answers to this section will *not* affect your grade; however, skipping it
|
||||||
|
will certainly do.
|
||||||
|
|
||||||
|
|
||||||
|
TESTCASES:
|
||||||
|
|
||||||
|
for each sequence s, we list three lines:
|
||||||
|
best(s)
|
||||||
|
total(s)
|
||||||
|
kbest(s, 10)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
ACAGU
|
||||||
|
(2, '((.))')
|
||||||
|
6
|
||||||
|
[(2, '((.))'), (1, '.(.).'), (1, '..(.)'), (1, '...()'), (1, '(...)'), (0, '.....')]
|
||||||
|
------
|
||||||
|
AC
|
||||||
|
(0, '..')
|
||||||
|
1
|
||||||
|
[(0, '..')]
|
||||||
|
------
|
||||||
|
GUAC
|
||||||
|
(2, '(())')
|
||||||
|
5
|
||||||
|
[(2, '(())'), (1, '()..'), (1, '.().'), (1, '(..)'), (0, '....')]
|
||||||
|
------
|
||||||
|
GCACG
|
||||||
|
(2, '().()')
|
||||||
|
6
|
||||||
|
[(2, '().()'), (1, '(..).'), (1, '()...'), (1, '.(..)'), (1, '...()'), (0, '.....')]
|
||||||
|
------
|
||||||
|
CCGG
|
||||||
|
(2, '(())')
|
||||||
|
6
|
||||||
|
[(2, '(())'), (1, '(.).'), (1, '.().'), (1, '.(.)'), (1, '(..)'), (0, '....')]
|
||||||
|
------
|
||||||
|
CCCGGG
|
||||||
|
(3, '((()))')
|
||||||
|
20
|
||||||
|
[(3, '((()))'), (2, '((.)).'), (2, '(.()).'), (2, '.(()).'), (2, '.(().)'), (2, '.((.))'), (2, '((.).)'), (2, '(.(.))'), (2, '(.().)'), (2, '((..))')]
|
||||||
|
------
|
||||||
|
UUCAGGA
|
||||||
|
(3, '(((.)))')
|
||||||
|
24
|
||||||
|
[(3, '(((.)))'), (2, '((.).).'), (2, '((..)).'), (2, '(.(.)).'), (2, '((.))..'), (2, '.((.)).'), (2, '.((.).)'), (2, '.((..))'), (2, '((..).)'), (2, '((.)..)')]
|
||||||
|
------
|
||||||
|
AUAACCUA
|
||||||
|
(2, '.((...))')
|
||||||
|
19
|
||||||
|
[(2, '((.)..).'), (2, '(()...).'), (2, '()(...).'), (2, '().(..).'), (2, '()....()'), (2, '.()(..).'), (2, '.()...()'), (2, '.(.)..()'), (2, '.((...))'), (2, '.(.(..))')]
|
||||||
|
------
|
||||||
|
UUGGACUUG
|
||||||
|
(4, '(()((.)))')
|
||||||
|
129
|
||||||
|
[(4, '(())(.)()'), (4, '(()((.)))'), (3, '(().)..()'), (3, '(().).(.)'), (3, '(().)(..)'), (3, '((.))..()'), (3, '((.)).(.)'), (3, '((.))(..)'), (3, '(())(..).'), (3, '(())(.)..')]
|
||||||
|
------
|
||||||
|
UUUGGCACUA
|
||||||
|
(4, '(.()()(.))')
|
||||||
|
179
|
||||||
|
[(4, '((()).).()'), (4, '((.)()).()'), (4, '(.()()).()'), (4, '.(()()).()'), (4, '.(()()(.))'), (4, '((()).(.))'), (4, '((.)()(.))'), (4, '((()())..)'), (4, '(.()()(.))'), (3, '((()).)...')]
|
||||||
|
------
|
||||||
|
GAUGCCGUGUAGUCCAAAGACUUC
|
||||||
|
(11, '(((()()((()(.))))((.))))')
|
||||||
|
2977987
|
||||||
|
[(11, '(()())(((()().))(((.))))'), (11, '(()())(((()()).)(((.))))'), (11, '(()())(((()(.)))(((.))))'), (11, '(()()()((()(.)))(((.))))'), (11, '(((()()((()().)))((.))))'), (11, '(((()()((()(.))))((.))))'), (11, '(()()()((()()).)(((.))))'), (11, '(()()()((()().))(((.))))'), (11, '(((()()((()()).))((.))))'), (10, '(()()()((()().).)((.))).')]
|
||||||
|
------
|
||||||
|
AGGCAUCAAACCCUGCAUGGGAGCG
|
||||||
|
(10, '.(()())...((((()()))).())')
|
||||||
|
560580
|
||||||
|
[(10, '.(()())...((((())())).)()'), (10, '.(()())...((((()()))).)()'), (10, '.(()())...(((()(()))).)()'), (10, '.(()())...(((()(()))).())'), (10, '.(()())...((((())())).())'), (10, '.(()())...((((()()))).())'), (9, '((.).)(...(.((()()))).)()'), (9, '((.).)(...(((.)(()))).)()'), (9, '((.).)(...(.(()(()))).)()'), (9, '((.).)(...((.(()()))).)()')]
|
||||||
|
------
|
||||||
42
code/cs325-langs/hws/hw11.txt
Normal file
42
code/cs325-langs/hws/hw11.txt
Normal file
@@ -0,0 +1,42 @@
|
|||||||
|
HW11 -- OPTIONAL (for your practice only -- solutions will be released on Tuesday)
|
||||||
|
|
||||||
|
Edit Distance (see updated final review solutions)
|
||||||
|
|
||||||
|
flip $ /nfs/farm/classes/eecs/fall2019/cs325-001/submit hw11 edit.py
|
||||||
|
|
||||||
|
Implement two functions:
|
||||||
|
* distance1(s, t): Viterbi-style (either top-down or bottom-up)
|
||||||
|
* distance2(s, t): Dijkstra-style (best-first)
|
||||||
|
|
||||||
|
For Dijkstra, you can use either heapdict or heapq (see review problem 7).
|
||||||
|
Given that this graph is extremely sparse (why?), heapq (ElogE) might be faster than heapdict (ElogV)
|
||||||
|
because the latter has overhead for hash.
|
||||||
|
|
||||||
|
They should return the same result (just return the edit distance).
|
||||||
|
|
||||||
|
We have 10 testcases (listed below); the first 5 test distance1(),
|
||||||
|
and the second 5 test distance2() on the same 5 string pairs.
|
||||||
|
|
||||||
|
My solutions (on flip2):
|
||||||
|
Testing Case 1 (open)... 0.001 s, Correct
|
||||||
|
Testing Case 2 (open)... 0.000 s, Correct
|
||||||
|
Testing Case 3 (open)... 0.012 s, Correct
|
||||||
|
Testing Case 4 (open)... 0.155 s, Correct
|
||||||
|
Testing Case 5 (open)... 0.112 s, Correct
|
||||||
|
Testing Case 6 (hidden)... 0.000 s, Correct
|
||||||
|
Testing Case 7 (hidden)... 0.000 s, Correct
|
||||||
|
Testing Case 8 (hidden)... 0.004 s, Correct
|
||||||
|
Testing Case 9 (hidden)... 0.009 s, Correct
|
||||||
|
Testing Case 10 (hidden)... 0.021 s, Correct
|
||||||
|
Total Time: 0.316 s
|
||||||
|
|
||||||
|
distance1("abcdefh", "abbcdfg") == 3
|
||||||
|
distance1("pretty", "prettier") == 3
|
||||||
|
distance1("aaaaaaadaaaaaaaaaaaaaaaaacaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaabaaaaaaaaaaaaaaaaaaaaaaaaaaaaaxaaaaaaaaaaaaaaaaaaaaaa") == 5
|
||||||
|
distance1('cpuyedzrwcbritzclzhwwabmlyresvewkdxwkamyzbxtwiqzvokqpkecyywrbvhlqgxzutdjfmvlhsezfbhfjbllmfhzlqlcwibubyyjupbwhztskyksfthkptxqlmhivfjbgclwsombvytdztapwpzmdqfwwrhqsgztobeuiatcwmrzfbwhfnpzzasomrhotoqiwvexlgxsnafiagfewmopdzwanxswfsmbxsmsczbwsgnwy', 'cpuyedzrwcbritzclzhwwabmlyresvewkdxwkamyzbtwiqzvokqpkecyywrbvhlqgxzutdjfmvlhsezfbhfjbllmfhzlqlcwibubyyjupbwhztskyksfthkptxqlmhivfbgclwsombvytdztapwpzmdqfwwrhqsgztobeuiatcwmrzfbwhfnpzzasonrhotoqiwvexlgxsnafiagfewmopdzwanxswfsmbxsmsczbwsgnwy') == 3
|
||||||
|
distance1('cpuyedzrwcbritzclzhwwabmlyresvewkdxwkamyzbtwiqzvokqpasdfkecyywrbvhlqgxzutdjfmvlhsezfbhbllmfhzlqlcwibubyyjupbwhztsxyksfthkptxqlmhivfjbgclhombvytdztapwpzmdqfwwrhqsgztobeuiatcwmrzfbwhfnpzzasomrttoqiwvexlgxsnafiagfewmopdzwanxswfsmbxsmsczbwsgnwydmbihjkvziitusmkjljrsbafytsinql', 'cpuyedzrwcbritzclzhwwabmlyresvewkdxwkamyzbtwiqzvokqpkecyywrbvhlqgxzutdjfmvlhsezfbhfjbllmfhzlqlcwibubyyjupbwhztskyksfthkptxqlmhivfjbgclwsombvytdztapwpzmdqfwwrhqsgztobeuiatcwmrzfbwhfnpzzasomrhotoqiwvexlgxsnafiagfewmopdzwanxswfsmbxsmsczbwsgnwydmbihjkvziitusmkjljrsbafytsinql') == 11
|
||||||
|
distance2("abcdefh", "abbcdfg") == 3
|
||||||
|
distance2("pretty", "prettier") == 3
|
||||||
|
distance2("aaaaaaadaaaaaaaaaaaaaaaaacaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaabaaaaaaaaaaaaaaaaaaaaaaaaaaaaaxaaaaaaaaaaaaaaaaaaaaaa") == 5
|
||||||
|
distance2('cpuyedzrwcbritzclzhwwabmlyresvewkdxwkamyzbxtwiqzvokqpkecyywrbvhlqgxzutdjfmvlhsezfbhfjbllmfhzlqlcwibubyyjupbwhztskyksfthkptxqlmhivfjbgclwsombvytdztapwpzmdqfwwrhqsgztobeuiatcwmrzfbwhfnpzzasomrhotoqiwvexlgxsnafiagfewmopdzwanxswfsmbxsmsczbwsgnwy', 'cpuyedzrwcbritzclzhwwabmlyresvewkdxwkamyzbtwiqzvokqpkecyywrbvhlqgxzutdjfmvlhsezfbhfjbllmfhzlqlcwibubyyjupbwhztskyksfthkptxqlmhivfbgclwsombvytdztapwpzmdqfwwrhqsgztobeuiatcwmrzfbwhfnpzzasonrhotoqiwvexlgxsnafiagfewmopdzwanxswfsmbxsmsczbwsgnwy') == 3
|
||||||
|
distance2('cpuyedzrwcbritzclzhwwabmlyresvewkdxwkamyzbtwiqzvokqpasdfkecyywrbvhlqgxzutdjfmvlhsezfbhbllmfhzlqlcwibubyyjupbwhztsxyksfthkptxqlmhivfjbgclhombvytdztapwpzmdqfwwrhqsgztobeuiatcwmrzfbwhfnpzzasomrttoqiwvexlgxsnafiagfewmopdzwanxswfsmbxsmsczbwsgnwydmbihjkvziitusmkjljrsbafytsinql', 'cpuyedzrwcbritzclzhwwabmlyresvewkdxwkamyzbtwiqzvokqpkecyywrbvhlqgxzutdjfmvlhsezfbhfjbllmfhzlqlcwibubyyjupbwhztskyksfthkptxqlmhivfjbgclwsombvytdztapwpzmdqfwwrhqsgztobeuiatcwmrzfbwhfnpzzasomrhotoqiwvexlgxsnafiagfewmopdzwanxswfsmbxsmsczbwsgnwydmbihjkvziitusmkjljrsbafytsinql') == 11
|
||||||
80
code/cs325-langs/hws/hw2.txt
Normal file
80
code/cs325-langs/hws/hw2.txt
Normal file
@@ -0,0 +1,80 @@
|
|||||||
|
CS 325-001, Analysis of Algorithms, Fall 2019
|
||||||
|
HW2 - Divide-n-conquer: mergesort, number of inversions, longest path
|
||||||
|
|
||||||
|
Due Monday Oct 7, 11:59pm (same submission instructions as HW1).
|
||||||
|
No late submission will be accepted.
|
||||||
|
|
||||||
|
Need to submit: report.txt, msort.py, inversions.py, and longest.py.
|
||||||
|
longest.py will be graded for correctness (1%).
|
||||||
|
|
||||||
|
To submit:
|
||||||
|
flip $ /nfs/farm/classes/eecs/fall2019/cs325-001/submit hw2 report.txt {msort,inversions,longest}.py
|
||||||
|
(You can submit each file separately, or submit them together.)
|
||||||
|
|
||||||
|
To see your best results so far:
|
||||||
|
flip $ /nfs/farm/classes/eecs/fall2019/cs325-001/query hw2
|
||||||
|
|
||||||
|
|
||||||
|
Textbooks for References:
|
||||||
|
[1] CLRS Ch. 2
|
||||||
|
|
||||||
|
0. Which of the following sorting algorithms are (or can be made) stable?
|
||||||
|
(a) mergesort
|
||||||
|
(b) quicksort with the first element as pivot
|
||||||
|
(c) quicksort with randomized pivot
|
||||||
|
(d) selection sort
|
||||||
|
(e) insertion sort
|
||||||
|
(f) heap sort --- not covered yet (see CLRS Ch. 6)
|
||||||
|
|
||||||
|
1. Implement mergesort.
|
||||||
|
|
||||||
|
>>> mergesort([4, 2, 5, 1, 6, 3])
|
||||||
|
[1, 2, 3, 4, 5, 6]
|
||||||
|
|
||||||
|
Filename: msort.py
|
||||||
|
|
||||||
|
2. Calculate the number of inversions in a list.
|
||||||
|
|
||||||
|
>>> num_inversions([4, 1, 3, 2])
|
||||||
|
4
|
||||||
|
>>> num_inversions([2, 4, 1, 3])
|
||||||
|
3
|
||||||
|
|
||||||
|
Filename: inversions.py
|
||||||
|
Must run in O(nlogn) time.
|
||||||
|
|
||||||
|
3. [WILL BE GRADED]
|
||||||
|
|
||||||
|
Length of the longest path in a binary tree (number of edges).
|
||||||
|
|
||||||
|
We will use the "buggy qsort" representation of binary trees from HW1:
|
||||||
|
[left_subtree, root, right_subtree]
|
||||||
|
|
||||||
|
>>> longest([[], 1, []])
|
||||||
|
0
|
||||||
|
|
||||||
|
>>> longest([[[], 1, []], 2, [[], 3, []]])
|
||||||
|
2
|
||||||
|
|
||||||
|
>>> longest([[[[], 1, []], 2, [[], 3, []]], 4, [[[], 5, []], 6, [[], 7, [[], 9, []]]]])
|
||||||
|
5
|
||||||
|
|
||||||
|
Note the answer is 5 because the longest path is 1-2-4-6-7-9.
|
||||||
|
|
||||||
|
Filename: longest.py
|
||||||
|
Must run in O(n) time.
|
||||||
|
|
||||||
|
Debriefing (required!): --------------------------
|
||||||
|
|
||||||
|
1. Approximately how many hours did you spend on this assignment?
|
||||||
|
2. Would you rate it as easy, moderate, or difficult?
|
||||||
|
3. Did you work on it mostly alone, or mostly with other people?
|
||||||
|
Note you are encouraged to discuss with your classmates,
|
||||||
|
but each students should submit his/her own code.
|
||||||
|
4. How deeply do you feel you understand the material it covers (0%–100%)?
|
||||||
|
5. Any other comments?
|
||||||
|
|
||||||
|
This section is intended to help us calibrate the homework assignments.
|
||||||
|
Your answers to this section will *not* affect your grade; however, skipping it
|
||||||
|
will certainly do.
|
||||||
|
|
||||||
83
code/cs325-langs/hws/hw3.txt
Normal file
83
code/cs325-langs/hws/hw3.txt
Normal file
@@ -0,0 +1,83 @@
|
|||||||
|
CS 325, Algorithms, Fall 2019
|
||||||
|
HW3 - K closest numbers; Two Pointers
|
||||||
|
|
||||||
|
Due Monday Oct 14, 11:59pm. (same submission instructions as HW1-2).
|
||||||
|
No late submission will be accepted.
|
||||||
|
|
||||||
|
Need to submit: report.txt, closest_unsorted.py, closest_sorted.py, xyz.py.
|
||||||
|
closest_sorted.py will be graded for correctness (1%).
|
||||||
|
|
||||||
|
To submit:
|
||||||
|
flip $ /nfs/farm/classes/eecs/fall2019/cs325-001/submit hw3 report.txt {closest*,xyz}.py
|
||||||
|
(You can submit each file separately, or submit them together.)
|
||||||
|
|
||||||
|
To see your best results so far:
|
||||||
|
flip $ /nfs/farm/classes/eecs/fall2019/cs325-001/query hw3
|
||||||
|
|
||||||
|
|
||||||
|
1. Given an array A of n numbers, a query x, and a number k,
|
||||||
|
find the k numbers in A that are closest (in value) to x.
|
||||||
|
For example:
|
||||||
|
|
||||||
|
find([4,1,3,2,7,4], 5.2, 2) returns [4,4]
|
||||||
|
find([4,1,3,2,7,4], 6.5, 3) returns [4,7,4]
|
||||||
|
find([5,3,4,1,6,3], 3.5, 2) returns [3,4]
|
||||||
|
|
||||||
|
|
||||||
|
Filename: closest_unsorted.py
|
||||||
|
Must run in O(n) time.
|
||||||
|
The elements in the returned list must be in the original order.
|
||||||
|
In case two numbers are equally close to x, choose the earlier one.
|
||||||
|
|
||||||
|
|
||||||
|
2. [WILL BE GRADED]
|
||||||
|
Now what if the input array is sorted? Can you do it faster?
|
||||||
|
|
||||||
|
find([1,2,3,4,4,7], 5.2, 2) returns [4,4]
|
||||||
|
find([1,2,3,4,4,7], 6.5, 3) returns [4,4,7]
|
||||||
|
|
||||||
|
Filename: closest_sorted.py
|
||||||
|
Must run in O(logn + k) time.
|
||||||
|
The elements in the returned list must be in the original order.
|
||||||
|
|
||||||
|
Note: in case two numbers are equally close to x, choose the smaller one:
|
||||||
|
find([1,2,3,4,4,6,6], 5, 3) returns [4,4,6]
|
||||||
|
find([1,2,3,4,4,5,6], 4, 5) returns [2,3,4,4,5]
|
||||||
|
|
||||||
|
Hint: you can use Python's bisect.bisect for binary search.
|
||||||
|
|
||||||
|
|
||||||
|
3. For a given array A of n *distinct* numbers, find all triples (x,y,z)
|
||||||
|
s.t. x + y = z. (x, y, z are distinct numbers)
|
||||||
|
|
||||||
|
e.g.,
|
||||||
|
|
||||||
|
find([1, 4, 2, 3, 5]) returns [(1,3,4), (1,2,3), (1,4,5), (2,3,5)]
|
||||||
|
|
||||||
|
Note that:
|
||||||
|
1) no duplicates in the input array
|
||||||
|
2) you can choose any arbitrary order for triples in the returned list.
|
||||||
|
|
||||||
|
Filename: xyz.py
|
||||||
|
Must run in O(n^2) time.
|
||||||
|
|
||||||
|
Hint: you can use any built-in sort in Python.
|
||||||
|
|
||||||
|
|
||||||
|
Debriefing (required!): --------------------------
|
||||||
|
|
||||||
|
0. What's your name?
|
||||||
|
1. Approximately how many hours did you spend on this assignment?
|
||||||
|
2. Would you rate it as easy, moderate, or difficult?
|
||||||
|
3. Did you work on it mostly alone, or mostly with other people?
|
||||||
|
Note you are encouraged to discuss with your classmates,
|
||||||
|
but each students should submit his/her own code.
|
||||||
|
4. How deeply do you feel you understand the material it covers (0%-100%)?
|
||||||
|
|
||||||
|
5. Which part(s) of the course you like the most so far?
|
||||||
|
6. Which part(s) of the course you dislike the most so far?
|
||||||
|
|
||||||
|
This section is intended to help us calibrate the homework assignments.
|
||||||
|
Your answers to this section will *not* affect your grade; however, skipping it
|
||||||
|
will certainly do.
|
||||||
|
|
||||||
114
code/cs325-langs/hws/hw4.txt
Normal file
114
code/cs325-langs/hws/hw4.txt
Normal file
@@ -0,0 +1,114 @@
|
|||||||
|
CS 325-001, Algorithms, Fall 2019
|
||||||
|
HW4 - Priority Queue and Heaps
|
||||||
|
|
||||||
|
Due via the submit program on Monday Oct 21, 11:59pm.
|
||||||
|
No late submission will be accepted.
|
||||||
|
|
||||||
|
Need to submit: report.txt, nbest.py, kmergesort.py, datastream.py.
|
||||||
|
datastream.py will be graded for correctness (1%).
|
||||||
|
|
||||||
|
To submit:
|
||||||
|
flip $ /nfs/farm/classes/eecs/fall2019/cs325-001/submit hw4 report.txt {nbest,kmergesort,datastream}.py
|
||||||
|
(You can submit each file separately, or submit them together.)
|
||||||
|
|
||||||
|
To see your best results so far:
|
||||||
|
flip $ /nfs/farm/classes/eecs/fall2019/cs325-001/query hw4
|
||||||
|
|
||||||
|
|
||||||
|
Textbooks for References:
|
||||||
|
[1] CLRS Ch. 6
|
||||||
|
[2] KT slides for binary heaps (only read the first 20 pages!):
|
||||||
|
https://www.cs.princeton.edu/~wayne/kleinberg-tardos/pdf/BinomialHeaps.pdf
|
||||||
|
[3] Python heapq module
|
||||||
|
|
||||||
|
0. There are two methods for building a heap from an unsorted array:
|
||||||
|
(1) insert each element into the heap --- O(nlogn) -- heapq.heappush()
|
||||||
|
(2) heapify (top-down) --- O(n) -- heapq.heapify()
|
||||||
|
|
||||||
|
(a) Derive these time complexities.
|
||||||
|
(b) Use a long list of random numbers to show the difference in time. (Hint: random.shuffle or random.sample)
|
||||||
|
(c) What about sorted or reversely-sorted numbers?
|
||||||
|
|
||||||
|
1. Given two lists A and B, each with n integers, return
|
||||||
|
a sorted list C that contains the smallest n elements from AxB:
|
||||||
|
|
||||||
|
AxB = { (x, y) | x in A, y in B }
|
||||||
|
|
||||||
|
i.e., AxB is the Cartesian Product of A and B.
|
||||||
|
|
||||||
|
ordering: (x,y) < (x',y') iff. x+y < x'+y' or (x+y==x'+y' and y<y')
|
||||||
|
|
||||||
|
You need to implement three algorithms and compare:
|
||||||
|
|
||||||
|
(a) enumerate all n^2 pairs, sort, and take top n.
|
||||||
|
(b) enumerate all n^2 pairs, but use qselect from hw1.
|
||||||
|
(c) Dijkstra-style best-first, only enumerate O(n) (at most 2n) pairs.
|
||||||
|
Hint: you can use Python's heapq module for priority queue.
|
||||||
|
|
||||||
|
Q: What are the time complexities of these algorithms?
|
||||||
|
|
||||||
|
>>> a, b = [4, 1, 5, 3], [2, 6, 3, 4]
|
||||||
|
>>> nbesta(a, b) # algorithm (a), slowest
|
||||||
|
[(1, 2), (1, 3), (3, 2), (1, 4)]
|
||||||
|
>>> nbestb(a, b) # algorithm (b), slow
|
||||||
|
[(1, 2), (1, 3), (3, 2), (1, 4)]
|
||||||
|
>>> nbestc(a, b) # algorithm (c), fast
|
||||||
|
[(1, 2), (1, 3), (3, 2), (1, 4)]
|
||||||
|
|
||||||
|
Filename: nbest.py
|
||||||
|
|
||||||
|
2. k-way mergesort (the classical mergesort is a special case where k=2).
|
||||||
|
|
||||||
|
>>> kmergesort([4,1,5,2,6,3,7,0], 3) # k=3
|
||||||
|
[0,1,2,3,4,5,6,7]
|
||||||
|
|
||||||
|
Q: What is the complexity? Write down the detailed analysis in report.txt.
|
||||||
|
|
||||||
|
Filename: kmergesort.py
|
||||||
|
|
||||||
|
3. [WILL BE GRADED]
|
||||||
|
|
||||||
|
Find the k smallest numbers in a data stream of length n (k<<n),
|
||||||
|
using only O(k) space (the stream itself might be too big to fit in memory).
|
||||||
|
|
||||||
|
>>> ksmallest(4, [10, 2, 9, 3, 7, 8, 11, 5, 7])
|
||||||
|
[2, 3, 5, 7]
|
||||||
|
>>> ksmallest(3, range(1000000, 0, -1))
|
||||||
|
[1, 2, 3]
|
||||||
|
|
||||||
|
Note:
|
||||||
|
a) it should work with both lists and lazy lists
|
||||||
|
b) the output list should be sorted
|
||||||
|
|
||||||
|
Q: What is your complexity? Write down the detailed analysis in report.txt.
|
||||||
|
|
||||||
|
Filename: datastream.py
|
||||||
|
|
||||||
|
[UPDATE] The built-in function heapq.nsmallest() is _not_ allowed for this problem.
|
||||||
|
The whole point is to implement it yourself. :)
|
||||||
|
|
||||||
|
|
||||||
|
4. (optional) Summarize the time complexities of the basic operations (push, pop-min, peak, heapify) for these implementations of priority queue:
|
||||||
|
|
||||||
|
(a) unsorted array
|
||||||
|
(b) sorted array (highest priority first)
|
||||||
|
(c) reversly sorted array (lowest priority first)
|
||||||
|
(d) linked list
|
||||||
|
(e) binary heap
|
||||||
|
|
||||||
|
Debriefing (required!): --------------------------
|
||||||
|
|
||||||
|
0. What's your name?
|
||||||
|
1. Approximately how many hours did you spend on this assignment?
|
||||||
|
2. Would you rate it as easy, moderate, or difficult?
|
||||||
|
3. Did you work on it mostly alone, or mostly with other people?
|
||||||
|
Note you are encouraged to discuss with your classmates,
|
||||||
|
but each students should submit his/her own code.
|
||||||
|
4. How deeply do you feel you understand the material it covers (0%-100%)?
|
||||||
|
5. Which part(s) of the course you like the most so far?
|
||||||
|
6. Which part(s) of the course you dislike the most so far?
|
||||||
|
|
||||||
|
This section is intended to help us calibrate the homework assignments.
|
||||||
|
Your answers to this section will *not* affect your grade; however, skipping it
|
||||||
|
will certainly do.
|
||||||
|
|
||||||
130
code/cs325-langs/hws/hw5.txt
Normal file
130
code/cs325-langs/hws/hw5.txt
Normal file
@@ -0,0 +1,130 @@
|
|||||||
|
CS 532-001, Algorithms, Fall 2019
|
||||||
|
HW5 - DP (part 1: simple)
|
||||||
|
|
||||||
|
HWs 5-7 are all on DPs.
|
||||||
|
|
||||||
|
Due Monday Oct 28, 11:59pm.
|
||||||
|
No late submission will be accepted.
|
||||||
|
|
||||||
|
Need to submit report.txt, mis.py, bsts.py, bitstrings.py.
|
||||||
|
mis.py will be graded for correctness (1%).
|
||||||
|
|
||||||
|
To submit:
|
||||||
|
flip $ /nfs/farm/classes/eecs/fall2019/cs325-001/submit hw5 report.txt {mis,bsts,bitstrings}.py
|
||||||
|
(You can submit each file separately, or submit them together.)
|
||||||
|
|
||||||
|
To see your best results so far:
|
||||||
|
flip $ /nfs/farm/classes/eecs/fall2019/cs325-001/query hw5
|
||||||
|
|
||||||
|
|
||||||
|
Textbooks for References:
|
||||||
|
[1] CLRS Ch. 15
|
||||||
|
[2] KT Ch. 6
|
||||||
|
or Ch. 5 in a previous version:
|
||||||
|
http://cs.furman.edu/~chealy/cs361/kleinbergbook.pdf
|
||||||
|
|
||||||
|
Hint: Among the three coding questions, p3 is the easiest, and p1 is similar to p3.
|
||||||
|
You'll realize that both are very similar to p0 (Fibonacci).
|
||||||
|
p2 is slightly different from these, but still very easy.
|
||||||
|
|
||||||
|
0. (Optional) Is Fibonacci REALLY O(n)?
|
||||||
|
Hint: the value of f(n) itself grows exponentially.
|
||||||
|
|
||||||
|
1. [WILL BE GRADED]
|
||||||
|
Maximum Weighted Independent Set
|
||||||
|
|
||||||
|
[HINT] independent set is a set where no two numbers are neighbors in the original list.
|
||||||
|
see also https://en.wikipedia.org/wiki/Independent_set_(graph_theory)
|
||||||
|
|
||||||
|
input: a list of numbers (could be negative)
|
||||||
|
output: a pair of the max sum and the list of numbers chosen
|
||||||
|
|
||||||
|
>>> max_wis([7,8,5])
|
||||||
|
(12, [7,5])
|
||||||
|
|
||||||
|
>>> max_wis([-1,8,10])
|
||||||
|
(10, [10])
|
||||||
|
|
||||||
|
>>> max_wis([])
|
||||||
|
(0, [])
|
||||||
|
|
||||||
|
[HINT] if all numbers are negative, the optimal solution is 0,
|
||||||
|
since [] is an independent set according to the definition above.
|
||||||
|
|
||||||
|
>>> max_wis([-5, -1, -4])
|
||||||
|
(0, [])
|
||||||
|
|
||||||
|
Q: What's the complexity?
|
||||||
|
|
||||||
|
Include both top-down (max_wis()) and bottom-up (max_wis2()) solutions,
|
||||||
|
and make sure they produce exact same results.
|
||||||
|
We'll only grade the top-down version.
|
||||||
|
|
||||||
|
Tie-breaking: any best solution is considered correct.
|
||||||
|
|
||||||
|
Filename: mis.py
|
||||||
|
|
||||||
|
[HINT] you can also use the naive O(2^n) exhaustive search method to verify your answer.
|
||||||
|
|
||||||
|
|
||||||
|
2. Number of n-node BSTs
|
||||||
|
|
||||||
|
input: n
|
||||||
|
output: number of n-node BSTs
|
||||||
|
|
||||||
|
>>> bsts(2)
|
||||||
|
2
|
||||||
|
>>> bsts(3)
|
||||||
|
5
|
||||||
|
>>> bsts(5)
|
||||||
|
42
|
||||||
|
|
||||||
|
[HINT] There are two 2-node BSTs:
|
||||||
|
2 1
|
||||||
|
/ \
|
||||||
|
1 2
|
||||||
|
Note that all other 2-node BSTs are *isomorphic* to either one.
|
||||||
|
|
||||||
|
Qa: What's the complexity of this DP?
|
||||||
|
|
||||||
|
Qb: What's the name of this famous number series?
|
||||||
|
|
||||||
|
Feel free to use any implementation style.
|
||||||
|
|
||||||
|
Filename: bsts.py
|
||||||
|
|
||||||
|
3. Number of bit strings of length n that has
|
||||||
|
|
||||||
|
1) no two consecutive 0s.
|
||||||
|
2) two consecutive 0s.
|
||||||
|
|
||||||
|
>>> num_no(3)
|
||||||
|
5
|
||||||
|
>>> num_yes(3)
|
||||||
|
3
|
||||||
|
|
||||||
|
[HINT] There are three 3-bit 0/1-strings that have two consecutive 0s.
|
||||||
|
001 100 000
|
||||||
|
The other five 3-bit 0/1-strings have no two consecutive 0s:
|
||||||
|
010 011 101 110 111
|
||||||
|
|
||||||
|
Feel free to choose any implementation style.
|
||||||
|
|
||||||
|
Filename: bitstrings.py
|
||||||
|
|
||||||
|
[HINT] Like problem 1, you can also use the O(2^n) exhaustive search method to verify your answer.
|
||||||
|
|
||||||
|
|
||||||
|
Debriefing (required!): --------------------------
|
||||||
|
|
||||||
|
0. What's your name?
|
||||||
|
1. Approximately how many hours did you spend on this assignment?
|
||||||
|
2. Would you rate it as easy, moderate, or difficult?
|
||||||
|
3. Did you work on it mostly alone, or mostly with other people?
|
||||||
|
4. How deeply do you feel you understand the material it covers (0%-100%)?
|
||||||
|
5. Which part(s) of the course you like the most so far?
|
||||||
|
6. Which part(s) of the course you dislike the most so far?
|
||||||
|
|
||||||
|
This section is intended to help us calibrate the homework assignments.
|
||||||
|
Your answers to this section will *not* affect your grade; however, skipping it
|
||||||
|
will certainly do.
|
||||||
114
code/cs325-langs/hws/hw6.txt
Normal file
114
code/cs325-langs/hws/hw6.txt
Normal file
@@ -0,0 +1,114 @@
|
|||||||
|
CS 325-001, Algorithms, Fall 2019
|
||||||
|
HW6 - DP (part 2)
|
||||||
|
|
||||||
|
Due on Monday Nov 4, 11:59pm.
|
||||||
|
No late submission will be accepted.
|
||||||
|
|
||||||
|
Need to submit: report.txt, knapsack_unbounded.py, knapsack_bounded.py.
|
||||||
|
knapsack_bounded.py will be graded for correctness (1%).
|
||||||
|
|
||||||
|
To submit:
|
||||||
|
flip $ /nfs/farm/classes/eecs/fall2019/cs325-001/submit hw6 report.txt knapsack*.py
|
||||||
|
(You can submit each file separately, or submit them together.)
|
||||||
|
|
||||||
|
To see your best results so far:
|
||||||
|
flip $ /nfs/farm/classes/eecs/fall2019/cs325-001/query hw6
|
||||||
|
|
||||||
|
Textbooks for References:
|
||||||
|
[1] KT Ch. 6.4
|
||||||
|
or Ch. 5.3 in a previous version:
|
||||||
|
http://cs.furman.edu/~chealy/cs361/kleinbergbook.pdf
|
||||||
|
[2] KT slides for DP (pages 1-37):
|
||||||
|
https://www.cs.princeton.edu/~wayne/kleinberg-tardos/pdf/06DynamicProgrammingI.pdf
|
||||||
|
[3] Wikipedia: Knapsack (unbounded and 0/1)
|
||||||
|
[4] CLRS Ch. 15
|
||||||
|
|
||||||
|
Please answer time/space complexities for each problem in report.txt.
|
||||||
|
|
||||||
|
0. For each of the coding problems below:
|
||||||
|
(a) Describe a greedy solution.
|
||||||
|
(b) Show a counterexample to the greedy solution.
|
||||||
|
(c) Define the DP subproblem
|
||||||
|
(d) Write the recurrence relations
|
||||||
|
(e) Do not forget base cases
|
||||||
|
(f) Analyze the space and time complexities
|
||||||
|
|
||||||
|
1. Unbounded Knapsack
|
||||||
|
|
||||||
|
You have n items, each with weight w_i and value v_i, and each has infinite copies.
|
||||||
|
**All numbers are positive integers.**
|
||||||
|
What's the best value for a bag of W?
|
||||||
|
|
||||||
|
>>> best(3, [(2, 4), (3, 5)])
|
||||||
|
(5, [0, 1])
|
||||||
|
|
||||||
|
the input to the best() function is W and a list of pairs (w_i, v_i).
|
||||||
|
this output means to take 0 copies of item 1 and 1 copy of item 2.
|
||||||
|
|
||||||
|
tie-breaking: *reverse* lexicographical: i.e., [1, 0] is better than [0, 1]:
|
||||||
|
(i.e., take as many copies from the first item as possible, etc.)
|
||||||
|
|
||||||
|
>>> best(3, [(1, 5), (1, 5)])
|
||||||
|
(15, [3, 0])
|
||||||
|
|
||||||
|
>>> best(3, [(1, 2), (1, 5)])
|
||||||
|
(15, [0, 3])
|
||||||
|
|
||||||
|
>>> best(3, [(1, 2), (2, 5)])
|
||||||
|
(7, [1, 1])
|
||||||
|
|
||||||
|
>>> best(58, [(5, 9), (9, 18), (6, 12)])
|
||||||
|
(114, [2, 4, 2])
|
||||||
|
|
||||||
|
>>> best(92, [(8, 9), (9, 10), (10, 12), (5, 6)])
|
||||||
|
(109, [1, 1, 7, 1])
|
||||||
|
|
||||||
|
Q: What are the time and space complexities?
|
||||||
|
|
||||||
|
filename: knapsack_unbounded.py
|
||||||
|
|
||||||
|
2. [WILL BE GRADED]
|
||||||
|
Bounded Knapsack
|
||||||
|
|
||||||
|
You have n items, each with weight w_i and value v_i, and has c_i copies.
|
||||||
|
**All numbers are positive integers.**
|
||||||
|
What's the best value for a bag of W?
|
||||||
|
|
||||||
|
>>> best(3, [(2, 4, 2), (3, 5, 3)])
|
||||||
|
(5, [0, 1])
|
||||||
|
|
||||||
|
the input to the best() function is W and a list of triples (w_i, v_i, c_i).
|
||||||
|
|
||||||
|
tie-breaking: same as in p1:
|
||||||
|
|
||||||
|
>>> best(3, [(1, 5, 2), (1, 5, 3)])
|
||||||
|
(15, [2, 1])
|
||||||
|
|
||||||
|
>>> best(3, [(1, 5, 1), (1, 5, 3)])
|
||||||
|
(15, [1, 2])
|
||||||
|
|
||||||
|
>>> best(20, [(1, 10, 6), (3, 15, 4), (2, 10, 3)])
|
||||||
|
(130, [6, 4, 1])
|
||||||
|
|
||||||
|
>>> best(92, [(1, 6, 6), (6, 15, 7), (8, 9, 8), (2, 4, 7), (2, 20, 2)])
|
||||||
|
(236, [6, 7, 3, 7, 2])
|
||||||
|
|
||||||
|
Q: What are the time and space complexities?
|
||||||
|
|
||||||
|
filename: knapsack_bounded.py
|
||||||
|
|
||||||
|
You are encouraged to come up with a few other testcases yourself to test your code!
|
||||||
|
|
||||||
|
Debriefing (required!): --------------------------
|
||||||
|
|
||||||
|
0. What's your name?
|
||||||
|
1. Approximately how many hours did you spend on this assignment?
|
||||||
|
2. Would you rate it as easy, moderate, or difficult?
|
||||||
|
3. Did you work on it mostly alone, or mostly with other people?
|
||||||
|
4. How deeply do you feel you understand the material it covers (0%-100%)?
|
||||||
|
5. Which part(s) of the course you like the most so far?
|
||||||
|
6. Which part(s) of the course you dislike the most so far?
|
||||||
|
|
||||||
|
This section is intended to help us calibrate the homework assignments.
|
||||||
|
Your answers to this section will *not* affect your grade; however, skipping it
|
||||||
|
will certainly do.
|
||||||
147
code/cs325-langs/hws/hw8.txt
Normal file
147
code/cs325-langs/hws/hw8.txt
Normal file
@@ -0,0 +1,147 @@
|
|||||||
|
CS 325-001, Algorithms, Fall 2019
|
||||||
|
HW8 - Graphs (part I); DP (part III)
|
||||||
|
|
||||||
|
Due on Monday November 18, 11:59pm.
|
||||||
|
No late submission will be accepted.
|
||||||
|
|
||||||
|
Include in your submission: report.txt, topol.py, viterbi.py.
|
||||||
|
viterbi.py will be graded for correctness (1%).
|
||||||
|
|
||||||
|
To submit:
|
||||||
|
flip $ /nfs/farm/classes/eecs/fall2019/cs325-001/submit hw8 report.txt {topol,viterbi}.py
|
||||||
|
(You can submit each file separately, or submit them together.)
|
||||||
|
|
||||||
|
To see your best results so far:
|
||||||
|
flip $ /nfs/farm/classes/eecs/fall2019/cs325-001/query hw8
|
||||||
|
|
||||||
|
Textbooks for References:
|
||||||
|
[1] CLRS Ch. 23 (Elementary Graph Algorithms)
|
||||||
|
[2] KT Ch. 3 (graphs), or Ch. 2 in this earlier version:
|
||||||
|
http://cs.furman.edu/~chealy/cs361/kleinbergbook.pdf
|
||||||
|
[3] KT slides (highly recommend!):
|
||||||
|
https://www.cs.princeton.edu/~wayne/kleinberg-tardos/pdf/03Graphs.pdf
|
||||||
|
[4] Jeff Erickson: Ch. 5 (Basic Graph Algorithms):
|
||||||
|
http://jeffe.cs.illinois.edu/teaching/algorithms/book/05-graphs.pdf
|
||||||
|
[5] DPV Ch. 3, 4.2, 4.4, 4.7 (Dasgupta, Papadimitriou, Vazirani)
|
||||||
|
https://www.cs.berkeley.edu/~vazirani/algorithms/chap3.pdf (decomposition of graphs)
|
||||||
|
https://www.cs.berkeley.edu/~vazirani/algorithms/chap4.pdf (paths, shortest paths)
|
||||||
|
[6] my advanced DP tutorial (up to page 16):
|
||||||
|
http://web.engr.oregonstate.edu/~huanlian/slides/COLING-tutorial-anim.pdf
|
||||||
|
|
||||||
|
Please answer non-coding questions in report.txt.
|
||||||
|
|
||||||
|
0. For the following graphs, decide whether they are
|
||||||
|
(1) directed or undirected, (2) dense or sparse, and (3) cyclic or acyclic:
|
||||||
|
|
||||||
|
(a) Facebook
|
||||||
|
(b) Twitter
|
||||||
|
(c) a family
|
||||||
|
(d) V=airports, E=direct_flights
|
||||||
|
(e) a mesh
|
||||||
|
(f) V=courses, E=prerequisites
|
||||||
|
(g) a tree
|
||||||
|
(h) V=linux_software_packages, E=dependencies
|
||||||
|
(i) DP subproblems for 0-1 knapsack
|
||||||
|
|
||||||
|
Can you name a very big dense graph?
|
||||||
|
|
||||||
|
1. Topological Sort
|
||||||
|
|
||||||
|
For a given directed graph, output a topological order if it exists.
|
||||||
|
|
||||||
|
Tie-breaking: ARBITRARY tie-breaking. This will make the code
|
||||||
|
and time complexity analysis a lot easier.
|
||||||
|
|
||||||
|
e.g., for the following example:
|
||||||
|
|
||||||
|
0 --> 2 --> 3 --> 5 --> 6
|
||||||
|
/ \ | / \
|
||||||
|
/ \ v / \
|
||||||
|
1 > 4 > 7
|
||||||
|
|
||||||
|
>>> order(8, [(0,2), (1,2), (2,3), (2,4), (3,4), (3,5), (4,5), (5,6), (5,7)])
|
||||||
|
[0, 1, 2, 3, 4, 5, 6, 7]
|
||||||
|
|
||||||
|
Note that order() takes two arguments, n and list_of_edges,
|
||||||
|
where n specifies that the nodes are named 0..(n-1).
|
||||||
|
|
||||||
|
If we flip the (3,4) edge:
|
||||||
|
|
||||||
|
>>> order(8, [(0,2), (1,2), (2,3), (2,4), (4,3), (3,5), (4,5), (5,6), (5,7)])
|
||||||
|
[0, 1, 2, 4, 3, 5, 6, 7]
|
||||||
|
|
||||||
|
If there is a cycle, return None
|
||||||
|
|
||||||
|
>>> order(4, [(0,1), (1,2), (2,1), (2,3)])
|
||||||
|
None
|
||||||
|
|
||||||
|
Other cases:
|
||||||
|
|
||||||
|
>>> order(5, [(0,1), (1,2), (2,3), (3,4)])
|
||||||
|
[0, 1, 2, 3, 4]
|
||||||
|
|
||||||
|
>>> order(5, [])
|
||||||
|
[0, 1, 2, 3, 4] # could be any order
|
||||||
|
|
||||||
|
>>> order(3, [(1,2), (2,1)])
|
||||||
|
None
|
||||||
|
|
||||||
|
>>> order(1, [(0,0)]) # self-loop
|
||||||
|
None
|
||||||
|
|
||||||
|
Tie-breaking: arbitrary (any valid topological order is fine).
|
||||||
|
|
||||||
|
filename: topol.py
|
||||||
|
|
||||||
|
questions:
|
||||||
|
(a) did you realize that bottom-up implementations of DP use (implicit) topological orderings?
|
||||||
|
e.g., what is the topological ordering in your (or my) bottom-up bounded knapsack code?
|
||||||
|
(b) what about top-down implementations? what order do they use to traverse the graph?
|
||||||
|
(c) does that suggest there is a top-down solution for topological sort as well?
|
||||||
|
|
||||||
|
2. [WILL BE GRADED]
|
||||||
|
Viterbi Algorithm For Longest Path in DAG (see DPV 4.7, [2], CLRS problem 15-1)
|
||||||
|
|
||||||
|
Recall that the Viterbi algorithm has just two steps:
|
||||||
|
a) get a topological order (use problem 1 above)
|
||||||
|
b) follow that order, and do either forward or backward updates
|
||||||
|
|
||||||
|
This algorithm captures all DP problems on DAGs, for example,
|
||||||
|
longest path, shortest path, number of paths, etc.
|
||||||
|
|
||||||
|
In this problem, given a DAG (guaranteed acyclic!), output a pair (l, p)
|
||||||
|
where l is the length of the longest path (number of edges), and p is the path. (you can think of each edge being unit cost)
|
||||||
|
|
||||||
|
e.g., for the above example:
|
||||||
|
|
||||||
|
>>> longest(8, [(0,2), (1,2), (2,3), (2,4), (3,4), (3,5), (4,5), (5,6), (5,7)])
|
||||||
|
(5, [0, 2, 3, 4, 5, 6])
|
||||||
|
|
||||||
|
>>> longest(8, [(0,2), (1,2), (2,3), (2,4), (4,3), (3,5), (4,5), (5,6), (5,7)])
|
||||||
|
(5, [0, 2, 4, 3, 5, 6])
|
||||||
|
|
||||||
|
>>> longest(8, [(0,1), (0,2), (1,2), (2,3), (2,4), (4,3), (3,5), (4,5), (5,6), (5,7), (6,7)])
|
||||||
|
(7, [0, 1, 2, 4, 3, 5, 6, 7]) # unique answer
|
||||||
|
|
||||||
|
Note that longest() takes two arguments, n and list_of_edges,
|
||||||
|
where n specifies that the nodes are named 0..(n-1).
|
||||||
|
|
||||||
|
Tie-breaking: arbitrary. any longest path is fine.
|
||||||
|
|
||||||
|
Filename: viterbi.py
|
||||||
|
|
||||||
|
Note: you can use this program to solve MIS, knapsacks, coins, etc.
|
||||||
|
|
||||||
|
|
||||||
|
Debriefing (required!): --------------------------
|
||||||
|
|
||||||
|
0. What's your name?
|
||||||
|
1. Approximately how many hours did you spend on this assignment?
|
||||||
|
2. Would you rate it as easy, moderate, or difficult?
|
||||||
|
3. Did you work on it mostly alone, or mostly with other people?
|
||||||
|
4. How deeply do you feel you understand the material it covers (0%-100%)?
|
||||||
|
5. Any other comments?
|
||||||
|
|
||||||
|
This section is intended to help us calibrate the homework assignments.
|
||||||
|
Your answers to this section will *not* affect your grade; however, skipping it
|
||||||
|
will certainly do.
|
||||||
166
code/cs325-langs/hws/hw9.txt
Normal file
166
code/cs325-langs/hws/hw9.txt
Normal file
@@ -0,0 +1,166 @@
|
|||||||
|
CS 325, Algorithms, Fall 2019
|
||||||
|
HW9 - Graphs (part 2), DP (part 4)
|
||||||
|
|
||||||
|
Due Monday Nov 25, 11:59pm.
|
||||||
|
No late submission will be accepted.
|
||||||
|
|
||||||
|
Include in your submission: report.txt, dijkstra.py, nbest.py.
|
||||||
|
dijkstra.py will be graded for correctness (1%).
|
||||||
|
|
||||||
|
Textbooks for References:
|
||||||
|
[1] CLRS Ch. 22 (graph)
|
||||||
|
[2] my DP tutorial (up to page 16):
|
||||||
|
http://web.engr.oregonstate.edu/~huanlian/slides/COLING-tutorial-anim.pdf
|
||||||
|
[3] DPV Ch. 3, 4.2, 4.4, 4.7, 6 (Dasgupta, Papadimitriou, Vazirani)
|
||||||
|
https://www.cs.berkeley.edu/~vazirani/algorithms/chap3.pdf
|
||||||
|
https://www.cs.berkeley.edu/~vazirani/algorithms/chap4.pdf
|
||||||
|
https://www.cs.berkeley.edu/~vazirani/algorithms/chap6.pdf
|
||||||
|
[4] KT Ch. 6 (DP)
|
||||||
|
http://www.aw-bc.com/info/kleinberg/assets/downloads/ch6.pdf
|
||||||
|
[5] KT slides: Greedy II (Dijkstra)
|
||||||
|
http://www.cs.princeton.edu/~wayne/kleinberg-tardos/
|
||||||
|
|
||||||
|
***Please answer time/space complexities for each problem in report.txt.
|
||||||
|
|
||||||
|
1. [WILL BE GRADED]
|
||||||
|
Dijkstra (see CLRS 24.3 and DPV 4.4)
|
||||||
|
|
||||||
|
Given an undirected graph, find the shortest path from source (node 0)
|
||||||
|
to target (node n-1).
|
||||||
|
|
||||||
|
Edge weights are guaranteed to be non-negative, since Dijkstra doesn't work
|
||||||
|
with negative weights, e.g.
|
||||||
|
|
||||||
|
3
|
||||||
|
0 ------ 1
|
||||||
|
\ /
|
||||||
|
2 \ / -2
|
||||||
|
\/
|
||||||
|
2
|
||||||
|
|
||||||
|
in this example, Dijkstra would return length 2 (path 0-2),
|
||||||
|
but path 0-1-2 is better (length 1).
|
||||||
|
|
||||||
|
For example (return a pair of shortest-distance and shortest-path):
|
||||||
|
|
||||||
|
1
|
||||||
|
0 ------ 1
|
||||||
|
\ / \
|
||||||
|
5 \ /1 \6
|
||||||
|
\/ 2 \
|
||||||
|
2 ------ 3
|
||||||
|
|
||||||
|
>>> shortest(4, [(0,1,1), (0,2,5), (1,2,1), (2,3,2), (1,3,6)])
|
||||||
|
(4, [0,1,2,3])
|
||||||
|
|
||||||
|
If the target node (n-1) is unreachable from the source (0),
|
||||||
|
return None:
|
||||||
|
|
||||||
|
>>> shortest(5, [(0,1,1), (0,2,5), (1,2,1), (2,3,2), (1,3,6)])
|
||||||
|
None
|
||||||
|
|
||||||
|
Another example:
|
||||||
|
|
||||||
|
1 1
|
||||||
|
0-----1 2-----3
|
||||||
|
|
||||||
|
>>> shortest(4, [(0,1,1), (2,3,1)])
|
||||||
|
None
|
||||||
|
|
||||||
|
Tiebreaking: arbitrary. Any shortest path would do.
|
||||||
|
|
||||||
|
Filename: dijkstra.py
|
||||||
|
|
||||||
|
Hint: please use heapdict from here:
|
||||||
|
https://raw.githubusercontent.com/DanielStutzbach/heapdict/master/heapdict.py
|
||||||
|
|
||||||
|
>>> from heapdict import heapdict
|
||||||
|
>>> h = heapdict()
|
||||||
|
>>> h['a'] = 3
|
||||||
|
>>> h['b'] = 1
|
||||||
|
>>> h.peekitem()
|
||||||
|
('b', 1)
|
||||||
|
>>> h['a'] = 0
|
||||||
|
>>> h.peekitem()
|
||||||
|
('a', 0)
|
||||||
|
>>> h.popitem()
|
||||||
|
('a', 0)
|
||||||
|
>>> len(h)
|
||||||
|
1
|
||||||
|
>>> 'a' in h
|
||||||
|
False
|
||||||
|
>>> 'b' in h
|
||||||
|
True
|
||||||
|
|
||||||
|
You don't need to submit heapdict.py; we have it in our grader.
|
||||||
|
|
||||||
|
|
||||||
|
2. [Redo the nbest question from Midterm, preparing for HW10 part 3]
|
||||||
|
|
||||||
|
Given k pairs of lists A_i and B_i (0 <= i < k), each with n sorted numbers,
|
||||||
|
find the n smallest pairs in all the (k n^2) pairs.
|
||||||
|
We say (x,y) < (x', y') if and only if x+y < x'+y'.
|
||||||
|
Tie-breaking: lexicographical (i.e., prefer smaller x).
|
||||||
|
|
||||||
|
You can base your code on the skeleton from the Midterm:
|
||||||
|
|
||||||
|
from heapq import heappush, heappop
|
||||||
|
def nbest(ABs): # no need to pass in k or n
|
||||||
|
k = len(ABs)
|
||||||
|
n = len(ABs[0][0])
|
||||||
|
def trypush(i, p, q): # push pair (A_i,p, B_i,q) if possible
|
||||||
|
A, B = ABs[i] # A_i, B_i
|
||||||
|
if p < n and q < n and ______________________________:
|
||||||
|
heappush(h, (________________, i, p, q, (A[p],B[q])))
|
||||||
|
used.add((i, p, q))
|
||||||
|
h, used = ___________________ # initialize
|
||||||
|
for i in range(k): # NEED TO OPTIMIZE
|
||||||
|
trypush(______________)
|
||||||
|
for _ in range(n):
|
||||||
|
_, i, p, q, pair = ________________
|
||||||
|
yield pair # return the next pair (in a lazy list)
|
||||||
|
_______________________
|
||||||
|
_______________________
|
||||||
|
|
||||||
|
|
||||||
|
But recall we had two optimizations to speed up the first for-loop (queue initialization):
|
||||||
|
|
||||||
|
(1) using heapify instead of k initial pushes. You need to implement this (very easy).
|
||||||
|
|
||||||
|
(2) using qselect to choose top n out of the k bests. This one is OPTIONAL.
|
||||||
|
|
||||||
|
Analyze the time complexity for the version you implemented.
|
||||||
|
|
||||||
|
>>> list(nbest([([1,2,4], [2,3,5]), ([0,2,4], [3,4,5])]))
|
||||||
|
|
||||||
|
[(0, 3), (1, 2), (0, 4)]
|
||||||
|
|
||||||
|
>>> list(nbest([([-1,2],[1,4]), ([0,2],[3,4]), ([0,1],[4,6]), ([-1,2],[1,5])]))
|
||||||
|
[(-1, 1), (-1, 1)]
|
||||||
|
|
||||||
|
>>> list(nbest([([5,6,10,14],[3,5,10,14]),([2,7,9,11],[3,8,12,16]),([1,3,8,10],[5,9,10,11]),([1,2,3,5],[3,4,9,10]),([4,5,9,10],[2,4,6,11]),([4,6,10,13],[2,3,5,9]),([3,7,10,12],[1,2,5,10]),([5,9,14,15],[4,8,13,14])]))
|
||||||
|
|
||||||
|
[(1, 3), (3, 1), (1, 4), (2, 3)]
|
||||||
|
|
||||||
|
>>> list(nbest([([1,6,8,13],[5,8,11,12]),([1,2,3,5],[5,9,11,13]),([3,5,7,10],[4,6,7,11]),([1,4,7,8],[4,9,11,15]),([4,8,10,13],[4,6,10,11]),([4,8,12,15],[5,10,11,13]),([2,3,4,8],[4,7,11,15]),([4,5,10,15],[5,6,7,8])]))
|
||||||
|
|
||||||
|
[(1, 4), (1, 5), (1, 5), (2, 4)]
|
||||||
|
|
||||||
|
This problem prepares you for the hardest question in HW10 (part 3).
|
||||||
|
|
||||||
|
Filename: nbest.py
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
Debriefing (required!): --------------------------
|
||||||
|
|
||||||
|
0. What's your name?
|
||||||
|
1. Approximately how many hours did you spend on this assignment?
|
||||||
|
2. Would you rate it as easy, moderate, or difficult?
|
||||||
|
3. Did you work on it mostly alone, or mostly with other people?
|
||||||
|
4. How deeply do you feel you understand the material it covers (0%-100%)?
|
||||||
|
5. Any other comments?
|
||||||
|
|
||||||
|
This section is intended to help us calibrate the homework assignments.
|
||||||
|
Your answers to this section will *not* affect your grade; however, skipping it
|
||||||
|
will certainly do.
|
||||||
19
code/cs325-langs/sols/hw1.lang
Normal file
19
code/cs325-langs/sols/hw1.lang
Normal file
@@ -0,0 +1,19 @@
|
|||||||
|
qselect(xs,k) =
|
||||||
|
~xs -> {
|
||||||
|
pivot <- xs[0]!
|
||||||
|
left <- xs[#0 <= pivot]
|
||||||
|
right <- xs[#0 > pivot]
|
||||||
|
} ->
|
||||||
|
if k > |left| + 1 then qselect(right, k - |left| - 1)
|
||||||
|
else if k == |left| + 1 then [pivot]
|
||||||
|
else qselect(left, k);
|
||||||
|
|
||||||
|
_search(xs, k) =
|
||||||
|
if xs[1] == k then xs
|
||||||
|
else if xs[1] > k then _search(xs[0], k)
|
||||||
|
else _search(xs[2], k);
|
||||||
|
|
||||||
|
sorted(xs) = sorted(xs[0]) ++ [xs[1]] ++ sorted(xs[2]);
|
||||||
|
search(xs, k) = |_search(xs, k)| != 0;
|
||||||
|
insert(xs, k) = _insert(k, _search(xs, k));
|
||||||
|
_insert(k, xs) = if |xs| == 0 then xs << [] << k << [] else xs
|
||||||
11
code/cs325-langs/sols/hw2.lang
Normal file
11
code/cs325-langs/sols/hw2.lang
Normal file
@@ -0,0 +1,11 @@
|
|||||||
|
state 0;
|
||||||
|
|
||||||
|
effect {
|
||||||
|
if(SOURCE == R) {
|
||||||
|
STATE = STATE + |LEFT|;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
combine {
|
||||||
|
STATE = STATE + LSTATE + RSTATE;
|
||||||
|
}
|
||||||
95
code/cs325-langs/sols/hw3.lang
Normal file
95
code/cs325-langs/sols/hw3.lang
Normal file
@@ -0,0 +1,95 @@
|
|||||||
|
function qselect(xs, k, c) {
|
||||||
|
if xs == [] {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
traverser bisector(list: xs, span: (0,len(xs)));
|
||||||
|
traverser pivot(list: xs, random: true);
|
||||||
|
|
||||||
|
let pivotE = pop!(pivot);
|
||||||
|
let (leftList, rightList) = bisect!(bisector, (x) -> c(x) < c(pivotE));
|
||||||
|
|
||||||
|
if k > len(leftList) + 1 {
|
||||||
|
return qselect(rightList, k - len(leftList) - 1, c);
|
||||||
|
} elsif k == len(leftList) + 1 {
|
||||||
|
return pivotE;
|
||||||
|
} else {
|
||||||
|
return qselect(leftList, k, c);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function closestUnsorted(xs, k, n) {
|
||||||
|
let min = qselect(list(xs), k, (x) -> abs(x - n));
|
||||||
|
let out = [];
|
||||||
|
let countEqual = k;
|
||||||
|
|
||||||
|
traverser iter(list: xs, span: (0, len(xs)));
|
||||||
|
while valid!(iter) {
|
||||||
|
if abs(at!(iter)-n) < abs(min-n) {
|
||||||
|
let countEqual = countEqual - 1;
|
||||||
|
}
|
||||||
|
step!(iter);
|
||||||
|
}
|
||||||
|
|
||||||
|
traverser iter(list: xs, span: (0, len(xs)));
|
||||||
|
while valid!(iter) {
|
||||||
|
if abs(at!(iter)-n) == abs(min-n) and countEqual > 0 {
|
||||||
|
let countEqual = countEqual - 1;
|
||||||
|
let out = out + [at!(iter)];
|
||||||
|
} elsif abs(at!(iter)-n) < abs(min-n) {
|
||||||
|
let out = out + [at!(iter)];
|
||||||
|
}
|
||||||
|
step!(iter);
|
||||||
|
}
|
||||||
|
|
||||||
|
return out;
|
||||||
|
}
|
||||||
|
|
||||||
|
function closestSorted(xs, k, n) {
|
||||||
|
let start = bisect(xs, n);
|
||||||
|
let counter = 0;
|
||||||
|
traverser left(list: xs, span: (0, start), reverse: true);
|
||||||
|
traverser right(list: xs, span: (start, len(xs)));
|
||||||
|
|
||||||
|
while counter != k and canstep!(left) and valid!(right) {
|
||||||
|
if abs(at!(left, 1) - n) < abs(at!(right) - n) {
|
||||||
|
step!(left);
|
||||||
|
} else {
|
||||||
|
step!(right);
|
||||||
|
}
|
||||||
|
let counter = counter + 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
while counter != k and (canstep!(left) or valid!(right)) {
|
||||||
|
if canstep!(left) { step!(left); }
|
||||||
|
else { step!(right); }
|
||||||
|
let counter = counter + 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
return subset!(left, right);
|
||||||
|
}
|
||||||
|
|
||||||
|
sorted function xyz(xs, k) {
|
||||||
|
traverser x(list: xs, span: (0,len(xs)));
|
||||||
|
let dest = [];
|
||||||
|
|
||||||
|
while valid!(x) {
|
||||||
|
traverser z(list: xs, span: (pos!(x)+2,len(xs)));
|
||||||
|
traverser y(list: xs, span: (pos!(x)+1,pos!(z)));
|
||||||
|
|
||||||
|
while valid!(y) and valid!(z) {
|
||||||
|
if at!(x) + at!(y) == at!(z) {
|
||||||
|
let dest = dest + [(at!(x), at!(y), at!(z))];
|
||||||
|
step!(z);
|
||||||
|
} elsif at!(x) + at!(y) > at!(z) {
|
||||||
|
step!(z);
|
||||||
|
} else {
|
||||||
|
step!(y);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
step!(x);
|
||||||
|
}
|
||||||
|
|
||||||
|
return dest;
|
||||||
|
}
|
||||||
15
code/cs325-langs/src/Common.hs
Normal file
15
code/cs325-langs/src/Common.hs
Normal file
@@ -0,0 +1,15 @@
|
|||||||
|
module Common where
|
||||||
|
import PythonAst
|
||||||
|
import PythonGen
|
||||||
|
import Text.Parsec
|
||||||
|
|
||||||
|
compile :: (String -> String -> Either ParseError p) -> (p -> [PyStmt]) -> String -> IO ()
|
||||||
|
compile p t f = do
|
||||||
|
let inputName = f ++ ".lang"
|
||||||
|
let outputName = f ++ ".py"
|
||||||
|
file <- readFile inputName
|
||||||
|
let either = p inputName file
|
||||||
|
case either of
|
||||||
|
Right prog -> writeFile outputName (translate $ t prog)
|
||||||
|
Left e -> print e
|
||||||
|
|
||||||
90
code/cs325-langs/src/CommonParsing.hs
Normal file
90
code/cs325-langs/src/CommonParsing.hs
Normal file
@@ -0,0 +1,90 @@
|
|||||||
|
module CommonParsing where
|
||||||
|
import Data.Char
|
||||||
|
import Data.Functor
|
||||||
|
import Text.Parsec
|
||||||
|
import Text.Parsec.Char
|
||||||
|
import Text.Parsec.Combinator
|
||||||
|
|
||||||
|
type Parser a b = Parsec String a b
|
||||||
|
|
||||||
|
kw :: String -> Parser a ()
|
||||||
|
kw s = try $ string s <* spaces $> ()
|
||||||
|
|
||||||
|
kwIf :: Parser a ()
|
||||||
|
kwIf = kw "if"
|
||||||
|
|
||||||
|
kwThen :: Parser a ()
|
||||||
|
kwThen = kw "then"
|
||||||
|
|
||||||
|
kwElse :: Parser a ()
|
||||||
|
kwElse = kw "else"
|
||||||
|
|
||||||
|
kwElsif :: Parser a ()
|
||||||
|
kwElsif = kw "elsif"
|
||||||
|
|
||||||
|
kwWhile :: Parser a ()
|
||||||
|
kwWhile = kw "while"
|
||||||
|
|
||||||
|
kwState :: Parser a ()
|
||||||
|
kwState = kw "state"
|
||||||
|
|
||||||
|
kwEffect :: Parser a ()
|
||||||
|
kwEffect = kw "effect"
|
||||||
|
|
||||||
|
kwCombine :: Parser a ()
|
||||||
|
kwCombine = kw "combine"
|
||||||
|
|
||||||
|
kwRand :: Parser a ()
|
||||||
|
kwRand = kw "rand"
|
||||||
|
|
||||||
|
kwFunction :: Parser a ()
|
||||||
|
kwFunction = kw "function"
|
||||||
|
|
||||||
|
kwSorted :: Parser a ()
|
||||||
|
kwSorted = kw "sorted"
|
||||||
|
|
||||||
|
kwLet :: Parser a ()
|
||||||
|
kwLet = kw "let"
|
||||||
|
|
||||||
|
kwTraverser :: Parser a ()
|
||||||
|
kwTraverser = kw "traverser"
|
||||||
|
|
||||||
|
kwReturn :: Parser a ()
|
||||||
|
kwReturn = kw "return"
|
||||||
|
|
||||||
|
op :: String -> op -> Parser a op
|
||||||
|
op s o = string s $> o
|
||||||
|
|
||||||
|
int :: Parser a Int
|
||||||
|
int = read <$> (many1 digit <* spaces)
|
||||||
|
|
||||||
|
var :: [String] -> Parser a String
|
||||||
|
var reserved =
|
||||||
|
do
|
||||||
|
c <- satisfy $ \c -> isLetter c || c == '_'
|
||||||
|
cs <- many (satisfy isLetter <|> digit) <* spaces
|
||||||
|
let name = c:cs
|
||||||
|
if name `elem` reserved
|
||||||
|
then fail "Can't use reserved keyword as identifier"
|
||||||
|
else return name
|
||||||
|
|
||||||
|
list :: Char -> Char -> Char -> Parser a b -> Parser a [b]
|
||||||
|
list co cc cd pe = surround co cc $ sepBy pe (char cd >> spaces)
|
||||||
|
|
||||||
|
surround :: Char -> Char -> Parser a b -> Parser a b
|
||||||
|
surround c1 c2 pe =
|
||||||
|
do
|
||||||
|
char c1 >> spaces
|
||||||
|
e <- pe
|
||||||
|
spaces >> char c2 >> spaces
|
||||||
|
return e
|
||||||
|
|
||||||
|
level :: (o -> e -> e -> e) -> Parser a o -> Parser a e -> Parser a e
|
||||||
|
level c po pe =
|
||||||
|
do
|
||||||
|
e <- pe <* spaces
|
||||||
|
ops <- many $ try $ (flip . c <$> (po <* spaces) <*> pe) <* spaces
|
||||||
|
return $ foldl (flip ($)) e ops
|
||||||
|
|
||||||
|
precedence :: (o -> e -> e -> e) -> Parser a e -> [ Parser a o ] -> Parser a e
|
||||||
|
precedence = foldl . flip . level
|
||||||
393
code/cs325-langs/src/LanguageOne.hs
Normal file
393
code/cs325-langs/src/LanguageOne.hs
Normal file
@@ -0,0 +1,393 @@
|
|||||||
|
module LanguageOne where
|
||||||
|
import qualified PythonAst as Py
|
||||||
|
import qualified CommonParsing as P
|
||||||
|
import Data.Bifunctor
|
||||||
|
import Data.Char
|
||||||
|
import Data.Functor
|
||||||
|
import qualified Data.Map as Map
|
||||||
|
import Data.Maybe
|
||||||
|
import qualified Data.Set as Set
|
||||||
|
import Text.Parsec
|
||||||
|
import Text.Parsec.Char
|
||||||
|
import Text.Parsec.Combinator
|
||||||
|
import Control.Monad.State
|
||||||
|
|
||||||
|
{- Data Types -}
|
||||||
|
data PossibleType = List | Any deriving Eq
|
||||||
|
|
||||||
|
data SelectorMarker = None | Remove
|
||||||
|
|
||||||
|
data Op
|
||||||
|
= Add
|
||||||
|
| Subtract
|
||||||
|
| Multiply
|
||||||
|
| Divide
|
||||||
|
| Insert
|
||||||
|
| Concat
|
||||||
|
| LessThan
|
||||||
|
| LessThanEq
|
||||||
|
| GreaterThan
|
||||||
|
| GreaterThanEq
|
||||||
|
| Equal
|
||||||
|
| NotEqual
|
||||||
|
| And
|
||||||
|
| Or
|
||||||
|
|
||||||
|
data Selector = Selector String Expr
|
||||||
|
|
||||||
|
data Expr
|
||||||
|
= Var String
|
||||||
|
| IntLiteral Int
|
||||||
|
| ListLiteral [Expr]
|
||||||
|
| Split Expr [Selector] Expr
|
||||||
|
| IfElse Expr Expr Expr
|
||||||
|
| BinOp Op Expr Expr
|
||||||
|
| FunctionCall Expr [Expr]
|
||||||
|
| LengthOf Expr
|
||||||
|
| Random
|
||||||
|
| Access Expr Expr SelectorMarker
|
||||||
|
| Parameter Int
|
||||||
|
|
||||||
|
data Function = Function String [String] Expr
|
||||||
|
|
||||||
|
data Prog = Prog [Function]
|
||||||
|
|
||||||
|
{- Parser -}
|
||||||
|
type Parser = Parsec String (Maybe Int)
|
||||||
|
|
||||||
|
parseVar :: Parser String
|
||||||
|
parseVar = P.var ["if", "then", "else", "var"]
|
||||||
|
|
||||||
|
parseThis :: Parser Expr
|
||||||
|
parseThis =
|
||||||
|
do
|
||||||
|
char '&'
|
||||||
|
contextNum <- getState
|
||||||
|
spaces
|
||||||
|
return (Var $ "context_" ++ show contextNum)
|
||||||
|
|
||||||
|
parseList :: Parser Expr
|
||||||
|
parseList = ListLiteral <$>
|
||||||
|
do
|
||||||
|
char '[' >> spaces
|
||||||
|
es <- sepBy parseExpr (char ',' >> spaces)
|
||||||
|
spaces >> char ']' >> spaces
|
||||||
|
return es
|
||||||
|
|
||||||
|
parseSplit :: Parser Expr
|
||||||
|
parseSplit =
|
||||||
|
do
|
||||||
|
char '~' >> spaces
|
||||||
|
e <- parseExpr
|
||||||
|
spaces >> string "->"
|
||||||
|
spaces >> char '{'
|
||||||
|
contextNum <- getState
|
||||||
|
putState $ return $ 1 + fromMaybe (-1) contextNum
|
||||||
|
es <- many1 (spaces >> parseSelector)
|
||||||
|
putState contextNum
|
||||||
|
spaces >> char '}' >> spaces >> string "->" >> spaces
|
||||||
|
e' <- parseExpr
|
||||||
|
spaces
|
||||||
|
return $ Split e es e'
|
||||||
|
|
||||||
|
parseSelectorMarker :: Parser SelectorMarker
|
||||||
|
parseSelectorMarker = (char '!' >> return Remove) <|> return None
|
||||||
|
|
||||||
|
parseSelector :: Parser Selector
|
||||||
|
parseSelector =
|
||||||
|
do
|
||||||
|
name <- parseVar
|
||||||
|
spaces >> string "<-" >> spaces
|
||||||
|
expr <- parseExpr
|
||||||
|
spaces
|
||||||
|
return $ Selector name expr
|
||||||
|
|
||||||
|
parseIfElse :: Parser Expr
|
||||||
|
parseIfElse =
|
||||||
|
do
|
||||||
|
P.kwIf >> spaces
|
||||||
|
ec <- parseExpr
|
||||||
|
spaces >> P.kwThen >> spaces
|
||||||
|
et <- parseExpr
|
||||||
|
spaces >> P.kwElse >> spaces
|
||||||
|
ee <- parseExpr
|
||||||
|
spaces
|
||||||
|
return $ IfElse ec et ee
|
||||||
|
|
||||||
|
parseLength :: Parser Expr
|
||||||
|
parseLength =
|
||||||
|
do
|
||||||
|
char '|' >> spaces
|
||||||
|
e <- parseExpr
|
||||||
|
spaces >> char '|' >> spaces
|
||||||
|
return $ LengthOf e
|
||||||
|
|
||||||
|
parseParameter :: Parser Expr
|
||||||
|
parseParameter =
|
||||||
|
do
|
||||||
|
char '#'
|
||||||
|
d <- digit
|
||||||
|
spaces
|
||||||
|
return $ Parameter $ read [d]
|
||||||
|
|
||||||
|
parseParenthesized :: Parser Expr
|
||||||
|
parseParenthesized =
|
||||||
|
do
|
||||||
|
char '(' >> spaces
|
||||||
|
e <- parseExpr
|
||||||
|
spaces >> char ')' >> spaces
|
||||||
|
return e
|
||||||
|
|
||||||
|
parseBasicExpr :: Parser Expr
|
||||||
|
parseBasicExpr = choice
|
||||||
|
[ IntLiteral <$> P.int
|
||||||
|
, parseThis
|
||||||
|
, parseList
|
||||||
|
, parseSplit
|
||||||
|
, parseLength
|
||||||
|
, parseParameter
|
||||||
|
, parseParenthesized
|
||||||
|
, Var <$> try parseVar
|
||||||
|
, P.kwRand $> Random
|
||||||
|
, parseIfElse
|
||||||
|
]
|
||||||
|
|
||||||
|
parsePostfix :: Parser (Expr -> Expr)
|
||||||
|
parsePostfix = parsePostfixAccess <|> parsePostfixCall
|
||||||
|
|
||||||
|
parsePostfixAccess :: Parser (Expr -> Expr)
|
||||||
|
parsePostfixAccess =
|
||||||
|
do
|
||||||
|
char '[' >> spaces
|
||||||
|
e <- parseExpr
|
||||||
|
spaces >> char ']' >> spaces
|
||||||
|
marker <- parseSelectorMarker
|
||||||
|
spaces
|
||||||
|
return $ \e' -> Access e' e marker
|
||||||
|
|
||||||
|
parsePostfixCall :: Parser (Expr -> Expr)
|
||||||
|
parsePostfixCall =
|
||||||
|
do
|
||||||
|
char '(' >> spaces
|
||||||
|
es <- sepBy parseExpr (char ',' >> spaces)
|
||||||
|
char ')' >> spaces
|
||||||
|
return $ flip FunctionCall es
|
||||||
|
|
||||||
|
parsePostfixedExpr :: Parser Expr
|
||||||
|
parsePostfixedExpr =
|
||||||
|
do
|
||||||
|
eb <- parseBasicExpr
|
||||||
|
spaces
|
||||||
|
ps <- many parsePostfix
|
||||||
|
return $ foldl (flip ($)) eb ps
|
||||||
|
|
||||||
|
parseExpr :: Parser Expr
|
||||||
|
parseExpr = P.precedence BinOp parsePostfixedExpr
|
||||||
|
[ P.op "*" Multiply, P.op "/" Divide
|
||||||
|
, P.op "+" Add, P.op "-" Subtract
|
||||||
|
, P.op "<<" Insert
|
||||||
|
, P.op "++" Concat
|
||||||
|
, try (P.op "<=" LessThanEq) <|> try (P.op ">=" GreaterThanEq) <|>
|
||||||
|
P.op "<" LessThan <|> P.op ">" GreaterThan <|>
|
||||||
|
P.op "==" Equal <|> P.op "!=" NotEqual
|
||||||
|
, P.op "&&" And <|> P.op "||" Or
|
||||||
|
]
|
||||||
|
|
||||||
|
parseFunction :: Parser Function
|
||||||
|
parseFunction =
|
||||||
|
do
|
||||||
|
name <- parseVar
|
||||||
|
spaces >> char '(' >> spaces
|
||||||
|
vs <- sepBy parseVar (char ',' >> spaces)
|
||||||
|
spaces >> char ')' >> spaces >> char '=' >> spaces
|
||||||
|
body <- parseExpr
|
||||||
|
spaces
|
||||||
|
return $ Function name vs body
|
||||||
|
|
||||||
|
parseProg :: Parser Prog
|
||||||
|
parseProg = Prog <$> sepBy1 parseFunction (char ';' >> spaces)
|
||||||
|
|
||||||
|
parse :: SourceName -> String -> Either ParseError Prog
|
||||||
|
parse = runParser parseProg Nothing
|
||||||
|
|
||||||
|
{- "Type" checker -}
|
||||||
|
mergePossibleType :: PossibleType -> PossibleType -> PossibleType
|
||||||
|
mergePossibleType List _ = List
|
||||||
|
mergePossibleType _ List = List
|
||||||
|
mergePossibleType _ _ = Any
|
||||||
|
|
||||||
|
getPossibleType :: String -> Expr -> PossibleType
|
||||||
|
getPossibleType s (Var s') = if s == s' then List else Any
|
||||||
|
getPossibleType _ (ListLiteral _) = List
|
||||||
|
getPossibleType s (Split _ _ e) = getPossibleType s e
|
||||||
|
getPossibleType s (IfElse i t e) =
|
||||||
|
foldl1 mergePossibleType $ map (getPossibleType s) [i, t, e]
|
||||||
|
getPossibleType _ (BinOp Insert _ _) = List
|
||||||
|
getPossibleType _ (BinOp Concat _ _) = List
|
||||||
|
getPossibleType _ _ = Any
|
||||||
|
|
||||||
|
{- Translator -}
|
||||||
|
type Translator = Control.Monad.State.State (Map.Map String [String], Int)
|
||||||
|
|
||||||
|
currentTemp :: Translator String
|
||||||
|
currentTemp = do
|
||||||
|
t <- gets snd
|
||||||
|
return $ "temp" ++ show t
|
||||||
|
|
||||||
|
incrementTemp :: Translator String
|
||||||
|
incrementTemp = do
|
||||||
|
modify (second (+1))
|
||||||
|
currentTemp
|
||||||
|
|
||||||
|
hasLambda :: Expr -> Bool
|
||||||
|
hasLambda (ListLiteral es) = any hasLambda es
|
||||||
|
hasLambda (Split e ss r) =
|
||||||
|
hasLambda e || any (\(Selector _ e') -> hasLambda e') ss || hasLambda r
|
||||||
|
hasLambda (IfElse i t e) = hasLambda i || hasLambda t || hasLambda e
|
||||||
|
hasLambda (BinOp o l r) = hasLambda l || hasLambda r
|
||||||
|
hasLambda (FunctionCall e es) = any hasLambda $ e : es
|
||||||
|
hasLambda (LengthOf e) = hasLambda e
|
||||||
|
hasLambda (Access e _ _) = hasLambda e
|
||||||
|
hasLambda Parameter{} = True
|
||||||
|
hasLambda _ = False
|
||||||
|
|
||||||
|
translate :: Prog -> [Py.PyStmt]
|
||||||
|
translate p = fst $ runState (translateProg p) (Map.empty, 0)
|
||||||
|
|
||||||
|
translateProg :: Prog -> Translator [Py.PyStmt]
|
||||||
|
translateProg (Prog fs) = concat <$> traverse translateFunction fs
|
||||||
|
|
||||||
|
translateFunction :: Function -> Translator [Py.PyStmt]
|
||||||
|
translateFunction (Function n ps ex) = do
|
||||||
|
let createIf p = Py.BinOp Py.Equal (Py.Var p) (Py.ListLiteral [])
|
||||||
|
let createReturn p = Py.IfElse (createIf p) [Py.Return (Py.Var p)] [] Nothing
|
||||||
|
let fastReturn = [createReturn p | p <- take 1 ps, getPossibleType p ex == List]
|
||||||
|
(ss, e) <- translateExpr ex
|
||||||
|
return $ return $ Py.FunctionDef n ps $ fastReturn ++ ss ++ [Py.Return e]
|
||||||
|
|
||||||
|
translateSelector :: Selector -> Translator Py.PyStmt
|
||||||
|
translateSelector (Selector n e) =
|
||||||
|
let
|
||||||
|
cacheCheck = Py.NotIn (Py.StrLiteral n) (Py.Var "cache")
|
||||||
|
cacheAccess = Py.Access (Py.Var "cache") [Py.StrLiteral n]
|
||||||
|
cacheSet = Py.Assign (Py.AccessPat (Py.Var "cache") [Py.StrLiteral n])
|
||||||
|
body e' = [ Py.IfElse cacheCheck [cacheSet e'] [] Nothing, Py.Return cacheAccess]
|
||||||
|
in
|
||||||
|
do
|
||||||
|
(ss, e') <- translateExpr e
|
||||||
|
vs <- gets fst
|
||||||
|
let callPrereq p = Py.Standalone $ Py.FunctionCall (Py.Var p) []
|
||||||
|
let prereqs = maybe [] (map callPrereq) $ Map.lookup n vs
|
||||||
|
return $ Py.FunctionDef n [] $ ss ++ prereqs ++ body e'
|
||||||
|
|
||||||
|
translateExpr :: Expr -> Translator ([Py.PyStmt], Py.PyExpr)
|
||||||
|
translateExpr (Var s) = do
|
||||||
|
vs <- gets fst
|
||||||
|
let sVar = Py.Var s
|
||||||
|
let expr = if Map.member s vs then Py.FunctionCall sVar [] else sVar
|
||||||
|
return ([], expr)
|
||||||
|
translateExpr (IntLiteral i) = return ([], Py.IntLiteral i)
|
||||||
|
translateExpr (ListLiteral l) = do
|
||||||
|
tl <- mapM translateExpr l
|
||||||
|
return (concatMap fst tl, Py.ListLiteral $ map snd tl)
|
||||||
|
translateExpr (Split e ss e') = do
|
||||||
|
vs <- gets fst
|
||||||
|
let cacheAssign = Py.Assign (Py.VarPat "cache") (Py.DictLiteral [])
|
||||||
|
let cacheStmt = [ cacheAssign | Map.size vs == 0 ]
|
||||||
|
let vnames = map (\(Selector n es) -> n) ss
|
||||||
|
let prereqs = snd $ foldl (\(ds, m) (Selector n es) -> (n:ds, Map.insert n ds m)) ([], Map.empty) ss
|
||||||
|
modify $ first $ Map.union prereqs
|
||||||
|
fs <- mapM translateSelector ss
|
||||||
|
(sts, te) <- translateExpr e'
|
||||||
|
modify $ first $ const vs
|
||||||
|
return (cacheStmt ++ fs ++ sts, te)
|
||||||
|
translateExpr (IfElse i t e) = do
|
||||||
|
temp <- incrementTemp
|
||||||
|
let tempPat = Py.VarPat temp
|
||||||
|
(ists, ie) <- translateExpr i
|
||||||
|
(tsts, te) <- translateExpr t
|
||||||
|
(ests, ee) <- translateExpr e
|
||||||
|
let thenSts = tsts ++ [Py.Assign tempPat te]
|
||||||
|
let elseSts = ests ++ [Py.Assign tempPat ee]
|
||||||
|
let newIf = Py.IfElse ie thenSts [] $ Just elseSts
|
||||||
|
return (ists ++ [newIf], Py.Var temp)
|
||||||
|
translateExpr (BinOp o l r) = do
|
||||||
|
(lsts, le) <- translateExpr l
|
||||||
|
(rsts, re) <- translateExpr r
|
||||||
|
(opsts, oe) <- translateOp o le re
|
||||||
|
return (lsts ++ rsts ++ opsts, oe)
|
||||||
|
translateExpr (FunctionCall f ps) = do
|
||||||
|
(fsts, fe) <- translateExpr f
|
||||||
|
tps <- mapM translateExpr ps
|
||||||
|
return (fsts ++ concatMap fst tps, Py.FunctionCall fe $ map snd tps)
|
||||||
|
translateExpr (LengthOf e) =
|
||||||
|
second (Py.FunctionCall (Py.Var "len") . return) <$> translateExpr e
|
||||||
|
translateExpr (Access e Random m) = do
|
||||||
|
temp <- incrementTemp
|
||||||
|
(sts, ce) <- translateExpr e
|
||||||
|
let lenExpr = Py.FunctionCall (Py.Var "len") [Py.Var temp]
|
||||||
|
let randExpr = Py.FunctionCall (Py.Var "randint") [ Py.IntLiteral 0, lenExpr ]
|
||||||
|
return (sts, singleAccess ce randExpr m)
|
||||||
|
translateExpr (Access c i m) = do
|
||||||
|
(csts, ce) <- translateExpr c
|
||||||
|
(ists, ie) <- translateExpr i
|
||||||
|
temp <- incrementTemp
|
||||||
|
if hasLambda i
|
||||||
|
then return (csts ++ ists ++ [createFilterLambda temp ie m], Py.FunctionCall (Py.Var temp) [ce])
|
||||||
|
else return (csts ++ ists, singleAccess ce ie m)
|
||||||
|
translateExpr (Parameter i) = return $ ([], Py.Var $ "arg" ++ show i)
|
||||||
|
translateExpr _ = fail "Invalid expression"
|
||||||
|
|
||||||
|
singleAccess :: Py.PyExpr -> Py.PyExpr -> SelectorMarker -> Py.PyExpr
|
||||||
|
singleAccess c i None = Py.Access c [i]
|
||||||
|
singleAccess c i Remove = Py.FunctionCall (Py.Member c "pop") [i]
|
||||||
|
|
||||||
|
createFilterLambda :: String -> Py.PyExpr -> SelectorMarker -> Py.PyStmt
|
||||||
|
createFilterLambda s e None = Py.FunctionDef s ["arg"]
|
||||||
|
[ Py.Assign (Py.VarPat "out") (Py.ListLiteral [])
|
||||||
|
, Py.For (Py.VarPat "arg0") (Py.Var "arg")
|
||||||
|
[ Py.IfElse e
|
||||||
|
[ Py.Standalone $ Py.FunctionCall (Py.Member (Py.Var "out") "append")
|
||||||
|
[ Py.Var "arg0" ]
|
||||||
|
]
|
||||||
|
[]
|
||||||
|
Nothing
|
||||||
|
]
|
||||||
|
, Py.Return $ Py.Var "out"
|
||||||
|
]
|
||||||
|
createFilterLambda s e Remove = Py.FunctionDef s ["arg"]
|
||||||
|
[ Py.Assign (Py.VarPat "i") $ Py.IntLiteral 0
|
||||||
|
, Py.Assign (Py.VarPat "out") (Py.ListLiteral [])
|
||||||
|
, Py.While (Py.BinOp Py.LessThan (Py.Var "i") $ Py.FunctionCall (Py.Var "len") [Py.Var "arg"])
|
||||||
|
[ Py.IfElse e
|
||||||
|
[ Py.Standalone $ Py.FunctionCall (Py.Member (Py.Var "out") "append")
|
||||||
|
[ singleAccess (Py.Var "arg") (Py.Var "i") Remove
|
||||||
|
]
|
||||||
|
]
|
||||||
|
[]
|
||||||
|
Nothing
|
||||||
|
, Py.Assign (Py.VarPat "i") (Py.BinOp Py.Add (Py.Var "i") (Py.IntLiteral 1))
|
||||||
|
]
|
||||||
|
, Py.Return $ Py.Var "out"
|
||||||
|
]
|
||||||
|
|
||||||
|
translateOp :: Op -> Py.PyExpr -> Py.PyExpr -> Translator ([Py.PyStmt], Py.PyExpr)
|
||||||
|
translateOp Add l r = return ([], Py.BinOp Py.Add l r)
|
||||||
|
translateOp Subtract l r = return ([], Py.BinOp Py.Subtract l r)
|
||||||
|
translateOp Multiply l r = return ([], Py.BinOp Py.Multiply l r)
|
||||||
|
translateOp Divide l r = return ([], Py.BinOp Py.Divide l r)
|
||||||
|
translateOp LessThan l r = return ([], Py.BinOp Py.LessThan l r)
|
||||||
|
translateOp LessThanEq l r = return ([], Py.BinOp Py.LessThanEq l r)
|
||||||
|
translateOp GreaterThan l r = return ([], Py.BinOp Py.GreaterThan l r)
|
||||||
|
translateOp GreaterThanEq l r = return ([], Py.BinOp Py.GreaterThanEq l r)
|
||||||
|
translateOp Equal l r = return ([], Py.BinOp Py.Equal l r)
|
||||||
|
translateOp NotEqual l r = return ([], Py.BinOp Py.NotEqual l r)
|
||||||
|
translateOp And l r = return ([], Py.BinOp Py.And l r)
|
||||||
|
translateOp Or l r = return ([], Py.BinOp Py.Or l r)
|
||||||
|
translateOp Concat l r = return ([], Py.BinOp Py.Add l r)
|
||||||
|
translateOp Insert l r = do
|
||||||
|
temp <- incrementTemp
|
||||||
|
let assignStmt = Py.Assign (Py.VarPat temp) l
|
||||||
|
let appendFunc = Py.Member (Py.Var temp) "append"
|
||||||
|
let insertStmt = Py.Standalone $ Py.FunctionCall appendFunc [r]
|
||||||
|
return ([assignStmt, insertStmt], Py.Var temp)
|
||||||
461
code/cs325-langs/src/LanguageThree.hs
Normal file
461
code/cs325-langs/src/LanguageThree.hs
Normal file
@@ -0,0 +1,461 @@
|
|||||||
|
module LanguageThree where
|
||||||
|
import qualified CommonParsing as P
|
||||||
|
import qualified PythonAst as Py
|
||||||
|
import Control.Monad.State
|
||||||
|
import Data.Bifunctor
|
||||||
|
import Data.Foldable
|
||||||
|
import Data.Functor
|
||||||
|
import qualified Data.Map as Map
|
||||||
|
import Data.Maybe
|
||||||
|
import Text.Parsec hiding (State)
|
||||||
|
import Text.Parsec.Char
|
||||||
|
import Text.Parsec.Combinator
|
||||||
|
|
||||||
|
{- Data Types -}
|
||||||
|
data Op
|
||||||
|
= Add
|
||||||
|
| Subtract
|
||||||
|
| Multiply
|
||||||
|
| Divide
|
||||||
|
| LessThan
|
||||||
|
| LessThanEqual
|
||||||
|
| GreaterThan
|
||||||
|
| GreaterThanEqual
|
||||||
|
| Equal
|
||||||
|
| NotEqual
|
||||||
|
| And
|
||||||
|
| Or
|
||||||
|
|
||||||
|
data Expr
|
||||||
|
= TraverserCall String [Expr]
|
||||||
|
| FunctionCall String [Expr]
|
||||||
|
| BinOp Op Expr Expr
|
||||||
|
| Lambda [String] Expr
|
||||||
|
| Var String
|
||||||
|
| IntLiteral Int
|
||||||
|
| BoolLiteral Bool
|
||||||
|
| ListLiteral [Expr]
|
||||||
|
| TupleLiteral [Expr]
|
||||||
|
|
||||||
|
type Branch = (Expr, [Stmt])
|
||||||
|
|
||||||
|
data Stmt
|
||||||
|
= IfElse Branch [Branch] [Stmt]
|
||||||
|
| While Branch
|
||||||
|
| Traverser String [(String, Expr)]
|
||||||
|
| Let Pat Expr
|
||||||
|
| Return Expr
|
||||||
|
| Standalone Expr
|
||||||
|
|
||||||
|
data Pat
|
||||||
|
= VarPat String
|
||||||
|
| TuplePat [Pat]
|
||||||
|
|
||||||
|
data SortedMarker = Sorted | Unsorted deriving Eq
|
||||||
|
|
||||||
|
data Function = Function SortedMarker String [String] [Stmt]
|
||||||
|
|
||||||
|
data Prog = Prog [Function]
|
||||||
|
|
||||||
|
{- Parser -}
|
||||||
|
type Parser = Parsec String ()
|
||||||
|
|
||||||
|
parseVar :: Parser String
|
||||||
|
parseVar = P.var
|
||||||
|
[ "if", "elif", "else"
|
||||||
|
, "while", "let", "traverser"
|
||||||
|
, "function", "sort"
|
||||||
|
, "true", "false"
|
||||||
|
]
|
||||||
|
|
||||||
|
parseBool :: Parser Bool
|
||||||
|
parseBool = (string "true" $> True) <|> (string "false" $> False)
|
||||||
|
|
||||||
|
parseList :: Parser Expr
|
||||||
|
parseList = ListLiteral <$> P.list '[' ']' ',' parseExpr
|
||||||
|
|
||||||
|
parseTupleElems :: Parser [Expr]
|
||||||
|
parseTupleElems = P.list '(' ')' ',' parseExpr
|
||||||
|
|
||||||
|
parseTuple :: Parser Expr
|
||||||
|
parseTuple = do
|
||||||
|
es <- parseTupleElems
|
||||||
|
return $ case es of
|
||||||
|
e:[] -> e
|
||||||
|
_ -> TupleLiteral es
|
||||||
|
|
||||||
|
parseLambda :: Parser Expr
|
||||||
|
parseLambda = try $ do
|
||||||
|
vs <- P.list '(' ')' ',' parseVar
|
||||||
|
string "->" >> spaces
|
||||||
|
Lambda vs <$> parseExpr
|
||||||
|
|
||||||
|
parseCall :: Parser Expr
|
||||||
|
parseCall = try $ do
|
||||||
|
v <- parseVar
|
||||||
|
choice
|
||||||
|
[ TraverserCall v <$> (char '!' *> parseTupleElems)
|
||||||
|
, FunctionCall v <$> parseTupleElems
|
||||||
|
]
|
||||||
|
|
||||||
|
parseBasic :: Parser Expr
|
||||||
|
parseBasic = choice
|
||||||
|
[ IntLiteral <$> P.int
|
||||||
|
, BoolLiteral <$> parseBool
|
||||||
|
, try parseCall
|
||||||
|
, Var <$> parseVar
|
||||||
|
, parseList
|
||||||
|
, parseLambda
|
||||||
|
, parseTuple
|
||||||
|
]
|
||||||
|
|
||||||
|
parseExpr :: Parser Expr
|
||||||
|
parseExpr = P.precedence BinOp parseBasic
|
||||||
|
[ P.op "*" Multiply <|> P.op "/" Divide
|
||||||
|
, P.op "+" Add <|> P.op "-" Subtract
|
||||||
|
, P.op "==" Equal <|> P.op "!=" NotEqual <|>
|
||||||
|
try (P.op "<=" LessThanEqual) <|> P.op "<" LessThan <|>
|
||||||
|
try (P.op ">=" GreaterThanEqual) <|> P.op ">" GreaterThan
|
||||||
|
, P.op "and" And
|
||||||
|
, P.op "or" Or
|
||||||
|
]
|
||||||
|
|
||||||
|
parseBlock :: Parser [Stmt]
|
||||||
|
parseBlock = char '{' >> spaces >> many parseStmt <* char '}' <* spaces
|
||||||
|
|
||||||
|
parseBranch :: Parser Branch
|
||||||
|
parseBranch = (,) <$> (parseExpr <* spaces) <*> parseBlock
|
||||||
|
|
||||||
|
parseIf :: Parser Stmt
|
||||||
|
parseIf = do
|
||||||
|
i <- P.kwIf >> parseBranch
|
||||||
|
els <- many (P.kwElsif >> parseBranch)
|
||||||
|
e <- try (P.kwElse >> parseBlock) <|> return []
|
||||||
|
return $ IfElse i els e
|
||||||
|
|
||||||
|
parseWhile :: Parser Stmt
|
||||||
|
parseWhile = While <$> (P.kwWhile >> parseBranch)
|
||||||
|
|
||||||
|
parseTraverser :: Parser Stmt
|
||||||
|
parseTraverser = Traverser
|
||||||
|
<$> (P.kwTraverser *> parseVar)
|
||||||
|
<*> (P.list '(' ')' ',' parseKey) <* char ';' <* spaces
|
||||||
|
|
||||||
|
parseKey :: Parser (String, Expr)
|
||||||
|
parseKey = (,)
|
||||||
|
<$> (parseVar <* spaces <* char ':' <* spaces)
|
||||||
|
<*> parseExpr
|
||||||
|
|
||||||
|
parseLet :: Parser Stmt
|
||||||
|
parseLet = Let
|
||||||
|
<$> (P.kwLet >> parsePat <* char '=' <* spaces)
|
||||||
|
<*> parseExpr <* char ';' <* spaces
|
||||||
|
|
||||||
|
parseReturn :: Parser Stmt
|
||||||
|
parseReturn = Return <$> (P.kwReturn >> parseExpr <* char ';' <* spaces)
|
||||||
|
|
||||||
|
parsePat :: Parser Pat
|
||||||
|
parsePat = (VarPat <$> parseVar) <|> (TuplePat <$> P.list '(' ')' ',' parsePat)
|
||||||
|
|
||||||
|
parseStmt :: Parser Stmt
|
||||||
|
parseStmt = choice
|
||||||
|
[ parseTraverser
|
||||||
|
, parseLet
|
||||||
|
, parseIf
|
||||||
|
, parseWhile
|
||||||
|
, parseReturn
|
||||||
|
, Standalone <$> (parseExpr <* char ';' <* spaces)
|
||||||
|
]
|
||||||
|
|
||||||
|
parseFunction :: Parser Function
|
||||||
|
parseFunction = Function
|
||||||
|
<$> (P.kwSorted $> Sorted <|> return Unsorted)
|
||||||
|
<*> (P.kwFunction >> parseVar)
|
||||||
|
<*> (P.list '(' ')' ',' parseVar)
|
||||||
|
<*> parseBlock
|
||||||
|
|
||||||
|
parseProg :: Parser Prog
|
||||||
|
parseProg = Prog <$> many parseFunction
|
||||||
|
|
||||||
|
parse :: String -> String -> Either ParseError Prog
|
||||||
|
parse = runParser parseProg ()
|
||||||
|
|
||||||
|
{- Translation -}
|
||||||
|
data TraverserBounds = Range Py.PyExpr Py.PyExpr | Random
|
||||||
|
|
||||||
|
data TraverserData = TraverserData
|
||||||
|
{ list :: Maybe String
|
||||||
|
, bounds :: Maybe TraverserBounds
|
||||||
|
, rev :: Bool
|
||||||
|
}
|
||||||
|
|
||||||
|
data ValidTraverserData = ValidTraverserData
|
||||||
|
{ validList :: String
|
||||||
|
, validBounds :: TraverserBounds
|
||||||
|
, validRev :: Bool
|
||||||
|
}
|
||||||
|
|
||||||
|
type Translator = State (Map.Map String ValidTraverserData, [Py.PyStmt], Int)
|
||||||
|
|
||||||
|
getScoped :: Translator (Map.Map String ValidTraverserData)
|
||||||
|
getScoped = gets (\(m, _, _) -> m)
|
||||||
|
|
||||||
|
setScoped :: Map.Map String ValidTraverserData -> Translator ()
|
||||||
|
setScoped m = modify (\(_, ss, i) -> (m, ss, i))
|
||||||
|
|
||||||
|
scope :: Translator a -> Translator a
|
||||||
|
scope m = do
|
||||||
|
s <- getScoped
|
||||||
|
a <- m
|
||||||
|
setScoped s
|
||||||
|
return a
|
||||||
|
|
||||||
|
clearTraverser :: String -> Translator ()
|
||||||
|
clearTraverser s = modify (\(m, ss, i) -> (Map.delete s m, ss, i))
|
||||||
|
|
||||||
|
putTraverser :: String -> ValidTraverserData -> Translator ()
|
||||||
|
putTraverser s vtd = modify (\(m, ss, i) -> (Map.insert s vtd m, ss, i))
|
||||||
|
|
||||||
|
getTemp :: Translator String
|
||||||
|
getTemp = gets $ \(_, _, i) -> "temp" ++ show i
|
||||||
|
|
||||||
|
freshTemp :: Translator String
|
||||||
|
freshTemp = modify (second (+1)) >> getTemp
|
||||||
|
|
||||||
|
emitStatement :: Py.PyStmt -> Translator ()
|
||||||
|
emitStatement = modify . first . (:)
|
||||||
|
|
||||||
|
collectStatements :: Translator a -> Translator ([Py.PyStmt], a)
|
||||||
|
collectStatements t = do
|
||||||
|
modify (first $ const [])
|
||||||
|
a <- t
|
||||||
|
ss <- gets $ \(_, ss, _) -> ss
|
||||||
|
modify (first $ const [])
|
||||||
|
return (ss, a)
|
||||||
|
|
||||||
|
withdrawStatements :: Translator (Py.PyStmt) -> Translator [Py.PyStmt]
|
||||||
|
withdrawStatements ts =
|
||||||
|
(\(ss, s) -> ss ++ [s]) <$> (collectStatements ts)
|
||||||
|
|
||||||
|
requireTraverser :: String -> Translator ValidTraverserData
|
||||||
|
requireTraverser s = gets (\(m, _, _) -> Map.lookup s m) >>= handleMaybe
|
||||||
|
where
|
||||||
|
handleMaybe Nothing = fail "Invalid traverser"
|
||||||
|
handleMaybe (Just vtd) = return vtd
|
||||||
|
|
||||||
|
traverserIncrement :: Bool -> Py.PyExpr -> Py.PyExpr -> Py.PyExpr
|
||||||
|
traverserIncrement rev by e =
|
||||||
|
Py.BinOp op e (Py.BinOp Py.Multiply by (Py.IntLiteral 1))
|
||||||
|
where op = if rev then Py.Subtract else Py.Add
|
||||||
|
|
||||||
|
traverserValid :: Py.PyExpr -> ValidTraverserData -> Py.PyExpr
|
||||||
|
traverserValid e vtd =
|
||||||
|
case validBounds vtd of
|
||||||
|
Range f t ->
|
||||||
|
if validRev vtd
|
||||||
|
then Py.BinOp Py.GreaterThanEq e f
|
||||||
|
else Py.BinOp Py.LessThan e t
|
||||||
|
Random -> Py.BoolLiteral True
|
||||||
|
|
||||||
|
traverserStep :: String -> ValidTraverserData -> Py.PyStmt
|
||||||
|
traverserStep s vtd =
|
||||||
|
case validBounds vtd of
|
||||||
|
Range _ _ -> Py.Assign (Py.VarPat s) $ Py.BinOp op (Py.Var s) (Py.IntLiteral 1)
|
||||||
|
where op = if validRev vtd then Py.Subtract else Py.Add
|
||||||
|
Random -> traverserRandom s $ validList vtd
|
||||||
|
|
||||||
|
traverserRandom :: String -> String -> Py.PyStmt
|
||||||
|
traverserRandom s l =
|
||||||
|
Py.Assign (Py.VarPat s) $ Py.FunctionCall (Py.Var "random.randrange")
|
||||||
|
[Py.FunctionCall (Py.Var "len") [Py.Var l]]
|
||||||
|
|
||||||
|
hasVar :: String -> Py.PyPat -> Bool
|
||||||
|
hasVar s (Py.VarPat s') = s == s'
|
||||||
|
hasVar s (Py.TuplePat ps) = any (hasVar s) ps
|
||||||
|
hasVar s _ = False
|
||||||
|
|
||||||
|
substituteVariable :: String -> Py.PyExpr -> Py.PyExpr -> Py.PyExpr
|
||||||
|
substituteVariable s e (Py.BinOp o l r) =
|
||||||
|
Py.BinOp o (substituteVariable s e l) (substituteVariable s e r)
|
||||||
|
substituteVariable s e (Py.ListLiteral es) =
|
||||||
|
Py.ListLiteral $ map (substituteVariable s e) es
|
||||||
|
substituteVariable s e (Py.DictLiteral es) =
|
||||||
|
Py.DictLiteral $
|
||||||
|
map (first (substituteVariable s e) . second (substituteVariable s e)) es
|
||||||
|
substituteVariable s e (Py.Lambda ps e') =
|
||||||
|
Py.Lambda ps $ if any (hasVar s) ps then substituteVariable s e e' else e'
|
||||||
|
substituteVariable s e (Py.Var s')
|
||||||
|
| s == s' = e
|
||||||
|
| otherwise = Py.Var s'
|
||||||
|
substituteVariable s e (Py.TupleLiteral es) =
|
||||||
|
Py.TupleLiteral $ map (substituteVariable s e) es
|
||||||
|
substituteVariable s e (Py.FunctionCall e' es) =
|
||||||
|
Py.FunctionCall (substituteVariable s e e') $
|
||||||
|
map (substituteVariable s e) es
|
||||||
|
substituteVariable s e (Py.Access e' es) =
|
||||||
|
Py.Access (substituteVariable s e e') $
|
||||||
|
map (substituteVariable s e) es
|
||||||
|
substituteVariable s e (Py.Ternary i t e') =
|
||||||
|
Py.Ternary (substituteVariable s e i) (substituteVariable s e t)
|
||||||
|
(substituteVariable s e e')
|
||||||
|
substituteVariable s e (Py.Member e' m) =
|
||||||
|
Py.Member (substituteVariable s e e') m
|
||||||
|
substituteVariable s e (Py.In e1 e2) =
|
||||||
|
Py.In (substituteVariable s e e1) (substituteVariable s e e2)
|
||||||
|
substituteVariable s e (Py.NotIn e1 e2) =
|
||||||
|
Py.NotIn (substituteVariable s e e1) (substituteVariable s e e2)
|
||||||
|
substituteVariable s e (Py.Slice f t) =
|
||||||
|
Py.Slice (substituteVariable s e <$> f) (substituteVariable s e <$> t)
|
||||||
|
|
||||||
|
translateExpr :: Expr -> Translator Py.PyExpr
|
||||||
|
translateExpr (TraverserCall "pop" [Var s]) = do
|
||||||
|
l <- validList <$> requireTraverser s
|
||||||
|
return $ Py.FunctionCall (Py.Member (Py.Var l) "pop") [Py.Var s]
|
||||||
|
translateExpr (TraverserCall "pos" [Var s]) = do
|
||||||
|
requireTraverser s
|
||||||
|
return $ Py.Var s
|
||||||
|
translateExpr (TraverserCall "at" [Var s]) = do
|
||||||
|
l <- validList <$> requireTraverser s
|
||||||
|
return $ Py.Access (Py.Var l) [Py.Var s]
|
||||||
|
translateExpr (TraverserCall "at" [Var s, IntLiteral i]) = do
|
||||||
|
vtd <- requireTraverser s
|
||||||
|
return $ Py.Access (Py.Var $ validList vtd)
|
||||||
|
[traverserIncrement (validRev vtd) (Py.IntLiteral i) (Py.Var s)]
|
||||||
|
translateExpr (TraverserCall "step" [Var s]) = do
|
||||||
|
vtd <- requireTraverser s
|
||||||
|
emitStatement $ traverserStep s vtd
|
||||||
|
return $ Py.IntLiteral 0
|
||||||
|
translateExpr (TraverserCall "canstep" [Var s]) = do
|
||||||
|
vtd <- requireTraverser s
|
||||||
|
return $
|
||||||
|
traverserValid
|
||||||
|
(traverserIncrement (validRev vtd) (Py.IntLiteral 1) (Py.Var s)) vtd
|
||||||
|
translateExpr (TraverserCall "valid" [Var s]) = do
|
||||||
|
vtd <- requireTraverser s
|
||||||
|
return $ traverserValid (Py.Var s) vtd
|
||||||
|
translateExpr (TraverserCall "subset" [Var s1, Var s2]) = do
|
||||||
|
l1 <- validList <$> requireTraverser s1
|
||||||
|
l2 <- validList <$> requireTraverser s2
|
||||||
|
if l1 == l2
|
||||||
|
then return $ Py.Access (Py.Var l1) [Py.Slice (Just $ Py.Var s1) (Just $ Py.Var s2)]
|
||||||
|
else fail "Incompatible traversers!"
|
||||||
|
translateExpr (TraverserCall "bisect" [Var s, Lambda [x] e]) = do
|
||||||
|
vtd <- requireTraverser s
|
||||||
|
newTemp <- freshTemp
|
||||||
|
lambdaExpr <- translateExpr e
|
||||||
|
let access = Py.Access (Py.Var $ validList vtd) [Py.Var s]
|
||||||
|
let translated = substituteVariable x access lambdaExpr
|
||||||
|
let append s = Py.FunctionCall (Py.Member (Py.Var s) "append") [ access ]
|
||||||
|
let bisectStmt = Py.FunctionDef newTemp []
|
||||||
|
[ Py.Nonlocal [s]
|
||||||
|
, Py.Assign (Py.VarPat "l") (Py.ListLiteral [])
|
||||||
|
, Py.Assign (Py.VarPat "r") (Py.ListLiteral [])
|
||||||
|
, Py.While (traverserValid (Py.Var s) vtd)
|
||||||
|
[ Py.IfElse translated
|
||||||
|
[ Py.Standalone $ append "l" ]
|
||||||
|
[]
|
||||||
|
(Just [ Py.Standalone $ append "r" ])
|
||||||
|
, traverserStep s vtd
|
||||||
|
]
|
||||||
|
, Py.Return $ Py.TupleLiteral [Py.Var "l", Py.Var "r"]
|
||||||
|
]
|
||||||
|
emitStatement bisectStmt
|
||||||
|
return $ Py.FunctionCall (Py.Var newTemp) []
|
||||||
|
translateExpr (TraverserCall _ _) = fail "Invalid traverser operation"
|
||||||
|
translateExpr (FunctionCall f ps) = do
|
||||||
|
pes <- mapM translateExpr ps
|
||||||
|
return $ Py.FunctionCall (Py.Var f) pes
|
||||||
|
translateExpr (BinOp o l r) =
|
||||||
|
Py.BinOp (translateOp o) <$> translateExpr l <*> translateExpr r
|
||||||
|
translateExpr (Lambda ps e) =
|
||||||
|
Py.Lambda (map Py.VarPat ps) <$> translateExpr e
|
||||||
|
translateExpr (Var s) = return $ Py.Var s
|
||||||
|
translateExpr (IntLiteral i) = return $ Py.IntLiteral i
|
||||||
|
translateExpr (BoolLiteral b) = return $ Py.BoolLiteral b
|
||||||
|
translateExpr (ListLiteral es) = Py.ListLiteral <$> mapM translateExpr es
|
||||||
|
translateExpr (TupleLiteral es) = Py.TupleLiteral <$> mapM translateExpr es
|
||||||
|
|
||||||
|
applyOption :: TraverserData -> (String, Py.PyExpr) -> Maybe TraverserData
|
||||||
|
applyOption td ("list", Py.Var s) =
|
||||||
|
return $ td { list = Just s }
|
||||||
|
applyOption td ("span", Py.TupleLiteral [f, t]) =
|
||||||
|
return $ td { bounds = Just $ Range f t }
|
||||||
|
applyOption td ("random", Py.BoolLiteral True) =
|
||||||
|
return $ td { bounds = Just Random }
|
||||||
|
applyOption td ("reverse", Py.BoolLiteral b) =
|
||||||
|
return $ td { rev = b }
|
||||||
|
applyOption td _ = Nothing
|
||||||
|
|
||||||
|
translateOption :: (String, Expr) -> Translator (String, Py.PyExpr)
|
||||||
|
translateOption (s, e) = (,) s <$> translateExpr e
|
||||||
|
|
||||||
|
defaultTraverser :: TraverserData
|
||||||
|
defaultTraverser =
|
||||||
|
TraverserData { list = Nothing, bounds = Nothing, rev = False }
|
||||||
|
|
||||||
|
translateBranch :: Branch -> Translator (Py.PyExpr, [Py.PyStmt])
|
||||||
|
translateBranch (e, s) = (,) <$> translateExpr e <*>
|
||||||
|
(concat <$> mapM (withdrawStatements . translateStmt) s)
|
||||||
|
|
||||||
|
translateStmt :: Stmt -> Translator Py.PyStmt
|
||||||
|
translateStmt (IfElse i els e) = uncurry Py.IfElse
|
||||||
|
<$> (translateBranch i) <*> (mapM translateBranch els) <*> convertElse e
|
||||||
|
where
|
||||||
|
convertElse [] = return Nothing
|
||||||
|
convertElse es = Just . concat <$>
|
||||||
|
mapM (withdrawStatements . translateStmt) es
|
||||||
|
translateStmt (While b) = uncurry Py.While <$> translateBranch b
|
||||||
|
translateStmt (Traverser s os) =
|
||||||
|
foldlM applyOption defaultTraverser <$> mapM translateOption os >>= saveTraverser
|
||||||
|
where
|
||||||
|
saveTraverser :: Maybe TraverserData -> Translator Py.PyStmt
|
||||||
|
saveTraverser (Just (td@TraverserData { list = Just l, bounds = Just bs})) =
|
||||||
|
putTraverser s vtd $> translateInitialBounds s vtd
|
||||||
|
where
|
||||||
|
vtd = ValidTraverserData
|
||||||
|
{ validList = l
|
||||||
|
, validBounds = bs
|
||||||
|
, validRev = rev td
|
||||||
|
}
|
||||||
|
saveTraverser Nothing = fail "Invalid traverser (!)"
|
||||||
|
translateStmt (Let p e) = Py.Assign <$> translatePat p <*> translateExpr e
|
||||||
|
translateStmt (Return e) = Py.Return <$> translateExpr e
|
||||||
|
translateStmt (Standalone e) = Py.Standalone <$> translateExpr e
|
||||||
|
|
||||||
|
translateInitialBounds :: String -> ValidTraverserData -> Py.PyStmt
|
||||||
|
translateInitialBounds s vtd =
|
||||||
|
case (validBounds vtd, validRev vtd) of
|
||||||
|
(Random, _) -> traverserRandom s $ validList vtd
|
||||||
|
(Range l _, False) -> Py.Assign (Py.VarPat s) l
|
||||||
|
(Range _ r, True) -> Py.Assign (Py.VarPat s) r
|
||||||
|
|
||||||
|
translatePat :: Pat -> Translator Py.PyPat
|
||||||
|
translatePat (VarPat s) = clearTraverser s $> Py.VarPat s
|
||||||
|
translatePat (TuplePat ts) = Py.TuplePat <$> mapM translatePat ts
|
||||||
|
|
||||||
|
translateOp :: Op -> Py.PyBinOp
|
||||||
|
translateOp Add = Py.Add
|
||||||
|
translateOp Subtract = Py.Subtract
|
||||||
|
translateOp Multiply = Py.Multiply
|
||||||
|
translateOp Divide = Py.Divide
|
||||||
|
translateOp LessThan = Py.LessThan
|
||||||
|
translateOp LessThanEqual = Py.LessThanEq
|
||||||
|
translateOp GreaterThan = Py.GreaterThan
|
||||||
|
translateOp GreaterThanEqual = Py.GreaterThanEq
|
||||||
|
translateOp Equal = Py.Equal
|
||||||
|
translateOp NotEqual = Py.NotEqual
|
||||||
|
translateOp And = Py.And
|
||||||
|
translateOp Or = Py.Or
|
||||||
|
|
||||||
|
translateFunction :: Function -> [Py.PyStmt]
|
||||||
|
translateFunction (Function m s ps ss) = return $ Py.FunctionDef s ps $
|
||||||
|
[ Py.Standalone $ Py.FunctionCall (Py.Member (Py.Var p) "sort") []
|
||||||
|
| p <- take 1 ps, m == Sorted ] ++ stmts
|
||||||
|
where
|
||||||
|
stmts = concat $ evalState
|
||||||
|
(mapM (withdrawStatements . translateStmt) ss) (Map.empty, [], 0)
|
||||||
|
|
||||||
|
translate :: Prog -> [Py.PyStmt]
|
||||||
|
translate (Prog fs) =
|
||||||
|
(Py.FromImport "bisect" ["bisect"]) :
|
||||||
|
(Py.Import "random") : concatMap translateFunction fs
|
||||||
198
code/cs325-langs/src/LanguageTwo.hs
Normal file
198
code/cs325-langs/src/LanguageTwo.hs
Normal file
@@ -0,0 +1,198 @@
|
|||||||
|
module LanguageTwo where
|
||||||
|
import qualified PythonAst as Py
|
||||||
|
import qualified CommonParsing as P
|
||||||
|
import Data.Char
|
||||||
|
import Data.Functor
|
||||||
|
import Text.Parsec
|
||||||
|
import Text.Parsec.Char
|
||||||
|
import Text.Parsec.Combinator
|
||||||
|
|
||||||
|
{- Data Types -}
|
||||||
|
data Op
|
||||||
|
= Add
|
||||||
|
| Subtract
|
||||||
|
| Multiply
|
||||||
|
| Divide
|
||||||
|
| Equal
|
||||||
|
| NotEqual
|
||||||
|
| And
|
||||||
|
| Or
|
||||||
|
|
||||||
|
data Expr
|
||||||
|
= IntLiteral Int
|
||||||
|
| BinOp Op Expr Expr
|
||||||
|
| Var String
|
||||||
|
| Length Expr
|
||||||
|
|
||||||
|
data Stmt
|
||||||
|
= IfElse Expr Stmt (Maybe Stmt)
|
||||||
|
| Assign String Expr
|
||||||
|
| Block [Stmt]
|
||||||
|
|
||||||
|
data Prog = Prog Expr [Stmt] [Stmt]
|
||||||
|
|
||||||
|
{- Parser -}
|
||||||
|
type Parser = Parsec String ()
|
||||||
|
|
||||||
|
parseVar :: Parser String
|
||||||
|
parseVar = P.var [ "if", "else", "state", "effect", "combine" ]
|
||||||
|
|
||||||
|
parseLength :: Parser Expr
|
||||||
|
parseLength = Length <$> P.surround '|' '|' parseExpr
|
||||||
|
|
||||||
|
parseParenthesized :: Parser Expr
|
||||||
|
parseParenthesized = P.surround '(' ')' parseExpr
|
||||||
|
|
||||||
|
parseBasic :: Parser Expr
|
||||||
|
parseBasic = choice
|
||||||
|
[ IntLiteral <$> P.int
|
||||||
|
, Var <$> parseVar
|
||||||
|
, parseLength
|
||||||
|
, parseParenthesized
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
parseExpr :: Parser Expr
|
||||||
|
parseExpr = P.precedence BinOp parseBasic
|
||||||
|
[ P.op "*" Multiply <|> P.op "/" Divide
|
||||||
|
, P.op "+" Add <|> P.op "-" Subtract
|
||||||
|
, P.op "==" Equal <|> P.op "!=" NotEqual
|
||||||
|
, P.op "&&" And
|
||||||
|
, try $ P.op "||" Or
|
||||||
|
]
|
||||||
|
|
||||||
|
parseIf :: Parser Stmt
|
||||||
|
parseIf = do
|
||||||
|
P.kwIf >> spaces
|
||||||
|
c <- parseParenthesized
|
||||||
|
t <- parseStmt <* spaces
|
||||||
|
e <- (Just <$> (P.kwElse >> spaces *> parseStmt)) <|> return Nothing
|
||||||
|
return $ IfElse c t e
|
||||||
|
|
||||||
|
parseBlockStmts :: Parser [Stmt]
|
||||||
|
parseBlockStmts = P.surround '{' '}' (many parseStmt)
|
||||||
|
|
||||||
|
parseBlock :: Parser Stmt
|
||||||
|
parseBlock = Block <$> parseBlockStmts
|
||||||
|
|
||||||
|
parseAssign :: Parser Stmt
|
||||||
|
parseAssign = Assign <$>
|
||||||
|
(parseVar <* char '=' <* spaces) <*>
|
||||||
|
parseExpr <* (char ';' >> spaces)
|
||||||
|
|
||||||
|
parseStmt :: Parser Stmt
|
||||||
|
parseStmt = choice
|
||||||
|
[ parseIf
|
||||||
|
, parseAssign
|
||||||
|
, parseBlock
|
||||||
|
]
|
||||||
|
|
||||||
|
parseProgram :: Parser Prog
|
||||||
|
parseProgram = do
|
||||||
|
state <- P.kwState >> spaces *> parseExpr <* char ';' <* spaces
|
||||||
|
effect <- P.kwEffect >> spaces *> parseBlockStmts <* spaces
|
||||||
|
combined <- P.kwCombine >> spaces *> parseBlockStmts <* spaces
|
||||||
|
return $ Prog state effect combined
|
||||||
|
|
||||||
|
parse :: String -> String -> Either ParseError Prog
|
||||||
|
parse = runParser parseProgram ()
|
||||||
|
|
||||||
|
{- Translation -}
|
||||||
|
baseFunction :: Py.PyExpr -> [Py.PyStmt] -> [Py.PyStmt] -> Py.PyStmt
|
||||||
|
baseFunction s e c = Py.FunctionDef "prog" ["xs"] $
|
||||||
|
[Py.IfElse
|
||||||
|
(Py.BinOp Py.LessThan
|
||||||
|
(Py.FunctionCall (Py.Var "len") [Py.Var "xs"])
|
||||||
|
(Py.IntLiteral 2))
|
||||||
|
[Py.Return $ Py.Tuple [s, Py.Var "xs"]]
|
||||||
|
[]
|
||||||
|
Nothing
|
||||||
|
, Py.Assign (Py.VarPat "leng")
|
||||||
|
(Py.BinOp Py.FloorDiv
|
||||||
|
(Py.FunctionCall (Py.Var "len") [Py.Var "xs"])
|
||||||
|
(Py.IntLiteral 2))
|
||||||
|
, Py.Assign (Py.VarPat "left")
|
||||||
|
(Py.Access
|
||||||
|
(Py.Var "xs")
|
||||||
|
[Py.Slice Nothing $ Just (Py.Var "leng")])
|
||||||
|
, Py.Assign (Py.VarPat "right")
|
||||||
|
(Py.Access
|
||||||
|
(Py.Var "xs")
|
||||||
|
[Py.Slice (Just (Py.Var "leng")) Nothing])
|
||||||
|
, Py.Assign (Py.TuplePat [Py.VarPat "ls", Py.VarPat "left"])
|
||||||
|
(Py.FunctionCall (Py.Var "prog") [Py.Var "left"])
|
||||||
|
, Py.Assign (Py.TuplePat [Py.VarPat "rs", Py.VarPat "right"])
|
||||||
|
(Py.FunctionCall (Py.Var "prog") [Py.Var "right"])
|
||||||
|
, Py.Standalone $
|
||||||
|
Py.FunctionCall (Py.Member (Py.Var "left") "reverse") []
|
||||||
|
, Py.Standalone $
|
||||||
|
Py.FunctionCall (Py.Member (Py.Var "right") "reverse") []
|
||||||
|
, Py.Assign (Py.VarPat "state") s
|
||||||
|
, Py.Assign (Py.VarPat "source") (Py.IntLiteral 0)
|
||||||
|
, Py.Assign (Py.VarPat "total") (Py.ListLiteral [])
|
||||||
|
, Py.While
|
||||||
|
(Py.BinOp Py.And
|
||||||
|
(Py.BinOp Py.NotEqual (Py.Var "left") (Py.ListLiteral []))
|
||||||
|
(Py.BinOp Py.NotEqual (Py.Var "right") (Py.ListLiteral []))) $
|
||||||
|
[ Py.IfElse
|
||||||
|
(Py.BinOp Py.LessThanEq
|
||||||
|
(Py.Access (Py.Var "left") [Py.IntLiteral $ -1])
|
||||||
|
(Py.Access (Py.Var "right") [Py.IntLiteral $ -1]))
|
||||||
|
[ Py.Standalone $
|
||||||
|
Py.FunctionCall (Py.Member (Py.Var "total") "append")
|
||||||
|
[Py.FunctionCall (Py.Member (Py.Var "left") "pop") []]
|
||||||
|
, Py.Assign (Py.VarPat "source") (Py.IntLiteral 1)
|
||||||
|
]
|
||||||
|
[] $
|
||||||
|
Just
|
||||||
|
[ Py.Standalone $
|
||||||
|
Py.FunctionCall (Py.Member (Py.Var "total") "append")
|
||||||
|
[Py.FunctionCall (Py.Member (Py.Var "right") "pop") []]
|
||||||
|
, Py.Assign (Py.VarPat "source") (Py.IntLiteral 2)
|
||||||
|
]
|
||||||
|
] ++ e
|
||||||
|
] ++ c ++
|
||||||
|
[ Py.Standalone $ Py.FunctionCall (Py.Member (Py.Var "left") "reverse") []
|
||||||
|
, Py.Standalone $ Py.FunctionCall (Py.Member (Py.Var "right") "reverse") []
|
||||||
|
, Py.Return $ Py.Tuple
|
||||||
|
[ Py.Var "state"
|
||||||
|
, foldl (Py.BinOp Py.Add) (Py.Var "total") [Py.Var "left", Py.Var "right"]
|
||||||
|
]
|
||||||
|
]
|
||||||
|
|
||||||
|
translateExpr :: Expr -> Py.PyExpr
|
||||||
|
translateExpr (IntLiteral i) = Py.IntLiteral i
|
||||||
|
translateExpr (BinOp op l r) =
|
||||||
|
Py.BinOp (translateOp op) (translateExpr l) (translateExpr r)
|
||||||
|
translateExpr (Var s)
|
||||||
|
| s == "SOURCE" = Py.Var "source"
|
||||||
|
| s == "LEFT" = Py.Var "left"
|
||||||
|
| s == "RIGHT" = Py.Var "right"
|
||||||
|
| s == "STATE" = Py.Var "state"
|
||||||
|
| s == "LSTATE" = Py.Var "ls"
|
||||||
|
| s == "RSTATE" = Py.Var "rs"
|
||||||
|
| s == "L" = Py.IntLiteral 1
|
||||||
|
| s == "R" = Py.IntLiteral 2
|
||||||
|
| otherwise = Py.Var s
|
||||||
|
translateExpr (Length e) = Py.FunctionCall (Py.Var "len") [translateExpr e]
|
||||||
|
|
||||||
|
translateOp :: Op -> Py.PyBinOp
|
||||||
|
translateOp Add = Py.Add
|
||||||
|
translateOp Subtract = Py.Subtract
|
||||||
|
translateOp Multiply = Py.Multiply
|
||||||
|
translateOp Divide = Py.Divide
|
||||||
|
translateOp Equal = Py.Equal
|
||||||
|
translateOp NotEqual = Py.NotEqual
|
||||||
|
translateOp And = Py.And
|
||||||
|
translateOp Or = Py.Or
|
||||||
|
|
||||||
|
translateStmt :: Stmt -> [Py.PyStmt]
|
||||||
|
translateStmt (IfElse c t e) =
|
||||||
|
[Py.IfElse (translateExpr c) (translateStmt t) [] (translateStmt <$> e)]
|
||||||
|
translateStmt (Assign "STATE" e) = [Py.Assign (Py.VarPat "state") (translateExpr e)]
|
||||||
|
translateStmt (Assign v e) = [Py.Assign (Py.VarPat v) (translateExpr e)]
|
||||||
|
translateStmt (Block s) = concatMap translateStmt s
|
||||||
|
|
||||||
|
translate :: Prog -> [Py.PyStmt]
|
||||||
|
translate (Prog s e c) =
|
||||||
|
[baseFunction (translateExpr s) (concatMap translateStmt e) (concatMap translateStmt c)]
|
||||||
52
code/cs325-langs/src/PythonAst.hs
Normal file
52
code/cs325-langs/src/PythonAst.hs
Normal file
@@ -0,0 +1,52 @@
|
|||||||
|
module PythonAst where
|
||||||
|
|
||||||
|
data PyBinOp
|
||||||
|
= Add
|
||||||
|
| Subtract
|
||||||
|
| Multiply
|
||||||
|
| Divide
|
||||||
|
| FloorDiv
|
||||||
|
| LessThan
|
||||||
|
| LessThanEq
|
||||||
|
| GreaterThan
|
||||||
|
| GreaterThanEq
|
||||||
|
| Equal
|
||||||
|
| NotEqual
|
||||||
|
| And
|
||||||
|
| Or
|
||||||
|
|
||||||
|
data PyExpr
|
||||||
|
= BinOp PyBinOp PyExpr PyExpr
|
||||||
|
| IntLiteral Int
|
||||||
|
| StrLiteral String
|
||||||
|
| BoolLiteral Bool
|
||||||
|
| ListLiteral [PyExpr]
|
||||||
|
| DictLiteral [(PyExpr, PyExpr)]
|
||||||
|
| Lambda [PyPat] PyExpr
|
||||||
|
| Var String
|
||||||
|
| TupleLiteral [PyExpr]
|
||||||
|
| FunctionCall PyExpr [PyExpr]
|
||||||
|
| Access PyExpr [PyExpr]
|
||||||
|
| Ternary PyExpr PyExpr PyExpr
|
||||||
|
| Member PyExpr String
|
||||||
|
| In PyExpr PyExpr
|
||||||
|
| NotIn PyExpr PyExpr
|
||||||
|
| Slice (Maybe PyExpr) (Maybe PyExpr)
|
||||||
|
|
||||||
|
data PyPat
|
||||||
|
= VarPat String
|
||||||
|
| IgnorePat
|
||||||
|
| TuplePat [PyPat]
|
||||||
|
| AccessPat PyExpr [PyExpr]
|
||||||
|
|
||||||
|
data PyStmt
|
||||||
|
= Assign PyPat PyExpr
|
||||||
|
| IfElse PyExpr [PyStmt] [(PyExpr, [PyStmt])] (Maybe [PyStmt])
|
||||||
|
| While PyExpr [PyStmt]
|
||||||
|
| For PyPat PyExpr [PyStmt]
|
||||||
|
| FunctionDef String [String] [PyStmt]
|
||||||
|
| Return PyExpr
|
||||||
|
| Standalone PyExpr
|
||||||
|
| Import String
|
||||||
|
| FromImport String [String]
|
||||||
|
| Nonlocal [String]
|
||||||
142
code/cs325-langs/src/PythonGen.hs
Normal file
142
code/cs325-langs/src/PythonGen.hs
Normal file
@@ -0,0 +1,142 @@
|
|||||||
|
module PythonGen where
|
||||||
|
import PythonAst
|
||||||
|
import Data.List
|
||||||
|
import Data.Bifunctor
|
||||||
|
import Data.Maybe
|
||||||
|
|
||||||
|
indent :: String -> String
|
||||||
|
indent = (" " ++)
|
||||||
|
|
||||||
|
stmtBlock :: [PyStmt] -> [String]
|
||||||
|
stmtBlock = concatMap translateStmt
|
||||||
|
|
||||||
|
block :: String -> [String] -> [String]
|
||||||
|
block s ss = (s ++ ":") : map indent ss
|
||||||
|
|
||||||
|
prefix :: String -> PyExpr -> [PyStmt] -> [String]
|
||||||
|
prefix s e sts = block (s ++ " " ++ translateExpr e) $ stmtBlock sts
|
||||||
|
|
||||||
|
if_ :: PyExpr -> [PyStmt] -> [String]
|
||||||
|
if_ = prefix "if"
|
||||||
|
|
||||||
|
elif :: PyExpr -> [PyStmt] -> [String]
|
||||||
|
elif = prefix "elif"
|
||||||
|
|
||||||
|
else_ :: [PyStmt] -> [String]
|
||||||
|
else_ = block "else" . stmtBlock
|
||||||
|
|
||||||
|
while :: PyExpr -> [PyStmt] -> [String]
|
||||||
|
while = prefix "while"
|
||||||
|
|
||||||
|
parenth :: String -> String
|
||||||
|
parenth s = "(" ++ s ++ ")"
|
||||||
|
|
||||||
|
translateStmt :: PyStmt -> [String]
|
||||||
|
translateStmt (Assign p e) = [translatePat p ++ " = " ++ translateExpr e]
|
||||||
|
translateStmt (IfElse i t es e) =
|
||||||
|
if_ i t ++ concatMap (uncurry elif) es ++ maybe [] else_ e
|
||||||
|
translateStmt (While c t) = while c t
|
||||||
|
translateStmt (For x in_ b) = block head body
|
||||||
|
where
|
||||||
|
head = "for " ++ translatePat x ++ " in " ++ translateExpr in_
|
||||||
|
body = stmtBlock b
|
||||||
|
translateStmt (FunctionDef s ps b) = block head body
|
||||||
|
where
|
||||||
|
head = "def " ++ s ++ "(" ++ intercalate "," ps ++ ")"
|
||||||
|
body = stmtBlock b
|
||||||
|
translateStmt (Return e) = ["return " ++ translateExpr e]
|
||||||
|
translateStmt (Standalone e) = [translateExpr e]
|
||||||
|
translateStmt (Import s) = ["import " ++ s]
|
||||||
|
translateStmt (FromImport s ss) =
|
||||||
|
["from " ++ s ++ " import " ++ intercalate "," ss]
|
||||||
|
translateStmt (Nonlocal vs) =
|
||||||
|
["nonlocal " ++ intercalate "," vs]
|
||||||
|
|
||||||
|
precedence :: PyBinOp -> Int
|
||||||
|
precedence Add = 3
|
||||||
|
precedence Subtract = 3
|
||||||
|
precedence Multiply = 4
|
||||||
|
precedence Divide = 4
|
||||||
|
precedence FloorDiv = 4
|
||||||
|
precedence LessThan = 2
|
||||||
|
precedence LessThanEq = 2
|
||||||
|
precedence GreaterThan = 2
|
||||||
|
precedence GreaterThanEq = 2
|
||||||
|
precedence Equal = 2
|
||||||
|
precedence NotEqual = 2
|
||||||
|
precedence And = 1
|
||||||
|
precedence Or = 0
|
||||||
|
|
||||||
|
opString :: PyBinOp -> String
|
||||||
|
opString Add = "+"
|
||||||
|
opString Subtract = "-"
|
||||||
|
opString Multiply = "*"
|
||||||
|
opString Divide = "/"
|
||||||
|
opString FloorDiv = "//"
|
||||||
|
opString LessThan = "<"
|
||||||
|
opString LessThanEq = "<="
|
||||||
|
opString GreaterThan = ">"
|
||||||
|
opString GreaterThanEq = ">="
|
||||||
|
opString Equal = "=="
|
||||||
|
opString NotEqual = "!="
|
||||||
|
opString And = " and "
|
||||||
|
opString Or = " or "
|
||||||
|
|
||||||
|
translateOp :: PyBinOp -> PyBinOp -> PyExpr -> String
|
||||||
|
translateOp o o' =
|
||||||
|
if precedence o > precedence o'
|
||||||
|
then parenth . translateExpr
|
||||||
|
else translateExpr
|
||||||
|
|
||||||
|
dictMapping :: PyExpr -> PyExpr -> String
|
||||||
|
dictMapping f t = translateExpr f ++ ": " ++ translateExpr t
|
||||||
|
|
||||||
|
list :: String -> String -> [PyExpr] -> String
|
||||||
|
list o c es = o ++ intercalate ", " (map translateExpr es) ++ c
|
||||||
|
|
||||||
|
translateExpr :: PyExpr -> String
|
||||||
|
translateExpr (BinOp o l@(BinOp o1 _ _) r@(BinOp o2 _ _)) =
|
||||||
|
translateOp o o1 l ++ opString o ++ translateOp o o2 r
|
||||||
|
translateExpr (BinOp o l@(BinOp o1 _ _) r) =
|
||||||
|
translateOp o o1 l ++ opString o ++ translateExpr r
|
||||||
|
translateExpr (BinOp o l r@(BinOp o2 _ _)) =
|
||||||
|
translateExpr l ++ opString o ++ translateOp o o2 r
|
||||||
|
translateExpr (BinOp o l r) =
|
||||||
|
translateExpr l ++ opString o ++ translateExpr r
|
||||||
|
translateExpr (IntLiteral i) = show i
|
||||||
|
translateExpr (StrLiteral s) = "\"" ++ s ++ "\""
|
||||||
|
translateExpr (BoolLiteral b) = if b then "true" else "false"
|
||||||
|
translateExpr (ListLiteral l) = list "[" "]" l
|
||||||
|
translateExpr (DictLiteral l) =
|
||||||
|
"{" ++ intercalate ", " (map (uncurry dictMapping) l) ++ "}"
|
||||||
|
translateExpr (Lambda ps e) = parenth (head ++ ": " ++ body)
|
||||||
|
where
|
||||||
|
head = "lambda " ++ intercalate ", " (map translatePat ps)
|
||||||
|
body = translateExpr e
|
||||||
|
translateExpr (Var s) = s
|
||||||
|
translateExpr (TupleLiteral es) = list "(" ")" es
|
||||||
|
translateExpr (FunctionCall f ps) = translateExpr f ++ list "(" ")" ps
|
||||||
|
translateExpr (Access (Var s) e) = s ++ list "[" "]" e
|
||||||
|
translateExpr (Access e@Access{} i) = translateExpr e ++ list "[" "]" i
|
||||||
|
translateExpr (Access e i) = "(" ++ translateExpr e ++ ")" ++ list "[" "]" i
|
||||||
|
translateExpr (Ternary c t e) =
|
||||||
|
translateExpr t ++ " if " ++ translateExpr c ++ " else " ++ translateExpr e
|
||||||
|
translateExpr (Member (Var s) m) = s ++ "." ++ m
|
||||||
|
translateExpr (Member e@Member{} m) = translateExpr e ++ "." ++ m
|
||||||
|
translateExpr (Member e m) = "(" ++ translateExpr e ++ ")." ++ m
|
||||||
|
translateExpr (In m c) =
|
||||||
|
"(" ++ translateExpr m ++ ") in (" ++ translateExpr c ++ ")"
|
||||||
|
translateExpr (NotIn m c) =
|
||||||
|
"(" ++ translateExpr m ++ ") not in (" ++ translateExpr c ++ ")"
|
||||||
|
translateExpr (Slice l r) =
|
||||||
|
maybe [] (parenth . translateExpr) l ++ ":" ++ maybe [] (parenth . translateExpr) r
|
||||||
|
|
||||||
|
translatePat :: PyPat -> String
|
||||||
|
translatePat (VarPat s) = s
|
||||||
|
translatePat IgnorePat = "_"
|
||||||
|
translatePat (TuplePat ps) =
|
||||||
|
"(" ++ intercalate "," (map translatePat ps) ++ ")"
|
||||||
|
translatePat (AccessPat e es) = translateExpr (Access e es)
|
||||||
|
|
||||||
|
translate :: [PyStmt] -> String
|
||||||
|
translate = intercalate "\n" . concatMap translateStmt
|
||||||
64
code/typesafe-interpreter/TypesafeIntr.idr
Normal file
64
code/typesafe-interpreter/TypesafeIntr.idr
Normal file
@@ -0,0 +1,64 @@
|
|||||||
|
data ExprType
|
||||||
|
= IntType
|
||||||
|
| BoolType
|
||||||
|
| StringType
|
||||||
|
|
||||||
|
repr : ExprType -> Type
|
||||||
|
repr IntType = Int
|
||||||
|
repr BoolType = Bool
|
||||||
|
repr StringType = String
|
||||||
|
|
||||||
|
data Op
|
||||||
|
= Add
|
||||||
|
| Subtract
|
||||||
|
| Multiply
|
||||||
|
| Divide
|
||||||
|
|
||||||
|
data Expr
|
||||||
|
= IntLit Int
|
||||||
|
| BoolLit Bool
|
||||||
|
| StringLit String
|
||||||
|
| BinOp Op Expr Expr
|
||||||
|
|
||||||
|
data SafeExpr : ExprType -> Type where
|
||||||
|
IntLiteral : Int -> SafeExpr IntType
|
||||||
|
BoolLiteral : Bool -> SafeExpr BoolType
|
||||||
|
StringLiteral : String -> SafeExpr StringType
|
||||||
|
BinOperation : (repr a -> repr b -> repr c) -> SafeExpr a -> SafeExpr b -> SafeExpr c
|
||||||
|
|
||||||
|
typecheckOp : Op -> (a : ExprType) -> (b : ExprType) -> Either String (c : ExprType ** repr a -> repr b -> repr c)
|
||||||
|
typecheckOp Add IntType IntType = Right (IntType ** (+))
|
||||||
|
typecheckOp Subtract IntType IntType = Right (IntType ** (-))
|
||||||
|
typecheckOp Multiply IntType IntType = Right (IntType ** (*))
|
||||||
|
typecheckOp Divide IntType IntType = Right (IntType ** div)
|
||||||
|
typecheckOp _ _ _ = Left "Invalid binary operator application"
|
||||||
|
|
||||||
|
typecheck : Expr -> Either String (n : ExprType ** SafeExpr n)
|
||||||
|
typecheck (IntLit i) = Right (_ ** IntLiteral i)
|
||||||
|
typecheck (BoolLit b) = Right (_ ** BoolLiteral b)
|
||||||
|
typecheck (StringLit s) = Right (_ ** StringLiteral s)
|
||||||
|
typecheck (BinOp o l r) = do
|
||||||
|
(lt ** le) <- typecheck l
|
||||||
|
(rt ** re) <- typecheck r
|
||||||
|
(ot ** f) <- typecheckOp o lt rt
|
||||||
|
pure (_ ** BinOperation f le re)
|
||||||
|
|
||||||
|
eval : SafeExpr t -> repr t
|
||||||
|
eval (IntLiteral i) = i
|
||||||
|
eval (BoolLiteral b) = b
|
||||||
|
eval (StringLiteral s) = s
|
||||||
|
eval (BinOperation f l r) = f (eval l) (eval r)
|
||||||
|
|
||||||
|
resultStr : {t : ExprType} -> repr t -> String
|
||||||
|
resultStr {t=IntType} i = show i
|
||||||
|
resultStr {t=BoolType} b = show b
|
||||||
|
resultStr {t=StringType} s = show s
|
||||||
|
|
||||||
|
tryEval : Expr -> String
|
||||||
|
tryEval ex =
|
||||||
|
case typecheck ex of
|
||||||
|
Left err => "Type error: " ++ err
|
||||||
|
Right (t ** e) => resultStr $ eval {t} e
|
||||||
|
|
||||||
|
main : IO ()
|
||||||
|
main = putStrLn $ tryEval $ BinOp Add (IntLit 6) (BinOp Multiply (IntLit 160) (IntLit 2))
|
||||||
@@ -2,5 +2,5 @@
|
|||||||
title: Daniel's Blog
|
title: Daniel's Blog
|
||||||
---
|
---
|
||||||
## Hello!
|
## Hello!
|
||||||
Welcome to my blog. Here, I write abour various subjects, including (but not limited to)
|
Welcome to my blog. Here, I write about various subjects, including (but not limited to)
|
||||||
functional programming, compiler development, programming language theory, and occasionally video games. I hope you find something useful here!
|
functional programming, compiler development, programming language theory, and occasionally video games. I hope you find something useful here!
|
||||||
|
|||||||
@@ -140,3 +140,4 @@ Here are the posts that I've written so far for this series:
|
|||||||
* [Compilation]({{< relref "06_compiler_compilation.md" >}})
|
* [Compilation]({{< relref "06_compiler_compilation.md" >}})
|
||||||
* [Runtime]({{< relref "07_compiler_runtime.md" >}})
|
* [Runtime]({{< relref "07_compiler_runtime.md" >}})
|
||||||
* [LLVM]({{< relref "08_compiler_llvm.md" >}})
|
* [LLVM]({{< relref "08_compiler_llvm.md" >}})
|
||||||
|
* [Garbage Collection]({{< relref "09_compiler_garbage_collection.md" >}})
|
||||||
|
|||||||
511
content/blog/00_cs325_languages_hw1.md
Normal file
511
content/blog/00_cs325_languages_hw1.md
Normal file
@@ -0,0 +1,511 @@
|
|||||||
|
---
|
||||||
|
title: A Language for an Assignment - Homework 1
|
||||||
|
date: 2019-12-27T23:27:09-08:00
|
||||||
|
tags: ["Haskell", "Python", "Algorithms"]
|
||||||
|
---
|
||||||
|
|
||||||
|
On a rainy Oregon day, I was walking between classes with a group of friends.
|
||||||
|
We were discussing the various ways to obfuscate solutions to the weekly
|
||||||
|
homework assignments in our Algorithms course: replace every `if` with
|
||||||
|
a ternary expression, use single variable names, put everything on one line.
|
||||||
|
I said:
|
||||||
|
|
||||||
|
> The
|
||||||
|
{{< sidenote "right" "chad-note" "chad" >}}
|
||||||
|
This is in reference to a meme, <a href="https://knowyourmeme.com/memes/virgin-vs-chad">Virgin vs Chad</a>.
|
||||||
|
A "chad" characteristic is masculine or "alpha" to the point of absurdity.
|
||||||
|
{{< /sidenote >}} move would be to make your own, different language for every homework assignment.
|
||||||
|
|
||||||
|
It was required of us to use
|
||||||
|
{{< sidenote "left" "python-note" "Python" >}}
|
||||||
|
A friend suggested making a Haskell program
|
||||||
|
that generates Python-based interpreters for languages. While that would be truly
|
||||||
|
absurd, I'll leave <em>this</em> challenge for another day.
|
||||||
|
{{< /sidenote >}} for our solutions, so that was the first limitation on this challenge.
|
||||||
|
Someone suggested to write the languages in Haskell, since that's what we used
|
||||||
|
in our Programming Languages class. So the final goal ended up:
|
||||||
|
|
||||||
|
* For each of the 10 homework assignments in CS325 - Analysis of Algorithms,
|
||||||
|
* Create a Haskell program that translates a language into,
|
||||||
|
* A valid Python program that works (nearly) out of the box and passes all the test cases.
|
||||||
|
|
||||||
|
It may not be worth it to create a whole
|
||||||
|
{{< sidenote "right" "general-purpose-note" "general-purpose" >}}
|
||||||
|
A general purpose language is one that's designed to be used in various
|
||||||
|
domains. For instance, C++ is a general-purpose language because it can
|
||||||
|
be used for embedded systems, GUI programs, and pretty much anything else.
|
||||||
|
This is in contrast to a domain-specific language, such as Game Maker Language,
|
||||||
|
which is aimed at a much narrower set of uses.
|
||||||
|
{{< /sidenote >}} language for each problem,
|
||||||
|
but nowhere in the challenge did we say that it had to be general-purpose. In
|
||||||
|
fact, some interesting design thinking can go into designing a domain-specific
|
||||||
|
language for a particular assignment. So let's jump right into it, and make
|
||||||
|
a language for the first homework assignment.
|
||||||
|
|
||||||
|
### Homework 1
|
||||||
|
There are two problems in Homework 1. Here they are, verbatim:
|
||||||
|
|
||||||
|
{{< codelines "text" "cs325-langs/hws/hw1.txt" 32 38 >}}
|
||||||
|
|
||||||
|
And the second:
|
||||||
|
|
||||||
|
{{< codelines "text" "cs325-langs/hws/hw1.txt" 47 68 >}}
|
||||||
|
|
||||||
|
We want to make a language __specifically__ for these two tasks (one of which
|
||||||
|
is split into many tasks). What common things can we isolate? I see two:
|
||||||
|
|
||||||
|
First, __all the problems deal with lists__. This may seem like a trivial observation,
|
||||||
|
but these two problems are the __only__ thing we use our language for. We have
|
||||||
|
list access,
|
||||||
|
{{< sidenote "right" "filterting-note" "list filtering" >}}
|
||||||
|
Quickselect is a variation on quicksort, which itself
|
||||||
|
finds all the "lesser" and "greater" elements in the input array.
|
||||||
|
{{< /sidenote >}} and list creation. That should serve as a good base!
|
||||||
|
|
||||||
|
If you squint a little bit, __all the problems are recursive with the same base case__.
|
||||||
|
Consider the first few lines of `search`, implemented naively:
|
||||||
|
|
||||||
|
```Python
|
||||||
|
def search(xs, k):
|
||||||
|
if xs == []:
|
||||||
|
return false
|
||||||
|
```
|
||||||
|
|
||||||
|
How about `sorted`? Take a look:
|
||||||
|
|
||||||
|
```Python
|
||||||
|
def sorted(xs):
|
||||||
|
if xs == []:
|
||||||
|
return []
|
||||||
|
```
|
||||||
|
|
||||||
|
I'm sure you see the picture. But it will take some real mental gymnastics to twist the
|
||||||
|
rest of the problems into this shape. What about `qselect`, for instance? There's two
|
||||||
|
cases for what it may return:
|
||||||
|
|
||||||
|
* `None` or equivalent if the index is out of bounds (we give it `4` an a list `[1, 2]`).
|
||||||
|
* A number if `qselect` worked.
|
||||||
|
|
||||||
|
The test cases never provide a concrete example of what should be returned from
|
||||||
|
`qselect` in the first case, so we'll interpret it like
|
||||||
|
{{< sidenote "right" "undefined-note" "undefined behavior" >}}
|
||||||
|
For a quick sidenote about undefined behavior, check out how
|
||||||
|
C++ optimizes the <a href="https://godbolt.org/z/3skK9j">Collatz Conjecture function</a>.
|
||||||
|
Clang doesn't know whether or not the function will terminate (whether the Collatz Conjecture
|
||||||
|
function terminates is an <a href="https://en.wikipedia.org/wiki/Collatz_conjecture">unsolved problem</a>),
|
||||||
|
but functions that don't terminate are undefined behavior. There's only one other way the function
|
||||||
|
returns, and that's with "1". Thus, clang optimizes the entire function to a single "return 1" call.
|
||||||
|
{{< /sidenote >}} in C++:
|
||||||
|
we can do whatever we want. So, let's allow it to return `[]` in the `None` case.
|
||||||
|
This makes this base case valid:
|
||||||
|
|
||||||
|
```Python
|
||||||
|
def qselect(xs, k):
|
||||||
|
if xs == []:
|
||||||
|
return []
|
||||||
|
```
|
||||||
|
|
||||||
|
"Oh yeah, now it's all coming together." With one more observation (which will come
|
||||||
|
from a piece I haven't yet shown you!), we'll be able to generalize this base case.
|
||||||
|
|
||||||
|
The observation is this section in the assignment:
|
||||||
|
|
||||||
|
{{< codelines "text" "cs325-langs/hws/hw1.txt" 83 98 >}}
|
||||||
|
|
||||||
|
The real key is the part about "returning the `[]` where x should be inserted". It so
|
||||||
|
happens that when the list given to the function is empty, the number should be inserted
|
||||||
|
precisely into that list. Thus:
|
||||||
|
|
||||||
|
```Python
|
||||||
|
def _search(xs, k):
|
||||||
|
if xs == []:
|
||||||
|
return xs
|
||||||
|
```
|
||||||
|
|
||||||
|
The same works for `qselect`:
|
||||||
|
|
||||||
|
```Python
|
||||||
|
def qselect(xs, k):
|
||||||
|
if xs == []:
|
||||||
|
return xs
|
||||||
|
```
|
||||||
|
|
||||||
|
And for sorted, too:
|
||||||
|
|
||||||
|
```Python
|
||||||
|
def sorted(xs):
|
||||||
|
if xs == []:
|
||||||
|
return xs
|
||||||
|
```
|
||||||
|
|
||||||
|
There are some functions that are exceptions, though:
|
||||||
|
|
||||||
|
```Python
|
||||||
|
def insert(xs, k):
|
||||||
|
# We can't return early here!
|
||||||
|
# If we do, we'll never insert anything.
|
||||||
|
```
|
||||||
|
|
||||||
|
Also:
|
||||||
|
|
||||||
|
```Python
|
||||||
|
def search(xs, k):
|
||||||
|
# We have to return true or false, never
|
||||||
|
# an empty list.
|
||||||
|
```
|
||||||
|
|
||||||
|
So, whenever we __don't__ return a list, we don't want to add a special case.
|
||||||
|
We arrive at the following common base case: __whenever a function returns a list, if its first argument
|
||||||
|
is the empty list, the first argument is immediately returned__.
|
||||||
|
|
||||||
|
We've largely exhasuted the conclusiosn we can draw from these problems. Let's get to designing a language.
|
||||||
|
|
||||||
|
### A Silly Language
|
||||||
|
Let's start by visualizing our goals. Without base cases, the solution to `_search`
|
||||||
|
would be something like this:
|
||||||
|
|
||||||
|
{{< codelines "text" "cs325-langs/sols/hw1.lang" 11 14 >}}
|
||||||
|
|
||||||
|
Here we have an __`if`-expression__. It has to have an `else`, and evaluates to the value
|
||||||
|
of the chosen branch. That is, `if true then 0 else 1` evaluates to `0`, while
|
||||||
|
`if false then 0 else 1` evaluates to `1`. Otherwise, we follow the binary tree search
|
||||||
|
algorithm faithfully.
|
||||||
|
|
||||||
|
Using this definition of `_search`, we can define `search` pretty easily:
|
||||||
|
|
||||||
|
{{< codelines "text" "cs325-langs/sols/hw1.lang" 17 17 >}}
|
||||||
|
|
||||||
|
Let's use Haskell's `(++)` operator for concatentation. This will help us understand
|
||||||
|
when the user is operating on lists, and when they're not. With this, `sorted` becomes:
|
||||||
|
|
||||||
|
{{< codelines "text" "cs325-langs/sols/hw1.lang" 16 16 >}}
|
||||||
|
|
||||||
|
Let's go for `qselect` now. We'll introduce a very silly language feature for this
|
||||||
|
problem:
|
||||||
|
{{< sidenote "right" "selector-note" "list selectors" >}}
|
||||||
|
You've probably never heard of list selectors, and for a good reason:
|
||||||
|
this is a <em>terrible</em> language feature. I'll go in more detail
|
||||||
|
later, but I wanted to make this clear right away.
|
||||||
|
{{< /sidenote >}}. We observe that `qselect` aims to partition the list into
|
||||||
|
other lists. We thus add the following pieces of syntax:
|
||||||
|
|
||||||
|
```
|
||||||
|
~xs -> {
|
||||||
|
pivot <- xs[rand]!
|
||||||
|
left <- xs[#0 <= pivot]
|
||||||
|
...
|
||||||
|
} -> ...
|
||||||
|
```
|
||||||
|
|
||||||
|
There are three new things here.
|
||||||
|
|
||||||
|
1. The actual "list selector": `~xs -> { .. } -> ...`. Between the curly braces
|
||||||
|
are branches which select parts of the list and assign them to new variables.
|
||||||
|
Thus, `pivot <- xs[rand]!` assigns the element at a random index to the variable `pivot`.
|
||||||
|
the `!` at the end means "after taking this out of `xs`, delete it from `xs`". The
|
||||||
|
syntax {{< sidenote "right" "curly-note" "starts with \"~\"" >}}
|
||||||
|
An observant reader will note that there's no need for the "xs" after the "~".
|
||||||
|
The idea was to add a special case syntax to reference the "selected list", but
|
||||||
|
I ended up not bothering. So in fact, this part of the syntax is useless.
|
||||||
|
{{< /sidenote >}} to make it easier to parse.
|
||||||
|
2. The `rand` list access syntax. `xs[rand]` is a special case that picks a random
|
||||||
|
element from `xs`.
|
||||||
|
3. The `xs[#0 <= pivot]` syntax. This is another special case that selects all elements
|
||||||
|
from `xs` that match the given predicate (where `#0` is replaced with each element in `xs`).
|
||||||
|
|
||||||
|
The big part of qselect is to not evaluate `right` unless you have to. So, we shouldn't
|
||||||
|
eagerly evaluate the list selector. We also don't want something like `right[|right|-1]` to evaluate
|
||||||
|
`right` twice. So we settle on
|
||||||
|
{{< sidenote "right" "lazy-note" "lazy evaluation" >}}
|
||||||
|
Lazy evaluation means only evaluating an expression when we need to. Thus,
|
||||||
|
although we might encounter the expression for <code>right</code>, we
|
||||||
|
only evaluate it when the time comes. Lazy evaluation, at least
|
||||||
|
the way that Haskell has it, is more specific: an expression is evaluated only
|
||||||
|
once, or not at all.
|
||||||
|
{{</ sidenote >}}.
|
||||||
|
Ah, but the `!` marker introduces
|
||||||
|
{{< sidenote "left" "side-effect-note" "side effects" >}}
|
||||||
|
A side effect is a term frequently used when talking about functional programming.
|
||||||
|
Evaluating the expression <code>xs[rand]!</code> doesn't just get a random element,
|
||||||
|
it also changes <em>something else</em>. In this case, that something else is
|
||||||
|
the <code>xs</code> list.
|
||||||
|
{{< /sidenote >}}. So we can't just evaluate these things all willy-nilly.
|
||||||
|
So, let's make it so that each expression in the selector list requires the ones above it. Thus,
|
||||||
|
`left` will require `pivot`, and `right` will require `left` and `pivot`. So,
|
||||||
|
lazily evaluated, ordered expressions. The whole `qselect` becomes:
|
||||||
|
|
||||||
|
{{< codelines "text" "cs325-langs/sols/hw1.lang" 1 9 >}}
|
||||||
|
|
||||||
|
We've now figured out all the language constructs. Let's start working on
|
||||||
|
some implementation!
|
||||||
|
|
||||||
|
#### Implementation
|
||||||
|
It would be silly of me to explain every detail of creating a language in Haskell
|
||||||
|
in this post; this is neither the purpose of the post, nor is it plausible
|
||||||
|
to do this without covering monads, parser combinators, grammars, abstract syntax
|
||||||
|
trees, and more. So, instead, I'll discuss the _interesting_ parts of the
|
||||||
|
implementation.
|
||||||
|
|
||||||
|
##### Temporary Variables
|
||||||
|
Our language is expression-based, yes. A function is a single,
|
||||||
|
arbitrarily complex expression (involving `if/else`, list
|
||||||
|
selectors, and more). So it would make sense to translate
|
||||||
|
a function to a single, arbitrarily complex Python expression.
|
||||||
|
However, the way we've designed our language makes it
|
||||||
|
not-so-suitable for converting to a single expression! For
|
||||||
|
instance, consider `xs[rand]`. We need to compute the list,
|
||||||
|
get its length, generate a random number, and then access
|
||||||
|
the corresponding element in the list. We use the list
|
||||||
|
here twice, and simply repeating the expression would not
|
||||||
|
be very smart: we'd be evaluating twice. So instead,
|
||||||
|
we'll use a variable, assign the list to that variable,
|
||||||
|
and then access that variable multiple times.
|
||||||
|
|
||||||
|
To be extra safe, let's use a fresh temporary variable
|
||||||
|
every time we need to store something. The simplest
|
||||||
|
way is to simply maintain a counter of how many temporary
|
||||||
|
variables we've already used, and generate a new variable
|
||||||
|
by prepending the word "temp" to that number. We start
|
||||||
|
with `temp0`, then `temp1`, and so on. To keep a counter,
|
||||||
|
we can use a state monad:
|
||||||
|
|
||||||
|
{{< codelines "Haskell" "cs325-langs/src/LanguageOne.hs" 230 230 >}}
|
||||||
|
|
||||||
|
Don't worry about the `Map.Map String [String]`, we'll get to that in a bit.
|
||||||
|
For now, all we have to worry about is the second element of the tuple,
|
||||||
|
the integer counting how many temporary variables we've used. We can
|
||||||
|
get the current temporary variable as follows:
|
||||||
|
|
||||||
|
{{< codelines "Haskell" "cs325-langs/src/LanguageOne.hs" 232 235 >}}
|
||||||
|
|
||||||
|
We can also get a fresh temporary variable like this:
|
||||||
|
|
||||||
|
{{< codelines "Haskell" "cs325-langs/src/LanguageOne.hs" 237 240 >}}
|
||||||
|
|
||||||
|
Now, the
|
||||||
|
{{< sidenote "left" "code-note" "code" >}}
|
||||||
|
Since we are translating an expression, we must have the result of
|
||||||
|
the translation yield an Python expression we can use in generating
|
||||||
|
larger Python expressions. However, as we've seen, we occasionally
|
||||||
|
have to use statements. Thus, the <code>translateExpr</code> function
|
||||||
|
returns a <code>Translator ([Py.PyStmt], Py.PyExpr)</code>.
|
||||||
|
{{< /sidenote >}}for generating a random list access looks like
|
||||||
|
{{< sidenote "right" "ast-note" "this:" >}}
|
||||||
|
The <code>Py.*</code> constructors are a part of a Python AST module I quickly
|
||||||
|
threw together. I won't showcase it here, but you can always look at the
|
||||||
|
source code for the blog (which includes this project)
|
||||||
|
<a href="https://dev.danilafe.com/Web-Projects/blog-static">here</a>.
|
||||||
|
{{< /sidenote >}}
|
||||||
|
|
||||||
|
{{< codelines "Haskell" "cs325-langs/src/LanguageOne.hs" 325 330 >}}
|
||||||
|
|
||||||
|
##### Implementing "lazy evaluation"
|
||||||
|
Lazy evaluation in functional programs usually arises from
|
||||||
|
{{< sidenote "right" "graph-note" "graph reduction" >}}
|
||||||
|
Graph reduction, more specifically the <em>Spineless,
|
||||||
|
Tagless G-machine</em> is at the core of the Glasgow Haskell
|
||||||
|
Compiler (GHC). Simon Peyton Jones' earlier book,
|
||||||
|
<em>Implementing Functional Languages: a tutorial</em>
|
||||||
|
details an earlier version of the G-machine.
|
||||||
|
{{< /sidenote >}}. However, Python is neither
|
||||||
|
functional nor graph-based, and we only lazily
|
||||||
|
evaluate list selectors. Thus, we'll have to do
|
||||||
|
some work to get our lazy evaluation to work as we desire.
|
||||||
|
Here's what I came up with:
|
||||||
|
|
||||||
|
1. It's difficult to insert Python statements where they are
|
||||||
|
needed: we'd have to figure out in which scope each variable
|
||||||
|
has already been declared, and in which scope it's yet
|
||||||
|
to be assigned.
|
||||||
|
2. Instead, we can use a Python dictionary, called `cache`,
|
||||||
|
and store computed versions of each variable in the cache.
|
||||||
|
3. It's pretty difficult to check if a variable
|
||||||
|
is in the cache, compute it if not, and then return the
|
||||||
|
result of the computation, in one expression. This is
|
||||||
|
true, unless that single expression is a function call, and we have a dedicated
|
||||||
|
function that takes no arguments, computes the expression if needed,
|
||||||
|
and uses the cache otherwise. We choose this route.
|
||||||
|
4. We have already promised that we'd evaluate all the selected
|
||||||
|
variables above a given variable before evaluating the variable
|
||||||
|
itself. So, each function will first call (and therefore
|
||||||
|
{{< sidenote "right" "force-note" "force" >}}
|
||||||
|
Forcing, in this case, comes from the context of lazy evaluation. To
|
||||||
|
force a variable or an expression is to tell the program to compute its
|
||||||
|
value, even though it may have been putting it off.
|
||||||
|
{{< /sidenote >}}) the functions
|
||||||
|
generated for variables declared above the function's own variable.
|
||||||
|
5. To keep track of all of this, we use the already-existing state monad
|
||||||
|
as a reader monad (that is, we clear the changes we make to the monad
|
||||||
|
after we're done translating the list selector). This is where the `Map.Map String [String]`
|
||||||
|
comes from.
|
||||||
|
|
||||||
|
The `Map.Map String [String]` keeps track of variables that will be lazily computed,
|
||||||
|
and also of the dependencies of each variable (the variables that need
|
||||||
|
to be access before the variable itself). We compute such a map for
|
||||||
|
each selector as follows:
|
||||||
|
|
||||||
|
{{< codelines "Haskell" "cs325-langs/src/LanguageOne.hs" 298 298 >}}
|
||||||
|
|
||||||
|
We update the existing map using `Map.union`:
|
||||||
|
|
||||||
|
{{< codelines "Haskell" "cs325-langs/src/LanguageOne.hs" 299 299 >}}
|
||||||
|
|
||||||
|
And, after we're done generating expressions in the body of this selector,
|
||||||
|
we clear it to its previous value `vs`:
|
||||||
|
|
||||||
|
{{< codelines "Haskell" "cs325-langs/src/LanguageOne.hs" 302 302 >}}
|
||||||
|
|
||||||
|
We generate a single selector as follows:
|
||||||
|
|
||||||
|
{{< codelines "Haskell" "cs325-langs/src/LanguageOne.hs" 268 281 >}}
|
||||||
|
|
||||||
|
This generates a function definition statement, which we will examine in
|
||||||
|
generated Python code later on.
|
||||||
|
|
||||||
|
Solving the problem this way also introduces another gotcha: sometimes,
|
||||||
|
a variable is produced by a function call, and other times the variable
|
||||||
|
is just a Python variable. We write this as follows:
|
||||||
|
|
||||||
|
{{< codelines "Haskell" "cs325-langs/src/LanguageOne.hs" 283 288 >}}
|
||||||
|
|
||||||
|
##### Special Case Insertion
|
||||||
|
This is a silly language for a single homework assignment. I'm not
|
||||||
|
planning to implement Hindley-Milner type inference, or anything
|
||||||
|
of that sort. For the purpose of this language, things will be
|
||||||
|
either a list, or not a list. And as long as a function __can__ return
|
||||||
|
a list, it can also return the list from its base case. Thus,
|
||||||
|
that's all we will try to figure out. The checking code is so
|
||||||
|
short that we can include the whole snippet at once:
|
||||||
|
|
||||||
|
{{< codelines "Haskell" "cs325-langs/src/LanguageOne.hs" 219 227 >}}
|
||||||
|
|
||||||
|
`mergePossibleType`
|
||||||
|
{{< sidenote "right" "bool-identity-note" "figures out" >}}
|
||||||
|
An observant reader will note that this is just a logical
|
||||||
|
OR function. It's not, however, good practice to use
|
||||||
|
booleans for types that have two constructors with no arguments.
|
||||||
|
Check out this <a href="https://programming-elm.com/blog/2019-05-20-solving-the-boolean-identity-crisis-part-1/">
|
||||||
|
Elm-based article</a> about this, which the author calls the
|
||||||
|
Boolean Identity Crisis.
|
||||||
|
{{< /sidenote >}}, given two possible types for an
|
||||||
|
expression, the final type for the expression.
|
||||||
|
|
||||||
|
There's only one real trick to this. Sometimes, like in
|
||||||
|
`_search`, the only time we return something _known_ to be a list, that
|
||||||
|
something is `xs`. Since we're making a list manipulation language,
|
||||||
|
let's __assume the first argument to the function is a list__, and
|
||||||
|
__use this information to determine expression types__. We guess
|
||||||
|
types in a very basic manner otherwise: If you use the concatenation
|
||||||
|
operator, or a list literal, then obviously we're working on a list.
|
||||||
|
If you're returning the first argument of the function, that's also
|
||||||
|
a list. Otherwise, it could be anything.
|
||||||
|
|
||||||
|
My Haskell linter actually suggested a pretty clever way of writing
|
||||||
|
the whole "add a base case if this function returns a list" code.
|
||||||
|
Check it out:
|
||||||
|
|
||||||
|
{{< codelines "Haskell" "cs325-langs/src/LanguageOne.hs" 260 266 >}}
|
||||||
|
|
||||||
|
Specifically, look at the line with `let fastReturn = ...`. It
|
||||||
|
uses a list comprehension: we take a parameter `p` from the list of
|
||||||
|
parameter `ps`, but only produce the statements for the base case
|
||||||
|
if the possible type computed using `p` is `List`.
|
||||||
|
|
||||||
|
### The Output
|
||||||
|
What kind of beast have we created? Take a look for yourself:
|
||||||
|
```Python
|
||||||
|
def qselect(xs,k):
|
||||||
|
if xs==[]:
|
||||||
|
return xs
|
||||||
|
cache = {}
|
||||||
|
def pivot():
|
||||||
|
if ("pivot") not in (cache):
|
||||||
|
cache["pivot"] = xs.pop(0)
|
||||||
|
return cache["pivot"]
|
||||||
|
def left():
|
||||||
|
def temp2(arg):
|
||||||
|
out = []
|
||||||
|
for arg0 in arg:
|
||||||
|
if arg0<=pivot():
|
||||||
|
out.append(arg0)
|
||||||
|
return out
|
||||||
|
pivot()
|
||||||
|
if ("left") not in (cache):
|
||||||
|
cache["left"] = temp2(xs)
|
||||||
|
return cache["left"]
|
||||||
|
def right():
|
||||||
|
def temp3(arg):
|
||||||
|
out = []
|
||||||
|
for arg0 in arg:
|
||||||
|
if arg0>pivot():
|
||||||
|
out.append(arg0)
|
||||||
|
return out
|
||||||
|
left()
|
||||||
|
pivot()
|
||||||
|
if ("right") not in (cache):
|
||||||
|
cache["right"] = temp3(xs)
|
||||||
|
return cache["right"]
|
||||||
|
if k>(len(left())+1):
|
||||||
|
temp4 = qselect(right(), k-len(left())-1)
|
||||||
|
else:
|
||||||
|
if k==(len(left())+1):
|
||||||
|
temp5 = [pivot()]
|
||||||
|
else:
|
||||||
|
temp5 = qselect(left(), k)
|
||||||
|
temp4 = temp5
|
||||||
|
return temp4
|
||||||
|
def _search(xs,k):
|
||||||
|
if xs==[]:
|
||||||
|
return xs
|
||||||
|
if xs[1]==k:
|
||||||
|
temp6 = xs
|
||||||
|
else:
|
||||||
|
if xs[1]>k:
|
||||||
|
temp8 = _search(xs[0], k)
|
||||||
|
else:
|
||||||
|
temp8 = _search(xs[2], k)
|
||||||
|
temp6 = temp8
|
||||||
|
return temp6
|
||||||
|
def sorted(xs):
|
||||||
|
if xs==[]:
|
||||||
|
return xs
|
||||||
|
return sorted(xs[0])+[xs[1]]+sorted(xs[2])
|
||||||
|
def search(xs,k):
|
||||||
|
return len(_search(xs, k))!=0
|
||||||
|
def insert(xs,k):
|
||||||
|
return _insert(k, _search(xs, k))
|
||||||
|
def _insert(k,xs):
|
||||||
|
if k==[]:
|
||||||
|
return k
|
||||||
|
if len(xs)==0:
|
||||||
|
temp16 = xs
|
||||||
|
temp16.append([])
|
||||||
|
temp17 = temp16
|
||||||
|
temp17.append(k)
|
||||||
|
temp18 = temp17
|
||||||
|
temp18.append([])
|
||||||
|
temp15 = temp18
|
||||||
|
else:
|
||||||
|
temp15 = xs
|
||||||
|
return temp15
|
||||||
|
```
|
||||||
|
It's...horrible! All the `tempX` variables, __three layers of nested function declarations__, hardcoded cache access. This is not something you'd ever want to write.
|
||||||
|
Even to get this code, I had to come up with hacks __in a language I created__.
|
||||||
|
The first is the hack is to make the `qselect` function use the `xs == []` base
|
||||||
|
case. This doesn't happen by default, because `qselect` doesn't return a list!
|
||||||
|
To "fix" this, I made `qselect` return the number it found, wrapped in a
|
||||||
|
list literal. This is not up to spec, and would require another function
|
||||||
|
to unwrap this list.
|
||||||
|
|
||||||
|
While `qselect` was struggling with not having the base case, `insert` had
|
||||||
|
a base case it didn't need: `insert` shouldn't return the list itself
|
||||||
|
when it's empty, it should insert into it! However, when we use the `<<`
|
||||||
|
list insertion operator, the language infers `insert` to be a list-returning
|
||||||
|
function itself, inserting into an empty list will always fail. So, we
|
||||||
|
make a function `_insert`, which __takes the arguments in reverse__.
|
||||||
|
The base case will still be generated, but the first argument (against
|
||||||
|
which the base case is checked) will be a number, so the `k == []` check
|
||||||
|
will always fail.
|
||||||
|
|
||||||
|
That concludes this post. I'll be working on more solutions to homework
|
||||||
|
assignments in self-made languages, so keep an eye out!
|
||||||
218
content/blog/01_cs325_languages_hw2.md
Normal file
218
content/blog/01_cs325_languages_hw2.md
Normal file
@@ -0,0 +1,218 @@
|
|||||||
|
---
|
||||||
|
title: A Language for an Assignment - Homework 2
|
||||||
|
date: 2019-12-30T20:05:10-08:00
|
||||||
|
tags: ["Haskell", "Python", "Algorithms"]
|
||||||
|
---
|
||||||
|
|
||||||
|
After the madness of the
|
||||||
|
[language for homework 1]({{< relref "00_cs325_languages_hw1.md" >}}),
|
||||||
|
the solution to the second homework offers a moment of respite.
|
||||||
|
Let's get right into the problems, shall we?
|
||||||
|
|
||||||
|
### Homework 2
|
||||||
|
Besides some free-response questions, the homework contains
|
||||||
|
two problems. The first:
|
||||||
|
|
||||||
|
{{< codelines "text" "cs325-langs/hws/hw2.txt" 29 34 >}}
|
||||||
|
|
||||||
|
And the second:
|
||||||
|
|
||||||
|
{{< codelines "text" "cs325-langs/hws/hw2.txt" 36 44 >}}
|
||||||
|
|
||||||
|
At first glance, it's not obvious why these problems are good for
|
||||||
|
us. However, there's one key observation: __`num_inversions` can be implemented
|
||||||
|
using a slightly-modified `mergesort`__. The trick is to maintain a counter
|
||||||
|
of inversions in every recursive call to `mergesort`, updating
|
||||||
|
it every time we take an element from the
|
||||||
|
{{< sidenote "right" "right-note" "right list" >}}
|
||||||
|
If this nomenclature is not clear to you, recall that
|
||||||
|
mergesort divides a list into two smaller lists. The
|
||||||
|
"right list" refers to the second of the two, because
|
||||||
|
if you visualize the original list as a rectangle, and cut
|
||||||
|
it in half (vertically, down the middle), then the second list
|
||||||
|
(from the left) is on the right.
|
||||||
|
{{< /sidenote >}} while there are still elements in the
|
||||||
|
{{< sidenote "left" "left-note" "left list" >}}
|
||||||
|
Why this is the case is left as an exercise to the reader.
|
||||||
|
{{< /sidenote >}}.
|
||||||
|
When we return from the call,
|
||||||
|
we add up the number of inversions from running `num_inversions`
|
||||||
|
on the smaller lists, and the number of inversions that we counted
|
||||||
|
as I described. We then return both the total number
|
||||||
|
of inversions and the sorted list.
|
||||||
|
|
||||||
|
So, we either perform the standard mergesort, or we perform mergesort
|
||||||
|
with additional steps added on. The additional steps can be divided into
|
||||||
|
three general categories:
|
||||||
|
|
||||||
|
1. __Initialization__: We create / set some initial state. This state
|
||||||
|
doesn't depend on the lists or anything else.
|
||||||
|
2. __Effect__: Each time that an element is moved from one of the two smaller
|
||||||
|
lists into the output list, we may change the state in some way (create
|
||||||
|
an effect).
|
||||||
|
3. __Combination__: The final state, and the results of the two
|
||||||
|
sub-problem states, are combined into the output of the function.
|
||||||
|
|
||||||
|
This is all very abstract. In the concrete case of inversions,
|
||||||
|
these steps are as follows:
|
||||||
|
|
||||||
|
1. __Initializtion__: The initial state, which is just the counter, is set to 0.
|
||||||
|
2. __Effect__: Each time an element is moved, if it comes from the right list,
|
||||||
|
the number of inversions is updated.
|
||||||
|
3. __Combination__: We update the state, simply adding the left and right
|
||||||
|
inversion counts.
|
||||||
|
|
||||||
|
We can make a language out of this!
|
||||||
|
|
||||||
|
### A Language
|
||||||
|
Again, let's start by visualizing what the solution will look like. How about this:
|
||||||
|
|
||||||
|
{{< rawblock "cs325-langs/sols/hw2.lang" >}}
|
||||||
|
|
||||||
|
We divide the code into the same three steps that we described above. The first
|
||||||
|
section is the initial state. Since it doesn't depend on anything, we expect
|
||||||
|
it to be some kind of literal, like an integer. Next, we have the effect section,
|
||||||
|
which has access to the variables below:
|
||||||
|
|
||||||
|
* `STATE`, to manipulate or check the current state.
|
||||||
|
* `LEFT` and `RIGHT`, to access the two lists being merged.
|
||||||
|
* `L` and `R`, constants that are used to compare against the `SOURCE` variable.
|
||||||
|
* `SOURCE`, to denote which list a number came from.
|
||||||
|
* `LSTATE` and `RSTATE`, to denote the final states from the two subproblems.
|
||||||
|
|
||||||
|
We use an `if`-statement to check if the element that was popped came
|
||||||
|
from the right list (by checking `SOURCE == R`). If it did, we increment the counter
|
||||||
|
(state) by the proper amount. In the combine step, which has access to the
|
||||||
|
same variables, we simply increment the state by the counters from the left
|
||||||
|
and right solutions, stored in `LSTATE` and `RSTATE`. That's it!
|
||||||
|
|
||||||
|
#### Implementation
|
||||||
|
The implementation is not tricky at all. We don't need to use monads like we did last
|
||||||
|
time, and nor do we have to perform any fancy Python nested function declarations.
|
||||||
|
|
||||||
|
To keep with the Python convention of lowercase variables, we'll translate the
|
||||||
|
uppercase "global" variables to lowercase. We'll do it like so:
|
||||||
|
|
||||||
|
{{< codelines "Haskell" "cs325-langs/src/LanguageTwo.hs" 167 176 >}}
|
||||||
|
|
||||||
|
Note that we translated `L` and `R` to integer literals. We'll indicate the source of
|
||||||
|
each element with an integer, since there's no real point to representing it with
|
||||||
|
a string or a variable. We'll need to be aware of this when we implement the actual, generic
|
||||||
|
mergesort code. Let's do that now:
|
||||||
|
|
||||||
|
{{< codelines "Haskell" "cs325-langs/src/LanguageTwo.hs" 101 161 >}}
|
||||||
|
|
||||||
|
This is probably the ugliest part of this assignment: we handwrote a Python
|
||||||
|
AST in Haskell that implements mergesort with our augmentations. Note that
|
||||||
|
this is a function, which takes a `Py.PyExpr` (the initial state expression),
|
||||||
|
and two lists of `Py.PyStmt`, which are the "effect" and "combination" code,
|
||||||
|
respectively. We simply splice them into our regular mergesort function.
|
||||||
|
The translation is otherwise pretty trivial, so there's no real reason
|
||||||
|
to show it here.
|
||||||
|
|
||||||
|
### The Output
|
||||||
|
What's the output of our solution to `num_inversions`? Take a look for yourself:
|
||||||
|
|
||||||
|
```Python
|
||||||
|
def prog(xs):
|
||||||
|
if len(xs)<2:
|
||||||
|
return (0, xs)
|
||||||
|
leng = len(xs)//2
|
||||||
|
left = xs[:(leng)]
|
||||||
|
right = xs[(leng):]
|
||||||
|
(ls,left) = prog(left)
|
||||||
|
(rs,right) = prog(right)
|
||||||
|
left.reverse()
|
||||||
|
right.reverse()
|
||||||
|
state = 0
|
||||||
|
source = 0
|
||||||
|
total = []
|
||||||
|
while (left!=[])and(right!=[]):
|
||||||
|
if left[-1]<=right[-1]:
|
||||||
|
total.append(left.pop())
|
||||||
|
source = 1
|
||||||
|
else:
|
||||||
|
total.append(right.pop())
|
||||||
|
source = 2
|
||||||
|
if source==2:
|
||||||
|
state = state+len(left)
|
||||||
|
state = state+ls+rs
|
||||||
|
left.reverse()
|
||||||
|
right.reverse()
|
||||||
|
return (state, total+left+right)
|
||||||
|
```
|
||||||
|
|
||||||
|
Honestly, that's pretty clean. As clean as `left.reverse()` to allow for \\(O(1)\\) pop is.
|
||||||
|
What's really clean, however, is the implementation of mergesort in our language.
|
||||||
|
It goes as follows:
|
||||||
|
|
||||||
|
```
|
||||||
|
state 0;
|
||||||
|
effect {}
|
||||||
|
combine {}
|
||||||
|
```
|
||||||
|
|
||||||
|
To implement mergesort in our language, which describes mergesort variants, all
|
||||||
|
we have to do is not specify any additional behavior. Cool, huh?
|
||||||
|
|
||||||
|
That's the end of this post. If you liked this one (and the previous one!),
|
||||||
|
keep an eye out for more!
|
||||||
|
|
||||||
|
### Appendix (Missing Homework Question)
|
||||||
|
I should not view homework assignments on a small-screen device. There __was__ a third problem
|
||||||
|
on homework 2:
|
||||||
|
|
||||||
|
{{< codelines "text" "cs325-langs/hws/hw2.txt" 46 65 >}}
|
||||||
|
|
||||||
|
This is not a mergesort variant, and adding support for it into our second language
|
||||||
|
will prevent us from making it the neat specialized
|
||||||
|
{{< sidenote "right" "dsl-note" "DSL" >}}
|
||||||
|
DSL is a shortened form of "domain specific language", which was briefly
|
||||||
|
described in another sidenote while solving homework 1.
|
||||||
|
{{< /sidenote >}} that was just saw. We'll do something else, instead:
|
||||||
|
we'll use the language we defined in homework 1 to solve this
|
||||||
|
problem:
|
||||||
|
|
||||||
|
```
|
||||||
|
empty() = [0, 0];
|
||||||
|
longest(xs) =
|
||||||
|
if |xs| != 0
|
||||||
|
then _longest(longest(xs[0]), longest(xs[2]))
|
||||||
|
else empty();
|
||||||
|
_longest(l, r) = [max(l[0], r[0]) + 1, max(l[0]+r[0], max(l[1], r[1]))];
|
||||||
|
```
|
||||||
|
|
||||||
|
{{< sidenote "right" "terrible-note" "This is quite terrible." >}}
|
||||||
|
This is probably true with any program written in our first
|
||||||
|
language.
|
||||||
|
{{< /sidenote >}} In these 6 lines of code, there are two hacks
|
||||||
|
to work around the peculiarities of the language.
|
||||||
|
|
||||||
|
At each recursive call, we want to keep track of both the depth
|
||||||
|
of the tree and the existing longest path. This is because
|
||||||
|
the longest path could be found either somewhere down
|
||||||
|
a subtree, or from combining the largest depths of
|
||||||
|
two subtrees. To return two values from a function in Python,
|
||||||
|
we'd use a tuple. Here, we use a list.
|
||||||
|
|
||||||
|
Alarm bells should be going off here. There's no reason why we should
|
||||||
|
ever return an empty list from the recursive call: at the very least, we
|
||||||
|
want to return `[0,0]`. But placing such a list literal in a function
|
||||||
|
will trigger the special case insertion. So, we have to hide this literal
|
||||||
|
from the compiler. Fortunately, that's not too hard to do - the compiler
|
||||||
|
is pretty halfhearted in its inference of types. Simply putting
|
||||||
|
the literal behind a constant function (`empty`) does the trick.
|
||||||
|
|
||||||
|
The program uses the subproblem depths multiple times in the
|
||||||
|
final computation. We thus probably want to assign these values
|
||||||
|
to names so we don't have to perform any repeated work. Since
|
||||||
|
the only two mechanisms for
|
||||||
|
{{< sidenote "right" "binding-note" "binding variables" >}}
|
||||||
|
To bind a variable means to assign a value to it.
|
||||||
|
{{< /sidenote >}} in this language are function calls
|
||||||
|
and list selectors, we use a helper function `_longest`,
|
||||||
|
which takes two subproblem solutions an combines them
|
||||||
|
into a new solution. It's pretty obvious that `_longest`
|
||||||
|
returns a list, so the compiler will try insert a base
|
||||||
|
case. Fortunately, subproblem solutions are always
|
||||||
|
lists of two numbers, so this doesn't affect us too much.
|
||||||
@@ -18,33 +18,35 @@ expand into other things; for us, this is a token).
|
|||||||
|
|
||||||
Let's write a context free grammar (CFG from now on) to match our parenthesis language:
|
Let's write a context free grammar (CFG from now on) to match our parenthesis language:
|
||||||
|
|
||||||
$$
|
{{< latex >}}
|
||||||
\\begin{align}
|
\begin{aligned}
|
||||||
S & \\rightarrow ( S ) \\\\\\
|
S & \rightarrow ( S ) \\
|
||||||
S & \\rightarrow ()
|
S & \rightarrow ()
|
||||||
\\end{align}
|
\end{aligned}
|
||||||
$$
|
{{< /latex >}}
|
||||||
|
|
||||||
So, how does this work? We start with a "start symbol" nonterminal, which we usually denote as \\(S\\). Then, to get a desired string,
|
So, how does this work? We start with a "start symbol" nonterminal, which we usually denote as \\(S\\). Then, to get a desired string,
|
||||||
we replace a nonterminal with the sequence of terminals and nonterminals on the right of one of its rules. For instance, to get `()`,
|
we replace a nonterminal with the sequence of terminals and nonterminals on the right of one of its rules. For instance, to get `()`,
|
||||||
we start with \\(S\\) and replace it with the body of the second one of its rules. This gives us `()` right away. To get `((()))`, we
|
we start with \\(S\\) and replace it with the body of the second one of its rules. This gives us `()` right away. To get `((()))`, we
|
||||||
have to do a little more work:
|
have to do a little more work:
|
||||||
|
|
||||||
$$
|
{{< latex >}}
|
||||||
S \\rightarrow (S) \\rightarrow ((S)) \\rightarrow ((()))
|
S \rightarrow (S) \rightarrow ((S)) \rightarrow ((()))
|
||||||
$$
|
{{< /latex >}}
|
||||||
|
|
||||||
In practice, there are many ways of using a CFG to parse a programming language. Various parsing algorithms support various subsets
|
In practice, there are many ways of using a CFG to parse a programming language. Various parsing algorithms support various subsets
|
||||||
of context free languages. For instance, top down parsers follow nearly exactly the structure that we had. They try to parse
|
of context free languages. For instance, top down parsers follow nearly exactly the structure that we had. They try to parse
|
||||||
a nonterminal by trying to match each symbol in its body. In the rule \\(S \\rightarrow \\alpha \\beta \\gamma\\), it will
|
a nonterminal by trying to match each symbol in its body. In the rule \\(S \\rightarrow \\alpha \\beta \\gamma\\), it will
|
||||||
first try to match \\(\\alpha\\), then \\(\\beta\\), and so on. If one of the three contains a nonterminal, it will attempt to parse
|
first try to match \\(\\alpha\\), then \\(\\beta\\), and so on. If one of the three contains a nonterminal, it will attempt to parse
|
||||||
that nonterminal following the same strategy. However, this leaves a flaw - For instance, consider the grammar
|
that nonterminal following the same strategy. However, this leaves a flaw - For instance, consider the grammar
|
||||||
$$
|
|
||||||
\\begin{align}
|
{{< latex >}}
|
||||||
S & \\rightarrow Sa \\\\\\
|
\begin{aligned}
|
||||||
S & \\rightarrow a
|
S & \rightarrow Sa \\
|
||||||
\\end{align}
|
S & \rightarrow a
|
||||||
$$
|
\end{aligned}
|
||||||
|
{{< /latex >}}
|
||||||
|
|
||||||
A top down parser will start with \\(S\\). It will then try the first rule, which starts with \\(S\\). So, dutifully, it will
|
A top down parser will start with \\(S\\). It will then try the first rule, which starts with \\(S\\). So, dutifully, it will
|
||||||
try to parse __that__ \\(S\\). And to do that, it will once again try the first rule, and find that it starts with another \\(S\\)...
|
try to parse __that__ \\(S\\). And to do that, it will once again try the first rule, and find that it starts with another \\(S\\)...
|
||||||
This will never end, and the parser will get stuck. A grammar in which a nonterminal can appear in the beginning of one of its rules
|
This will never end, and the parser will get stuck. A grammar in which a nonterminal can appear in the beginning of one of its rules
|
||||||
@@ -53,26 +55,36 @@ __left recursive__, and top-down parsers aren't able to handle those grammars.
|
|||||||
We __could__ rewrite our grammar without using left-recursion, but we don't want to. Instead, we'll use a __bottom up__ parser,
|
We __could__ rewrite our grammar without using left-recursion, but we don't want to. Instead, we'll use a __bottom up__ parser,
|
||||||
using specifically the LALR(1) parsing algorithm. Here's an example of how it works, using our left-recursive grammar. We start with our
|
using specifically the LALR(1) parsing algorithm. Here's an example of how it works, using our left-recursive grammar. We start with our
|
||||||
goal string, and a "dot" indicating where we are. At first, the dot is behind all the characters:
|
goal string, and a "dot" indicating where we are. At first, the dot is behind all the characters:
|
||||||
$$
|
|
||||||
|
{{< latex >}}
|
||||||
.aaa
|
.aaa
|
||||||
$$
|
{{< /latex >}}
|
||||||
|
|
||||||
We see nothing interesting on the left side of the dot, so we move (or __shift__) the dot forward by one character:
|
We see nothing interesting on the left side of the dot, so we move (or __shift__) the dot forward by one character:
|
||||||
$$
|
|
||||||
|
{{< latex >}}
|
||||||
a.aa
|
a.aa
|
||||||
$$
|
{{< /latex >}}
|
||||||
|
|
||||||
Now, on the left side of the dot, we see something! In particular, we see the body of one of the rules for \\(S\\) (the second one).
|
Now, on the left side of the dot, we see something! In particular, we see the body of one of the rules for \\(S\\) (the second one).
|
||||||
So we __reduce__ the thing on the left side of the dot, by replacing it with the left hand side of the rule (\\(S\\)):
|
So we __reduce__ the thing on the left side of the dot, by replacing it with the left hand side of the rule (\\(S\\)):
|
||||||
$$
|
|
||||||
|
{{< latex >}}
|
||||||
S.aa
|
S.aa
|
||||||
$$
|
{{< /latex >}}
|
||||||
|
|
||||||
There's nothing else we can do with the left side, so we shift again:
|
There's nothing else we can do with the left side, so we shift again:
|
||||||
$$
|
|
||||||
|
{{< latex >}}
|
||||||
Sa.a
|
Sa.a
|
||||||
$$
|
{{< /latex >}}
|
||||||
|
|
||||||
Great, we see another body on the left of the dot. We reduce it:
|
Great, we see another body on the left of the dot. We reduce it:
|
||||||
$$
|
|
||||||
|
{{< latex >}}
|
||||||
S.a
|
S.a
|
||||||
$$
|
{{< /latex >}}
|
||||||
|
|
||||||
Just like before, we shift over the dot, and again, we reduce. We end up with our
|
Just like before, we shift over the dot, and again, we reduce. We end up with our
|
||||||
start symbol, and nothing on the right of the dot, so we're done!
|
start symbol, and nothing on the right of the dot, so we're done!
|
||||||
|
|
||||||
@@ -97,13 +109,15 @@ a tree representing "the multiplication of the result of adding 3 to 2 and 6", w
|
|||||||
|
|
||||||
So, with this in mind, we want our rule for __addition__ (represented with the nonterminal \\(A\_{add}\\), to be matched first, and
|
So, with this in mind, we want our rule for __addition__ (represented with the nonterminal \\(A\_{add}\\), to be matched first, and
|
||||||
for its children to be trees created by the multiplication rule, \\(A\_{mult}\\). So we write the following rules:
|
for its children to be trees created by the multiplication rule, \\(A\_{mult}\\). So we write the following rules:
|
||||||
$$
|
|
||||||
\\begin{align}
|
{{< latex >}}
|
||||||
A\_{add} & \\rightarrow A\_{add}+A\_{mult} \\\\\\
|
\begin{aligned}
|
||||||
A\_{add} & \\rightarrow A\_{add}-A\_{mult} \\\\\\
|
A_{add} & \rightarrow A_{add}+A_{mult} \\
|
||||||
A\_{add} & \\rightarrow A\_{mult}
|
A_{add} & \rightarrow A_{add}-A_{mult} \\
|
||||||
\\end{align}
|
A_{add} & \rightarrow A_{mult}
|
||||||
$$
|
\end{aligned}
|
||||||
|
{{< /latex >}}
|
||||||
|
|
||||||
The first rule matches another addition, added to the result of a multiplication. Similarly, the second rule matches another addition, from which the result of a multiplication is then subtracted. We use the \\(A\_{add}\\) on the left side of \\(+\\) and \\(-\\) in the body
|
The first rule matches another addition, added to the result of a multiplication. Similarly, the second rule matches another addition, from which the result of a multiplication is then subtracted. We use the \\(A\_{add}\\) on the left side of \\(+\\) and \\(-\\) in the body
|
||||||
because we want to be able to parse strings like `1+2+3+4`, which we want to view as `((1+2)+3)+4` (mostly because
|
because we want to be able to parse strings like `1+2+3+4`, which we want to view as `((1+2)+3)+4` (mostly because
|
||||||
subtraction is [left-associative](https://en.wikipedia.org/wiki/Operator_associativity)). So, we want the top level
|
subtraction is [left-associative](https://en.wikipedia.org/wiki/Operator_associativity)). So, we want the top level
|
||||||
@@ -113,51 +127,58 @@ of the tree to be the rightmost `+` or `-`, since that means it will be the "las
|
|||||||
|
|
||||||
This is the purpose of the third rule, which serves to say "an addition expression can just be a multiplication,
|
This is the purpose of the third rule, which serves to say "an addition expression can just be a multiplication,
|
||||||
without any plusses or minuses." Our rules for multiplication are very similar:
|
without any plusses or minuses." Our rules for multiplication are very similar:
|
||||||
$$
|
|
||||||
\\begin{align}
|
|
||||||
A\_{mult} & \\rightarrow A\_{mult}*P \\\\\\
|
|
||||||
A\_{mult} & \\rightarrow A\_{mult}/P \\\\\\
|
|
||||||
A\_{mult} & \\rightarrow P
|
|
||||||
\\end{align}
|
|
||||||
$$
|
|
||||||
|
|
||||||
P, in this case, is an a__p__lication (remember, application has higher precedence than any binary operator).
|
{{< latex >}}
|
||||||
|
\begin{aligned}
|
||||||
|
A_{mult} & \rightarrow A_{mult}*P \\
|
||||||
|
A_{mult} & \rightarrow A_{mult}/P \\
|
||||||
|
A_{mult} & \rightarrow P
|
||||||
|
\end{aligned}
|
||||||
|
{{< /latex >}}
|
||||||
|
|
||||||
|
P, in this case, is an application (remember, application has higher precedence than any binary operator).
|
||||||
Once again, if there's no `*` or `\`, we simply fall through to a \\(P\\) nonterminal, representing application.
|
Once again, if there's no `*` or `\`, we simply fall through to a \\(P\\) nonterminal, representing application.
|
||||||
|
|
||||||
Application is refreshingly simple:
|
Application is refreshingly simple:
|
||||||
$$
|
|
||||||
\\begin{align}
|
{{< latex >}}
|
||||||
P & \\rightarrow P B \\\\\\
|
\begin{aligned}
|
||||||
P & \\rightarrow B
|
P & \rightarrow P B \\
|
||||||
\\end{align}
|
P & \rightarrow B
|
||||||
$$
|
\end{aligned}
|
||||||
An application is either only one "thing" (represented with \\(B\\), for __b__ase), such as a number or an identifier,
|
{{< /latex >}}
|
||||||
|
|
||||||
|
An application is either only one "thing" (represented with \\(B\\), for base), such as a number or an identifier,
|
||||||
or another application followed by a thing.
|
or another application followed by a thing.
|
||||||
|
|
||||||
We now need to define what a "thing" is. As we said before, it's a number, or an identifier. We also make a parenthesized
|
We now need to define what a "thing" is. As we said before, it's a number, or an identifier. We also make a parenthesized
|
||||||
arithmetic expression a "thing", allowing us to wrap right back around and recognize anything inside parentheses:
|
arithmetic expression a "thing", allowing us to wrap right back around and recognize anything inside parentheses:
|
||||||
$$
|
|
||||||
\\begin{align}
|
{{< latex >}}
|
||||||
B & \\rightarrow \text{num} \\\\\\
|
\begin{aligned}
|
||||||
B & \\rightarrow \text{lowerVar} \\\\\\
|
B & \rightarrow \text{num} \\
|
||||||
B & \\rightarrow \text{upperVar} \\\\\\
|
B & \rightarrow \text{lowerVar} \\
|
||||||
B & \\rightarrow ( A\_{add} ) \\\\\\
|
B & \rightarrow \text{upperVar} \\
|
||||||
B & \\rightarrow C
|
B & \rightarrow ( A_{add} ) \\
|
||||||
\\end{align}
|
B & \rightarrow C
|
||||||
$$
|
\end{aligned}
|
||||||
|
{{< /latex >}}
|
||||||
|
|
||||||
What's the last \\(C\\)? We also want a "thing" to be a case expression. Here are the rules for that:
|
What's the last \\(C\\)? We also want a "thing" to be a case expression. Here are the rules for that:
|
||||||
$$
|
|
||||||
\\begin{align}
|
{{< latex >}}
|
||||||
C & \\rightarrow \\text{case} \\; A\_{add} \\; \\text{of} \\; \\{ L\_B\\} \\\\\\
|
\begin{aligned}
|
||||||
L\_B & \\rightarrow R \\; L\_B \\\\\\
|
C & \rightarrow \text{case} \; A_{add} \; \text{of} \; \{ L_B\} \\
|
||||||
L\_B & \\rightarrow R \\\\\\
|
L_B & \rightarrow R \; L_B \\
|
||||||
R & \\rightarrow N \\; \\text{arrow} \\; \\{ A\_{add} \\} \\\\\\
|
L_B & \rightarrow R \\
|
||||||
N & \\rightarrow \\text{lowerVar} \\\\\\
|
R & \rightarrow N \; \text{arrow} \; \{ A_{add} \} \\
|
||||||
N & \\rightarrow \\text{upperVar} \\; L\_L \\\\\\
|
N & \rightarrow \text{lowerVar} \\
|
||||||
L\_L & \\rightarrow \\text{lowerVar} \\; L\_L \\\\\\
|
N & \rightarrow \text{upperVar} \; L_L \\
|
||||||
L\_L & \\rightarrow \\epsilon
|
L_L & \rightarrow \text{lowerVar} \; L_L \\
|
||||||
\\end{align}
|
L_L & \rightarrow \epsilon
|
||||||
$$
|
\end{aligned}
|
||||||
|
{{< /latex >}}
|
||||||
|
|
||||||
\\(L\_B\\) is the list of branches in our case expression. \\(R\\) is a single branch, which is in the
|
\\(L\_B\\) is the list of branches in our case expression. \\(R\\) is a single branch, which is in the
|
||||||
form `Pattern -> Expression`. \\(N\\) is a pattern, which we will for now define to be either a variable name
|
form `Pattern -> Expression`. \\(N\\) is a pattern, which we will for now define to be either a variable name
|
||||||
(\\(\\text{lowerVar}\\)), or a constructor with some arguments. The arguments of a constructor will be
|
(\\(\\text{lowerVar}\\)), or a constructor with some arguments. The arguments of a constructor will be
|
||||||
@@ -167,40 +188,43 @@ We use this because a constructor can have no arguments (like Nil).
|
|||||||
|
|
||||||
We can use these grammar rules to represent any expression we want. For instance, let's try `3+(multiply 2 6)`,
|
We can use these grammar rules to represent any expression we want. For instance, let's try `3+(multiply 2 6)`,
|
||||||
where multiply is a function that, well, multiplies. We start with \\(A_{add}\\):
|
where multiply is a function that, well, multiplies. We start with \\(A_{add}\\):
|
||||||
$$
|
|
||||||
\\begin{align}
|
{{< latex >}}
|
||||||
& A\_{add} \\\\\\
|
\begin{aligned}
|
||||||
& \\rightarrow A\_{add} + A\_{mult} \\\\\\
|
& A_{add} \\
|
||||||
& \\rightarrow A\_{mult} + A\_{mult} \\\\\\
|
& \rightarrow A_{add} + A_{mult} \\
|
||||||
& \\rightarrow P + A\_{mult} \\\\\\
|
& \rightarrow A_{mult} + A_{mult} \\
|
||||||
& \\rightarrow B + A\_{mult} \\\\\\
|
& \rightarrow P + A_{mult} \\
|
||||||
& \\rightarrow \\text{num(3)} + A\_{mult} \\\\\\
|
& \rightarrow B + A_{mult} \\
|
||||||
& \\rightarrow \\text{num(3)} + P \\\\\\
|
& \rightarrow \text{num(3)} + A_{mult} \\
|
||||||
& \\rightarrow \\text{num(3)} + B \\\\\\
|
& \rightarrow \text{num(3)} + P \\
|
||||||
& \\rightarrow \\text{num(3)} + (A\_{add}) \\\\\\
|
& \rightarrow \text{num(3)} + B \\
|
||||||
& \\rightarrow \\text{num(3)} + (A\_{mult}) \\\\\\
|
& \rightarrow \text{num(3)} + (A_{add}) \\
|
||||||
& \\rightarrow \\text{num(3)} + (P) \\\\\\
|
& \rightarrow \text{num(3)} + (A_{mult}) \\
|
||||||
& \\rightarrow \\text{num(3)} + (P \\; \\text{num(6)}) \\\\\\
|
& \rightarrow \text{num(3)} + (P) \\
|
||||||
& \\rightarrow \\text{num(3)} + (P \\; \\text{num(2)} \\; \\text{num(6)}) \\\\\\
|
& \rightarrow \text{num(3)} + (P \; \text{num(6)}) \\
|
||||||
& \\rightarrow \\text{num(3)} + (\\text{lowerVar(multiply)} \\; \\text{num(2)} \\; \\text{num(6)}) \\\\\\
|
& \rightarrow \text{num(3)} + (P \; \text{num(2)} \; \text{num(6)}) \\
|
||||||
\\end{align}
|
& \rightarrow \text{num(3)} + (\text{lowerVar(multiply)} \; \text{num(2)} \; \text{num(6)}) \\
|
||||||
$$
|
\end{aligned}
|
||||||
|
{{< /latex >}}
|
||||||
|
|
||||||
We're almost there. We now want a rule for a "something that can appear at the top level of a program", like
|
We're almost there. We now want a rule for a "something that can appear at the top level of a program", like
|
||||||
a function or data type declaration. We make a new set of rules:
|
a function or data type declaration. We make a new set of rules:
|
||||||
$$
|
|
||||||
\\begin{align}
|
{{< latex >}}
|
||||||
T & \\rightarrow \\text{defn} \\; \\text{lowerVar} \\; L\_P =\\{ A\_{add} \\} \\\\\\
|
\begin{aligned}
|
||||||
T & \\rightarrow \\text{data} \\; \\text{upperVar} = \\{ L\_D \\} \\\\\\
|
T & \rightarrow \text{defn} \; \text{lowerVar} \; L_P =\{ A_{add} \} \\
|
||||||
L\_D & \\rightarrow D \\; , \\; L\_D \\\\\\
|
T & \rightarrow \text{data} \; \text{upperVar} = \{ L_D \} \\
|
||||||
L\_D & \\rightarrow D \\\\\\
|
L_D & \rightarrow D \; , \; L_D \\
|
||||||
L\_P & \\rightarrow \\text{lowerVar} \\; L\_P \\\\\\
|
L_D & \rightarrow D \\
|
||||||
L\_P & \\rightarrow \\epsilon \\\\\\
|
L_P & \rightarrow \text{lowerVar} \; L_P \\
|
||||||
D & \\rightarrow \\text{upperVar} \\; L\_U \\\\\\
|
L_P & \rightarrow \epsilon \\
|
||||||
L\_U & \\rightarrow \\text{upperVar} \\; L\_U \\\\\\
|
D & \rightarrow \text{upperVar} \; L_U \\
|
||||||
L\_U & \\rightarrow \\epsilon
|
L_U & \rightarrow \text{upperVar} \; L_U \\
|
||||||
\\end{align}
|
L_U & \rightarrow \epsilon
|
||||||
$$
|
\end{aligned}
|
||||||
|
{{< /latex >}}
|
||||||
|
|
||||||
That's a lot of rules! \\(T\\) is the "top-level declaration rule. It matches either
|
That's a lot of rules! \\(T\\) is the "top-level declaration rule. It matches either
|
||||||
a function or a data definition. A function definition consists of the keyword "defn",
|
a function or a data definition. A function definition consists of the keyword "defn",
|
||||||
followed by a function name (starting with a lowercase letter), followed by a list of
|
followed by a function name (starting with a lowercase letter), followed by a list of
|
||||||
@@ -213,12 +237,12 @@ a constructor of the data type, followed by a list \\(L\_U\\) of zero or more up
|
|||||||
the types of the arguments of the constructor).
|
the types of the arguments of the constructor).
|
||||||
|
|
||||||
Finally, we want one or more of these declarations in a valid program:
|
Finally, we want one or more of these declarations in a valid program:
|
||||||
$$
|
{{< latex >}}
|
||||||
\\begin{align}
|
\begin{aligned}
|
||||||
G & \\rightarrow T \\; G \\\\\\
|
G & \rightarrow T \; G \\
|
||||||
G & \\rightarrow T
|
G & \rightarrow T
|
||||||
\\end{align}
|
\end{aligned}
|
||||||
$$
|
{{< /latex >}}
|
||||||
|
|
||||||
Just like with tokenizing, there exists a piece of software that will generate a bottom-up parser for us, given our grammar.
|
Just like with tokenizing, there exists a piece of software that will generate a bottom-up parser for us, given our grammar.
|
||||||
It's called Bison, and it is frequently used with Flex. Before we get to bison, though, we need to pay a debt we've already
|
It's called Bison, and it is frequently used with Flex. Before we get to bison, though, we need to pay a debt we've already
|
||||||
|
|||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user