Compare commits
57 Commits
sidenotes
...
ee90351c17
| Author | SHA1 | Date | |
|---|---|---|---|
| ee90351c17 | |||
| fbdbf67ce3 | |||
| a7e32d300a | |||
| 56387cb936 | |||
| df965816ac | |||
| c7341c9b15 | |||
| 00322d7e9f | |||
| ef93632130 | |||
| 0f744888ef | |||
| a5b84bab69 | |||
| 12725500a8 | |||
| 1917c08e51 | |||
| b304057560 | |||
| e5a39d8dfb | |||
| 54ccef9c72 | |||
| c103c6acbf | |||
| d6f53076c0 | |||
| b07ea8fe9c | |||
| 9a7441779f | |||
| a6f27e446d | |||
| e7f0ccfa16 | |||
| e5d01a4e19 | |||
| b7d72f2fbf | |||
| 281dbbd174 | |||
| 153349f3d5 | |||
| 8d22acfe78 | |||
| c1b030ee97 | |||
| 803f52b2d0 | |||
| 2f96abeef6 | |||
| 163fcd2b2e | |||
| 9ddcb1b3f2 | |||
| 133979218a | |||
| ef545be03c | |||
| c534dc7508 | |||
| 263ffe2b8c | |||
| 67181fb033 | |||
| a026e67a3b | |||
| d9544398b9 | |||
| 1c4bb29fdd | |||
| 765d497724 | |||
| 80410c9200 | |||
| 4e918db5cb | |||
| 382102f071 | |||
| 6e88780f8b | |||
| e3035b9d66 | |||
| 8765626898 | |||
| c38247df9e | |||
| baf44f8627 | |||
| 19aa126025 | |||
| a406fb0846 | |||
| 75664e90bb | |||
| f74209c970 | |||
| c7ce8a3107 | |||
| b3b906dd90 | |||
| b8e0e0b4ce | |||
| eb02e1e6b0 | |||
| b2fc6ea5a8 |
11
.drone.yml
Normal file
11
.drone.yml
Normal file
@@ -0,0 +1,11 @@
|
|||||||
|
kind: pipeline
|
||||||
|
type: docker
|
||||||
|
name: default
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- name: test-compiler
|
||||||
|
image: archlinux
|
||||||
|
commands:
|
||||||
|
- pacman -Sy cmake gcc make llvm bison flex gettext --noconfirm
|
||||||
|
- cd code/compiler
|
||||||
|
- ./test.sh
|
||||||
@@ -6,7 +6,7 @@
|
|||||||
}
|
}
|
||||||
|
|
||||||
.gmachine-instruction-name {
|
.gmachine-instruction-name {
|
||||||
padding: 10px;
|
padding: .8rem;
|
||||||
border-right: $standard-border;
|
border-right: $standard-border;
|
||||||
flex-grow: 1;
|
flex-grow: 1;
|
||||||
flex-basis: 20%;
|
flex-basis: 20%;
|
||||||
@@ -28,12 +28,12 @@
|
|||||||
}
|
}
|
||||||
|
|
||||||
.gmachine-inner-label {
|
.gmachine-inner-label {
|
||||||
padding: 10px;
|
padding: .8rem;
|
||||||
font-weight: bold;
|
font-weight: bold;
|
||||||
}
|
}
|
||||||
|
|
||||||
.gmachine-inner-text {
|
.gmachine-inner-text {
|
||||||
padding: 10px;
|
padding: .8rem;
|
||||||
text-align: right;
|
text-align: right;
|
||||||
flex-grow: 1;
|
flex-grow: 1;
|
||||||
}
|
}
|
||||||
|
|||||||
42
code/compiler/09/CMakeLists.txt
Normal file
42
code/compiler/09/CMakeLists.txt
Normal file
@@ -0,0 +1,42 @@
|
|||||||
|
cmake_minimum_required(VERSION 3.1)
|
||||||
|
project(compiler)
|
||||||
|
|
||||||
|
# Find all the required packages
|
||||||
|
find_package(BISON)
|
||||||
|
find_package(FLEX)
|
||||||
|
find_package(LLVM REQUIRED CONFIG)
|
||||||
|
|
||||||
|
# Set up the flex and bison targets
|
||||||
|
bison_target(parser
|
||||||
|
${CMAKE_CURRENT_SOURCE_DIR}/parser.y
|
||||||
|
${CMAKE_CURRENT_BINARY_DIR}/parser.cpp
|
||||||
|
COMPILE_FLAGS "-d")
|
||||||
|
flex_target(scanner
|
||||||
|
${CMAKE_CURRENT_SOURCE_DIR}/scanner.l
|
||||||
|
${CMAKE_CURRENT_BINARY_DIR}/scanner.cpp)
|
||||||
|
add_flex_bison_dependency(scanner parser)
|
||||||
|
|
||||||
|
# Find all the relevant LLVM components
|
||||||
|
llvm_map_components_to_libnames(LLVM_LIBS core x86asmparser x86codegen)
|
||||||
|
|
||||||
|
# Create compiler executable
|
||||||
|
add_executable(compiler
|
||||||
|
ast.cpp ast.hpp definition.cpp
|
||||||
|
llvm_context.cpp llvm_context.hpp
|
||||||
|
type_env.cpp type_env.hpp
|
||||||
|
env.cpp env.hpp
|
||||||
|
type.cpp type.hpp
|
||||||
|
error.cpp error.hpp
|
||||||
|
binop.cpp binop.hpp
|
||||||
|
instruction.cpp instruction.hpp
|
||||||
|
${BISON_parser_OUTPUTS}
|
||||||
|
${FLEX_scanner_OUTPUTS}
|
||||||
|
main.cpp
|
||||||
|
)
|
||||||
|
|
||||||
|
# Configure compiler executable
|
||||||
|
target_include_directories(compiler PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
|
||||||
|
target_include_directories(compiler PUBLIC ${CMAKE_CURRENT_BINARY_DIR})
|
||||||
|
target_include_directories(compiler PUBLIC ${LLVM_INCLUDE_DIRS})
|
||||||
|
target_compile_definitions(compiler PUBLIC ${LLVM_DEFINITIONS})
|
||||||
|
target_link_libraries(compiler ${LLVM_LIBS})
|
||||||
264
code/compiler/09/ast.cpp
Normal file
264
code/compiler/09/ast.cpp
Normal file
@@ -0,0 +1,264 @@
|
|||||||
|
#include "ast.hpp"
|
||||||
|
#include <ostream>
|
||||||
|
#include "binop.hpp"
|
||||||
|
#include "error.hpp"
|
||||||
|
|
||||||
|
static void print_indent(int n, std::ostream& to) {
|
||||||
|
while(n--) to << " ";
|
||||||
|
}
|
||||||
|
|
||||||
|
type_ptr ast::typecheck_common(type_mgr& mgr, const type_env& env) {
|
||||||
|
node_type = typecheck(mgr, env);
|
||||||
|
return node_type;
|
||||||
|
}
|
||||||
|
|
||||||
|
void ast::resolve_common(const type_mgr& mgr) {
|
||||||
|
type_var* var;
|
||||||
|
type_ptr resolved_type = mgr.resolve(node_type, var);
|
||||||
|
if(var) throw type_error("ambiguously typed program");
|
||||||
|
|
||||||
|
resolve(mgr);
|
||||||
|
node_type = std::move(resolved_type);
|
||||||
|
}
|
||||||
|
|
||||||
|
void ast_int::print(int indent, std::ostream& to) const {
|
||||||
|
print_indent(indent, to);
|
||||||
|
to << "INT: " << value << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
type_ptr ast_int::typecheck(type_mgr& mgr, const type_env& env) const {
|
||||||
|
return type_ptr(new type_base("Int"));
|
||||||
|
}
|
||||||
|
|
||||||
|
void ast_int::resolve(const type_mgr& mgr) const {
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
void ast_int::compile(const env_ptr& env, std::vector<instruction_ptr>& into) const {
|
||||||
|
into.push_back(instruction_ptr(new instruction_pushint(value)));
|
||||||
|
}
|
||||||
|
|
||||||
|
void ast_lid::print(int indent, std::ostream& to) const {
|
||||||
|
print_indent(indent, to);
|
||||||
|
to << "LID: " << id << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
type_ptr ast_lid::typecheck(type_mgr& mgr, const type_env& env) const {
|
||||||
|
return env.lookup(id);
|
||||||
|
}
|
||||||
|
|
||||||
|
void ast_lid::resolve(const type_mgr& mgr) const {
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
void ast_lid::compile(const env_ptr& env, std::vector<instruction_ptr>& into) const {
|
||||||
|
into.push_back(instruction_ptr(
|
||||||
|
env->has_variable(id) ?
|
||||||
|
(instruction*) new instruction_push(env->get_offset(id)) :
|
||||||
|
(instruction*) new instruction_pushglobal(id)));
|
||||||
|
}
|
||||||
|
|
||||||
|
void ast_uid::print(int indent, std::ostream& to) const {
|
||||||
|
print_indent(indent, to);
|
||||||
|
to << "UID: " << id << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
type_ptr ast_uid::typecheck(type_mgr& mgr, const type_env& env) const {
|
||||||
|
return env.lookup(id);
|
||||||
|
}
|
||||||
|
|
||||||
|
void ast_uid::resolve(const type_mgr& mgr) const {
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
void ast_uid::compile(const env_ptr& env, std::vector<instruction_ptr>& into) const {
|
||||||
|
into.push_back(instruction_ptr(new instruction_pushglobal(id)));
|
||||||
|
}
|
||||||
|
|
||||||
|
void ast_binop::print(int indent, std::ostream& to) const {
|
||||||
|
print_indent(indent, to);
|
||||||
|
to << "BINOP: " << op_name(op) << std::endl;
|
||||||
|
left->print(indent + 1, to);
|
||||||
|
right->print(indent + 1, to);
|
||||||
|
}
|
||||||
|
|
||||||
|
type_ptr ast_binop::typecheck(type_mgr& mgr, const type_env& env) const {
|
||||||
|
type_ptr ltype = left->typecheck_common(mgr, env);
|
||||||
|
type_ptr rtype = right->typecheck_common(mgr, env);
|
||||||
|
type_ptr ftype = env.lookup(op_name(op));
|
||||||
|
if(!ftype) throw type_error(std::string("unknown binary operator ") + op_name(op));
|
||||||
|
|
||||||
|
type_ptr return_type = mgr.new_type();
|
||||||
|
type_ptr arrow_one = type_ptr(new type_arr(rtype, return_type));
|
||||||
|
type_ptr arrow_two = type_ptr(new type_arr(ltype, arrow_one));
|
||||||
|
|
||||||
|
mgr.unify(arrow_two, ftype);
|
||||||
|
return return_type;
|
||||||
|
}
|
||||||
|
|
||||||
|
void ast_binop::resolve(const type_mgr& mgr) const {
|
||||||
|
left->resolve_common(mgr);
|
||||||
|
right->resolve_common(mgr);
|
||||||
|
}
|
||||||
|
|
||||||
|
void ast_binop::compile(const env_ptr& env, std::vector<instruction_ptr>& into) const {
|
||||||
|
right->compile(env, into);
|
||||||
|
left->compile(env_ptr(new env_offset(1, env)), into);
|
||||||
|
|
||||||
|
into.push_back(instruction_ptr(new instruction_pushglobal(op_action(op))));
|
||||||
|
into.push_back(instruction_ptr(new instruction_mkapp()));
|
||||||
|
into.push_back(instruction_ptr(new instruction_mkapp()));
|
||||||
|
}
|
||||||
|
|
||||||
|
void ast_app::print(int indent, std::ostream& to) const {
|
||||||
|
print_indent(indent, to);
|
||||||
|
to << "APP:" << std::endl;
|
||||||
|
left->print(indent + 1, to);
|
||||||
|
right->print(indent + 1, to);
|
||||||
|
}
|
||||||
|
|
||||||
|
type_ptr ast_app::typecheck(type_mgr& mgr, const type_env& env) const {
|
||||||
|
type_ptr ltype = left->typecheck_common(mgr, env);
|
||||||
|
type_ptr rtype = right->typecheck_common(mgr, env);
|
||||||
|
|
||||||
|
type_ptr return_type = mgr.new_type();
|
||||||
|
type_ptr arrow = type_ptr(new type_arr(rtype, return_type));
|
||||||
|
mgr.unify(arrow, ltype);
|
||||||
|
return return_type;
|
||||||
|
}
|
||||||
|
|
||||||
|
void ast_app::resolve(const type_mgr& mgr) const {
|
||||||
|
left->resolve_common(mgr);
|
||||||
|
right->resolve_common(mgr);
|
||||||
|
}
|
||||||
|
|
||||||
|
void ast_app::compile(const env_ptr& env, std::vector<instruction_ptr>& into) const {
|
||||||
|
right->compile(env, into);
|
||||||
|
left->compile(env_ptr(new env_offset(1, env)), into);
|
||||||
|
into.push_back(instruction_ptr(new instruction_mkapp()));
|
||||||
|
}
|
||||||
|
|
||||||
|
void ast_case::print(int indent, std::ostream& to) const {
|
||||||
|
print_indent(indent, to);
|
||||||
|
to << "CASE: " << std::endl;
|
||||||
|
for(auto& branch : branches) {
|
||||||
|
print_indent(indent + 1, to);
|
||||||
|
branch->pat->print(to);
|
||||||
|
to << std::endl;
|
||||||
|
branch->expr->print(indent + 2, to);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
type_ptr ast_case::typecheck(type_mgr& mgr, const type_env& env) const {
|
||||||
|
type_var* var;
|
||||||
|
type_ptr case_type = mgr.resolve(of->typecheck_common(mgr, env), var);
|
||||||
|
type_ptr branch_type = mgr.new_type();
|
||||||
|
|
||||||
|
for(auto& branch : branches) {
|
||||||
|
type_env new_env = env.scope();
|
||||||
|
branch->pat->match(case_type, mgr, new_env);
|
||||||
|
type_ptr curr_branch_type = branch->expr->typecheck_common(mgr, new_env);
|
||||||
|
mgr.unify(branch_type, curr_branch_type);
|
||||||
|
}
|
||||||
|
|
||||||
|
case_type = mgr.resolve(case_type, var);
|
||||||
|
if(!dynamic_cast<type_data*>(case_type.get())) {
|
||||||
|
throw type_error("attempting case analysis of non-data type");
|
||||||
|
}
|
||||||
|
|
||||||
|
return branch_type;
|
||||||
|
}
|
||||||
|
|
||||||
|
void ast_case::resolve(const type_mgr& mgr) const {
|
||||||
|
of->resolve_common(mgr);
|
||||||
|
for(auto& branch : branches) {
|
||||||
|
branch->expr->resolve_common(mgr);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void ast_case::compile(const env_ptr& env, std::vector<instruction_ptr>& into) const {
|
||||||
|
type_data* type = dynamic_cast<type_data*>(of->node_type.get());
|
||||||
|
|
||||||
|
of->compile(env, into);
|
||||||
|
into.push_back(instruction_ptr(new instruction_eval()));
|
||||||
|
|
||||||
|
instruction_jump* jump_instruction = new instruction_jump();
|
||||||
|
into.push_back(instruction_ptr(jump_instruction));
|
||||||
|
for(auto& branch : branches) {
|
||||||
|
std::vector<instruction_ptr> branch_instructions;
|
||||||
|
pattern_var* vpat;
|
||||||
|
pattern_constr* cpat;
|
||||||
|
|
||||||
|
if((vpat = dynamic_cast<pattern_var*>(branch->pat.get()))) {
|
||||||
|
branch->expr->compile(env_ptr(new env_offset(1, env)), branch_instructions);
|
||||||
|
|
||||||
|
for(auto& constr_pair : type->constructors) {
|
||||||
|
if(jump_instruction->tag_mappings.find(constr_pair.second.tag) !=
|
||||||
|
jump_instruction->tag_mappings.end())
|
||||||
|
break;
|
||||||
|
|
||||||
|
jump_instruction->tag_mappings[constr_pair.second.tag] =
|
||||||
|
jump_instruction->branches.size();
|
||||||
|
}
|
||||||
|
jump_instruction->branches.push_back(std::move(branch_instructions));
|
||||||
|
} else if((cpat = dynamic_cast<pattern_constr*>(branch->pat.get()))) {
|
||||||
|
env_ptr new_env = env;
|
||||||
|
for(auto it = cpat->params.rbegin(); it != cpat->params.rend(); it++) {
|
||||||
|
new_env = env_ptr(new env_var(*it, new_env));
|
||||||
|
}
|
||||||
|
|
||||||
|
branch_instructions.push_back(instruction_ptr(new instruction_split(
|
||||||
|
cpat->params.size())));
|
||||||
|
branch->expr->compile(new_env, branch_instructions);
|
||||||
|
branch_instructions.push_back(instruction_ptr(new instruction_slide(
|
||||||
|
cpat->params.size())));
|
||||||
|
|
||||||
|
int new_tag = type->constructors[cpat->constr].tag;
|
||||||
|
if(jump_instruction->tag_mappings.find(new_tag) !=
|
||||||
|
jump_instruction->tag_mappings.end())
|
||||||
|
throw type_error("technically not a type error: duplicate pattern");
|
||||||
|
|
||||||
|
jump_instruction->tag_mappings[new_tag] =
|
||||||
|
jump_instruction->branches.size();
|
||||||
|
jump_instruction->branches.push_back(std::move(branch_instructions));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for(auto& constr_pair : type->constructors) {
|
||||||
|
if(jump_instruction->tag_mappings.find(constr_pair.second.tag) ==
|
||||||
|
jump_instruction->tag_mappings.end())
|
||||||
|
throw type_error("non-total pattern");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void pattern_var::print(std::ostream& to) const {
|
||||||
|
to << var;
|
||||||
|
}
|
||||||
|
|
||||||
|
void pattern_var::match(type_ptr t, type_mgr& mgr, type_env& env) const {
|
||||||
|
env.bind(var, t);
|
||||||
|
}
|
||||||
|
|
||||||
|
void pattern_constr::print(std::ostream& to) const {
|
||||||
|
to << constr;
|
||||||
|
for(auto& param : params) {
|
||||||
|
to << " " << param;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void pattern_constr::match(type_ptr t, type_mgr& mgr, type_env& env) const {
|
||||||
|
type_ptr constructor_type = env.lookup(constr);
|
||||||
|
if(!constructor_type) {
|
||||||
|
throw type_error(std::string("pattern using unknown constructor ") + constr);
|
||||||
|
}
|
||||||
|
|
||||||
|
for(int i = 0; i < params.size(); i++) {
|
||||||
|
type_arr* arr = dynamic_cast<type_arr*>(constructor_type.get());
|
||||||
|
if(!arr) throw type_error("too many parameters in constructor pattern");
|
||||||
|
|
||||||
|
env.bind(params[i], arr->left);
|
||||||
|
constructor_type = arr->right;
|
||||||
|
}
|
||||||
|
|
||||||
|
mgr.unify(t, constructor_type);
|
||||||
|
}
|
||||||
141
code/compiler/09/ast.hpp
Normal file
141
code/compiler/09/ast.hpp
Normal file
@@ -0,0 +1,141 @@
|
|||||||
|
#pragma once
|
||||||
|
#include <memory>
|
||||||
|
#include <vector>
|
||||||
|
#include "type.hpp"
|
||||||
|
#include "type_env.hpp"
|
||||||
|
#include "binop.hpp"
|
||||||
|
#include "instruction.hpp"
|
||||||
|
#include "env.hpp"
|
||||||
|
|
||||||
|
struct ast {
|
||||||
|
type_ptr node_type;
|
||||||
|
|
||||||
|
virtual ~ast() = default;
|
||||||
|
|
||||||
|
virtual void print(int indent, std::ostream& to) const = 0;
|
||||||
|
virtual type_ptr typecheck(type_mgr& mgr, const type_env& env) const = 0;
|
||||||
|
virtual void resolve(const type_mgr& mgr) const = 0;
|
||||||
|
virtual void compile(const env_ptr& env,
|
||||||
|
std::vector<instruction_ptr>& into) const = 0;
|
||||||
|
|
||||||
|
type_ptr typecheck_common(type_mgr& mgr, const type_env& env);
|
||||||
|
void resolve_common(const type_mgr& mgr);
|
||||||
|
};
|
||||||
|
|
||||||
|
using ast_ptr = std::unique_ptr<ast>;
|
||||||
|
|
||||||
|
struct pattern {
|
||||||
|
virtual ~pattern() = default;
|
||||||
|
|
||||||
|
virtual void print(std::ostream& to) const = 0;
|
||||||
|
virtual void match(type_ptr t, type_mgr& mgr, type_env& env) const = 0;
|
||||||
|
};
|
||||||
|
|
||||||
|
using pattern_ptr = std::unique_ptr<pattern>;
|
||||||
|
|
||||||
|
struct branch {
|
||||||
|
pattern_ptr pat;
|
||||||
|
ast_ptr expr;
|
||||||
|
|
||||||
|
branch(pattern_ptr p, ast_ptr a)
|
||||||
|
: pat(std::move(p)), expr(std::move(a)) {}
|
||||||
|
};
|
||||||
|
|
||||||
|
using branch_ptr = std::unique_ptr<branch>;
|
||||||
|
|
||||||
|
struct ast_int : public ast {
|
||||||
|
int value;
|
||||||
|
|
||||||
|
explicit ast_int(int v)
|
||||||
|
: value(v) {}
|
||||||
|
|
||||||
|
void print(int indent, std::ostream& to) const;
|
||||||
|
type_ptr typecheck(type_mgr& mgr, const type_env& env) const;
|
||||||
|
void resolve(const type_mgr& mgr) const;
|
||||||
|
void compile(const env_ptr& env, std::vector<instruction_ptr>& into) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct ast_lid : public ast {
|
||||||
|
std::string id;
|
||||||
|
|
||||||
|
explicit ast_lid(std::string i)
|
||||||
|
: id(std::move(i)) {}
|
||||||
|
|
||||||
|
void print(int indent, std::ostream& to) const;
|
||||||
|
type_ptr typecheck(type_mgr& mgr, const type_env& env) const;
|
||||||
|
void resolve(const type_mgr& mgr) const;
|
||||||
|
void compile(const env_ptr& env, std::vector<instruction_ptr>& into) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct ast_uid : public ast {
|
||||||
|
std::string id;
|
||||||
|
|
||||||
|
explicit ast_uid(std::string i)
|
||||||
|
: id(std::move(i)) {}
|
||||||
|
|
||||||
|
void print(int indent, std::ostream& to) const;
|
||||||
|
type_ptr typecheck(type_mgr& mgr, const type_env& env) const;
|
||||||
|
void resolve(const type_mgr& mgr) const;
|
||||||
|
void compile(const env_ptr& env, std::vector<instruction_ptr>& into) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct ast_binop : public ast {
|
||||||
|
binop op;
|
||||||
|
ast_ptr left;
|
||||||
|
ast_ptr right;
|
||||||
|
|
||||||
|
ast_binop(binop o, ast_ptr l, ast_ptr r)
|
||||||
|
: op(o), left(std::move(l)), right(std::move(r)) {}
|
||||||
|
|
||||||
|
void print(int indent, std::ostream& to) const;
|
||||||
|
type_ptr typecheck(type_mgr& mgr, const type_env& env) const;
|
||||||
|
void resolve(const type_mgr& mgr) const;
|
||||||
|
void compile(const env_ptr& env, std::vector<instruction_ptr>& into) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct ast_app : public ast {
|
||||||
|
ast_ptr left;
|
||||||
|
ast_ptr right;
|
||||||
|
|
||||||
|
ast_app(ast_ptr l, ast_ptr r)
|
||||||
|
: left(std::move(l)), right(std::move(r)) {}
|
||||||
|
|
||||||
|
void print(int indent, std::ostream& to) const;
|
||||||
|
type_ptr typecheck(type_mgr& mgr, const type_env& env) const;
|
||||||
|
void resolve(const type_mgr& mgr) const;
|
||||||
|
void compile(const env_ptr& env, std::vector<instruction_ptr>& into) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct ast_case : public ast {
|
||||||
|
ast_ptr of;
|
||||||
|
std::vector<branch_ptr> branches;
|
||||||
|
|
||||||
|
ast_case(ast_ptr o, std::vector<branch_ptr> b)
|
||||||
|
: of(std::move(o)), branches(std::move(b)) {}
|
||||||
|
|
||||||
|
void print(int indent, std::ostream& to) const;
|
||||||
|
type_ptr typecheck(type_mgr& mgr, const type_env& env) const;
|
||||||
|
void resolve(const type_mgr& mgr) const;
|
||||||
|
void compile(const env_ptr& env, std::vector<instruction_ptr>& into) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct pattern_var : public pattern {
|
||||||
|
std::string var;
|
||||||
|
|
||||||
|
pattern_var(std::string v)
|
||||||
|
: var(std::move(v)) {}
|
||||||
|
|
||||||
|
void print(std::ostream &to) const;
|
||||||
|
void match(type_ptr t, type_mgr& mgr, type_env& env) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct pattern_constr : public pattern {
|
||||||
|
std::string constr;
|
||||||
|
std::vector<std::string> params;
|
||||||
|
|
||||||
|
pattern_constr(std::string c, std::vector<std::string> p)
|
||||||
|
: constr(std::move(c)), params(std::move(p)) {}
|
||||||
|
|
||||||
|
void print(std::ostream &to) const;
|
||||||
|
void match(type_ptr t, type_mgr&, type_env& env) const;
|
||||||
|
};
|
||||||
21
code/compiler/09/binop.cpp
Normal file
21
code/compiler/09/binop.cpp
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
#include "binop.hpp"
|
||||||
|
|
||||||
|
std::string op_name(binop op) {
|
||||||
|
switch(op) {
|
||||||
|
case PLUS: return "+";
|
||||||
|
case MINUS: return "-";
|
||||||
|
case TIMES: return "*";
|
||||||
|
case DIVIDE: return "/";
|
||||||
|
}
|
||||||
|
return "??";
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string op_action(binop op) {
|
||||||
|
switch(op) {
|
||||||
|
case PLUS: return "plus";
|
||||||
|
case MINUS: return "minus";
|
||||||
|
case TIMES: return "times";
|
||||||
|
case DIVIDE: return "divide";
|
||||||
|
}
|
||||||
|
return "??";
|
||||||
|
}
|
||||||
12
code/compiler/09/binop.hpp
Normal file
12
code/compiler/09/binop.hpp
Normal file
@@ -0,0 +1,12 @@
|
|||||||
|
#pragma once
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
enum binop {
|
||||||
|
PLUS,
|
||||||
|
MINUS,
|
||||||
|
TIMES,
|
||||||
|
DIVIDE
|
||||||
|
};
|
||||||
|
|
||||||
|
std::string op_name(binop op);
|
||||||
|
std::string op_action(binop op);
|
||||||
121
code/compiler/09/definition.cpp
Normal file
121
code/compiler/09/definition.cpp
Normal file
@@ -0,0 +1,121 @@
|
|||||||
|
#include "definition.hpp"
|
||||||
|
#include "error.hpp"
|
||||||
|
#include "ast.hpp"
|
||||||
|
#include "instruction.hpp"
|
||||||
|
#include "llvm_context.hpp"
|
||||||
|
#include <llvm/IR/DerivedTypes.h>
|
||||||
|
#include <llvm/IR/Function.h>
|
||||||
|
#include <llvm/IR/Type.h>
|
||||||
|
|
||||||
|
void definition_defn::typecheck_first(type_mgr& mgr, type_env& env) {
|
||||||
|
return_type = mgr.new_type();
|
||||||
|
type_ptr full_type = return_type;
|
||||||
|
|
||||||
|
for(auto it = params.rbegin(); it != params.rend(); it++) {
|
||||||
|
type_ptr param_type = mgr.new_type();
|
||||||
|
full_type = type_ptr(new type_arr(param_type, full_type));
|
||||||
|
param_types.push_back(param_type);
|
||||||
|
}
|
||||||
|
|
||||||
|
env.bind(name, full_type);
|
||||||
|
}
|
||||||
|
|
||||||
|
void definition_defn::typecheck_second(type_mgr& mgr, const type_env& env) const {
|
||||||
|
type_env new_env = env.scope();
|
||||||
|
auto param_it = params.begin();
|
||||||
|
auto type_it = param_types.rbegin();
|
||||||
|
|
||||||
|
while(param_it != params.end() && type_it != param_types.rend()) {
|
||||||
|
new_env.bind(*param_it, *type_it);
|
||||||
|
param_it++;
|
||||||
|
type_it++;
|
||||||
|
}
|
||||||
|
|
||||||
|
type_ptr body_type = body->typecheck_common(mgr, new_env);
|
||||||
|
mgr.unify(return_type, body_type);
|
||||||
|
}
|
||||||
|
|
||||||
|
void definition_defn::resolve(const type_mgr& mgr) {
|
||||||
|
type_var* var;
|
||||||
|
body->resolve_common(mgr);
|
||||||
|
|
||||||
|
return_type = mgr.resolve(return_type, var);
|
||||||
|
if(var) throw type_error("ambiguously typed program");
|
||||||
|
for(auto& param_type : param_types) {
|
||||||
|
param_type = mgr.resolve(param_type, var);
|
||||||
|
if(var) throw type_error("ambiguously typed program");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void definition_defn::compile() {
|
||||||
|
env_ptr new_env = env_ptr(new env_offset(0, nullptr));
|
||||||
|
for(auto it = params.rbegin(); it != params.rend(); it++) {
|
||||||
|
new_env = env_ptr(new env_var(*it, new_env));
|
||||||
|
}
|
||||||
|
body->compile(new_env, instructions);
|
||||||
|
instructions.push_back(instruction_ptr(new instruction_update(params.size())));
|
||||||
|
instructions.push_back(instruction_ptr(new instruction_pop(params.size())));
|
||||||
|
}
|
||||||
|
void definition_defn::gen_llvm_first(llvm_context& ctx) {
|
||||||
|
generated_function = ctx.create_custom_function(name, params.size());
|
||||||
|
}
|
||||||
|
|
||||||
|
void definition_defn::gen_llvm_second(llvm_context& ctx) {
|
||||||
|
ctx.builder.SetInsertPoint(&generated_function->getEntryBlock());
|
||||||
|
for(auto& instruction : instructions) {
|
||||||
|
instruction->gen_llvm(ctx, generated_function);
|
||||||
|
}
|
||||||
|
ctx.builder.CreateRetVoid();
|
||||||
|
}
|
||||||
|
|
||||||
|
void definition_data::typecheck_first(type_mgr& mgr, type_env& env) {
|
||||||
|
type_data* this_type = new type_data(name);
|
||||||
|
type_ptr return_type = type_ptr(this_type);
|
||||||
|
int next_tag = 0;
|
||||||
|
|
||||||
|
for(auto& constructor : constructors) {
|
||||||
|
constructor->tag = next_tag;
|
||||||
|
this_type->constructors[constructor->name] = { next_tag++ };
|
||||||
|
|
||||||
|
type_ptr full_type = return_type;
|
||||||
|
for(auto it = constructor->types.rbegin(); it != constructor->types.rend(); it++) {
|
||||||
|
type_ptr type = type_ptr(new type_base(*it));
|
||||||
|
full_type = type_ptr(new type_arr(type, full_type));
|
||||||
|
}
|
||||||
|
|
||||||
|
env.bind(constructor->name, full_type);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void definition_data::typecheck_second(type_mgr& mgr, const type_env& env) const {
|
||||||
|
// Nothing
|
||||||
|
}
|
||||||
|
|
||||||
|
void definition_data::resolve(const type_mgr& mgr) {
|
||||||
|
// Nothing
|
||||||
|
}
|
||||||
|
|
||||||
|
void definition_data::compile() {
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
void definition_data::gen_llvm_first(llvm_context& ctx) {
|
||||||
|
for(auto& constructor : constructors) {
|
||||||
|
auto new_function =
|
||||||
|
ctx.create_custom_function(constructor->name, constructor->types.size());
|
||||||
|
std::vector<instruction_ptr> instructions;
|
||||||
|
instructions.push_back(instruction_ptr(
|
||||||
|
new instruction_pack(constructor->tag, constructor->types.size())
|
||||||
|
));
|
||||||
|
instructions.push_back(instruction_ptr(new instruction_update(0)));
|
||||||
|
ctx.builder.SetInsertPoint(&new_function->getEntryBlock());
|
||||||
|
for (auto& instruction : instructions) {
|
||||||
|
instruction->gen_llvm(ctx, new_function);
|
||||||
|
}
|
||||||
|
ctx.builder.CreateRetVoid();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void definition_data::gen_llvm_second(llvm_context& ctx) {
|
||||||
|
// Nothing
|
||||||
|
}
|
||||||
73
code/compiler/09/definition.hpp
Normal file
73
code/compiler/09/definition.hpp
Normal file
@@ -0,0 +1,73 @@
|
|||||||
|
#pragma once
|
||||||
|
#include <memory>
|
||||||
|
#include <vector>
|
||||||
|
#include "instruction.hpp"
|
||||||
|
#include "llvm_context.hpp"
|
||||||
|
#include "type_env.hpp"
|
||||||
|
|
||||||
|
struct ast;
|
||||||
|
using ast_ptr = std::unique_ptr<ast>;
|
||||||
|
|
||||||
|
struct definition {
|
||||||
|
virtual ~definition() = default;
|
||||||
|
|
||||||
|
virtual void typecheck_first(type_mgr& mgr, type_env& env) = 0;
|
||||||
|
virtual void typecheck_second(type_mgr& mgr, const type_env& env) const = 0;
|
||||||
|
virtual void resolve(const type_mgr& mgr) = 0;
|
||||||
|
virtual void compile() = 0;
|
||||||
|
virtual void gen_llvm_first(llvm_context& ctx) = 0;
|
||||||
|
virtual void gen_llvm_second(llvm_context& ctx) = 0;
|
||||||
|
};
|
||||||
|
|
||||||
|
using definition_ptr = std::unique_ptr<definition>;
|
||||||
|
|
||||||
|
struct constructor {
|
||||||
|
std::string name;
|
||||||
|
std::vector<std::string> types;
|
||||||
|
int8_t tag;
|
||||||
|
|
||||||
|
constructor(std::string n, std::vector<std::string> ts)
|
||||||
|
: name(std::move(n)), types(std::move(ts)) {}
|
||||||
|
};
|
||||||
|
|
||||||
|
using constructor_ptr = std::unique_ptr<constructor>;
|
||||||
|
|
||||||
|
struct definition_defn : public definition {
|
||||||
|
std::string name;
|
||||||
|
std::vector<std::string> params;
|
||||||
|
ast_ptr body;
|
||||||
|
|
||||||
|
type_ptr return_type;
|
||||||
|
std::vector<type_ptr> param_types;
|
||||||
|
|
||||||
|
std::vector<instruction_ptr> instructions;
|
||||||
|
|
||||||
|
llvm::Function* generated_function;
|
||||||
|
|
||||||
|
definition_defn(std::string n, std::vector<std::string> p, ast_ptr b)
|
||||||
|
: name(std::move(n)), params(std::move(p)), body(std::move(b)) {
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
void typecheck_first(type_mgr& mgr, type_env& env);
|
||||||
|
void typecheck_second(type_mgr& mgr, const type_env& env) const;
|
||||||
|
void resolve(const type_mgr& mgr);
|
||||||
|
void compile();
|
||||||
|
void gen_llvm_first(llvm_context& ctx);
|
||||||
|
void gen_llvm_second(llvm_context& ctx);
|
||||||
|
};
|
||||||
|
|
||||||
|
struct definition_data : public definition {
|
||||||
|
std::string name;
|
||||||
|
std::vector<constructor_ptr> constructors;
|
||||||
|
|
||||||
|
definition_data(std::string n, std::vector<constructor_ptr> cs)
|
||||||
|
: name(std::move(n)), constructors(std::move(cs)) {}
|
||||||
|
|
||||||
|
void typecheck_first(type_mgr& mgr, type_env& env);
|
||||||
|
void typecheck_second(type_mgr& mgr, const type_env& env) const;
|
||||||
|
void resolve(const type_mgr& mgr);
|
||||||
|
void compile();
|
||||||
|
void gen_llvm_first(llvm_context& ctx);
|
||||||
|
void gen_llvm_second(llvm_context& ctx);
|
||||||
|
};
|
||||||
23
code/compiler/09/env.cpp
Normal file
23
code/compiler/09/env.cpp
Normal file
@@ -0,0 +1,23 @@
|
|||||||
|
#include "env.hpp"
|
||||||
|
|
||||||
|
int env_var::get_offset(const std::string& name) const {
|
||||||
|
if(name == this->name) return 0;
|
||||||
|
if(parent) return parent->get_offset(name) + 1;
|
||||||
|
throw 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool env_var::has_variable(const std::string& name) const {
|
||||||
|
if(name == this->name) return true;
|
||||||
|
if(parent) return parent->has_variable(name);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
int env_offset::get_offset(const std::string& name) const {
|
||||||
|
if(parent) return parent->get_offset(name) + offset;
|
||||||
|
throw 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool env_offset::has_variable(const std::string& name) const {
|
||||||
|
if(parent) return parent->has_variable(name);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
34
code/compiler/09/env.hpp
Normal file
34
code/compiler/09/env.hpp
Normal file
@@ -0,0 +1,34 @@
|
|||||||
|
#pragma once
|
||||||
|
#include <memory>
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
struct env {
|
||||||
|
virtual ~env() = default;
|
||||||
|
|
||||||
|
virtual int get_offset(const std::string& name) const = 0;
|
||||||
|
virtual bool has_variable(const std::string& name) const = 0;
|
||||||
|
};
|
||||||
|
|
||||||
|
using env_ptr = std::shared_ptr<env>;
|
||||||
|
|
||||||
|
struct env_var : public env {
|
||||||
|
std::string name;
|
||||||
|
env_ptr parent;
|
||||||
|
|
||||||
|
env_var(std::string& n, env_ptr p)
|
||||||
|
: name(std::move(n)), parent(std::move(p)) {}
|
||||||
|
|
||||||
|
int get_offset(const std::string& name) const;
|
||||||
|
bool has_variable(const std::string& name) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct env_offset : public env {
|
||||||
|
int offset;
|
||||||
|
env_ptr parent;
|
||||||
|
|
||||||
|
env_offset(int o, env_ptr p)
|
||||||
|
: offset(o), parent(std::move(p)) {}
|
||||||
|
|
||||||
|
int get_offset(const std::string& name) const;
|
||||||
|
bool has_variable(const std::string& name) const;
|
||||||
|
};
|
||||||
5
code/compiler/09/error.cpp
Normal file
5
code/compiler/09/error.cpp
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
#include "error.hpp"
|
||||||
|
|
||||||
|
const char* type_error::what() const noexcept {
|
||||||
|
return "an error occured while checking the types of the program";
|
||||||
|
}
|
||||||
21
code/compiler/09/error.hpp
Normal file
21
code/compiler/09/error.hpp
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
#pragma once
|
||||||
|
#include <exception>
|
||||||
|
#include "type.hpp"
|
||||||
|
|
||||||
|
struct type_error : std::exception {
|
||||||
|
std::string description;
|
||||||
|
|
||||||
|
type_error(std::string d)
|
||||||
|
: description(std::move(d)) {}
|
||||||
|
|
||||||
|
const char* what() const noexcept override;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct unification_error : public type_error {
|
||||||
|
type_ptr left;
|
||||||
|
type_ptr right;
|
||||||
|
|
||||||
|
unification_error(type_ptr l, type_ptr r)
|
||||||
|
: left(std::move(l)), right(std::move(r)),
|
||||||
|
type_error("failed to unify types") {}
|
||||||
|
};
|
||||||
2
code/compiler/09/examples/bad1.txt
Normal file
2
code/compiler/09/examples/bad1.txt
Normal file
@@ -0,0 +1,2 @@
|
|||||||
|
data Bool = { True, False }
|
||||||
|
defn main = { 3 + True }
|
||||||
1
code/compiler/09/examples/bad2.txt
Normal file
1
code/compiler/09/examples/bad2.txt
Normal file
@@ -0,0 +1 @@
|
|||||||
|
defn main = { 1 2 3 4 5 }
|
||||||
8
code/compiler/09/examples/bad3.txt
Normal file
8
code/compiler/09/examples/bad3.txt
Normal file
@@ -0,0 +1,8 @@
|
|||||||
|
data List = { Nil, Cons Int List }
|
||||||
|
|
||||||
|
defn head l = {
|
||||||
|
case l of {
|
||||||
|
Nil -> { 0 }
|
||||||
|
Cons x y z -> { x }
|
||||||
|
}
|
||||||
|
}
|
||||||
129
code/compiler/09/examples/primes.txt
Normal file
129
code/compiler/09/examples/primes.txt
Normal file
@@ -0,0 +1,129 @@
|
|||||||
|
data List = { Nil, Cons Nat List }
|
||||||
|
data Bool = { True, False }
|
||||||
|
data Nat = { O, S Nat }
|
||||||
|
|
||||||
|
defn ifN c t e = {
|
||||||
|
case c of {
|
||||||
|
True -> { t }
|
||||||
|
False -> { e }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
defn ifL c t e = {
|
||||||
|
case c of {
|
||||||
|
True -> { t }
|
||||||
|
False -> { e }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
defn toInt n = {
|
||||||
|
case n of {
|
||||||
|
O -> { 0 }
|
||||||
|
S np -> { 1 + toInt np }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
defn lte n m = {
|
||||||
|
case m of {
|
||||||
|
O -> {
|
||||||
|
case n of {
|
||||||
|
O -> { True }
|
||||||
|
S np -> { False }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
S mp -> {
|
||||||
|
case n of {
|
||||||
|
O -> { True }
|
||||||
|
S np -> { lte np mp }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
defn minus n m = {
|
||||||
|
case m of {
|
||||||
|
O -> { n }
|
||||||
|
S mp -> {
|
||||||
|
case n of {
|
||||||
|
O -> { O }
|
||||||
|
S np -> {
|
||||||
|
minus np mp
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
defn mod n m = {
|
||||||
|
ifN (lte m n) (mod (minus n m) m) n
|
||||||
|
}
|
||||||
|
|
||||||
|
defn notDivisibleBy n m = {
|
||||||
|
case (mod m n) of {
|
||||||
|
O -> { False }
|
||||||
|
S mp -> { True }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
defn filter f l = {
|
||||||
|
case l of {
|
||||||
|
Nil -> { Nil }
|
||||||
|
Cons x xs -> { ifL (f x) (Cons x (filter f xs)) (filter f xs) }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
defn map f l = {
|
||||||
|
case l of {
|
||||||
|
Nil -> { Nil }
|
||||||
|
Cons x xs -> { Cons (f x) (map f xs) }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
defn nats = {
|
||||||
|
Cons (S (S O)) (map S nats)
|
||||||
|
}
|
||||||
|
|
||||||
|
defn primesRec l = {
|
||||||
|
case l of {
|
||||||
|
Nil -> { Nil }
|
||||||
|
Cons p xs -> { Cons p (primesRec (filter (notDivisibleBy p) xs)) }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
defn primes = {
|
||||||
|
primesRec nats
|
||||||
|
}
|
||||||
|
|
||||||
|
defn take n l = {
|
||||||
|
case l of {
|
||||||
|
Nil -> { Nil }
|
||||||
|
Cons x xs -> {
|
||||||
|
case n of {
|
||||||
|
O -> { Nil }
|
||||||
|
S np -> { Cons x (take np xs) }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
defn head l = {
|
||||||
|
case l of {
|
||||||
|
Nil -> { O }
|
||||||
|
Cons x xs -> { x }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
defn reverseAcc a l = {
|
||||||
|
case l of {
|
||||||
|
Nil -> { a }
|
||||||
|
Cons x xs -> { reverseAcc (Cons x a) xs }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
defn reverse l = {
|
||||||
|
reverseAcc Nil l
|
||||||
|
}
|
||||||
|
|
||||||
|
defn main = {
|
||||||
|
toInt (head (reverse (take ((S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S O))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))) primes)))
|
||||||
|
}
|
||||||
31
code/compiler/09/examples/runtime1.c
Normal file
31
code/compiler/09/examples/runtime1.c
Normal file
@@ -0,0 +1,31 @@
|
|||||||
|
#include "../runtime.h"
|
||||||
|
|
||||||
|
void f_add(struct stack* s) {
|
||||||
|
struct node_num* left = (struct node_num*) eval(stack_peek(s, 0));
|
||||||
|
struct node_num* right = (struct node_num*) eval(stack_peek(s, 1));
|
||||||
|
stack_push(s, (struct node_base*) alloc_num(left->value + right->value));
|
||||||
|
}
|
||||||
|
|
||||||
|
void f_main(struct stack* s) {
|
||||||
|
// PushInt 320
|
||||||
|
stack_push(s, (struct node_base*) alloc_num(320));
|
||||||
|
|
||||||
|
// PushInt 6
|
||||||
|
stack_push(s, (struct node_base*) alloc_num(6));
|
||||||
|
|
||||||
|
// PushGlobal f_add (the function for +)
|
||||||
|
stack_push(s, (struct node_base*) alloc_global(f_add, 2));
|
||||||
|
|
||||||
|
struct node_base* left;
|
||||||
|
struct node_base* right;
|
||||||
|
|
||||||
|
// MkApp
|
||||||
|
left = stack_pop(s);
|
||||||
|
right = stack_pop(s);
|
||||||
|
stack_push(s, (struct node_base*) alloc_app(left, right));
|
||||||
|
|
||||||
|
// MkApp
|
||||||
|
left = stack_pop(s);
|
||||||
|
right = stack_pop(s);
|
||||||
|
stack_push(s, (struct node_base*) alloc_app(left, right));
|
||||||
|
}
|
||||||
3
code/compiler/09/examples/works1.txt
Normal file
3
code/compiler/09/examples/works1.txt
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
defn main = { sum 320 6 }
|
||||||
|
defn sum x y = { x + y }
|
||||||
|
|
||||||
3
code/compiler/09/examples/works2.txt
Normal file
3
code/compiler/09/examples/works2.txt
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
defn add x y = { x + y }
|
||||||
|
defn double x = { add x x }
|
||||||
|
defn main = { double 163 }
|
||||||
8
code/compiler/09/examples/works3.txt
Normal file
8
code/compiler/09/examples/works3.txt
Normal file
@@ -0,0 +1,8 @@
|
|||||||
|
data List = { Nil, Cons Int List }
|
||||||
|
defn length l = {
|
||||||
|
case l of {
|
||||||
|
Nil -> { 0 }
|
||||||
|
Cons x xs -> { 1 + length xs }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
defn main = { length (Cons 1 (Cons 2 (Cons 3 Nil))) }
|
||||||
16
code/compiler/09/examples/works4.txt
Normal file
16
code/compiler/09/examples/works4.txt
Normal file
@@ -0,0 +1,16 @@
|
|||||||
|
data List = { Nil, Cons Int List }
|
||||||
|
|
||||||
|
defn add x y = { x + y }
|
||||||
|
defn mul x y = { x * y }
|
||||||
|
|
||||||
|
defn foldr f b l = {
|
||||||
|
case l of {
|
||||||
|
Nil -> { b }
|
||||||
|
Cons x xs -> { f x (foldr f b xs) }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
defn main = {
|
||||||
|
foldr add 0 (Cons 1 (Cons 2 (Cons 3 (Cons 4 Nil)))) +
|
||||||
|
foldr mul 1 (Cons 1 (Cons 2 (Cons 3 (Cons 4 Nil))))
|
||||||
|
}
|
||||||
17
code/compiler/09/examples/works5.txt
Normal file
17
code/compiler/09/examples/works5.txt
Normal file
@@ -0,0 +1,17 @@
|
|||||||
|
data List = { Nil, Cons Int List }
|
||||||
|
|
||||||
|
defn sumZip l m = {
|
||||||
|
case l of {
|
||||||
|
Nil -> { 0 }
|
||||||
|
Cons x xs -> {
|
||||||
|
case m of {
|
||||||
|
Nil -> { 0 }
|
||||||
|
Cons y ys -> { x + y + sumZip xs ys }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
defn ones = { Cons 1 ones }
|
||||||
|
|
||||||
|
defn main = { sumZip ones (Cons 1 (Cons 2 (Cons 3 Nil))) }
|
||||||
177
code/compiler/09/instruction.cpp
Normal file
177
code/compiler/09/instruction.cpp
Normal file
@@ -0,0 +1,177 @@
|
|||||||
|
#include "instruction.hpp"
|
||||||
|
#include "llvm_context.hpp"
|
||||||
|
#include <llvm/IR/BasicBlock.h>
|
||||||
|
#include <llvm/IR/Function.h>
|
||||||
|
|
||||||
|
using namespace llvm;
|
||||||
|
|
||||||
|
static void print_indent(int n, std::ostream& to) {
|
||||||
|
while(n--) to << " ";
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_pushint::print(int indent, std::ostream& to) const {
|
||||||
|
print_indent(indent, to);
|
||||||
|
to << "PushInt(" << value << ")" << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_pushint::gen_llvm(llvm_context& ctx, Function* f) const {
|
||||||
|
ctx.create_push(f, ctx.create_num(f, ctx.create_i32(value)));
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_pushglobal::print(int indent, std::ostream& to) const {
|
||||||
|
print_indent(indent, to);
|
||||||
|
to << "PushGlobal(" << name << ")" << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_pushglobal::gen_llvm(llvm_context& ctx, Function* f) const {
|
||||||
|
auto& global_f = ctx.custom_functions.at("f_" + name);
|
||||||
|
auto arity = ctx.create_i32(global_f->arity);
|
||||||
|
ctx.create_push(f, ctx.create_global(f, global_f->function, arity));
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_push::print(int indent, std::ostream& to) const {
|
||||||
|
print_indent(indent, to);
|
||||||
|
to << "Push(" << offset << ")" << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_push::gen_llvm(llvm_context& ctx, Function* f) const {
|
||||||
|
ctx.create_push(f, ctx.create_peek(f, ctx.create_size(offset)));
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_pop::print(int indent, std::ostream& to) const {
|
||||||
|
print_indent(indent, to);
|
||||||
|
to << "Pop(" << count << ")" << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_pop::gen_llvm(llvm_context& ctx, Function* f) const {
|
||||||
|
ctx.create_popn(f, ctx.create_size(count));
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_mkapp::print(int indent, std::ostream& to) const {
|
||||||
|
print_indent(indent, to);
|
||||||
|
to << "MkApp()" << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_mkapp::gen_llvm(llvm_context& ctx, Function* f) const {
|
||||||
|
auto left = ctx.create_pop(f);
|
||||||
|
auto right = ctx.create_pop(f);
|
||||||
|
ctx.create_push(f, ctx.create_app(f, left, right));
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_update::print(int indent, std::ostream& to) const {
|
||||||
|
print_indent(indent, to);
|
||||||
|
to << "Update(" << offset << ")" << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_update::gen_llvm(llvm_context& ctx, Function* f) const {
|
||||||
|
ctx.create_update(f, ctx.create_size(offset));
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_pack::print(int indent, std::ostream& to) const {
|
||||||
|
print_indent(indent, to);
|
||||||
|
to << "Pack(" << tag << ", " << size << ")" << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_pack::gen_llvm(llvm_context& ctx, Function* f) const {
|
||||||
|
ctx.create_pack(f, ctx.create_size(size), ctx.create_i8(tag));
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_split::print(int indent, std::ostream& to) const {
|
||||||
|
print_indent(indent, to);
|
||||||
|
to << "Split()" << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_split::gen_llvm(llvm_context& ctx, Function* f) const {
|
||||||
|
ctx.create_split(f, ctx.create_size(size));
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_jump::print(int indent, std::ostream& to) const {
|
||||||
|
print_indent(indent, to);
|
||||||
|
to << "Jump(" << std::endl;
|
||||||
|
for(auto& instruction_set : branches) {
|
||||||
|
for(auto& instruction : instruction_set) {
|
||||||
|
instruction->print(indent + 2, to);
|
||||||
|
}
|
||||||
|
to << std::endl;
|
||||||
|
}
|
||||||
|
print_indent(indent, to);
|
||||||
|
to << ")" << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_jump::gen_llvm(llvm_context& ctx, Function* f) const {
|
||||||
|
auto top_node = ctx.create_peek(f, ctx.create_size(0));
|
||||||
|
auto tag = ctx.unwrap_data_tag(top_node);
|
||||||
|
auto safety_block = BasicBlock::Create(ctx.ctx, "safety", f);
|
||||||
|
auto switch_op = ctx.builder.CreateSwitch(tag, safety_block, tag_mappings.size());
|
||||||
|
std::vector<BasicBlock*> blocks;
|
||||||
|
|
||||||
|
for(auto& branch : branches) {
|
||||||
|
auto branch_block = BasicBlock::Create(ctx.ctx, "branch", f);
|
||||||
|
ctx.builder.SetInsertPoint(branch_block);
|
||||||
|
for(auto& instruction : branch) {
|
||||||
|
instruction->gen_llvm(ctx, f);
|
||||||
|
}
|
||||||
|
ctx.builder.CreateBr(safety_block);
|
||||||
|
blocks.push_back(branch_block);
|
||||||
|
}
|
||||||
|
|
||||||
|
for(auto& mapping : tag_mappings) {
|
||||||
|
switch_op->addCase(ctx.create_i8(mapping.first), blocks[mapping.second]);
|
||||||
|
}
|
||||||
|
|
||||||
|
ctx.builder.SetInsertPoint(safety_block);
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_slide::print(int indent, std::ostream& to) const {
|
||||||
|
print_indent(indent, to);
|
||||||
|
to << "Slide(" << offset << ")" << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_slide::gen_llvm(llvm_context& ctx, Function* f) const {
|
||||||
|
ctx.create_slide(f, ctx.create_size(offset));
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_binop::print(int indent, std::ostream& to) const {
|
||||||
|
print_indent(indent, to);
|
||||||
|
to << "BinOp(" << op_action(op) << ")" << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_binop::gen_llvm(llvm_context& ctx, Function* f) const {
|
||||||
|
auto left_int = ctx.unwrap_num(ctx.create_pop(f));
|
||||||
|
auto right_int = ctx.unwrap_num(ctx.create_pop(f));
|
||||||
|
llvm::Value* result;
|
||||||
|
switch(op) {
|
||||||
|
case PLUS: result = ctx.builder.CreateAdd(left_int, right_int); break;
|
||||||
|
case MINUS: result = ctx.builder.CreateSub(left_int, right_int); break;
|
||||||
|
case TIMES: result = ctx.builder.CreateMul(left_int, right_int); break;
|
||||||
|
case DIVIDE: result = ctx.builder.CreateSDiv(left_int, right_int); break;
|
||||||
|
}
|
||||||
|
ctx.create_push(f, ctx.create_num(f, result));
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_eval::print(int indent, std::ostream& to) const {
|
||||||
|
print_indent(indent, to);
|
||||||
|
to << "Eval()" << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_eval::gen_llvm(llvm_context& ctx, Function* f) const {
|
||||||
|
ctx.create_unwind(f);
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_alloc::print(int indent, std::ostream& to) const {
|
||||||
|
print_indent(indent, to);
|
||||||
|
to << "Alloc(" << amount << ")" << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_alloc::gen_llvm(llvm_context& ctx, Function* f) const {
|
||||||
|
ctx.create_alloc(f, ctx.create_size(amount));
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_unwind::print(int indent, std::ostream& to) const {
|
||||||
|
print_indent(indent, to);
|
||||||
|
to << "Unwind()" << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
void instruction_unwind::gen_llvm(llvm_context& ctx, Function* f) const {
|
||||||
|
// Nothing
|
||||||
|
}
|
||||||
142
code/compiler/09/instruction.hpp
Normal file
142
code/compiler/09/instruction.hpp
Normal file
@@ -0,0 +1,142 @@
|
|||||||
|
#pragma once
|
||||||
|
#include <llvm/IR/Function.h>
|
||||||
|
#include <string>
|
||||||
|
#include <memory>
|
||||||
|
#include <vector>
|
||||||
|
#include <map>
|
||||||
|
#include <ostream>
|
||||||
|
#include "binop.hpp"
|
||||||
|
#include "llvm_context.hpp"
|
||||||
|
|
||||||
|
struct instruction {
|
||||||
|
virtual ~instruction() = default;
|
||||||
|
|
||||||
|
virtual void print(int indent, std::ostream& to) const = 0;
|
||||||
|
virtual void gen_llvm(llvm_context& ctx, llvm::Function* f) const = 0;
|
||||||
|
};
|
||||||
|
|
||||||
|
using instruction_ptr = std::unique_ptr<instruction>;
|
||||||
|
|
||||||
|
struct instruction_pushint : public instruction {
|
||||||
|
int value;
|
||||||
|
|
||||||
|
instruction_pushint(int v)
|
||||||
|
: value(v) {}
|
||||||
|
|
||||||
|
void print(int indent, std::ostream& to) const;
|
||||||
|
void gen_llvm(llvm_context& ctx, llvm::Function* f) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct instruction_pushglobal : public instruction {
|
||||||
|
std::string name;
|
||||||
|
|
||||||
|
instruction_pushglobal(std::string n)
|
||||||
|
: name(std::move(n)) {}
|
||||||
|
|
||||||
|
void print(int indent, std::ostream& to) const;
|
||||||
|
void gen_llvm(llvm_context& ctx, llvm::Function* f) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct instruction_push : public instruction {
|
||||||
|
int offset;
|
||||||
|
|
||||||
|
instruction_push(int o)
|
||||||
|
: offset(o) {}
|
||||||
|
|
||||||
|
void print(int indent, std::ostream& to) const;
|
||||||
|
void gen_llvm(llvm_context& ctx, llvm::Function* f) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct instruction_pop : public instruction {
|
||||||
|
int count;
|
||||||
|
|
||||||
|
instruction_pop(int c)
|
||||||
|
: count(c) {}
|
||||||
|
|
||||||
|
void print(int indent, std::ostream& to) const;
|
||||||
|
void gen_llvm(llvm_context& ctx, llvm::Function* f) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct instruction_mkapp : public instruction {
|
||||||
|
void print(int indent, std::ostream& to) const;
|
||||||
|
void gen_llvm(llvm_context& ctx, llvm::Function* f) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct instruction_update : public instruction {
|
||||||
|
int offset;
|
||||||
|
|
||||||
|
instruction_update(int o)
|
||||||
|
: offset(o) {}
|
||||||
|
|
||||||
|
void print(int indent, std::ostream& to) const;
|
||||||
|
void gen_llvm(llvm_context& ctx, llvm::Function* f) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct instruction_pack : public instruction {
|
||||||
|
int tag;
|
||||||
|
int size;
|
||||||
|
|
||||||
|
instruction_pack(int t, int s)
|
||||||
|
: tag(t), size(s) {}
|
||||||
|
|
||||||
|
void print(int indent, std::ostream& to) const;
|
||||||
|
void gen_llvm(llvm_context& ctx, llvm::Function* f) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct instruction_split : public instruction {
|
||||||
|
int size;
|
||||||
|
|
||||||
|
instruction_split(int s)
|
||||||
|
: size(s) {}
|
||||||
|
|
||||||
|
void print(int indent, std::ostream& to) const;
|
||||||
|
void gen_llvm(llvm_context& ctx, llvm::Function* f) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct instruction_jump : public instruction {
|
||||||
|
std::vector<std::vector<instruction_ptr>> branches;
|
||||||
|
std::map<int, int> tag_mappings;
|
||||||
|
|
||||||
|
void print(int indent, std::ostream& to) const;
|
||||||
|
void gen_llvm(llvm_context& ctx, llvm::Function* f) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct instruction_slide : public instruction {
|
||||||
|
int offset;
|
||||||
|
|
||||||
|
instruction_slide(int o)
|
||||||
|
: offset(o) {}
|
||||||
|
|
||||||
|
void print(int indent, std::ostream& to) const;
|
||||||
|
void gen_llvm(llvm_context& ctx, llvm::Function* f) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct instruction_binop : public instruction {
|
||||||
|
binop op;
|
||||||
|
|
||||||
|
instruction_binop(binop o)
|
||||||
|
: op(o) {}
|
||||||
|
|
||||||
|
void print(int indent, std::ostream& to) const;
|
||||||
|
void gen_llvm(llvm_context& ctx, llvm::Function* f) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct instruction_eval : public instruction {
|
||||||
|
void print(int indent, std::ostream& to) const;
|
||||||
|
void gen_llvm(llvm_context& ctx, llvm::Function* f) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct instruction_alloc : public instruction {
|
||||||
|
int amount;
|
||||||
|
|
||||||
|
instruction_alloc(int a)
|
||||||
|
: amount(a) {}
|
||||||
|
|
||||||
|
void print(int indent, std::ostream& to) const;
|
||||||
|
void gen_llvm(llvm_context& ctx, llvm::Function* f) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct instruction_unwind : public instruction {
|
||||||
|
void print(int indent, std::ostream& to) const;
|
||||||
|
void gen_llvm(llvm_context& ctx, llvm::Function* f) const;
|
||||||
|
};
|
||||||
278
code/compiler/09/llvm_context.cpp
Normal file
278
code/compiler/09/llvm_context.cpp
Normal file
@@ -0,0 +1,278 @@
|
|||||||
|
#include "llvm_context.hpp"
|
||||||
|
#include <llvm/IR/DerivedTypes.h>
|
||||||
|
|
||||||
|
using namespace llvm;
|
||||||
|
|
||||||
|
void llvm_context::create_types() {
|
||||||
|
stack_type = StructType::create(ctx, "stack");
|
||||||
|
gmachine_type = StructType::create(ctx, "gmachine");
|
||||||
|
stack_ptr_type = PointerType::getUnqual(stack_type);
|
||||||
|
gmachine_ptr_type = PointerType::getUnqual(gmachine_type);
|
||||||
|
tag_type = IntegerType::getInt8Ty(ctx);
|
||||||
|
struct_types["node_base"] = StructType::create(ctx, "node_base");
|
||||||
|
struct_types["node_app"] = StructType::create(ctx, "node_app");
|
||||||
|
struct_types["node_num"] = StructType::create(ctx, "node_num");
|
||||||
|
struct_types["node_global"] = StructType::create(ctx, "node_global");
|
||||||
|
struct_types["node_ind"] = StructType::create(ctx, "node_ind");
|
||||||
|
struct_types["node_data"] = StructType::create(ctx, "node_data");
|
||||||
|
node_ptr_type = PointerType::getUnqual(struct_types.at("node_base"));
|
||||||
|
function_type = FunctionType::get(Type::getVoidTy(ctx), { gmachine_ptr_type }, false);
|
||||||
|
|
||||||
|
gmachine_type->setBody(
|
||||||
|
stack_ptr_type,
|
||||||
|
node_ptr_type,
|
||||||
|
IntegerType::getInt64Ty(ctx),
|
||||||
|
IntegerType::getInt64Ty(ctx)
|
||||||
|
);
|
||||||
|
struct_types.at("node_base")->setBody(
|
||||||
|
IntegerType::getInt32Ty(ctx),
|
||||||
|
IntegerType::getInt8Ty(ctx),
|
||||||
|
node_ptr_type
|
||||||
|
);
|
||||||
|
struct_types.at("node_app")->setBody(
|
||||||
|
struct_types.at("node_base"),
|
||||||
|
node_ptr_type,
|
||||||
|
node_ptr_type
|
||||||
|
);
|
||||||
|
struct_types.at("node_num")->setBody(
|
||||||
|
struct_types.at("node_base"),
|
||||||
|
IntegerType::getInt32Ty(ctx)
|
||||||
|
);
|
||||||
|
struct_types.at("node_global")->setBody(
|
||||||
|
struct_types.at("node_base"),
|
||||||
|
FunctionType::get(Type::getVoidTy(ctx), { stack_ptr_type }, false)
|
||||||
|
);
|
||||||
|
struct_types.at("node_ind")->setBody(
|
||||||
|
struct_types.at("node_base"),
|
||||||
|
node_ptr_type
|
||||||
|
);
|
||||||
|
struct_types.at("node_data")->setBody(
|
||||||
|
struct_types.at("node_base"),
|
||||||
|
IntegerType::getInt8Ty(ctx),
|
||||||
|
PointerType::getUnqual(node_ptr_type)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
void llvm_context::create_functions() {
|
||||||
|
auto void_type = Type::getVoidTy(ctx);
|
||||||
|
auto sizet_type = IntegerType::get(ctx, sizeof(size_t) * 8);
|
||||||
|
functions["stack_init"] = Function::Create(
|
||||||
|
FunctionType::get(void_type, { stack_ptr_type }, false),
|
||||||
|
Function::LinkageTypes::ExternalLinkage,
|
||||||
|
"stack_init",
|
||||||
|
&module
|
||||||
|
);
|
||||||
|
functions["stack_free"] = Function::Create(
|
||||||
|
FunctionType::get(void_type, { stack_ptr_type }, false),
|
||||||
|
Function::LinkageTypes::ExternalLinkage,
|
||||||
|
"stack_free",
|
||||||
|
&module
|
||||||
|
);
|
||||||
|
functions["stack_push"] = Function::Create(
|
||||||
|
FunctionType::get(void_type, { stack_ptr_type, node_ptr_type }, false),
|
||||||
|
Function::LinkageTypes::ExternalLinkage,
|
||||||
|
"stack_push",
|
||||||
|
&module
|
||||||
|
);
|
||||||
|
functions["stack_pop"] = Function::Create(
|
||||||
|
FunctionType::get(node_ptr_type, { stack_ptr_type }, false),
|
||||||
|
Function::LinkageTypes::ExternalLinkage,
|
||||||
|
"stack_pop",
|
||||||
|
&module
|
||||||
|
);
|
||||||
|
functions["stack_peek"] = Function::Create(
|
||||||
|
FunctionType::get(node_ptr_type, { stack_ptr_type, sizet_type }, false),
|
||||||
|
Function::LinkageTypes::ExternalLinkage,
|
||||||
|
"stack_peek",
|
||||||
|
&module
|
||||||
|
);
|
||||||
|
functions["stack_popn"] = Function::Create(
|
||||||
|
FunctionType::get(void_type, { stack_ptr_type, sizet_type }, false),
|
||||||
|
Function::LinkageTypes::ExternalLinkage,
|
||||||
|
"stack_popn",
|
||||||
|
&module
|
||||||
|
);
|
||||||
|
functions["gmachine_slide"] = Function::Create(
|
||||||
|
FunctionType::get(void_type, { gmachine_ptr_type, sizet_type }, false),
|
||||||
|
Function::LinkageTypes::ExternalLinkage,
|
||||||
|
"gmachine_slide",
|
||||||
|
&module
|
||||||
|
);
|
||||||
|
functions["gmachine_update"] = Function::Create(
|
||||||
|
FunctionType::get(void_type, { gmachine_ptr_type, sizet_type }, false),
|
||||||
|
Function::LinkageTypes::ExternalLinkage,
|
||||||
|
"gmachine_update",
|
||||||
|
&module
|
||||||
|
);
|
||||||
|
functions["gmachine_alloc"] = Function::Create(
|
||||||
|
FunctionType::get(void_type, { gmachine_ptr_type, sizet_type }, false),
|
||||||
|
Function::LinkageTypes::ExternalLinkage,
|
||||||
|
"gmachine_alloc",
|
||||||
|
&module
|
||||||
|
);
|
||||||
|
functions["gmachine_pack"] = Function::Create(
|
||||||
|
FunctionType::get(void_type, { gmachine_ptr_type, sizet_type, tag_type }, false),
|
||||||
|
Function::LinkageTypes::ExternalLinkage,
|
||||||
|
"gmachine_pack",
|
||||||
|
&module
|
||||||
|
);
|
||||||
|
functions["gmachine_split"] = Function::Create(
|
||||||
|
FunctionType::get(void_type, { gmachine_ptr_type, sizet_type }, false),
|
||||||
|
Function::LinkageTypes::ExternalLinkage,
|
||||||
|
"gmachine_split",
|
||||||
|
&module
|
||||||
|
);
|
||||||
|
functions["gmachine_track"] = Function::Create(
|
||||||
|
FunctionType::get(node_ptr_type, { gmachine_ptr_type, node_ptr_type }, false),
|
||||||
|
Function::LinkageTypes::ExternalLinkage,
|
||||||
|
"gmachine_track",
|
||||||
|
&module
|
||||||
|
);
|
||||||
|
|
||||||
|
auto int32_type = IntegerType::getInt32Ty(ctx);
|
||||||
|
functions["alloc_app"] = Function::Create(
|
||||||
|
FunctionType::get(node_ptr_type, { node_ptr_type, node_ptr_type }, false),
|
||||||
|
Function::LinkageTypes::ExternalLinkage,
|
||||||
|
"alloc_app",
|
||||||
|
&module
|
||||||
|
);
|
||||||
|
functions["alloc_num"] = Function::Create(
|
||||||
|
FunctionType::get(node_ptr_type, { int32_type }, false),
|
||||||
|
Function::LinkageTypes::ExternalLinkage,
|
||||||
|
"alloc_num",
|
||||||
|
&module
|
||||||
|
);
|
||||||
|
functions["alloc_global"] = Function::Create(
|
||||||
|
FunctionType::get(node_ptr_type, { function_type, int32_type }, false),
|
||||||
|
Function::LinkageTypes::ExternalLinkage,
|
||||||
|
"alloc_global",
|
||||||
|
&module
|
||||||
|
);
|
||||||
|
functions["alloc_ind"] = Function::Create(
|
||||||
|
FunctionType::get(node_ptr_type, { node_ptr_type }, false),
|
||||||
|
Function::LinkageTypes::ExternalLinkage,
|
||||||
|
"alloc_ind",
|
||||||
|
&module
|
||||||
|
);
|
||||||
|
|
||||||
|
functions["unwind"] = Function::Create(
|
||||||
|
FunctionType::get(void_type, { gmachine_ptr_type }, false),
|
||||||
|
Function::LinkageTypes::ExternalLinkage,
|
||||||
|
"unwind",
|
||||||
|
&module
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
ConstantInt* llvm_context::create_i8(int8_t i) {
|
||||||
|
return ConstantInt::get(ctx, APInt(8, i));
|
||||||
|
}
|
||||||
|
ConstantInt* llvm_context::create_i32(int32_t i) {
|
||||||
|
return ConstantInt::get(ctx, APInt(32, i));
|
||||||
|
}
|
||||||
|
ConstantInt* llvm_context::create_size(size_t i) {
|
||||||
|
return ConstantInt::get(ctx, APInt(sizeof(size_t) * 8, i));
|
||||||
|
}
|
||||||
|
|
||||||
|
Value* llvm_context::create_pop(Function* f) {
|
||||||
|
auto pop_f = functions.at("stack_pop");
|
||||||
|
return builder.CreateCall(pop_f, { unwrap_gmachine_stack_ptr(f->arg_begin()) });
|
||||||
|
}
|
||||||
|
Value* llvm_context::create_peek(Function* f, Value* off) {
|
||||||
|
auto peek_f = functions.at("stack_peek");
|
||||||
|
return builder.CreateCall(peek_f, { unwrap_gmachine_stack_ptr(f->arg_begin()), off });
|
||||||
|
}
|
||||||
|
void llvm_context::create_push(Function* f, Value* v) {
|
||||||
|
auto push_f = functions.at("stack_push");
|
||||||
|
builder.CreateCall(push_f, { unwrap_gmachine_stack_ptr(f->arg_begin()), v });
|
||||||
|
}
|
||||||
|
void llvm_context::create_popn(Function* f, Value* off) {
|
||||||
|
auto popn_f = functions.at("stack_popn");
|
||||||
|
builder.CreateCall(popn_f, { unwrap_gmachine_stack_ptr(f->arg_begin()), off });
|
||||||
|
}
|
||||||
|
void llvm_context::create_update(Function* f, Value* off) {
|
||||||
|
auto update_f = functions.at("gmachine_update");
|
||||||
|
builder.CreateCall(update_f, { f->arg_begin(), off });
|
||||||
|
}
|
||||||
|
void llvm_context::create_pack(Function* f, Value* c, Value* t) {
|
||||||
|
auto pack_f = functions.at("gmachine_pack");
|
||||||
|
builder.CreateCall(pack_f, { f->arg_begin(), c, t });
|
||||||
|
}
|
||||||
|
void llvm_context::create_split(Function* f, Value* c) {
|
||||||
|
auto split_f = functions.at("gmachine_split");
|
||||||
|
builder.CreateCall(split_f, { f->arg_begin(), c });
|
||||||
|
}
|
||||||
|
void llvm_context::create_slide(Function* f, Value* off) {
|
||||||
|
auto slide_f = functions.at("gmachine_slide");
|
||||||
|
builder.CreateCall(slide_f, { f->arg_begin(), off });
|
||||||
|
}
|
||||||
|
void llvm_context::create_alloc(Function* f, Value* n) {
|
||||||
|
auto alloc_f = functions.at("gmachine_alloc");
|
||||||
|
builder.CreateCall(alloc_f, { f->arg_begin(), n });
|
||||||
|
}
|
||||||
|
Value* llvm_context::create_track(Function* f, Value* v) {
|
||||||
|
auto track_f = functions.at("gmachine_track");
|
||||||
|
return builder.CreateCall(track_f, { f->arg_begin(), v });
|
||||||
|
}
|
||||||
|
|
||||||
|
void llvm_context::create_unwind(Function* f) {
|
||||||
|
auto unwind_f = functions.at("unwind");
|
||||||
|
builder.CreateCall(unwind_f, { f->args().begin() });
|
||||||
|
}
|
||||||
|
|
||||||
|
Value* llvm_context::unwrap_gmachine_stack_ptr(Value* g) {
|
||||||
|
auto offset_0 = create_i32(0);
|
||||||
|
return builder.CreateGEP(g, { offset_0, offset_0 });
|
||||||
|
}
|
||||||
|
|
||||||
|
Value* llvm_context::unwrap_num(Value* v) {
|
||||||
|
auto num_ptr_type = PointerType::getUnqual(struct_types.at("node_num"));
|
||||||
|
auto cast = builder.CreatePointerCast(v, num_ptr_type);
|
||||||
|
auto offset_0 = create_i32(0);
|
||||||
|
auto offset_1 = create_i32(1);
|
||||||
|
auto int_ptr = builder.CreateGEP(cast, { offset_0, offset_1 });
|
||||||
|
return builder.CreateLoad(int_ptr);
|
||||||
|
}
|
||||||
|
Value* llvm_context::create_num(Function* f, Value* v) {
|
||||||
|
auto alloc_num_f = functions.at("alloc_num");
|
||||||
|
auto alloc_num_call = builder.CreateCall(alloc_num_f, { v });
|
||||||
|
return create_track(f, alloc_num_call);
|
||||||
|
}
|
||||||
|
|
||||||
|
Value* llvm_context::unwrap_data_tag(Value* v) {
|
||||||
|
auto data_ptr_type = PointerType::getUnqual(struct_types.at("node_data"));
|
||||||
|
auto cast = builder.CreatePointerCast(v, data_ptr_type);
|
||||||
|
auto offset_0 = create_i32(0);
|
||||||
|
auto offset_1 = create_i32(1);
|
||||||
|
auto tag_ptr = builder.CreateGEP(cast, { offset_0, offset_1 });
|
||||||
|
return builder.CreateLoad(tag_ptr);
|
||||||
|
}
|
||||||
|
|
||||||
|
Value* llvm_context::create_global(Function* f, Value* gf, Value* a) {
|
||||||
|
auto alloc_global_f = functions.at("alloc_global");
|
||||||
|
auto alloc_global_call = builder.CreateCall(alloc_global_f, { gf, a });
|
||||||
|
return create_track(f, alloc_global_call);
|
||||||
|
}
|
||||||
|
|
||||||
|
Value* llvm_context::create_app(Function* f, Value* l, Value* r) {
|
||||||
|
auto alloc_app_f = functions.at("alloc_app");
|
||||||
|
auto alloc_app_call = builder.CreateCall(alloc_app_f, { l, r });
|
||||||
|
return create_track(f, alloc_app_call);
|
||||||
|
}
|
||||||
|
|
||||||
|
llvm::Function* llvm_context::create_custom_function(std::string name, int32_t arity) {
|
||||||
|
auto void_type = llvm::Type::getVoidTy(ctx);
|
||||||
|
auto new_function = llvm::Function::Create(
|
||||||
|
function_type,
|
||||||
|
llvm::Function::LinkageTypes::ExternalLinkage,
|
||||||
|
"f_" + name,
|
||||||
|
&module
|
||||||
|
);
|
||||||
|
auto start_block = llvm::BasicBlock::Create(ctx, "entry", new_function);
|
||||||
|
|
||||||
|
auto new_custom_f = custom_function_ptr(new custom_function());
|
||||||
|
new_custom_f->arity = arity;
|
||||||
|
new_custom_f->function = new_function;
|
||||||
|
custom_functions["f_" + name] = std::move(new_custom_f);
|
||||||
|
|
||||||
|
return new_function;
|
||||||
|
}
|
||||||
72
code/compiler/09/llvm_context.hpp
Normal file
72
code/compiler/09/llvm_context.hpp
Normal file
@@ -0,0 +1,72 @@
|
|||||||
|
#pragma once
|
||||||
|
#include <llvm/IR/DerivedTypes.h>
|
||||||
|
#include <llvm/IR/Function.h>
|
||||||
|
#include <llvm/IR/LLVMContext.h>
|
||||||
|
#include <llvm/IR/IRBuilder.h>
|
||||||
|
#include <llvm/IR/Module.h>
|
||||||
|
#include <llvm/IR/Value.h>
|
||||||
|
#include <map>
|
||||||
|
|
||||||
|
struct llvm_context {
|
||||||
|
struct custom_function {
|
||||||
|
llvm::Function* function;
|
||||||
|
int32_t arity;
|
||||||
|
};
|
||||||
|
|
||||||
|
using custom_function_ptr = std::unique_ptr<custom_function>;
|
||||||
|
|
||||||
|
llvm::LLVMContext ctx;
|
||||||
|
llvm::IRBuilder<> builder;
|
||||||
|
llvm::Module module;
|
||||||
|
|
||||||
|
std::map<std::string, custom_function_ptr> custom_functions;
|
||||||
|
std::map<std::string, llvm::Function*> functions;
|
||||||
|
std::map<std::string, llvm::StructType*> struct_types;
|
||||||
|
|
||||||
|
llvm::StructType* stack_type;
|
||||||
|
llvm::StructType* gmachine_type;
|
||||||
|
llvm::PointerType* stack_ptr_type;
|
||||||
|
llvm::PointerType* gmachine_ptr_type;
|
||||||
|
llvm::PointerType* node_ptr_type;
|
||||||
|
llvm::IntegerType* tag_type;
|
||||||
|
llvm::FunctionType* function_type;
|
||||||
|
|
||||||
|
llvm_context()
|
||||||
|
: builder(ctx), module("bloglang", ctx) {
|
||||||
|
create_types();
|
||||||
|
create_functions();
|
||||||
|
}
|
||||||
|
|
||||||
|
void create_types();
|
||||||
|
void create_functions();
|
||||||
|
|
||||||
|
llvm::ConstantInt* create_i8(int8_t);
|
||||||
|
llvm::ConstantInt* create_i32(int32_t);
|
||||||
|
llvm::ConstantInt* create_size(size_t);
|
||||||
|
|
||||||
|
llvm::Value* create_pop(llvm::Function*);
|
||||||
|
llvm::Value* create_peek(llvm::Function*, llvm::Value*);
|
||||||
|
void create_push(llvm::Function*, llvm::Value*);
|
||||||
|
void create_popn(llvm::Function*, llvm::Value*);
|
||||||
|
void create_update(llvm::Function*, llvm::Value*);
|
||||||
|
void create_pack(llvm::Function*, llvm::Value*, llvm::Value*);
|
||||||
|
void create_split(llvm::Function*, llvm::Value*);
|
||||||
|
void create_slide(llvm::Function*, llvm::Value*);
|
||||||
|
void create_alloc(llvm::Function*, llvm::Value*);
|
||||||
|
llvm::Value* create_track(llvm::Function*, llvm::Value*);
|
||||||
|
|
||||||
|
void create_unwind(llvm::Function*);
|
||||||
|
|
||||||
|
llvm::Value* unwrap_gmachine_stack_ptr(llvm::Value*);
|
||||||
|
|
||||||
|
llvm::Value* unwrap_num(llvm::Value*);
|
||||||
|
llvm::Value* create_num(llvm::Function*, llvm::Value*);
|
||||||
|
|
||||||
|
llvm::Value* unwrap_data_tag(llvm::Value*);
|
||||||
|
|
||||||
|
llvm::Value* create_global(llvm::Function*, llvm::Value*, llvm::Value*);
|
||||||
|
|
||||||
|
llvm::Value* create_app(llvm::Function*, llvm::Value*, llvm::Value*);
|
||||||
|
|
||||||
|
llvm::Function* create_custom_function(std::string name, int32_t arity);
|
||||||
|
};
|
||||||
176
code/compiler/09/main.cpp
Normal file
176
code/compiler/09/main.cpp
Normal file
@@ -0,0 +1,176 @@
|
|||||||
|
#include "ast.hpp"
|
||||||
|
#include <iostream>
|
||||||
|
#include "binop.hpp"
|
||||||
|
#include "definition.hpp"
|
||||||
|
#include "instruction.hpp"
|
||||||
|
#include "llvm_context.hpp"
|
||||||
|
#include "parser.hpp"
|
||||||
|
#include "error.hpp"
|
||||||
|
#include "type.hpp"
|
||||||
|
#include "llvm/IR/LegacyPassManager.h"
|
||||||
|
#include "llvm/IR/Verifier.h"
|
||||||
|
#include "llvm/Support/TargetSelect.h"
|
||||||
|
#include "llvm/Support/TargetRegistry.h"
|
||||||
|
#include "llvm/Support/raw_ostream.h"
|
||||||
|
#include "llvm/Support/FileSystem.h"
|
||||||
|
#include "llvm/Target/TargetOptions.h"
|
||||||
|
#include "llvm/Target/TargetMachine.h"
|
||||||
|
|
||||||
|
void yy::parser::error(const std::string& msg) {
|
||||||
|
std::cout << "An error occured: " << msg << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
extern std::vector<definition_ptr> program;
|
||||||
|
|
||||||
|
void typecheck_program(
|
||||||
|
const std::vector<definition_ptr>& prog,
|
||||||
|
type_mgr& mgr, type_env& env) {
|
||||||
|
type_ptr int_type = type_ptr(new type_base("Int"));
|
||||||
|
type_ptr binop_type = type_ptr(new type_arr(
|
||||||
|
int_type,
|
||||||
|
type_ptr(new type_arr(int_type, int_type))));
|
||||||
|
|
||||||
|
env.bind("+", binop_type);
|
||||||
|
env.bind("-", binop_type);
|
||||||
|
env.bind("*", binop_type);
|
||||||
|
env.bind("/", binop_type);
|
||||||
|
|
||||||
|
for(auto& def : prog) {
|
||||||
|
def->typecheck_first(mgr, env);
|
||||||
|
}
|
||||||
|
|
||||||
|
for(auto& def : prog) {
|
||||||
|
def->typecheck_second(mgr, env);
|
||||||
|
}
|
||||||
|
|
||||||
|
for(auto& pair : env.names) {
|
||||||
|
std::cout << pair.first << ": ";
|
||||||
|
pair.second->print(mgr, std::cout);
|
||||||
|
std::cout << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
for(auto& def : prog) {
|
||||||
|
def->resolve(mgr);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void compile_program(const std::vector<definition_ptr>& prog) {
|
||||||
|
for(auto& def : prog) {
|
||||||
|
def->compile();
|
||||||
|
|
||||||
|
definition_defn* defn = dynamic_cast<definition_defn*>(def.get());
|
||||||
|
if(!defn) continue;
|
||||||
|
for(auto& instruction : defn->instructions) {
|
||||||
|
instruction->print(0, std::cout);
|
||||||
|
}
|
||||||
|
std::cout << std::endl;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void gen_llvm_internal_op(llvm_context& ctx, binop op) {
|
||||||
|
auto new_function = ctx.create_custom_function(op_action(op), 2);
|
||||||
|
std::vector<instruction_ptr> instructions;
|
||||||
|
instructions.push_back(instruction_ptr(new instruction_push(1)));
|
||||||
|
instructions.push_back(instruction_ptr(new instruction_eval()));
|
||||||
|
instructions.push_back(instruction_ptr(new instruction_push(1)));
|
||||||
|
instructions.push_back(instruction_ptr(new instruction_eval()));
|
||||||
|
instructions.push_back(instruction_ptr(new instruction_binop(op)));
|
||||||
|
instructions.push_back(instruction_ptr(new instruction_update(2)));
|
||||||
|
instructions.push_back(instruction_ptr(new instruction_pop(2)));
|
||||||
|
ctx.builder.SetInsertPoint(&new_function->getEntryBlock());
|
||||||
|
for(auto& instruction : instructions) {
|
||||||
|
instruction->gen_llvm(ctx, new_function);
|
||||||
|
}
|
||||||
|
ctx.builder.CreateRetVoid();
|
||||||
|
}
|
||||||
|
|
||||||
|
void output_llvm(llvm_context& ctx, const std::string& filename) {
|
||||||
|
std::string targetTriple = llvm::sys::getDefaultTargetTriple();
|
||||||
|
|
||||||
|
llvm::InitializeNativeTarget();
|
||||||
|
llvm::InitializeNativeTargetAsmParser();
|
||||||
|
llvm::InitializeNativeTargetAsmPrinter();
|
||||||
|
|
||||||
|
std::string error;
|
||||||
|
const llvm::Target* target =
|
||||||
|
llvm::TargetRegistry::lookupTarget(targetTriple, error);
|
||||||
|
if (!target) {
|
||||||
|
std::cerr << error << std::endl;
|
||||||
|
} else {
|
||||||
|
std::string cpu = "generic";
|
||||||
|
std::string features = "";
|
||||||
|
llvm::TargetOptions options;
|
||||||
|
llvm::TargetMachine* targetMachine =
|
||||||
|
target->createTargetMachine(targetTriple, cpu, features,
|
||||||
|
options, llvm::Optional<llvm::Reloc::Model>());
|
||||||
|
|
||||||
|
ctx.module.setDataLayout(targetMachine->createDataLayout());
|
||||||
|
ctx.module.setTargetTriple(targetTriple);
|
||||||
|
|
||||||
|
std::error_code ec;
|
||||||
|
llvm::raw_fd_ostream file(filename, ec, llvm::sys::fs::F_None);
|
||||||
|
if (ec) {
|
||||||
|
throw 0;
|
||||||
|
} else {
|
||||||
|
llvm::TargetMachine::CodeGenFileType type = llvm::TargetMachine::CGFT_ObjectFile;
|
||||||
|
llvm::legacy::PassManager pm;
|
||||||
|
if (targetMachine->addPassesToEmitFile(pm, file, NULL, type)) {
|
||||||
|
throw 0;
|
||||||
|
} else {
|
||||||
|
pm.run(ctx.module);
|
||||||
|
file.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void gen_llvm(const std::vector<definition_ptr>& prog) {
|
||||||
|
llvm_context ctx;
|
||||||
|
gen_llvm_internal_op(ctx, PLUS);
|
||||||
|
gen_llvm_internal_op(ctx, MINUS);
|
||||||
|
gen_llvm_internal_op(ctx, TIMES);
|
||||||
|
gen_llvm_internal_op(ctx, DIVIDE);
|
||||||
|
|
||||||
|
for(auto& definition : prog) {
|
||||||
|
definition->gen_llvm_first(ctx);
|
||||||
|
}
|
||||||
|
|
||||||
|
for(auto& definition : prog) {
|
||||||
|
definition->gen_llvm_second(ctx);
|
||||||
|
}
|
||||||
|
ctx.module.print(llvm::outs(), nullptr);
|
||||||
|
output_llvm(ctx, "program.o");
|
||||||
|
}
|
||||||
|
|
||||||
|
int main() {
|
||||||
|
yy::parser parser;
|
||||||
|
type_mgr mgr;
|
||||||
|
type_env env;
|
||||||
|
|
||||||
|
parser.parse();
|
||||||
|
for(auto& definition : program) {
|
||||||
|
definition_defn* def = dynamic_cast<definition_defn*>(definition.get());
|
||||||
|
if(!def) continue;
|
||||||
|
|
||||||
|
std::cout << def->name;
|
||||||
|
for(auto& param : def->params) std::cout << " " << param;
|
||||||
|
std::cout << ":" << std::endl;
|
||||||
|
|
||||||
|
def->body->print(1, std::cout);
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
typecheck_program(program, mgr, env);
|
||||||
|
compile_program(program);
|
||||||
|
gen_llvm(program);
|
||||||
|
} catch(unification_error& err) {
|
||||||
|
std::cout << "failed to unify types: " << std::endl;
|
||||||
|
std::cout << " (1) \033[34m";
|
||||||
|
err.left->print(mgr, std::cout);
|
||||||
|
std::cout << "\033[0m" << std::endl;
|
||||||
|
std::cout << " (2) \033[32m";
|
||||||
|
err.right->print(mgr, std::cout);
|
||||||
|
std::cout << "\033[0m" << std::endl;
|
||||||
|
} catch(type_error& err) {
|
||||||
|
std::cout << "failed to type check program: " << err.description << std::endl;
|
||||||
|
}
|
||||||
|
}
|
||||||
141
code/compiler/09/parser.y
Normal file
141
code/compiler/09/parser.y
Normal file
@@ -0,0 +1,141 @@
|
|||||||
|
%{
|
||||||
|
#include <string>
|
||||||
|
#include <iostream>
|
||||||
|
#include "ast.hpp"
|
||||||
|
#include "definition.hpp"
|
||||||
|
#include "parser.hpp"
|
||||||
|
|
||||||
|
std::vector<definition_ptr> program;
|
||||||
|
extern yy::parser::symbol_type yylex();
|
||||||
|
|
||||||
|
%}
|
||||||
|
|
||||||
|
%token PLUS
|
||||||
|
%token TIMES
|
||||||
|
%token MINUS
|
||||||
|
%token DIVIDE
|
||||||
|
%token <int> INT
|
||||||
|
%token DEFN
|
||||||
|
%token DATA
|
||||||
|
%token CASE
|
||||||
|
%token OF
|
||||||
|
%token OCURLY
|
||||||
|
%token CCURLY
|
||||||
|
%token OPAREN
|
||||||
|
%token CPAREN
|
||||||
|
%token COMMA
|
||||||
|
%token ARROW
|
||||||
|
%token EQUAL
|
||||||
|
%token <std::string> LID
|
||||||
|
%token <std::string> UID
|
||||||
|
|
||||||
|
%language "c++"
|
||||||
|
%define api.value.type variant
|
||||||
|
%define api.token.constructor
|
||||||
|
|
||||||
|
%type <std::vector<std::string>> lowercaseParams uppercaseParams
|
||||||
|
%type <std::vector<definition_ptr>> program definitions
|
||||||
|
%type <std::vector<branch_ptr>> branches
|
||||||
|
%type <std::vector<constructor_ptr>> constructors
|
||||||
|
%type <ast_ptr> aAdd aMul case app appBase
|
||||||
|
%type <definition_ptr> definition defn data
|
||||||
|
%type <branch_ptr> branch
|
||||||
|
%type <pattern_ptr> pattern
|
||||||
|
%type <constructor_ptr> constructor
|
||||||
|
|
||||||
|
%start program
|
||||||
|
|
||||||
|
%%
|
||||||
|
|
||||||
|
program
|
||||||
|
: definitions { program = std::move($1); }
|
||||||
|
;
|
||||||
|
|
||||||
|
definitions
|
||||||
|
: definitions definition { $$ = std::move($1); $$.push_back(std::move($2)); }
|
||||||
|
| definition { $$ = std::vector<definition_ptr>(); $$.push_back(std::move($1)); }
|
||||||
|
;
|
||||||
|
|
||||||
|
definition
|
||||||
|
: defn { $$ = std::move($1); }
|
||||||
|
| data { $$ = std::move($1); }
|
||||||
|
;
|
||||||
|
|
||||||
|
defn
|
||||||
|
: DEFN LID lowercaseParams EQUAL OCURLY aAdd CCURLY
|
||||||
|
{ $$ = definition_ptr(
|
||||||
|
new definition_defn(std::move($2), std::move($3), std::move($6))); }
|
||||||
|
;
|
||||||
|
|
||||||
|
lowercaseParams
|
||||||
|
: %empty { $$ = std::vector<std::string>(); }
|
||||||
|
| lowercaseParams LID { $$ = std::move($1); $$.push_back(std::move($2)); }
|
||||||
|
;
|
||||||
|
|
||||||
|
uppercaseParams
|
||||||
|
: %empty { $$ = std::vector<std::string>(); }
|
||||||
|
| uppercaseParams UID { $$ = std::move($1); $$.push_back(std::move($2)); }
|
||||||
|
;
|
||||||
|
|
||||||
|
aAdd
|
||||||
|
: aAdd PLUS aMul { $$ = ast_ptr(new ast_binop(PLUS, std::move($1), std::move($3))); }
|
||||||
|
| aAdd MINUS aMul { $$ = ast_ptr(new ast_binop(MINUS, std::move($1), std::move($3))); }
|
||||||
|
| aMul { $$ = std::move($1); }
|
||||||
|
;
|
||||||
|
|
||||||
|
aMul
|
||||||
|
: aMul TIMES app { $$ = ast_ptr(new ast_binop(TIMES, std::move($1), std::move($3))); }
|
||||||
|
| aMul DIVIDE app { $$ = ast_ptr(new ast_binop(DIVIDE, std::move($1), std::move($3))); }
|
||||||
|
| app { $$ = std::move($1); }
|
||||||
|
;
|
||||||
|
|
||||||
|
app
|
||||||
|
: app appBase { $$ = ast_ptr(new ast_app(std::move($1), std::move($2))); }
|
||||||
|
| appBase { $$ = std::move($1); }
|
||||||
|
;
|
||||||
|
|
||||||
|
appBase
|
||||||
|
: INT { $$ = ast_ptr(new ast_int($1)); }
|
||||||
|
| LID { $$ = ast_ptr(new ast_lid(std::move($1))); }
|
||||||
|
| UID { $$ = ast_ptr(new ast_uid(std::move($1))); }
|
||||||
|
| OPAREN aAdd CPAREN { $$ = std::move($2); }
|
||||||
|
| case { $$ = std::move($1); }
|
||||||
|
;
|
||||||
|
|
||||||
|
case
|
||||||
|
: CASE aAdd OF OCURLY branches CCURLY
|
||||||
|
{ $$ = ast_ptr(new ast_case(std::move($2), std::move($5))); }
|
||||||
|
;
|
||||||
|
|
||||||
|
branches
|
||||||
|
: branches branch { $$ = std::move($1); $$.push_back(std::move($2)); }
|
||||||
|
| branch { $$ = std::vector<branch_ptr>(); $$.push_back(std::move($1));}
|
||||||
|
;
|
||||||
|
|
||||||
|
branch
|
||||||
|
: pattern ARROW OCURLY aAdd CCURLY
|
||||||
|
{ $$ = branch_ptr(new branch(std::move($1), std::move($4))); }
|
||||||
|
;
|
||||||
|
|
||||||
|
pattern
|
||||||
|
: LID { $$ = pattern_ptr(new pattern_var(std::move($1))); }
|
||||||
|
| UID lowercaseParams
|
||||||
|
{ $$ = pattern_ptr(new pattern_constr(std::move($1), std::move($2))); }
|
||||||
|
;
|
||||||
|
|
||||||
|
data
|
||||||
|
: DATA UID EQUAL OCURLY constructors CCURLY
|
||||||
|
{ $$ = definition_ptr(new definition_data(std::move($2), std::move($5))); }
|
||||||
|
;
|
||||||
|
|
||||||
|
constructors
|
||||||
|
: constructors COMMA constructor { $$ = std::move($1); $$.push_back(std::move($3)); }
|
||||||
|
| constructor
|
||||||
|
{ $$ = std::vector<constructor_ptr>(); $$.push_back(std::move($1)); }
|
||||||
|
;
|
||||||
|
|
||||||
|
constructor
|
||||||
|
: UID uppercaseParams
|
||||||
|
{ $$ = constructor_ptr(new constructor(std::move($1), std::move($2))); }
|
||||||
|
;
|
||||||
|
|
||||||
269
code/compiler/09/runtime.c
Normal file
269
code/compiler/09/runtime.c
Normal file
@@ -0,0 +1,269 @@
|
|||||||
|
#include <stdint.h>
|
||||||
|
#include <assert.h>
|
||||||
|
#include <memory.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include "runtime.h"
|
||||||
|
|
||||||
|
struct node_base* alloc_node() {
|
||||||
|
struct node_base* new_node = malloc(sizeof(struct node_app));
|
||||||
|
new_node->gc_next = NULL;
|
||||||
|
new_node->gc_reachable = 0;
|
||||||
|
assert(new_node != NULL);
|
||||||
|
return new_node;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct node_app* alloc_app(struct node_base* l, struct node_base* r) {
|
||||||
|
struct node_app* node = (struct node_app*) alloc_node();
|
||||||
|
node->base.tag = NODE_APP;
|
||||||
|
node->left = l;
|
||||||
|
node->right = r;
|
||||||
|
return node;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct node_num* alloc_num(int32_t n) {
|
||||||
|
struct node_num* node = (struct node_num*) alloc_node();
|
||||||
|
node->base.tag = NODE_NUM;
|
||||||
|
node->value = n;
|
||||||
|
return node;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct node_global* alloc_global(void (*f)(struct gmachine*), int32_t a) {
|
||||||
|
struct node_global* node = (struct node_global*) alloc_node();
|
||||||
|
node->base.tag = NODE_GLOBAL;
|
||||||
|
node->arity = a;
|
||||||
|
node->function = f;
|
||||||
|
return node;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct node_ind* alloc_ind(struct node_base* n) {
|
||||||
|
struct node_ind* node = (struct node_ind*) alloc_node();
|
||||||
|
node->base.tag = NODE_IND;
|
||||||
|
node->next = n;
|
||||||
|
return node;
|
||||||
|
}
|
||||||
|
|
||||||
|
void free_node_direct(struct node_base* n) {
|
||||||
|
if(n->tag == NODE_DATA) {
|
||||||
|
free(((struct node_data*) n)->array);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void gc_visit_node(struct node_base* n) {
|
||||||
|
if(n->gc_reachable) return;
|
||||||
|
n->gc_reachable = 1;
|
||||||
|
|
||||||
|
if(n->tag == NODE_APP) {
|
||||||
|
struct node_app* app = (struct node_app*) n;
|
||||||
|
gc_visit_node(app->left);
|
||||||
|
gc_visit_node(app->right);
|
||||||
|
} if(n->tag == NODE_IND) {
|
||||||
|
struct node_ind* ind = (struct node_ind*) n;
|
||||||
|
gc_visit_node(ind->next);
|
||||||
|
} if(n->tag == NODE_DATA) {
|
||||||
|
struct node_data* data = (struct node_data*) n;
|
||||||
|
struct node_base** to_visit = data->array;
|
||||||
|
while(*to_visit) {
|
||||||
|
gc_visit_node(*to_visit);
|
||||||
|
to_visit++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void stack_init(struct stack* s) {
|
||||||
|
s->size = 4;
|
||||||
|
s->count = 0;
|
||||||
|
s->data = malloc(sizeof(*s->data) * s->size);
|
||||||
|
assert(s->data != NULL);
|
||||||
|
}
|
||||||
|
|
||||||
|
void stack_free(struct stack* s) {
|
||||||
|
free(s->data);
|
||||||
|
}
|
||||||
|
|
||||||
|
void stack_push(struct stack* s, struct node_base* n) {
|
||||||
|
while(s->count >= s->size) {
|
||||||
|
s->data = realloc(s->data, sizeof(*s->data) * (s->size *= 2));
|
||||||
|
assert(s->data != NULL);
|
||||||
|
}
|
||||||
|
s->data[s->count++] = n;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct node_base* stack_pop(struct stack* s) {
|
||||||
|
assert(s->count > 0);
|
||||||
|
return s->data[--s->count];
|
||||||
|
}
|
||||||
|
|
||||||
|
struct node_base* stack_peek(struct stack* s, size_t o) {
|
||||||
|
assert(s->count > o);
|
||||||
|
return s->data[s->count - o - 1];
|
||||||
|
}
|
||||||
|
|
||||||
|
void stack_popn(struct stack* s, size_t n) {
|
||||||
|
assert(s->count >= n);
|
||||||
|
s->count -= n;
|
||||||
|
}
|
||||||
|
|
||||||
|
void gmachine_init(struct gmachine* g) {
|
||||||
|
stack_init(&g->stack);
|
||||||
|
g->gc_nodes = NULL;
|
||||||
|
g->gc_node_count = 0;
|
||||||
|
g->gc_node_threshold = 128;
|
||||||
|
}
|
||||||
|
|
||||||
|
void gmachine_free(struct gmachine* g) {
|
||||||
|
stack_free(&g->stack);
|
||||||
|
struct node_base* to_free = g->gc_nodes;
|
||||||
|
struct node_base* next;
|
||||||
|
|
||||||
|
while(to_free) {
|
||||||
|
next = to_free->gc_next;
|
||||||
|
free_node_direct(to_free);
|
||||||
|
free(to_free);
|
||||||
|
to_free = next;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void gmachine_slide(struct gmachine* g, size_t n) {
|
||||||
|
assert(g->stack.count > n);
|
||||||
|
g->stack.data[g->stack.count - n - 1] = g->stack.data[g->stack.count - 1];
|
||||||
|
g->stack.count -= n;
|
||||||
|
}
|
||||||
|
|
||||||
|
void gmachine_update(struct gmachine* g, size_t o) {
|
||||||
|
assert(g->stack.count > o + 1);
|
||||||
|
struct node_ind* ind =
|
||||||
|
(struct node_ind*) g->stack.data[g->stack.count - o - 2];
|
||||||
|
ind->base.tag = NODE_IND;
|
||||||
|
ind->next = g->stack.data[g->stack.count -= 1];
|
||||||
|
}
|
||||||
|
|
||||||
|
void gmachine_alloc(struct gmachine* g, size_t o) {
|
||||||
|
while(o--) {
|
||||||
|
stack_push(&g->stack,
|
||||||
|
gmachine_track(g, (struct node_base*) alloc_ind(NULL)));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void gmachine_pack(struct gmachine* g, size_t n, int8_t t) {
|
||||||
|
assert(g->stack.count >= n);
|
||||||
|
|
||||||
|
struct node_base** data = malloc(sizeof(*data) * (n + 1));
|
||||||
|
assert(data != NULL);
|
||||||
|
memcpy(data, &g->stack.data[g->stack.count - n], n * sizeof(*data));
|
||||||
|
data[n] = NULL;
|
||||||
|
|
||||||
|
struct node_data* new_node = (struct node_data*) alloc_node();
|
||||||
|
new_node->array = data;
|
||||||
|
new_node->base.tag = NODE_DATA;
|
||||||
|
new_node->tag = t;
|
||||||
|
|
||||||
|
stack_popn(&g->stack, n);
|
||||||
|
stack_push(&g->stack, gmachine_track(g, (struct node_base*) new_node));
|
||||||
|
}
|
||||||
|
|
||||||
|
void gmachine_split(struct gmachine* g, size_t n) {
|
||||||
|
struct node_data* node = (struct node_data*) stack_pop(&g->stack);
|
||||||
|
for(size_t i = 0; i < n; i++) {
|
||||||
|
stack_push(&g->stack, node->array[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
struct node_base* gmachine_track(struct gmachine* g, struct node_base* b) {
|
||||||
|
g->gc_node_count++;
|
||||||
|
b->gc_next = g->gc_nodes;
|
||||||
|
g->gc_nodes = b;
|
||||||
|
|
||||||
|
if(g->gc_node_count >= g->gc_node_threshold) {
|
||||||
|
uint64_t nodes_before = g->gc_node_count;
|
||||||
|
gc_visit_node(b);
|
||||||
|
gmachine_gc(g);
|
||||||
|
g->gc_node_threshold = g->gc_node_count * 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
return b;
|
||||||
|
}
|
||||||
|
|
||||||
|
void gmachine_gc(struct gmachine* g) {
|
||||||
|
for(size_t i = 0; i < g->stack.count; i++) {
|
||||||
|
gc_visit_node(g->stack.data[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
struct node_base** head_ptr = &g->gc_nodes;
|
||||||
|
while(*head_ptr) {
|
||||||
|
if((*head_ptr)->gc_reachable) {
|
||||||
|
(*head_ptr)->gc_reachable = 0;
|
||||||
|
head_ptr = &(*head_ptr)->gc_next;
|
||||||
|
} else {
|
||||||
|
struct node_base* to_free = *head_ptr;
|
||||||
|
*head_ptr = to_free->gc_next;
|
||||||
|
free_node_direct(to_free);
|
||||||
|
free(to_free);
|
||||||
|
g->gc_node_count--;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void unwind(struct gmachine* g) {
|
||||||
|
struct stack* s = &g->stack;
|
||||||
|
|
||||||
|
while(1) {
|
||||||
|
struct node_base* peek = stack_peek(s, 0);
|
||||||
|
if(peek->tag == NODE_APP) {
|
||||||
|
struct node_app* n = (struct node_app*) peek;
|
||||||
|
stack_push(s, n->left);
|
||||||
|
} else if(peek->tag == NODE_GLOBAL) {
|
||||||
|
struct node_global* n = (struct node_global*) peek;
|
||||||
|
assert(s->count > n->arity);
|
||||||
|
|
||||||
|
for(size_t i = 1; i <= n->arity; i++) {
|
||||||
|
s->data[s->count - i]
|
||||||
|
= ((struct node_app*) s->data[s->count - i - 1])->right;
|
||||||
|
}
|
||||||
|
|
||||||
|
n->function(g);
|
||||||
|
} else if(peek->tag == NODE_IND) {
|
||||||
|
struct node_ind* n = (struct node_ind*) peek;
|
||||||
|
stack_pop(s);
|
||||||
|
stack_push(s, n->next);
|
||||||
|
} else {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
extern void f_main(struct gmachine* s);
|
||||||
|
|
||||||
|
void print_node(struct node_base* n) {
|
||||||
|
if(n->tag == NODE_APP) {
|
||||||
|
struct node_app* app = (struct node_app*) n;
|
||||||
|
print_node(app->left);
|
||||||
|
putchar(' ');
|
||||||
|
print_node(app->right);
|
||||||
|
} else if(n->tag == NODE_DATA) {
|
||||||
|
printf("(Packed)");
|
||||||
|
} else if(n->tag == NODE_GLOBAL) {
|
||||||
|
struct node_global* global = (struct node_global*) n;
|
||||||
|
printf("(Global: %p)", global->function);
|
||||||
|
} else if(n->tag == NODE_IND) {
|
||||||
|
print_node(((struct node_ind*) n)->next);
|
||||||
|
} else if(n->tag == NODE_NUM) {
|
||||||
|
struct node_num* num = (struct node_num*) n;
|
||||||
|
printf("%d", num->value);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(int argc, char** argv) {
|
||||||
|
struct gmachine gmachine;
|
||||||
|
struct node_global* first_node = alloc_global(f_main, 0);
|
||||||
|
struct node_base* result;
|
||||||
|
|
||||||
|
gmachine_init(&gmachine);
|
||||||
|
gmachine_track(&gmachine, (struct node_base*) first_node);
|
||||||
|
stack_push(&gmachine.stack, (struct node_base*) first_node);
|
||||||
|
unwind(&gmachine);
|
||||||
|
result = stack_pop(&gmachine.stack);
|
||||||
|
printf("Result: ");
|
||||||
|
print_node(result);
|
||||||
|
putchar('\n');
|
||||||
|
gmachine_free(&gmachine);
|
||||||
|
}
|
||||||
84
code/compiler/09/runtime.h
Normal file
84
code/compiler/09/runtime.h
Normal file
@@ -0,0 +1,84 @@
|
|||||||
|
#pragma once
|
||||||
|
#include <stdlib.h>
|
||||||
|
|
||||||
|
struct gmachine;
|
||||||
|
|
||||||
|
enum node_tag {
|
||||||
|
NODE_APP,
|
||||||
|
NODE_NUM,
|
||||||
|
NODE_GLOBAL,
|
||||||
|
NODE_IND,
|
||||||
|
NODE_DATA
|
||||||
|
};
|
||||||
|
|
||||||
|
struct node_base {
|
||||||
|
enum node_tag tag;
|
||||||
|
int8_t gc_reachable;
|
||||||
|
struct node_base* gc_next;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct node_app {
|
||||||
|
struct node_base base;
|
||||||
|
struct node_base* left;
|
||||||
|
struct node_base* right;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct node_num {
|
||||||
|
struct node_base base;
|
||||||
|
int32_t value;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct node_global {
|
||||||
|
struct node_base base;
|
||||||
|
int32_t arity;
|
||||||
|
void (*function)(struct gmachine*);
|
||||||
|
};
|
||||||
|
|
||||||
|
struct node_ind {
|
||||||
|
struct node_base base;
|
||||||
|
struct node_base* next;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct node_data {
|
||||||
|
struct node_base base;
|
||||||
|
int8_t tag;
|
||||||
|
struct node_base** array;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct node_base* alloc_node();
|
||||||
|
struct node_app* alloc_app(struct node_base* l, struct node_base* r);
|
||||||
|
struct node_num* alloc_num(int32_t n);
|
||||||
|
struct node_global* alloc_global(void (*f)(struct gmachine*), int32_t a);
|
||||||
|
struct node_ind* alloc_ind(struct node_base* n);
|
||||||
|
void free_node_direct(struct node_base*);
|
||||||
|
void gc_visit_node(struct node_base*);
|
||||||
|
|
||||||
|
struct stack {
|
||||||
|
size_t size;
|
||||||
|
size_t count;
|
||||||
|
struct node_base** data;
|
||||||
|
};
|
||||||
|
|
||||||
|
void stack_init(struct stack* s);
|
||||||
|
void stack_free(struct stack* s);
|
||||||
|
void stack_push(struct stack* s, struct node_base* n);
|
||||||
|
struct node_base* stack_pop(struct stack* s);
|
||||||
|
struct node_base* stack_peek(struct stack* s, size_t o);
|
||||||
|
void stack_popn(struct stack* s, size_t n);
|
||||||
|
|
||||||
|
struct gmachine {
|
||||||
|
struct stack stack;
|
||||||
|
struct node_base* gc_nodes;
|
||||||
|
int64_t gc_node_count;
|
||||||
|
int64_t gc_node_threshold;
|
||||||
|
};
|
||||||
|
|
||||||
|
void gmachine_init(struct gmachine* g);
|
||||||
|
void gmachine_free(struct gmachine* g);
|
||||||
|
void gmachine_slide(struct gmachine* g, size_t n);
|
||||||
|
void gmachine_update(struct gmachine* g, size_t o);
|
||||||
|
void gmachine_alloc(struct gmachine* g, size_t o);
|
||||||
|
void gmachine_pack(struct gmachine* g, size_t n, int8_t t);
|
||||||
|
void gmachine_split(struct gmachine* g, size_t n);
|
||||||
|
struct node_base* gmachine_track(struct gmachine* g, struct node_base* b);
|
||||||
|
void gmachine_gc(struct gmachine* g);
|
||||||
35
code/compiler/09/scanner.l
Normal file
35
code/compiler/09/scanner.l
Normal file
@@ -0,0 +1,35 @@
|
|||||||
|
%option noyywrap
|
||||||
|
|
||||||
|
%{
|
||||||
|
#include <iostream>
|
||||||
|
#include "ast.hpp"
|
||||||
|
#include "definition.hpp"
|
||||||
|
#include "parser.hpp"
|
||||||
|
|
||||||
|
#define YY_DECL yy::parser::symbol_type yylex()
|
||||||
|
|
||||||
|
%}
|
||||||
|
|
||||||
|
%%
|
||||||
|
|
||||||
|
[ \n]+ {}
|
||||||
|
\+ { return yy::parser::make_PLUS(); }
|
||||||
|
\* { return yy::parser::make_TIMES(); }
|
||||||
|
- { return yy::parser::make_MINUS(); }
|
||||||
|
\/ { return yy::parser::make_DIVIDE(); }
|
||||||
|
[0-9]+ { return yy::parser::make_INT(atoi(yytext)); }
|
||||||
|
defn { return yy::parser::make_DEFN(); }
|
||||||
|
data { return yy::parser::make_DATA(); }
|
||||||
|
case { return yy::parser::make_CASE(); }
|
||||||
|
of { return yy::parser::make_OF(); }
|
||||||
|
\{ { return yy::parser::make_OCURLY(); }
|
||||||
|
\} { return yy::parser::make_CCURLY(); }
|
||||||
|
\( { return yy::parser::make_OPAREN(); }
|
||||||
|
\) { return yy::parser::make_CPAREN(); }
|
||||||
|
, { return yy::parser::make_COMMA(); }
|
||||||
|
-> { return yy::parser::make_ARROW(); }
|
||||||
|
= { return yy::parser::make_EQUAL(); }
|
||||||
|
[a-z][a-zA-Z]* { return yy::parser::make_LID(std::string(yytext)); }
|
||||||
|
[A-Z][a-zA-Z]* { return yy::parser::make_UID(std::string(yytext)); }
|
||||||
|
|
||||||
|
%%
|
||||||
99
code/compiler/09/type.cpp
Normal file
99
code/compiler/09/type.cpp
Normal file
@@ -0,0 +1,99 @@
|
|||||||
|
#include "type.hpp"
|
||||||
|
#include <sstream>
|
||||||
|
#include <algorithm>
|
||||||
|
#include "error.hpp"
|
||||||
|
|
||||||
|
void type_var::print(const type_mgr& mgr, std::ostream& to) const {
|
||||||
|
auto it = mgr.types.find(name);
|
||||||
|
if(it != mgr.types.end()) {
|
||||||
|
it->second->print(mgr, to);
|
||||||
|
} else {
|
||||||
|
to << name;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void type_base::print(const type_mgr& mgr, std::ostream& to) const {
|
||||||
|
to << name;
|
||||||
|
}
|
||||||
|
|
||||||
|
void type_arr::print(const type_mgr& mgr, std::ostream& to) const {
|
||||||
|
left->print(mgr, to);
|
||||||
|
to << " -> (";
|
||||||
|
right->print(mgr, to);
|
||||||
|
to << ")";
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string type_mgr::new_type_name() {
|
||||||
|
int temp = last_id++;
|
||||||
|
std::string str = "";
|
||||||
|
|
||||||
|
while(temp != -1) {
|
||||||
|
str += (char) ('a' + (temp % 26));
|
||||||
|
temp = temp / 26 - 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::reverse(str.begin(), str.end());
|
||||||
|
return str;
|
||||||
|
}
|
||||||
|
|
||||||
|
type_ptr type_mgr::new_type() {
|
||||||
|
return type_ptr(new type_var(new_type_name()));
|
||||||
|
}
|
||||||
|
|
||||||
|
type_ptr type_mgr::new_arrow_type() {
|
||||||
|
return type_ptr(new type_arr(new_type(), new_type()));
|
||||||
|
}
|
||||||
|
|
||||||
|
type_ptr type_mgr::resolve(type_ptr t, type_var*& var) const {
|
||||||
|
type_var* cast;
|
||||||
|
|
||||||
|
var = nullptr;
|
||||||
|
while((cast = dynamic_cast<type_var*>(t.get()))) {
|
||||||
|
auto it = types.find(cast->name);
|
||||||
|
|
||||||
|
if(it == types.end()) {
|
||||||
|
var = cast;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
t = it->second;
|
||||||
|
}
|
||||||
|
|
||||||
|
return t;
|
||||||
|
}
|
||||||
|
|
||||||
|
void type_mgr::unify(type_ptr l, type_ptr r) {
|
||||||
|
type_var* lvar;
|
||||||
|
type_var* rvar;
|
||||||
|
type_arr* larr;
|
||||||
|
type_arr* rarr;
|
||||||
|
type_base* lid;
|
||||||
|
type_base* rid;
|
||||||
|
|
||||||
|
l = resolve(l, lvar);
|
||||||
|
r = resolve(r, rvar);
|
||||||
|
|
||||||
|
if(lvar) {
|
||||||
|
bind(lvar->name, r);
|
||||||
|
return;
|
||||||
|
} else if(rvar) {
|
||||||
|
bind(rvar->name, l);
|
||||||
|
return;
|
||||||
|
} else if((larr = dynamic_cast<type_arr*>(l.get())) &&
|
||||||
|
(rarr = dynamic_cast<type_arr*>(r.get()))) {
|
||||||
|
unify(larr->left, rarr->left);
|
||||||
|
unify(larr->right, rarr->right);
|
||||||
|
return;
|
||||||
|
} else if((lid = dynamic_cast<type_base*>(l.get())) &&
|
||||||
|
(rid = dynamic_cast<type_base*>(r.get()))) {
|
||||||
|
if(lid->name == rid->name) return;
|
||||||
|
}
|
||||||
|
|
||||||
|
throw unification_error(l, r);
|
||||||
|
}
|
||||||
|
|
||||||
|
void type_mgr::bind(const std::string& s, type_ptr t) {
|
||||||
|
type_var* other = dynamic_cast<type_var*>(t.get());
|
||||||
|
|
||||||
|
if(other && other->name == s) return;
|
||||||
|
types[s] = t;
|
||||||
|
}
|
||||||
65
code/compiler/09/type.hpp
Normal file
65
code/compiler/09/type.hpp
Normal file
@@ -0,0 +1,65 @@
|
|||||||
|
#pragma once
|
||||||
|
#include <memory>
|
||||||
|
#include <map>
|
||||||
|
|
||||||
|
struct type_mgr;
|
||||||
|
|
||||||
|
struct type {
|
||||||
|
virtual ~type() = default;
|
||||||
|
|
||||||
|
virtual void print(const type_mgr& mgr, std::ostream& to) const = 0;
|
||||||
|
};
|
||||||
|
|
||||||
|
using type_ptr = std::shared_ptr<type>;
|
||||||
|
|
||||||
|
struct type_var : public type {
|
||||||
|
std::string name;
|
||||||
|
|
||||||
|
type_var(std::string n)
|
||||||
|
: name(std::move(n)) {}
|
||||||
|
|
||||||
|
void print(const type_mgr& mgr, std::ostream& to) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct type_base : public type {
|
||||||
|
std::string name;
|
||||||
|
|
||||||
|
type_base(std::string n)
|
||||||
|
: name(std::move(n)) {}
|
||||||
|
|
||||||
|
void print(const type_mgr& mgr, std::ostream& to) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct type_data : public type_base {
|
||||||
|
struct constructor {
|
||||||
|
int tag;
|
||||||
|
};
|
||||||
|
|
||||||
|
std::map<std::string, constructor> constructors;
|
||||||
|
|
||||||
|
type_data(std::string n)
|
||||||
|
: type_base(std::move(n)) {}
|
||||||
|
};
|
||||||
|
|
||||||
|
struct type_arr : public type {
|
||||||
|
type_ptr left;
|
||||||
|
type_ptr right;
|
||||||
|
|
||||||
|
type_arr(type_ptr l, type_ptr r)
|
||||||
|
: left(std::move(l)), right(std::move(r)) {}
|
||||||
|
|
||||||
|
void print(const type_mgr& mgr, std::ostream& to) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct type_mgr {
|
||||||
|
int last_id = 0;
|
||||||
|
std::map<std::string, type_ptr> types;
|
||||||
|
|
||||||
|
std::string new_type_name();
|
||||||
|
type_ptr new_type();
|
||||||
|
type_ptr new_arrow_type();
|
||||||
|
|
||||||
|
void unify(type_ptr l, type_ptr r);
|
||||||
|
type_ptr resolve(type_ptr t, type_var*& var) const;
|
||||||
|
void bind(const std::string& s, type_ptr t);
|
||||||
|
};
|
||||||
16
code/compiler/09/type_env.cpp
Normal file
16
code/compiler/09/type_env.cpp
Normal file
@@ -0,0 +1,16 @@
|
|||||||
|
#include "type_env.hpp"
|
||||||
|
|
||||||
|
type_ptr type_env::lookup(const std::string& name) const {
|
||||||
|
auto it = names.find(name);
|
||||||
|
if(it != names.end()) return it->second;
|
||||||
|
if(parent) return parent->lookup(name);
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
void type_env::bind(const std::string& name, type_ptr t) {
|
||||||
|
names[name] = t;
|
||||||
|
}
|
||||||
|
|
||||||
|
type_env type_env::scope() const {
|
||||||
|
return type_env(this);
|
||||||
|
}
|
||||||
16
code/compiler/09/type_env.hpp
Normal file
16
code/compiler/09/type_env.hpp
Normal file
@@ -0,0 +1,16 @@
|
|||||||
|
#pragma once
|
||||||
|
#include <map>
|
||||||
|
#include "type.hpp"
|
||||||
|
|
||||||
|
struct type_env {
|
||||||
|
std::map<std::string, type_ptr> names;
|
||||||
|
type_env const* parent = nullptr;
|
||||||
|
|
||||||
|
type_env(type_env const* p)
|
||||||
|
: parent(p) {}
|
||||||
|
type_env() : type_env(nullptr) {}
|
||||||
|
|
||||||
|
type_ptr lookup(const std::string& name) const;
|
||||||
|
void bind(const std::string& name, type_ptr t);
|
||||||
|
type_env scope() const;
|
||||||
|
};
|
||||||
4
code/compiler/test.sh
Executable file
4
code/compiler/test.sh
Executable file
@@ -0,0 +1,4 @@
|
|||||||
|
cd 09
|
||||||
|
mkdir -p build && cd build
|
||||||
|
cmake ..
|
||||||
|
make -j8
|
||||||
119
code/cs325-langs/hws/hw1.txt
Normal file
119
code/cs325-langs/hws/hw1.txt
Normal file
@@ -0,0 +1,119 @@
|
|||||||
|
CS 325-001, Analysis of Algorithms, Fall 2019
|
||||||
|
HW1 - Python 3, qsort, BST, and qselect
|
||||||
|
Due electronically on flip on Monday 9/30 at 11:59pm.
|
||||||
|
No late submission will be accepted.
|
||||||
|
|
||||||
|
Need to submit on flip: report.txt, qsort.py, and qselect.py.
|
||||||
|
qselect.py will be automatically graded for correctness (1%).
|
||||||
|
|
||||||
|
flip $ /nfs/farm/classes/eecs/fall2019/cs325-001/submit hw1 qselect.py qsort.py report.txt
|
||||||
|
|
||||||
|
Note:
|
||||||
|
|
||||||
|
1. You can ssh to flip machines from your own machine by:
|
||||||
|
$ ssh access.engr.oregonstate.edu
|
||||||
|
|
||||||
|
2. You can add /nfs/farm/classes/eecs/fall2019/cs325-001/ to your $PATH:
|
||||||
|
$ export PATH=$PATH:/nfs/farm/classes/eecs/fall2019/cs325-001/
|
||||||
|
and add the above command to your ~/.bash_profile,
|
||||||
|
so that you don't need to type it every time.
|
||||||
|
|
||||||
|
(alternatively, you can use symbolic links or aliases to avoid typing the long path)
|
||||||
|
|
||||||
|
3. You can choose to submit each file separately, or submit them together.
|
||||||
|
|
||||||
|
Textbooks for References:
|
||||||
|
[1] CLRS Ch. 9.2 and Ch. 12
|
||||||
|
|
||||||
|
0. Q: What's the best-case, worst-case, and average-case time complexities of quicksort.
|
||||||
|
Briefly explain each case.
|
||||||
|
|
||||||
|
1. [WILL BE GRADED]
|
||||||
|
Quickselect with Randomized Pivot (CLRS Ch. 9.2).
|
||||||
|
|
||||||
|
>>> from qselect import *
|
||||||
|
>>> qselect(2, [3, 10, 4, 7, 19])
|
||||||
|
4
|
||||||
|
>>> qselect(4, [11, 2, 8, 3])
|
||||||
|
11
|
||||||
|
|
||||||
|
Q: What's the best-case, worst-case, and average-case time complexities? Briefly explain.
|
||||||
|
|
||||||
|
Filename: qselect.py
|
||||||
|
|
||||||
|
|
||||||
|
2. Buggy Qsort Revisited
|
||||||
|
|
||||||
|
In the slides we showed a buggy version of qsort which is weird in an interesting way:
|
||||||
|
it actually returns a binary search tree for the given array, rooted at the pivot:
|
||||||
|
|
||||||
|
>>> from qsort import *
|
||||||
|
>>> tree = sort([4,2,6,3,5,7,1,9])
|
||||||
|
>>> tree
|
||||||
|
[[[[], 1, []], 2, [[], 3, []]], 4, [[[], 5, []], 6, [[], 7, [[], 9, []]]]]
|
||||||
|
|
||||||
|
which encodes a binary search tree:
|
||||||
|
|
||||||
|
4
|
||||||
|
/ \
|
||||||
|
2 6
|
||||||
|
/ \ / \
|
||||||
|
1 3 5 7
|
||||||
|
\
|
||||||
|
9
|
||||||
|
|
||||||
|
Now on top of that piece of code, add three functions:
|
||||||
|
* sorted(t): returns the sorted order (infix traversal)
|
||||||
|
* search(t, x): returns whether x is in t
|
||||||
|
* insert(t, x): inserts x into t (in-place) if it is missing, otherwise does nothing.
|
||||||
|
|
||||||
|
>>> sorted(tree)
|
||||||
|
[1, 2, 3, 4, 5, 6, 7, 9]
|
||||||
|
>>> search(tree, 6)
|
||||||
|
True
|
||||||
|
>>> search(tree, 6.5)
|
||||||
|
False
|
||||||
|
>>> insert(tree, 6.5)
|
||||||
|
>>> tree
|
||||||
|
[[[[], 1, []], 2, [[], 3, []]], 4, [[[], 5, []], 6, [[[], 6.5, []], 7, [[], 9, []]]]]
|
||||||
|
>>> insert(tree, 3)
|
||||||
|
>>> tree
|
||||||
|
[[[[], 1, []], 2, [[], 3, []]], 4, [[[], 5, []], 6, [[[], 6.5, []], 7, [[], 9, []]]]]
|
||||||
|
|
||||||
|
Hint: both search and insert should depend on a helper function _search(tree, x) which
|
||||||
|
returns the subtree (a list) rooted at x when x is found, or the [] where x should
|
||||||
|
be inserted.
|
||||||
|
|
||||||
|
e.g.,
|
||||||
|
>>> tree = sort([4,2,6,3,5,7,1,9]) # starting from the initial tree
|
||||||
|
>>> _search(tree, 3)
|
||||||
|
[[], 3, []]
|
||||||
|
>>> _search(tree, 0)
|
||||||
|
[]
|
||||||
|
>>> _search(tree, 6.5)
|
||||||
|
[]
|
||||||
|
>>> _search(tree, 0) is _search(tree, 6.5)
|
||||||
|
False
|
||||||
|
>>> _search(tree, 0) == _search(tree, 6.5)
|
||||||
|
True
|
||||||
|
|
||||||
|
Note the last two []'s are different nodes (with different memory addresses):
|
||||||
|
the first one is the left child of 1, while the second one is the left child of 7
|
||||||
|
(so that insert is very easy).
|
||||||
|
|
||||||
|
Filename: qsort.py
|
||||||
|
|
||||||
|
Q: What are the time complexities for the operations implemented?
|
||||||
|
|
||||||
|
Debriefing (required!): --------------------------
|
||||||
|
|
||||||
|
1. Approximately how many hours did you spend on this assignment?
|
||||||
|
2. Would you rate it as easy, moderate, or difficult?
|
||||||
|
3. Did you work on it mostly alone, or mostly with other people?
|
||||||
|
4. How deeply do you feel you understand the material it covers (0%–100%)?
|
||||||
|
5. Any other comments?
|
||||||
|
|
||||||
|
This section is intended to help us calibrate the homework assignments.
|
||||||
|
Your answers to this section will *not* affect your grade; however, skipping it
|
||||||
|
will certainly do.
|
||||||
|
|
||||||
170
code/cs325-langs/hws/hw10.txt
Normal file
170
code/cs325-langs/hws/hw10.txt
Normal file
@@ -0,0 +1,170 @@
|
|||||||
|
CS 325, Algorithms (MS/MEng-level), Fall 2019
|
||||||
|
|
||||||
|
HW10 - Challenge Problem - RNA Structure Prediction (6%)
|
||||||
|
This problem combines dynamic programming and priority queues.
|
||||||
|
|
||||||
|
Due Wednesday 12/4, 11:59pm.
|
||||||
|
No late submission will be accepted.
|
||||||
|
|
||||||
|
Include in your submission: report.txt, rna.py.
|
||||||
|
Grading:
|
||||||
|
* report.txt -- 1%
|
||||||
|
* 1-best structure -- 2%
|
||||||
|
* number of structures -- 1%
|
||||||
|
* k-best structures -- 2%
|
||||||
|
|
||||||
|
Textbooks for References:
|
||||||
|
[1] KT Ch. 6.5 (DP over intervals -- RNA structure)
|
||||||
|
[2] KT slides: DP I (RNA section)
|
||||||
|
http://www.cs.princeton.edu/~wayne/kleinberg-tardos/
|
||||||
|
|
||||||
|
***Please analyze time/space complexities for each problem in report.txt.
|
||||||
|
|
||||||
|
1. Given an RNA sequence, such as ACAGU, we can predict its secondary structure
|
||||||
|
by tagging each nucleotide as (, ., or ). Each matching pair of () must be
|
||||||
|
AU, GC, or GU (or their mirror symmetries: UA, CG, UG).
|
||||||
|
We also assume pairs can _not_ cross each other.
|
||||||
|
The following are valid structures for ACAGU:
|
||||||
|
|
||||||
|
ACAGU
|
||||||
|
.....
|
||||||
|
...()
|
||||||
|
..(.)
|
||||||
|
.(.).
|
||||||
|
(...)
|
||||||
|
((.))
|
||||||
|
|
||||||
|
We want to find the structure with the maximum number of matching pairs.
|
||||||
|
In the above example, the last structure is optimal (2 pairs).
|
||||||
|
|
||||||
|
>>> best("ACAGU")
|
||||||
|
(2, '((.))')
|
||||||
|
|
||||||
|
Tie-breaking: arbitrary. Don't worry as long as your structure
|
||||||
|
is one of the correct best structures.
|
||||||
|
|
||||||
|
some other cases (more cases at the bottom):
|
||||||
|
|
||||||
|
GCACG
|
||||||
|
(2, '().()')
|
||||||
|
UUCAGGA
|
||||||
|
(3, '(((.)))')
|
||||||
|
GUUAGAGUCU
|
||||||
|
(4, '(.()((.)))')
|
||||||
|
AUAACCUUAUAGGGCUCUG
|
||||||
|
(8, '.(((..)()()((()))))')
|
||||||
|
AACCGCUGUGUCAAGCCCAUCCUGCCUUGUU
|
||||||
|
(11, '(((.(..(.((.)((...().))()))))))')
|
||||||
|
GAUGCCGUGUAGUCCAAAGACUUCACCGUUGG
|
||||||
|
(14, '.()()(()(()())(((.((.)(.))()))))')
|
||||||
|
CAUCGGGGUCUGAGAUGGCCAUGAAGGGCACGUACUGUUU
|
||||||
|
(18, '(()())(((((.)))()(((())(.(.().()()))))))')
|
||||||
|
ACGGCCAGUAAAGGUCAUAUACGCGGAAUGACAGGUCUAUCUAC
|
||||||
|
(19, '.()(((.)(..))(((.()()(())))(((.)((())))))())')
|
||||||
|
AGGCAUCAAACCCUGCAUGGGAGCACCGCCACUGGCGAUUUUGGUA
|
||||||
|
(20, '.(()())...((((()()))((()(.()(((.)))()())))))()')
|
||||||
|
|
||||||
|
2. Total number of all possible structures
|
||||||
|
|
||||||
|
>>> total("ACAGU")
|
||||||
|
6
|
||||||
|
|
||||||
|
3. k-best structures: output the 1-best, 2nd-best, ... kth-best structures.
|
||||||
|
|
||||||
|
>>> kbest("ACAGU", 3)
|
||||||
|
[(2, '((.))'), (1, '(...)'), (1, '.(.).')]
|
||||||
|
|
||||||
|
The list must be sorted.
|
||||||
|
Tie-breaking: arbitrary.
|
||||||
|
|
||||||
|
In case the input k is bigger than the number of possible structures, output all.
|
||||||
|
|
||||||
|
Sanity check: kbest(s, 1)[0][0] == best(s)[0] for each RNA sequence s.
|
||||||
|
|
||||||
|
All three functions should be in one file: rna.py.
|
||||||
|
|
||||||
|
See more testcases at the end.
|
||||||
|
|
||||||
|
Debriefing (required!): --------------------------
|
||||||
|
|
||||||
|
0. What's your name?
|
||||||
|
1. Approximately how many hours did you spend on this assignment?
|
||||||
|
2. Would you rate it as easy, moderate, or difficult?
|
||||||
|
3. Did you work on it mostly alone, or mostly with other people?
|
||||||
|
4. How deeply do you feel you understand the material it covers (0%-100%)?
|
||||||
|
5. Any other comments?
|
||||||
|
|
||||||
|
This section is intended to help us calibrate the homework assignments.
|
||||||
|
Your answers to this section will *not* affect your grade; however, skipping it
|
||||||
|
will certainly do.
|
||||||
|
|
||||||
|
|
||||||
|
TESTCASES:
|
||||||
|
|
||||||
|
for each sequence s, we list three lines:
|
||||||
|
best(s)
|
||||||
|
total(s)
|
||||||
|
kbest(s, 10)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
ACAGU
|
||||||
|
(2, '((.))')
|
||||||
|
6
|
||||||
|
[(2, '((.))'), (1, '.(.).'), (1, '..(.)'), (1, '...()'), (1, '(...)'), (0, '.....')]
|
||||||
|
------
|
||||||
|
AC
|
||||||
|
(0, '..')
|
||||||
|
1
|
||||||
|
[(0, '..')]
|
||||||
|
------
|
||||||
|
GUAC
|
||||||
|
(2, '(())')
|
||||||
|
5
|
||||||
|
[(2, '(())'), (1, '()..'), (1, '.().'), (1, '(..)'), (0, '....')]
|
||||||
|
------
|
||||||
|
GCACG
|
||||||
|
(2, '().()')
|
||||||
|
6
|
||||||
|
[(2, '().()'), (1, '(..).'), (1, '()...'), (1, '.(..)'), (1, '...()'), (0, '.....')]
|
||||||
|
------
|
||||||
|
CCGG
|
||||||
|
(2, '(())')
|
||||||
|
6
|
||||||
|
[(2, '(())'), (1, '(.).'), (1, '.().'), (1, '.(.)'), (1, '(..)'), (0, '....')]
|
||||||
|
------
|
||||||
|
CCCGGG
|
||||||
|
(3, '((()))')
|
||||||
|
20
|
||||||
|
[(3, '((()))'), (2, '((.)).'), (2, '(.()).'), (2, '.(()).'), (2, '.(().)'), (2, '.((.))'), (2, '((.).)'), (2, '(.(.))'), (2, '(.().)'), (2, '((..))')]
|
||||||
|
------
|
||||||
|
UUCAGGA
|
||||||
|
(3, '(((.)))')
|
||||||
|
24
|
||||||
|
[(3, '(((.)))'), (2, '((.).).'), (2, '((..)).'), (2, '(.(.)).'), (2, '((.))..'), (2, '.((.)).'), (2, '.((.).)'), (2, '.((..))'), (2, '((..).)'), (2, '((.)..)')]
|
||||||
|
------
|
||||||
|
AUAACCUA
|
||||||
|
(2, '.((...))')
|
||||||
|
19
|
||||||
|
[(2, '((.)..).'), (2, '(()...).'), (2, '()(...).'), (2, '().(..).'), (2, '()....()'), (2, '.()(..).'), (2, '.()...()'), (2, '.(.)..()'), (2, '.((...))'), (2, '.(.(..))')]
|
||||||
|
------
|
||||||
|
UUGGACUUG
|
||||||
|
(4, '(()((.)))')
|
||||||
|
129
|
||||||
|
[(4, '(())(.)()'), (4, '(()((.)))'), (3, '(().)..()'), (3, '(().).(.)'), (3, '(().)(..)'), (3, '((.))..()'), (3, '((.)).(.)'), (3, '((.))(..)'), (3, '(())(..).'), (3, '(())(.)..')]
|
||||||
|
------
|
||||||
|
UUUGGCACUA
|
||||||
|
(4, '(.()()(.))')
|
||||||
|
179
|
||||||
|
[(4, '((()).).()'), (4, '((.)()).()'), (4, '(.()()).()'), (4, '.(()()).()'), (4, '.(()()(.))'), (4, '((()).(.))'), (4, '((.)()(.))'), (4, '((()())..)'), (4, '(.()()(.))'), (3, '((()).)...')]
|
||||||
|
------
|
||||||
|
GAUGCCGUGUAGUCCAAAGACUUC
|
||||||
|
(11, '(((()()((()(.))))((.))))')
|
||||||
|
2977987
|
||||||
|
[(11, '(()())(((()().))(((.))))'), (11, '(()())(((()()).)(((.))))'), (11, '(()())(((()(.)))(((.))))'), (11, '(()()()((()(.)))(((.))))'), (11, '(((()()((()().)))((.))))'), (11, '(((()()((()(.))))((.))))'), (11, '(()()()((()()).)(((.))))'), (11, '(()()()((()().))(((.))))'), (11, '(((()()((()()).))((.))))'), (10, '(()()()((()().).)((.))).')]
|
||||||
|
------
|
||||||
|
AGGCAUCAAACCCUGCAUGGGAGCG
|
||||||
|
(10, '.(()())...((((()()))).())')
|
||||||
|
560580
|
||||||
|
[(10, '.(()())...((((())())).)()'), (10, '.(()())...((((()()))).)()'), (10, '.(()())...(((()(()))).)()'), (10, '.(()())...(((()(()))).())'), (10, '.(()())...((((())())).())'), (10, '.(()())...((((()()))).())'), (9, '((.).)(...(.((()()))).)()'), (9, '((.).)(...(((.)(()))).)()'), (9, '((.).)(...(.(()(()))).)()'), (9, '((.).)(...((.(()()))).)()')]
|
||||||
|
------
|
||||||
42
code/cs325-langs/hws/hw11.txt
Normal file
42
code/cs325-langs/hws/hw11.txt
Normal file
@@ -0,0 +1,42 @@
|
|||||||
|
HW11 -- OPTIONAL (for your practice only -- solutions will be released on Tuesday)
|
||||||
|
|
||||||
|
Edit Distance (see updated final review solutions)
|
||||||
|
|
||||||
|
flip $ /nfs/farm/classes/eecs/fall2019/cs325-001/submit hw11 edit.py
|
||||||
|
|
||||||
|
Implement two functions:
|
||||||
|
* distance1(s, t): Viterbi-style (either top-down or bottom-up)
|
||||||
|
* distance2(s, t): Dijkstra-style (best-first)
|
||||||
|
|
||||||
|
For Dijkstra, you can use either heapdict or heapq (see review problem 7).
|
||||||
|
Given that this graph is extremely sparse (why?), heapq (ElogE) might be faster than heapdict (ElogV)
|
||||||
|
because the latter has overhead for hash.
|
||||||
|
|
||||||
|
They should return the same result (just return the edit distance).
|
||||||
|
|
||||||
|
We have 10 testcases (listed below); the first 5 test distance1(),
|
||||||
|
and the second 5 test distance2() on the same 5 string pairs.
|
||||||
|
|
||||||
|
My solutions (on flip2):
|
||||||
|
Testing Case 1 (open)... 0.001 s, Correct
|
||||||
|
Testing Case 2 (open)... 0.000 s, Correct
|
||||||
|
Testing Case 3 (open)... 0.012 s, Correct
|
||||||
|
Testing Case 4 (open)... 0.155 s, Correct
|
||||||
|
Testing Case 5 (open)... 0.112 s, Correct
|
||||||
|
Testing Case 6 (hidden)... 0.000 s, Correct
|
||||||
|
Testing Case 7 (hidden)... 0.000 s, Correct
|
||||||
|
Testing Case 8 (hidden)... 0.004 s, Correct
|
||||||
|
Testing Case 9 (hidden)... 0.009 s, Correct
|
||||||
|
Testing Case 10 (hidden)... 0.021 s, Correct
|
||||||
|
Total Time: 0.316 s
|
||||||
|
|
||||||
|
distance1("abcdefh", "abbcdfg") == 3
|
||||||
|
distance1("pretty", "prettier") == 3
|
||||||
|
distance1("aaaaaaadaaaaaaaaaaaaaaaaacaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaabaaaaaaaaaaaaaaaaaaaaaaaaaaaaaxaaaaaaaaaaaaaaaaaaaaaa") == 5
|
||||||
|
distance1('cpuyedzrwcbritzclzhwwabmlyresvewkdxwkamyzbxtwiqzvokqpkecyywrbvhlqgxzutdjfmvlhsezfbhfjbllmfhzlqlcwibubyyjupbwhztskyksfthkptxqlmhivfjbgclwsombvytdztapwpzmdqfwwrhqsgztobeuiatcwmrzfbwhfnpzzasomrhotoqiwvexlgxsnafiagfewmopdzwanxswfsmbxsmsczbwsgnwy', 'cpuyedzrwcbritzclzhwwabmlyresvewkdxwkamyzbtwiqzvokqpkecyywrbvhlqgxzutdjfmvlhsezfbhfjbllmfhzlqlcwibubyyjupbwhztskyksfthkptxqlmhivfbgclwsombvytdztapwpzmdqfwwrhqsgztobeuiatcwmrzfbwhfnpzzasonrhotoqiwvexlgxsnafiagfewmopdzwanxswfsmbxsmsczbwsgnwy') == 3
|
||||||
|
distance1('cpuyedzrwcbritzclzhwwabmlyresvewkdxwkamyzbtwiqzvokqpasdfkecyywrbvhlqgxzutdjfmvlhsezfbhbllmfhzlqlcwibubyyjupbwhztsxyksfthkptxqlmhivfjbgclhombvytdztapwpzmdqfwwrhqsgztobeuiatcwmrzfbwhfnpzzasomrttoqiwvexlgxsnafiagfewmopdzwanxswfsmbxsmsczbwsgnwydmbihjkvziitusmkjljrsbafytsinql', 'cpuyedzrwcbritzclzhwwabmlyresvewkdxwkamyzbtwiqzvokqpkecyywrbvhlqgxzutdjfmvlhsezfbhfjbllmfhzlqlcwibubyyjupbwhztskyksfthkptxqlmhivfjbgclwsombvytdztapwpzmdqfwwrhqsgztobeuiatcwmrzfbwhfnpzzasomrhotoqiwvexlgxsnafiagfewmopdzwanxswfsmbxsmsczbwsgnwydmbihjkvziitusmkjljrsbafytsinql') == 11
|
||||||
|
distance2("abcdefh", "abbcdfg") == 3
|
||||||
|
distance2("pretty", "prettier") == 3
|
||||||
|
distance2("aaaaaaadaaaaaaaaaaaaaaaaacaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaabaaaaaaaaaaaaaaaaaaaaaaaaaaaaaxaaaaaaaaaaaaaaaaaaaaaa") == 5
|
||||||
|
distance2('cpuyedzrwcbritzclzhwwabmlyresvewkdxwkamyzbxtwiqzvokqpkecyywrbvhlqgxzutdjfmvlhsezfbhfjbllmfhzlqlcwibubyyjupbwhztskyksfthkptxqlmhivfjbgclwsombvytdztapwpzmdqfwwrhqsgztobeuiatcwmrzfbwhfnpzzasomrhotoqiwvexlgxsnafiagfewmopdzwanxswfsmbxsmsczbwsgnwy', 'cpuyedzrwcbritzclzhwwabmlyresvewkdxwkamyzbtwiqzvokqpkecyywrbvhlqgxzutdjfmvlhsezfbhfjbllmfhzlqlcwibubyyjupbwhztskyksfthkptxqlmhivfbgclwsombvytdztapwpzmdqfwwrhqsgztobeuiatcwmrzfbwhfnpzzasonrhotoqiwvexlgxsnafiagfewmopdzwanxswfsmbxsmsczbwsgnwy') == 3
|
||||||
|
distance2('cpuyedzrwcbritzclzhwwabmlyresvewkdxwkamyzbtwiqzvokqpasdfkecyywrbvhlqgxzutdjfmvlhsezfbhbllmfhzlqlcwibubyyjupbwhztsxyksfthkptxqlmhivfjbgclhombvytdztapwpzmdqfwwrhqsgztobeuiatcwmrzfbwhfnpzzasomrttoqiwvexlgxsnafiagfewmopdzwanxswfsmbxsmsczbwsgnwydmbihjkvziitusmkjljrsbafytsinql', 'cpuyedzrwcbritzclzhwwabmlyresvewkdxwkamyzbtwiqzvokqpkecyywrbvhlqgxzutdjfmvlhsezfbhfjbllmfhzlqlcwibubyyjupbwhztskyksfthkptxqlmhivfjbgclwsombvytdztapwpzmdqfwwrhqsgztobeuiatcwmrzfbwhfnpzzasomrhotoqiwvexlgxsnafiagfewmopdzwanxswfsmbxsmsczbwsgnwydmbihjkvziitusmkjljrsbafytsinql') == 11
|
||||||
80
code/cs325-langs/hws/hw2.txt
Normal file
80
code/cs325-langs/hws/hw2.txt
Normal file
@@ -0,0 +1,80 @@
|
|||||||
|
CS 325-001, Analysis of Algorithms, Fall 2019
|
||||||
|
HW2 - Divide-n-conquer: mergesort, number of inversions, longest path
|
||||||
|
|
||||||
|
Due Monday Oct 7, 11:59pm (same submission instructions as HW1).
|
||||||
|
No late submission will be accepted.
|
||||||
|
|
||||||
|
Need to submit: report.txt, msort.py, inversions.py, and longest.py.
|
||||||
|
longest.py will be graded for correctness (1%).
|
||||||
|
|
||||||
|
To submit:
|
||||||
|
flip $ /nfs/farm/classes/eecs/fall2019/cs325-001/submit hw2 report.txt {msort,inversions,longest}.py
|
||||||
|
(You can submit each file separately, or submit them together.)
|
||||||
|
|
||||||
|
To see your best results so far:
|
||||||
|
flip $ /nfs/farm/classes/eecs/fall2019/cs325-001/query hw2
|
||||||
|
|
||||||
|
|
||||||
|
Textbooks for References:
|
||||||
|
[1] CLRS Ch. 2
|
||||||
|
|
||||||
|
0. Which of the following sorting algorithms are (or can be made) stable?
|
||||||
|
(a) mergesort
|
||||||
|
(b) quicksort with the first element as pivot
|
||||||
|
(c) quicksort with randomized pivot
|
||||||
|
(d) selection sort
|
||||||
|
(e) insertion sort
|
||||||
|
(f) heap sort --- not covered yet (see CLRS Ch. 6)
|
||||||
|
|
||||||
|
1. Implement mergesort.
|
||||||
|
|
||||||
|
>>> mergesort([4, 2, 5, 1, 6, 3])
|
||||||
|
[1, 2, 3, 4, 5, 6]
|
||||||
|
|
||||||
|
Filename: msort.py
|
||||||
|
|
||||||
|
2. Calculate the number of inversions in a list.
|
||||||
|
|
||||||
|
>>> num_inversions([4, 1, 3, 2])
|
||||||
|
4
|
||||||
|
>>> num_inversions([2, 4, 1, 3])
|
||||||
|
3
|
||||||
|
|
||||||
|
Filename: inversions.py
|
||||||
|
Must run in O(nlogn) time.
|
||||||
|
|
||||||
|
3. [WILL BE GRADED]
|
||||||
|
|
||||||
|
Length of the longest path in a binary tree (number of edges).
|
||||||
|
|
||||||
|
We will use the "buggy qsort" representation of binary trees from HW1:
|
||||||
|
[left_subtree, root, right_subtree]
|
||||||
|
|
||||||
|
>>> longest([[], 1, []])
|
||||||
|
0
|
||||||
|
|
||||||
|
>>> longest([[[], 1, []], 2, [[], 3, []]])
|
||||||
|
2
|
||||||
|
|
||||||
|
>>> longest([[[[], 1, []], 2, [[], 3, []]], 4, [[[], 5, []], 6, [[], 7, [[], 9, []]]]])
|
||||||
|
5
|
||||||
|
|
||||||
|
Note the answer is 5 because the longest path is 1-2-4-6-7-9.
|
||||||
|
|
||||||
|
Filename: longest.py
|
||||||
|
Must run in O(n) time.
|
||||||
|
|
||||||
|
Debriefing (required!): --------------------------
|
||||||
|
|
||||||
|
1. Approximately how many hours did you spend on this assignment?
|
||||||
|
2. Would you rate it as easy, moderate, or difficult?
|
||||||
|
3. Did you work on it mostly alone, or mostly with other people?
|
||||||
|
Note you are encouraged to discuss with your classmates,
|
||||||
|
but each students should submit his/her own code.
|
||||||
|
4. How deeply do you feel you understand the material it covers (0%–100%)?
|
||||||
|
5. Any other comments?
|
||||||
|
|
||||||
|
This section is intended to help us calibrate the homework assignments.
|
||||||
|
Your answers to this section will *not* affect your grade; however, skipping it
|
||||||
|
will certainly do.
|
||||||
|
|
||||||
83
code/cs325-langs/hws/hw3.txt
Normal file
83
code/cs325-langs/hws/hw3.txt
Normal file
@@ -0,0 +1,83 @@
|
|||||||
|
CS 325, Algorithms, Fall 2019
|
||||||
|
HW3 - K closest numbers; Two Pointers
|
||||||
|
|
||||||
|
Due Monday Oct 14, 11:59pm. (same submission instructions as HW1-2).
|
||||||
|
No late submission will be accepted.
|
||||||
|
|
||||||
|
Need to submit: report.txt, closest_unsorted.py, closest_sorted.py, xyz.py.
|
||||||
|
closest_sorted.py will be graded for correctness (1%).
|
||||||
|
|
||||||
|
To submit:
|
||||||
|
flip $ /nfs/farm/classes/eecs/fall2019/cs325-001/submit hw3 report.txt {closest*,xyz}.py
|
||||||
|
(You can submit each file separately, or submit them together.)
|
||||||
|
|
||||||
|
To see your best results so far:
|
||||||
|
flip $ /nfs/farm/classes/eecs/fall2019/cs325-001/query hw3
|
||||||
|
|
||||||
|
|
||||||
|
1. Given an array A of n numbers, a query x, and a number k,
|
||||||
|
find the k numbers in A that are closest (in value) to x.
|
||||||
|
For example:
|
||||||
|
|
||||||
|
find([4,1,3,2,7,4], 5.2, 2) returns [4,4]
|
||||||
|
find([4,1,3,2,7,4], 6.5, 3) returns [4,7,4]
|
||||||
|
find([5,3,4,1,6,3], 3.5, 2) returns [3,4]
|
||||||
|
|
||||||
|
|
||||||
|
Filename: closest_unsorted.py
|
||||||
|
Must run in O(n) time.
|
||||||
|
The elements in the returned list must be in the original order.
|
||||||
|
In case two numbers are equally close to x, choose the earlier one.
|
||||||
|
|
||||||
|
|
||||||
|
2. [WILL BE GRADED]
|
||||||
|
Now what if the input array is sorted? Can you do it faster?
|
||||||
|
|
||||||
|
find([1,2,3,4,4,7], 5.2, 2) returns [4,4]
|
||||||
|
find([1,2,3,4,4,7], 6.5, 3) returns [4,4,7]
|
||||||
|
|
||||||
|
Filename: closest_sorted.py
|
||||||
|
Must run in O(logn + k) time.
|
||||||
|
The elements in the returned list must be in the original order.
|
||||||
|
|
||||||
|
Note: in case two numbers are equally close to x, choose the smaller one:
|
||||||
|
find([1,2,3,4,4,6,6], 5, 3) returns [4,4,6]
|
||||||
|
find([1,2,3,4,4,5,6], 4, 5) returns [2,3,4,4,5]
|
||||||
|
|
||||||
|
Hint: you can use Python's bisect.bisect for binary search.
|
||||||
|
|
||||||
|
|
||||||
|
3. For a given array A of n *distinct* numbers, find all triples (x,y,z)
|
||||||
|
s.t. x + y = z. (x, y, z are distinct numbers)
|
||||||
|
|
||||||
|
e.g.,
|
||||||
|
|
||||||
|
find([1, 4, 2, 3, 5]) returns [(1,3,4), (1,2,3), (1,4,5), (2,3,5)]
|
||||||
|
|
||||||
|
Note that:
|
||||||
|
1) no duplicates in the input array
|
||||||
|
2) you can choose any arbitrary order for triples in the returned list.
|
||||||
|
|
||||||
|
Filename: xyz.py
|
||||||
|
Must run in O(n^2) time.
|
||||||
|
|
||||||
|
Hint: you can use any built-in sort in Python.
|
||||||
|
|
||||||
|
|
||||||
|
Debriefing (required!): --------------------------
|
||||||
|
|
||||||
|
0. What's your name?
|
||||||
|
1. Approximately how many hours did you spend on this assignment?
|
||||||
|
2. Would you rate it as easy, moderate, or difficult?
|
||||||
|
3. Did you work on it mostly alone, or mostly with other people?
|
||||||
|
Note you are encouraged to discuss with your classmates,
|
||||||
|
but each students should submit his/her own code.
|
||||||
|
4. How deeply do you feel you understand the material it covers (0%-100%)?
|
||||||
|
|
||||||
|
5. Which part(s) of the course you like the most so far?
|
||||||
|
6. Which part(s) of the course you dislike the most so far?
|
||||||
|
|
||||||
|
This section is intended to help us calibrate the homework assignments.
|
||||||
|
Your answers to this section will *not* affect your grade; however, skipping it
|
||||||
|
will certainly do.
|
||||||
|
|
||||||
114
code/cs325-langs/hws/hw4.txt
Normal file
114
code/cs325-langs/hws/hw4.txt
Normal file
@@ -0,0 +1,114 @@
|
|||||||
|
CS 325-001, Algorithms, Fall 2019
|
||||||
|
HW4 - Priority Queue and Heaps
|
||||||
|
|
||||||
|
Due via the submit program on Monday Oct 21, 11:59pm.
|
||||||
|
No late submission will be accepted.
|
||||||
|
|
||||||
|
Need to submit: report.txt, nbest.py, kmergesort.py, datastream.py.
|
||||||
|
datastream.py will be graded for correctness (1%).
|
||||||
|
|
||||||
|
To submit:
|
||||||
|
flip $ /nfs/farm/classes/eecs/fall2019/cs325-001/submit hw4 report.txt {nbest,kmergesort,datastream}.py
|
||||||
|
(You can submit each file separately, or submit them together.)
|
||||||
|
|
||||||
|
To see your best results so far:
|
||||||
|
flip $ /nfs/farm/classes/eecs/fall2019/cs325-001/query hw4
|
||||||
|
|
||||||
|
|
||||||
|
Textbooks for References:
|
||||||
|
[1] CLRS Ch. 6
|
||||||
|
[2] KT slides for binary heaps (only read the first 20 pages!):
|
||||||
|
https://www.cs.princeton.edu/~wayne/kleinberg-tardos/pdf/BinomialHeaps.pdf
|
||||||
|
[3] Python heapq module
|
||||||
|
|
||||||
|
0. There are two methods for building a heap from an unsorted array:
|
||||||
|
(1) insert each element into the heap --- O(nlogn) -- heapq.heappush()
|
||||||
|
(2) heapify (top-down) --- O(n) -- heapq.heapify()
|
||||||
|
|
||||||
|
(a) Derive these time complexities.
|
||||||
|
(b) Use a long list of random numbers to show the difference in time. (Hint: random.shuffle or random.sample)
|
||||||
|
(c) What about sorted or reversely-sorted numbers?
|
||||||
|
|
||||||
|
1. Given two lists A and B, each with n integers, return
|
||||||
|
a sorted list C that contains the smallest n elements from AxB:
|
||||||
|
|
||||||
|
AxB = { (x, y) | x in A, y in B }
|
||||||
|
|
||||||
|
i.e., AxB is the Cartesian Product of A and B.
|
||||||
|
|
||||||
|
ordering: (x,y) < (x',y') iff. x+y < x'+y' or (x+y==x'+y' and y<y')
|
||||||
|
|
||||||
|
You need to implement three algorithms and compare:
|
||||||
|
|
||||||
|
(a) enumerate all n^2 pairs, sort, and take top n.
|
||||||
|
(b) enumerate all n^2 pairs, but use qselect from hw1.
|
||||||
|
(c) Dijkstra-style best-first, only enumerate O(n) (at most 2n) pairs.
|
||||||
|
Hint: you can use Python's heapq module for priority queue.
|
||||||
|
|
||||||
|
Q: What are the time complexities of these algorithms?
|
||||||
|
|
||||||
|
>>> a, b = [4, 1, 5, 3], [2, 6, 3, 4]
|
||||||
|
>>> nbesta(a, b) # algorithm (a), slowest
|
||||||
|
[(1, 2), (1, 3), (3, 2), (1, 4)]
|
||||||
|
>>> nbestb(a, b) # algorithm (b), slow
|
||||||
|
[(1, 2), (1, 3), (3, 2), (1, 4)]
|
||||||
|
>>> nbestc(a, b) # algorithm (c), fast
|
||||||
|
[(1, 2), (1, 3), (3, 2), (1, 4)]
|
||||||
|
|
||||||
|
Filename: nbest.py
|
||||||
|
|
||||||
|
2. k-way mergesort (the classical mergesort is a special case where k=2).
|
||||||
|
|
||||||
|
>>> kmergesort([4,1,5,2,6,3,7,0], 3) # k=3
|
||||||
|
[0,1,2,3,4,5,6,7]
|
||||||
|
|
||||||
|
Q: What is the complexity? Write down the detailed analysis in report.txt.
|
||||||
|
|
||||||
|
Filename: kmergesort.py
|
||||||
|
|
||||||
|
3. [WILL BE GRADED]
|
||||||
|
|
||||||
|
Find the k smallest numbers in a data stream of length n (k<<n),
|
||||||
|
using only O(k) space (the stream itself might be too big to fit in memory).
|
||||||
|
|
||||||
|
>>> ksmallest(4, [10, 2, 9, 3, 7, 8, 11, 5, 7])
|
||||||
|
[2, 3, 5, 7]
|
||||||
|
>>> ksmallest(3, range(1000000, 0, -1))
|
||||||
|
[1, 2, 3]
|
||||||
|
|
||||||
|
Note:
|
||||||
|
a) it should work with both lists and lazy lists
|
||||||
|
b) the output list should be sorted
|
||||||
|
|
||||||
|
Q: What is your complexity? Write down the detailed analysis in report.txt.
|
||||||
|
|
||||||
|
Filename: datastream.py
|
||||||
|
|
||||||
|
[UPDATE] The built-in function heapq.nsmallest() is _not_ allowed for this problem.
|
||||||
|
The whole point is to implement it yourself. :)
|
||||||
|
|
||||||
|
|
||||||
|
4. (optional) Summarize the time complexities of the basic operations (push, pop-min, peak, heapify) for these implementations of priority queue:
|
||||||
|
|
||||||
|
(a) unsorted array
|
||||||
|
(b) sorted array (highest priority first)
|
||||||
|
(c) reversly sorted array (lowest priority first)
|
||||||
|
(d) linked list
|
||||||
|
(e) binary heap
|
||||||
|
|
||||||
|
Debriefing (required!): --------------------------
|
||||||
|
|
||||||
|
0. What's your name?
|
||||||
|
1. Approximately how many hours did you spend on this assignment?
|
||||||
|
2. Would you rate it as easy, moderate, or difficult?
|
||||||
|
3. Did you work on it mostly alone, or mostly with other people?
|
||||||
|
Note you are encouraged to discuss with your classmates,
|
||||||
|
but each students should submit his/her own code.
|
||||||
|
4. How deeply do you feel you understand the material it covers (0%-100%)?
|
||||||
|
5. Which part(s) of the course you like the most so far?
|
||||||
|
6. Which part(s) of the course you dislike the most so far?
|
||||||
|
|
||||||
|
This section is intended to help us calibrate the homework assignments.
|
||||||
|
Your answers to this section will *not* affect your grade; however, skipping it
|
||||||
|
will certainly do.
|
||||||
|
|
||||||
130
code/cs325-langs/hws/hw5.txt
Normal file
130
code/cs325-langs/hws/hw5.txt
Normal file
@@ -0,0 +1,130 @@
|
|||||||
|
CS 532-001, Algorithms, Fall 2019
|
||||||
|
HW5 - DP (part 1: simple)
|
||||||
|
|
||||||
|
HWs 5-7 are all on DPs.
|
||||||
|
|
||||||
|
Due Monday Oct 28, 11:59pm.
|
||||||
|
No late submission will be accepted.
|
||||||
|
|
||||||
|
Need to submit report.txt, mis.py, bsts.py, bitstrings.py.
|
||||||
|
mis.py will be graded for correctness (1%).
|
||||||
|
|
||||||
|
To submit:
|
||||||
|
flip $ /nfs/farm/classes/eecs/fall2019/cs325-001/submit hw5 report.txt {mis,bsts,bitstrings}.py
|
||||||
|
(You can submit each file separately, or submit them together.)
|
||||||
|
|
||||||
|
To see your best results so far:
|
||||||
|
flip $ /nfs/farm/classes/eecs/fall2019/cs325-001/query hw5
|
||||||
|
|
||||||
|
|
||||||
|
Textbooks for References:
|
||||||
|
[1] CLRS Ch. 15
|
||||||
|
[2] KT Ch. 6
|
||||||
|
or Ch. 5 in a previous version:
|
||||||
|
http://cs.furman.edu/~chealy/cs361/kleinbergbook.pdf
|
||||||
|
|
||||||
|
Hint: Among the three coding questions, p3 is the easiest, and p1 is similar to p3.
|
||||||
|
You'll realize that both are very similar to p0 (Fibonacci).
|
||||||
|
p2 is slightly different from these, but still very easy.
|
||||||
|
|
||||||
|
0. (Optional) Is Fibonacci REALLY O(n)?
|
||||||
|
Hint: the value of f(n) itself grows exponentially.
|
||||||
|
|
||||||
|
1. [WILL BE GRADED]
|
||||||
|
Maximum Weighted Independent Set
|
||||||
|
|
||||||
|
[HINT] independent set is a set where no two numbers are neighbors in the original list.
|
||||||
|
see also https://en.wikipedia.org/wiki/Independent_set_(graph_theory)
|
||||||
|
|
||||||
|
input: a list of numbers (could be negative)
|
||||||
|
output: a pair of the max sum and the list of numbers chosen
|
||||||
|
|
||||||
|
>>> max_wis([7,8,5])
|
||||||
|
(12, [7,5])
|
||||||
|
|
||||||
|
>>> max_wis([-1,8,10])
|
||||||
|
(10, [10])
|
||||||
|
|
||||||
|
>>> max_wis([])
|
||||||
|
(0, [])
|
||||||
|
|
||||||
|
[HINT] if all numbers are negative, the optimal solution is 0,
|
||||||
|
since [] is an independent set according to the definition above.
|
||||||
|
|
||||||
|
>>> max_wis([-5, -1, -4])
|
||||||
|
(0, [])
|
||||||
|
|
||||||
|
Q: What's the complexity?
|
||||||
|
|
||||||
|
Include both top-down (max_wis()) and bottom-up (max_wis2()) solutions,
|
||||||
|
and make sure they produce exact same results.
|
||||||
|
We'll only grade the top-down version.
|
||||||
|
|
||||||
|
Tie-breaking: any best solution is considered correct.
|
||||||
|
|
||||||
|
Filename: mis.py
|
||||||
|
|
||||||
|
[HINT] you can also use the naive O(2^n) exhaustive search method to verify your answer.
|
||||||
|
|
||||||
|
|
||||||
|
2. Number of n-node BSTs
|
||||||
|
|
||||||
|
input: n
|
||||||
|
output: number of n-node BSTs
|
||||||
|
|
||||||
|
>>> bsts(2)
|
||||||
|
2
|
||||||
|
>>> bsts(3)
|
||||||
|
5
|
||||||
|
>>> bsts(5)
|
||||||
|
42
|
||||||
|
|
||||||
|
[HINT] There are two 2-node BSTs:
|
||||||
|
2 1
|
||||||
|
/ \
|
||||||
|
1 2
|
||||||
|
Note that all other 2-node BSTs are *isomorphic* to either one.
|
||||||
|
|
||||||
|
Qa: What's the complexity of this DP?
|
||||||
|
|
||||||
|
Qb: What's the name of this famous number series?
|
||||||
|
|
||||||
|
Feel free to use any implementation style.
|
||||||
|
|
||||||
|
Filename: bsts.py
|
||||||
|
|
||||||
|
3. Number of bit strings of length n that has
|
||||||
|
|
||||||
|
1) no two consecutive 0s.
|
||||||
|
2) two consecutive 0s.
|
||||||
|
|
||||||
|
>>> num_no(3)
|
||||||
|
5
|
||||||
|
>>> num_yes(3)
|
||||||
|
3
|
||||||
|
|
||||||
|
[HINT] There are three 3-bit 0/1-strings that have two consecutive 0s.
|
||||||
|
001 100 000
|
||||||
|
The other five 3-bit 0/1-strings have no two consecutive 0s:
|
||||||
|
010 011 101 110 111
|
||||||
|
|
||||||
|
Feel free to choose any implementation style.
|
||||||
|
|
||||||
|
Filename: bitstrings.py
|
||||||
|
|
||||||
|
[HINT] Like problem 1, you can also use the O(2^n) exhaustive search method to verify your answer.
|
||||||
|
|
||||||
|
|
||||||
|
Debriefing (required!): --------------------------
|
||||||
|
|
||||||
|
0. What's your name?
|
||||||
|
1. Approximately how many hours did you spend on this assignment?
|
||||||
|
2. Would you rate it as easy, moderate, or difficult?
|
||||||
|
3. Did you work on it mostly alone, or mostly with other people?
|
||||||
|
4. How deeply do you feel you understand the material it covers (0%-100%)?
|
||||||
|
5. Which part(s) of the course you like the most so far?
|
||||||
|
6. Which part(s) of the course you dislike the most so far?
|
||||||
|
|
||||||
|
This section is intended to help us calibrate the homework assignments.
|
||||||
|
Your answers to this section will *not* affect your grade; however, skipping it
|
||||||
|
will certainly do.
|
||||||
114
code/cs325-langs/hws/hw6.txt
Normal file
114
code/cs325-langs/hws/hw6.txt
Normal file
@@ -0,0 +1,114 @@
|
|||||||
|
CS 325-001, Algorithms, Fall 2019
|
||||||
|
HW6 - DP (part 2)
|
||||||
|
|
||||||
|
Due on Monday Nov 4, 11:59pm.
|
||||||
|
No late submission will be accepted.
|
||||||
|
|
||||||
|
Need to submit: report.txt, knapsack_unbounded.py, knapsack_bounded.py.
|
||||||
|
knapsack_bounded.py will be graded for correctness (1%).
|
||||||
|
|
||||||
|
To submit:
|
||||||
|
flip $ /nfs/farm/classes/eecs/fall2019/cs325-001/submit hw6 report.txt knapsack*.py
|
||||||
|
(You can submit each file separately, or submit them together.)
|
||||||
|
|
||||||
|
To see your best results so far:
|
||||||
|
flip $ /nfs/farm/classes/eecs/fall2019/cs325-001/query hw6
|
||||||
|
|
||||||
|
Textbooks for References:
|
||||||
|
[1] KT Ch. 6.4
|
||||||
|
or Ch. 5.3 in a previous version:
|
||||||
|
http://cs.furman.edu/~chealy/cs361/kleinbergbook.pdf
|
||||||
|
[2] KT slides for DP (pages 1-37):
|
||||||
|
https://www.cs.princeton.edu/~wayne/kleinberg-tardos/pdf/06DynamicProgrammingI.pdf
|
||||||
|
[3] Wikipedia: Knapsack (unbounded and 0/1)
|
||||||
|
[4] CLRS Ch. 15
|
||||||
|
|
||||||
|
Please answer time/space complexities for each problem in report.txt.
|
||||||
|
|
||||||
|
0. For each of the coding problems below:
|
||||||
|
(a) Describe a greedy solution.
|
||||||
|
(b) Show a counterexample to the greedy solution.
|
||||||
|
(c) Define the DP subproblem
|
||||||
|
(d) Write the recurrence relations
|
||||||
|
(e) Do not forget base cases
|
||||||
|
(f) Analyze the space and time complexities
|
||||||
|
|
||||||
|
1. Unbounded Knapsack
|
||||||
|
|
||||||
|
You have n items, each with weight w_i and value v_i, and each has infinite copies.
|
||||||
|
**All numbers are positive integers.**
|
||||||
|
What's the best value for a bag of W?
|
||||||
|
|
||||||
|
>>> best(3, [(2, 4), (3, 5)])
|
||||||
|
(5, [0, 1])
|
||||||
|
|
||||||
|
the input to the best() function is W and a list of pairs (w_i, v_i).
|
||||||
|
this output means to take 0 copies of item 1 and 1 copy of item 2.
|
||||||
|
|
||||||
|
tie-breaking: *reverse* lexicographical: i.e., [1, 0] is better than [0, 1]:
|
||||||
|
(i.e., take as many copies from the first item as possible, etc.)
|
||||||
|
|
||||||
|
>>> best(3, [(1, 5), (1, 5)])
|
||||||
|
(15, [3, 0])
|
||||||
|
|
||||||
|
>>> best(3, [(1, 2), (1, 5)])
|
||||||
|
(15, [0, 3])
|
||||||
|
|
||||||
|
>>> best(3, [(1, 2), (2, 5)])
|
||||||
|
(7, [1, 1])
|
||||||
|
|
||||||
|
>>> best(58, [(5, 9), (9, 18), (6, 12)])
|
||||||
|
(114, [2, 4, 2])
|
||||||
|
|
||||||
|
>>> best(92, [(8, 9), (9, 10), (10, 12), (5, 6)])
|
||||||
|
(109, [1, 1, 7, 1])
|
||||||
|
|
||||||
|
Q: What are the time and space complexities?
|
||||||
|
|
||||||
|
filename: knapsack_unbounded.py
|
||||||
|
|
||||||
|
2. [WILL BE GRADED]
|
||||||
|
Bounded Knapsack
|
||||||
|
|
||||||
|
You have n items, each with weight w_i and value v_i, and has c_i copies.
|
||||||
|
**All numbers are positive integers.**
|
||||||
|
What's the best value for a bag of W?
|
||||||
|
|
||||||
|
>>> best(3, [(2, 4, 2), (3, 5, 3)])
|
||||||
|
(5, [0, 1])
|
||||||
|
|
||||||
|
the input to the best() function is W and a list of triples (w_i, v_i, c_i).
|
||||||
|
|
||||||
|
tie-breaking: same as in p1:
|
||||||
|
|
||||||
|
>>> best(3, [(1, 5, 2), (1, 5, 3)])
|
||||||
|
(15, [2, 1])
|
||||||
|
|
||||||
|
>>> best(3, [(1, 5, 1), (1, 5, 3)])
|
||||||
|
(15, [1, 2])
|
||||||
|
|
||||||
|
>>> best(20, [(1, 10, 6), (3, 15, 4), (2, 10, 3)])
|
||||||
|
(130, [6, 4, 1])
|
||||||
|
|
||||||
|
>>> best(92, [(1, 6, 6), (6, 15, 7), (8, 9, 8), (2, 4, 7), (2, 20, 2)])
|
||||||
|
(236, [6, 7, 3, 7, 2])
|
||||||
|
|
||||||
|
Q: What are the time and space complexities?
|
||||||
|
|
||||||
|
filename: knapsack_bounded.py
|
||||||
|
|
||||||
|
You are encouraged to come up with a few other testcases yourself to test your code!
|
||||||
|
|
||||||
|
Debriefing (required!): --------------------------
|
||||||
|
|
||||||
|
0. What's your name?
|
||||||
|
1. Approximately how many hours did you spend on this assignment?
|
||||||
|
2. Would you rate it as easy, moderate, or difficult?
|
||||||
|
3. Did you work on it mostly alone, or mostly with other people?
|
||||||
|
4. How deeply do you feel you understand the material it covers (0%-100%)?
|
||||||
|
5. Which part(s) of the course you like the most so far?
|
||||||
|
6. Which part(s) of the course you dislike the most so far?
|
||||||
|
|
||||||
|
This section is intended to help us calibrate the homework assignments.
|
||||||
|
Your answers to this section will *not* affect your grade; however, skipping it
|
||||||
|
will certainly do.
|
||||||
147
code/cs325-langs/hws/hw8.txt
Normal file
147
code/cs325-langs/hws/hw8.txt
Normal file
@@ -0,0 +1,147 @@
|
|||||||
|
CS 325-001, Algorithms, Fall 2019
|
||||||
|
HW8 - Graphs (part I); DP (part III)
|
||||||
|
|
||||||
|
Due on Monday November 18, 11:59pm.
|
||||||
|
No late submission will be accepted.
|
||||||
|
|
||||||
|
Include in your submission: report.txt, topol.py, viterbi.py.
|
||||||
|
viterbi.py will be graded for correctness (1%).
|
||||||
|
|
||||||
|
To submit:
|
||||||
|
flip $ /nfs/farm/classes/eecs/fall2019/cs325-001/submit hw8 report.txt {topol,viterbi}.py
|
||||||
|
(You can submit each file separately, or submit them together.)
|
||||||
|
|
||||||
|
To see your best results so far:
|
||||||
|
flip $ /nfs/farm/classes/eecs/fall2019/cs325-001/query hw8
|
||||||
|
|
||||||
|
Textbooks for References:
|
||||||
|
[1] CLRS Ch. 23 (Elementary Graph Algorithms)
|
||||||
|
[2] KT Ch. 3 (graphs), or Ch. 2 in this earlier version:
|
||||||
|
http://cs.furman.edu/~chealy/cs361/kleinbergbook.pdf
|
||||||
|
[3] KT slides (highly recommend!):
|
||||||
|
https://www.cs.princeton.edu/~wayne/kleinberg-tardos/pdf/03Graphs.pdf
|
||||||
|
[4] Jeff Erickson: Ch. 5 (Basic Graph Algorithms):
|
||||||
|
http://jeffe.cs.illinois.edu/teaching/algorithms/book/05-graphs.pdf
|
||||||
|
[5] DPV Ch. 3, 4.2, 4.4, 4.7 (Dasgupta, Papadimitriou, Vazirani)
|
||||||
|
https://www.cs.berkeley.edu/~vazirani/algorithms/chap3.pdf (decomposition of graphs)
|
||||||
|
https://www.cs.berkeley.edu/~vazirani/algorithms/chap4.pdf (paths, shortest paths)
|
||||||
|
[6] my advanced DP tutorial (up to page 16):
|
||||||
|
http://web.engr.oregonstate.edu/~huanlian/slides/COLING-tutorial-anim.pdf
|
||||||
|
|
||||||
|
Please answer non-coding questions in report.txt.
|
||||||
|
|
||||||
|
0. For the following graphs, decide whether they are
|
||||||
|
(1) directed or undirected, (2) dense or sparse, and (3) cyclic or acyclic:
|
||||||
|
|
||||||
|
(a) Facebook
|
||||||
|
(b) Twitter
|
||||||
|
(c) a family
|
||||||
|
(d) V=airports, E=direct_flights
|
||||||
|
(e) a mesh
|
||||||
|
(f) V=courses, E=prerequisites
|
||||||
|
(g) a tree
|
||||||
|
(h) V=linux_software_packages, E=dependencies
|
||||||
|
(i) DP subproblems for 0-1 knapsack
|
||||||
|
|
||||||
|
Can you name a very big dense graph?
|
||||||
|
|
||||||
|
1. Topological Sort
|
||||||
|
|
||||||
|
For a given directed graph, output a topological order if it exists.
|
||||||
|
|
||||||
|
Tie-breaking: ARBITRARY tie-breaking. This will make the code
|
||||||
|
and time complexity analysis a lot easier.
|
||||||
|
|
||||||
|
e.g., for the following example:
|
||||||
|
|
||||||
|
0 --> 2 --> 3 --> 5 --> 6
|
||||||
|
/ \ | / \
|
||||||
|
/ \ v / \
|
||||||
|
1 > 4 > 7
|
||||||
|
|
||||||
|
>>> order(8, [(0,2), (1,2), (2,3), (2,4), (3,4), (3,5), (4,5), (5,6), (5,7)])
|
||||||
|
[0, 1, 2, 3, 4, 5, 6, 7]
|
||||||
|
|
||||||
|
Note that order() takes two arguments, n and list_of_edges,
|
||||||
|
where n specifies that the nodes are named 0..(n-1).
|
||||||
|
|
||||||
|
If we flip the (3,4) edge:
|
||||||
|
|
||||||
|
>>> order(8, [(0,2), (1,2), (2,3), (2,4), (4,3), (3,5), (4,5), (5,6), (5,7)])
|
||||||
|
[0, 1, 2, 4, 3, 5, 6, 7]
|
||||||
|
|
||||||
|
If there is a cycle, return None
|
||||||
|
|
||||||
|
>>> order(4, [(0,1), (1,2), (2,1), (2,3)])
|
||||||
|
None
|
||||||
|
|
||||||
|
Other cases:
|
||||||
|
|
||||||
|
>>> order(5, [(0,1), (1,2), (2,3), (3,4)])
|
||||||
|
[0, 1, 2, 3, 4]
|
||||||
|
|
||||||
|
>>> order(5, [])
|
||||||
|
[0, 1, 2, 3, 4] # could be any order
|
||||||
|
|
||||||
|
>>> order(3, [(1,2), (2,1)])
|
||||||
|
None
|
||||||
|
|
||||||
|
>>> order(1, [(0,0)]) # self-loop
|
||||||
|
None
|
||||||
|
|
||||||
|
Tie-breaking: arbitrary (any valid topological order is fine).
|
||||||
|
|
||||||
|
filename: topol.py
|
||||||
|
|
||||||
|
questions:
|
||||||
|
(a) did you realize that bottom-up implementations of DP use (implicit) topological orderings?
|
||||||
|
e.g., what is the topological ordering in your (or my) bottom-up bounded knapsack code?
|
||||||
|
(b) what about top-down implementations? what order do they use to traverse the graph?
|
||||||
|
(c) does that suggest there is a top-down solution for topological sort as well?
|
||||||
|
|
||||||
|
2. [WILL BE GRADED]
|
||||||
|
Viterbi Algorithm For Longest Path in DAG (see DPV 4.7, [2], CLRS problem 15-1)
|
||||||
|
|
||||||
|
Recall that the Viterbi algorithm has just two steps:
|
||||||
|
a) get a topological order (use problem 1 above)
|
||||||
|
b) follow that order, and do either forward or backward updates
|
||||||
|
|
||||||
|
This algorithm captures all DP problems on DAGs, for example,
|
||||||
|
longest path, shortest path, number of paths, etc.
|
||||||
|
|
||||||
|
In this problem, given a DAG (guaranteed acyclic!), output a pair (l, p)
|
||||||
|
where l is the length of the longest path (number of edges), and p is the path. (you can think of each edge being unit cost)
|
||||||
|
|
||||||
|
e.g., for the above example:
|
||||||
|
|
||||||
|
>>> longest(8, [(0,2), (1,2), (2,3), (2,4), (3,4), (3,5), (4,5), (5,6), (5,7)])
|
||||||
|
(5, [0, 2, 3, 4, 5, 6])
|
||||||
|
|
||||||
|
>>> longest(8, [(0,2), (1,2), (2,3), (2,4), (4,3), (3,5), (4,5), (5,6), (5,7)])
|
||||||
|
(5, [0, 2, 4, 3, 5, 6])
|
||||||
|
|
||||||
|
>>> longest(8, [(0,1), (0,2), (1,2), (2,3), (2,4), (4,3), (3,5), (4,5), (5,6), (5,7), (6,7)])
|
||||||
|
(7, [0, 1, 2, 4, 3, 5, 6, 7]) # unique answer
|
||||||
|
|
||||||
|
Note that longest() takes two arguments, n and list_of_edges,
|
||||||
|
where n specifies that the nodes are named 0..(n-1).
|
||||||
|
|
||||||
|
Tie-breaking: arbitrary. any longest path is fine.
|
||||||
|
|
||||||
|
Filename: viterbi.py
|
||||||
|
|
||||||
|
Note: you can use this program to solve MIS, knapsacks, coins, etc.
|
||||||
|
|
||||||
|
|
||||||
|
Debriefing (required!): --------------------------
|
||||||
|
|
||||||
|
0. What's your name?
|
||||||
|
1. Approximately how many hours did you spend on this assignment?
|
||||||
|
2. Would you rate it as easy, moderate, or difficult?
|
||||||
|
3. Did you work on it mostly alone, or mostly with other people?
|
||||||
|
4. How deeply do you feel you understand the material it covers (0%-100%)?
|
||||||
|
5. Any other comments?
|
||||||
|
|
||||||
|
This section is intended to help us calibrate the homework assignments.
|
||||||
|
Your answers to this section will *not* affect your grade; however, skipping it
|
||||||
|
will certainly do.
|
||||||
166
code/cs325-langs/hws/hw9.txt
Normal file
166
code/cs325-langs/hws/hw9.txt
Normal file
@@ -0,0 +1,166 @@
|
|||||||
|
CS 325, Algorithms, Fall 2019
|
||||||
|
HW9 - Graphs (part 2), DP (part 4)
|
||||||
|
|
||||||
|
Due Monday Nov 25, 11:59pm.
|
||||||
|
No late submission will be accepted.
|
||||||
|
|
||||||
|
Include in your submission: report.txt, dijkstra.py, nbest.py.
|
||||||
|
dijkstra.py will be graded for correctness (1%).
|
||||||
|
|
||||||
|
Textbooks for References:
|
||||||
|
[1] CLRS Ch. 22 (graph)
|
||||||
|
[2] my DP tutorial (up to page 16):
|
||||||
|
http://web.engr.oregonstate.edu/~huanlian/slides/COLING-tutorial-anim.pdf
|
||||||
|
[3] DPV Ch. 3, 4.2, 4.4, 4.7, 6 (Dasgupta, Papadimitriou, Vazirani)
|
||||||
|
https://www.cs.berkeley.edu/~vazirani/algorithms/chap3.pdf
|
||||||
|
https://www.cs.berkeley.edu/~vazirani/algorithms/chap4.pdf
|
||||||
|
https://www.cs.berkeley.edu/~vazirani/algorithms/chap6.pdf
|
||||||
|
[4] KT Ch. 6 (DP)
|
||||||
|
http://www.aw-bc.com/info/kleinberg/assets/downloads/ch6.pdf
|
||||||
|
[5] KT slides: Greedy II (Dijkstra)
|
||||||
|
http://www.cs.princeton.edu/~wayne/kleinberg-tardos/
|
||||||
|
|
||||||
|
***Please answer time/space complexities for each problem in report.txt.
|
||||||
|
|
||||||
|
1. [WILL BE GRADED]
|
||||||
|
Dijkstra (see CLRS 24.3 and DPV 4.4)
|
||||||
|
|
||||||
|
Given an undirected graph, find the shortest path from source (node 0)
|
||||||
|
to target (node n-1).
|
||||||
|
|
||||||
|
Edge weights are guaranteed to be non-negative, since Dijkstra doesn't work
|
||||||
|
with negative weights, e.g.
|
||||||
|
|
||||||
|
3
|
||||||
|
0 ------ 1
|
||||||
|
\ /
|
||||||
|
2 \ / -2
|
||||||
|
\/
|
||||||
|
2
|
||||||
|
|
||||||
|
in this example, Dijkstra would return length 2 (path 0-2),
|
||||||
|
but path 0-1-2 is better (length 1).
|
||||||
|
|
||||||
|
For example (return a pair of shortest-distance and shortest-path):
|
||||||
|
|
||||||
|
1
|
||||||
|
0 ------ 1
|
||||||
|
\ / \
|
||||||
|
5 \ /1 \6
|
||||||
|
\/ 2 \
|
||||||
|
2 ------ 3
|
||||||
|
|
||||||
|
>>> shortest(4, [(0,1,1), (0,2,5), (1,2,1), (2,3,2), (1,3,6)])
|
||||||
|
(4, [0,1,2,3])
|
||||||
|
|
||||||
|
If the target node (n-1) is unreachable from the source (0),
|
||||||
|
return None:
|
||||||
|
|
||||||
|
>>> shortest(5, [(0,1,1), (0,2,5), (1,2,1), (2,3,2), (1,3,6)])
|
||||||
|
None
|
||||||
|
|
||||||
|
Another example:
|
||||||
|
|
||||||
|
1 1
|
||||||
|
0-----1 2-----3
|
||||||
|
|
||||||
|
>>> shortest(4, [(0,1,1), (2,3,1)])
|
||||||
|
None
|
||||||
|
|
||||||
|
Tiebreaking: arbitrary. Any shortest path would do.
|
||||||
|
|
||||||
|
Filename: dijkstra.py
|
||||||
|
|
||||||
|
Hint: please use heapdict from here:
|
||||||
|
https://raw.githubusercontent.com/DanielStutzbach/heapdict/master/heapdict.py
|
||||||
|
|
||||||
|
>>> from heapdict import heapdict
|
||||||
|
>>> h = heapdict()
|
||||||
|
>>> h['a'] = 3
|
||||||
|
>>> h['b'] = 1
|
||||||
|
>>> h.peekitem()
|
||||||
|
('b', 1)
|
||||||
|
>>> h['a'] = 0
|
||||||
|
>>> h.peekitem()
|
||||||
|
('a', 0)
|
||||||
|
>>> h.popitem()
|
||||||
|
('a', 0)
|
||||||
|
>>> len(h)
|
||||||
|
1
|
||||||
|
>>> 'a' in h
|
||||||
|
False
|
||||||
|
>>> 'b' in h
|
||||||
|
True
|
||||||
|
|
||||||
|
You don't need to submit heapdict.py; we have it in our grader.
|
||||||
|
|
||||||
|
|
||||||
|
2. [Redo the nbest question from Midterm, preparing for HW10 part 3]
|
||||||
|
|
||||||
|
Given k pairs of lists A_i and B_i (0 <= i < k), each with n sorted numbers,
|
||||||
|
find the n smallest pairs in all the (k n^2) pairs.
|
||||||
|
We say (x,y) < (x', y') if and only if x+y < x'+y'.
|
||||||
|
Tie-breaking: lexicographical (i.e., prefer smaller x).
|
||||||
|
|
||||||
|
You can base your code on the skeleton from the Midterm:
|
||||||
|
|
||||||
|
from heapq import heappush, heappop
|
||||||
|
def nbest(ABs): # no need to pass in k or n
|
||||||
|
k = len(ABs)
|
||||||
|
n = len(ABs[0][0])
|
||||||
|
def trypush(i, p, q): # push pair (A_i,p, B_i,q) if possible
|
||||||
|
A, B = ABs[i] # A_i, B_i
|
||||||
|
if p < n and q < n and ______________________________:
|
||||||
|
heappush(h, (________________, i, p, q, (A[p],B[q])))
|
||||||
|
used.add((i, p, q))
|
||||||
|
h, used = ___________________ # initialize
|
||||||
|
for i in range(k): # NEED TO OPTIMIZE
|
||||||
|
trypush(______________)
|
||||||
|
for _ in range(n):
|
||||||
|
_, i, p, q, pair = ________________
|
||||||
|
yield pair # return the next pair (in a lazy list)
|
||||||
|
_______________________
|
||||||
|
_______________________
|
||||||
|
|
||||||
|
|
||||||
|
But recall we had two optimizations to speed up the first for-loop (queue initialization):
|
||||||
|
|
||||||
|
(1) using heapify instead of k initial pushes. You need to implement this (very easy).
|
||||||
|
|
||||||
|
(2) using qselect to choose top n out of the k bests. This one is OPTIONAL.
|
||||||
|
|
||||||
|
Analyze the time complexity for the version you implemented.
|
||||||
|
|
||||||
|
>>> list(nbest([([1,2,4], [2,3,5]), ([0,2,4], [3,4,5])]))
|
||||||
|
|
||||||
|
[(0, 3), (1, 2), (0, 4)]
|
||||||
|
|
||||||
|
>>> list(nbest([([-1,2],[1,4]), ([0,2],[3,4]), ([0,1],[4,6]), ([-1,2],[1,5])]))
|
||||||
|
[(-1, 1), (-1, 1)]
|
||||||
|
|
||||||
|
>>> list(nbest([([5,6,10,14],[3,5,10,14]),([2,7,9,11],[3,8,12,16]),([1,3,8,10],[5,9,10,11]),([1,2,3,5],[3,4,9,10]),([4,5,9,10],[2,4,6,11]),([4,6,10,13],[2,3,5,9]),([3,7,10,12],[1,2,5,10]),([5,9,14,15],[4,8,13,14])]))
|
||||||
|
|
||||||
|
[(1, 3), (3, 1), (1, 4), (2, 3)]
|
||||||
|
|
||||||
|
>>> list(nbest([([1,6,8,13],[5,8,11,12]),([1,2,3,5],[5,9,11,13]),([3,5,7,10],[4,6,7,11]),([1,4,7,8],[4,9,11,15]),([4,8,10,13],[4,6,10,11]),([4,8,12,15],[5,10,11,13]),([2,3,4,8],[4,7,11,15]),([4,5,10,15],[5,6,7,8])]))
|
||||||
|
|
||||||
|
[(1, 4), (1, 5), (1, 5), (2, 4)]
|
||||||
|
|
||||||
|
This problem prepares you for the hardest question in HW10 (part 3).
|
||||||
|
|
||||||
|
Filename: nbest.py
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
Debriefing (required!): --------------------------
|
||||||
|
|
||||||
|
0. What's your name?
|
||||||
|
1. Approximately how many hours did you spend on this assignment?
|
||||||
|
2. Would you rate it as easy, moderate, or difficult?
|
||||||
|
3. Did you work on it mostly alone, or mostly with other people?
|
||||||
|
4. How deeply do you feel you understand the material it covers (0%-100%)?
|
||||||
|
5. Any other comments?
|
||||||
|
|
||||||
|
This section is intended to help us calibrate the homework assignments.
|
||||||
|
Your answers to this section will *not* affect your grade; however, skipping it
|
||||||
|
will certainly do.
|
||||||
19
code/cs325-langs/sols/hw1.lang
Normal file
19
code/cs325-langs/sols/hw1.lang
Normal file
@@ -0,0 +1,19 @@
|
|||||||
|
qselect(xs,k) =
|
||||||
|
~xs -> {
|
||||||
|
pivot <- xs[0]!
|
||||||
|
left <- xs[#0 <= pivot]
|
||||||
|
right <- xs[#0 > pivot]
|
||||||
|
} ->
|
||||||
|
if k > |left| + 1 then qselect(right, k - |left| - 1)
|
||||||
|
else if k == |left| + 1 then [pivot]
|
||||||
|
else qselect(left, k);
|
||||||
|
|
||||||
|
_search(xs, k) =
|
||||||
|
if xs[1] == k then xs
|
||||||
|
else if xs[1] > k then _search(xs[0], k)
|
||||||
|
else _search(xs[2], k);
|
||||||
|
|
||||||
|
sorted(xs) = sorted(xs[0]) ++ [xs[1]] ++ sorted(xs[2]);
|
||||||
|
search(xs, k) = |_search(xs, k)| != 0;
|
||||||
|
insert(xs, k) = _insert(k, _search(xs, k));
|
||||||
|
_insert(k, xs) = if |xs| == 0 then xs << [] << k << [] else xs
|
||||||
11
code/cs325-langs/sols/hw2.lang
Normal file
11
code/cs325-langs/sols/hw2.lang
Normal file
@@ -0,0 +1,11 @@
|
|||||||
|
state 0;
|
||||||
|
|
||||||
|
effect {
|
||||||
|
if(SOURCE == R) {
|
||||||
|
STATE = STATE + |LEFT|;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
combine {
|
||||||
|
STATE = STATE + LSTATE + RSTATE;
|
||||||
|
}
|
||||||
95
code/cs325-langs/sols/hw3.lang
Normal file
95
code/cs325-langs/sols/hw3.lang
Normal file
@@ -0,0 +1,95 @@
|
|||||||
|
function qselect(xs, k, c) {
|
||||||
|
if xs == [] {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
traverser bisector(list: xs, span: (0,len(xs)));
|
||||||
|
traverser pivot(list: xs, random: true);
|
||||||
|
|
||||||
|
let pivotE = pop!(pivot);
|
||||||
|
let (leftList, rightList) = bisect!(bisector, (x) -> c(x) < c(pivotE));
|
||||||
|
|
||||||
|
if k > len(leftList) + 1 {
|
||||||
|
return qselect(rightList, k - len(leftList) - 1, c);
|
||||||
|
} elsif k == len(leftList) + 1 {
|
||||||
|
return pivotE;
|
||||||
|
} else {
|
||||||
|
return qselect(leftList, k, c);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function closestUnsorted(xs, k, n) {
|
||||||
|
let min = qselect(list(xs), k, (x) -> abs(x - n));
|
||||||
|
let out = [];
|
||||||
|
let countEqual = k;
|
||||||
|
|
||||||
|
traverser iter(list: xs, span: (0, len(xs)));
|
||||||
|
while valid!(iter) {
|
||||||
|
if abs(at!(iter)-n) < abs(min-n) {
|
||||||
|
let countEqual = countEqual - 1;
|
||||||
|
}
|
||||||
|
step!(iter);
|
||||||
|
}
|
||||||
|
|
||||||
|
traverser iter(list: xs, span: (0, len(xs)));
|
||||||
|
while valid!(iter) {
|
||||||
|
if abs(at!(iter)-n) == abs(min-n) and countEqual > 0 {
|
||||||
|
let countEqual = countEqual - 1;
|
||||||
|
let out = out + [at!(iter)];
|
||||||
|
} elsif abs(at!(iter)-n) < abs(min-n) {
|
||||||
|
let out = out + [at!(iter)];
|
||||||
|
}
|
||||||
|
step!(iter);
|
||||||
|
}
|
||||||
|
|
||||||
|
return out;
|
||||||
|
}
|
||||||
|
|
||||||
|
function closestSorted(xs, k, n) {
|
||||||
|
let start = bisect(xs, n);
|
||||||
|
let counter = 0;
|
||||||
|
traverser left(list: xs, span: (0, start), reverse: true);
|
||||||
|
traverser right(list: xs, span: (start, len(xs)));
|
||||||
|
|
||||||
|
while counter != k and canstep!(left) and valid!(right) {
|
||||||
|
if abs(at!(left, 1) - n) < abs(at!(right) - n) {
|
||||||
|
step!(left);
|
||||||
|
} else {
|
||||||
|
step!(right);
|
||||||
|
}
|
||||||
|
let counter = counter + 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
while counter != k and (canstep!(left) or valid!(right)) {
|
||||||
|
if canstep!(left) { step!(left); }
|
||||||
|
else { step!(right); }
|
||||||
|
let counter = counter + 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
return subset!(left, right);
|
||||||
|
}
|
||||||
|
|
||||||
|
sorted function xyz(xs, k) {
|
||||||
|
traverser x(list: xs, span: (0,len(xs)));
|
||||||
|
let dest = [];
|
||||||
|
|
||||||
|
while valid!(x) {
|
||||||
|
traverser z(list: xs, span: (pos!(x)+2,len(xs)));
|
||||||
|
traverser y(list: xs, span: (pos!(x)+1,pos!(z)));
|
||||||
|
|
||||||
|
while valid!(y) and valid!(z) {
|
||||||
|
if at!(x) + at!(y) == at!(z) {
|
||||||
|
let dest = dest + [(at!(x), at!(y), at!(z))];
|
||||||
|
step!(z);
|
||||||
|
} elsif at!(x) + at!(y) > at!(z) {
|
||||||
|
step!(z);
|
||||||
|
} else {
|
||||||
|
step!(y);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
step!(x);
|
||||||
|
}
|
||||||
|
|
||||||
|
return dest;
|
||||||
|
}
|
||||||
15
code/cs325-langs/src/Common.hs
Normal file
15
code/cs325-langs/src/Common.hs
Normal file
@@ -0,0 +1,15 @@
|
|||||||
|
module Common where
|
||||||
|
import PythonAst
|
||||||
|
import PythonGen
|
||||||
|
import Text.Parsec
|
||||||
|
|
||||||
|
compile :: (String -> String -> Either ParseError p) -> (p -> [PyStmt]) -> String -> IO ()
|
||||||
|
compile p t f = do
|
||||||
|
let inputName = f ++ ".lang"
|
||||||
|
let outputName = f ++ ".py"
|
||||||
|
file <- readFile inputName
|
||||||
|
let either = p inputName file
|
||||||
|
case either of
|
||||||
|
Right prog -> writeFile outputName (translate $ t prog)
|
||||||
|
Left e -> print e
|
||||||
|
|
||||||
90
code/cs325-langs/src/CommonParsing.hs
Normal file
90
code/cs325-langs/src/CommonParsing.hs
Normal file
@@ -0,0 +1,90 @@
|
|||||||
|
module CommonParsing where
|
||||||
|
import Data.Char
|
||||||
|
import Data.Functor
|
||||||
|
import Text.Parsec
|
||||||
|
import Text.Parsec.Char
|
||||||
|
import Text.Parsec.Combinator
|
||||||
|
|
||||||
|
type Parser a b = Parsec String a b
|
||||||
|
|
||||||
|
kw :: String -> Parser a ()
|
||||||
|
kw s = try $ string s <* spaces $> ()
|
||||||
|
|
||||||
|
kwIf :: Parser a ()
|
||||||
|
kwIf = kw "if"
|
||||||
|
|
||||||
|
kwThen :: Parser a ()
|
||||||
|
kwThen = kw "then"
|
||||||
|
|
||||||
|
kwElse :: Parser a ()
|
||||||
|
kwElse = kw "else"
|
||||||
|
|
||||||
|
kwElsif :: Parser a ()
|
||||||
|
kwElsif = kw "elsif"
|
||||||
|
|
||||||
|
kwWhile :: Parser a ()
|
||||||
|
kwWhile = kw "while"
|
||||||
|
|
||||||
|
kwState :: Parser a ()
|
||||||
|
kwState = kw "state"
|
||||||
|
|
||||||
|
kwEffect :: Parser a ()
|
||||||
|
kwEffect = kw "effect"
|
||||||
|
|
||||||
|
kwCombine :: Parser a ()
|
||||||
|
kwCombine = kw "combine"
|
||||||
|
|
||||||
|
kwRand :: Parser a ()
|
||||||
|
kwRand = kw "rand"
|
||||||
|
|
||||||
|
kwFunction :: Parser a ()
|
||||||
|
kwFunction = kw "function"
|
||||||
|
|
||||||
|
kwSorted :: Parser a ()
|
||||||
|
kwSorted = kw "sorted"
|
||||||
|
|
||||||
|
kwLet :: Parser a ()
|
||||||
|
kwLet = kw "let"
|
||||||
|
|
||||||
|
kwTraverser :: Parser a ()
|
||||||
|
kwTraverser = kw "traverser"
|
||||||
|
|
||||||
|
kwReturn :: Parser a ()
|
||||||
|
kwReturn = kw "return"
|
||||||
|
|
||||||
|
op :: String -> op -> Parser a op
|
||||||
|
op s o = string s $> o
|
||||||
|
|
||||||
|
int :: Parser a Int
|
||||||
|
int = read <$> (many1 digit <* spaces)
|
||||||
|
|
||||||
|
var :: [String] -> Parser a String
|
||||||
|
var reserved =
|
||||||
|
do
|
||||||
|
c <- satisfy $ \c -> isLetter c || c == '_'
|
||||||
|
cs <- many (satisfy isLetter <|> digit) <* spaces
|
||||||
|
let name = c:cs
|
||||||
|
if name `elem` reserved
|
||||||
|
then fail "Can't use reserved keyword as identifier"
|
||||||
|
else return name
|
||||||
|
|
||||||
|
list :: Char -> Char -> Char -> Parser a b -> Parser a [b]
|
||||||
|
list co cc cd pe = surround co cc $ sepBy pe (char cd >> spaces)
|
||||||
|
|
||||||
|
surround :: Char -> Char -> Parser a b -> Parser a b
|
||||||
|
surround c1 c2 pe =
|
||||||
|
do
|
||||||
|
char c1 >> spaces
|
||||||
|
e <- pe
|
||||||
|
spaces >> char c2 >> spaces
|
||||||
|
return e
|
||||||
|
|
||||||
|
level :: (o -> e -> e -> e) -> Parser a o -> Parser a e -> Parser a e
|
||||||
|
level c po pe =
|
||||||
|
do
|
||||||
|
e <- pe <* spaces
|
||||||
|
ops <- many $ try $ (flip . c <$> (po <* spaces) <*> pe) <* spaces
|
||||||
|
return $ foldl (flip ($)) e ops
|
||||||
|
|
||||||
|
precedence :: (o -> e -> e -> e) -> Parser a e -> [ Parser a o ] -> Parser a e
|
||||||
|
precedence = foldl . flip . level
|
||||||
393
code/cs325-langs/src/LanguageOne.hs
Normal file
393
code/cs325-langs/src/LanguageOne.hs
Normal file
@@ -0,0 +1,393 @@
|
|||||||
|
module LanguageOne where
|
||||||
|
import qualified PythonAst as Py
|
||||||
|
import qualified CommonParsing as P
|
||||||
|
import Data.Bifunctor
|
||||||
|
import Data.Char
|
||||||
|
import Data.Functor
|
||||||
|
import qualified Data.Map as Map
|
||||||
|
import Data.Maybe
|
||||||
|
import qualified Data.Set as Set
|
||||||
|
import Text.Parsec
|
||||||
|
import Text.Parsec.Char
|
||||||
|
import Text.Parsec.Combinator
|
||||||
|
import Control.Monad.State
|
||||||
|
|
||||||
|
{- Data Types -}
|
||||||
|
data PossibleType = List | Any deriving Eq
|
||||||
|
|
||||||
|
data SelectorMarker = None | Remove
|
||||||
|
|
||||||
|
data Op
|
||||||
|
= Add
|
||||||
|
| Subtract
|
||||||
|
| Multiply
|
||||||
|
| Divide
|
||||||
|
| Insert
|
||||||
|
| Concat
|
||||||
|
| LessThan
|
||||||
|
| LessThanEq
|
||||||
|
| GreaterThan
|
||||||
|
| GreaterThanEq
|
||||||
|
| Equal
|
||||||
|
| NotEqual
|
||||||
|
| And
|
||||||
|
| Or
|
||||||
|
|
||||||
|
data Selector = Selector String Expr
|
||||||
|
|
||||||
|
data Expr
|
||||||
|
= Var String
|
||||||
|
| IntLiteral Int
|
||||||
|
| ListLiteral [Expr]
|
||||||
|
| Split Expr [Selector] Expr
|
||||||
|
| IfElse Expr Expr Expr
|
||||||
|
| BinOp Op Expr Expr
|
||||||
|
| FunctionCall Expr [Expr]
|
||||||
|
| LengthOf Expr
|
||||||
|
| Random
|
||||||
|
| Access Expr Expr SelectorMarker
|
||||||
|
| Parameter Int
|
||||||
|
|
||||||
|
data Function = Function String [String] Expr
|
||||||
|
|
||||||
|
data Prog = Prog [Function]
|
||||||
|
|
||||||
|
{- Parser -}
|
||||||
|
type Parser = Parsec String (Maybe Int)
|
||||||
|
|
||||||
|
parseVar :: Parser String
|
||||||
|
parseVar = P.var ["if", "then", "else", "var"]
|
||||||
|
|
||||||
|
parseThis :: Parser Expr
|
||||||
|
parseThis =
|
||||||
|
do
|
||||||
|
char '&'
|
||||||
|
contextNum <- getState
|
||||||
|
spaces
|
||||||
|
return (Var $ "context_" ++ show contextNum)
|
||||||
|
|
||||||
|
parseList :: Parser Expr
|
||||||
|
parseList = ListLiteral <$>
|
||||||
|
do
|
||||||
|
char '[' >> spaces
|
||||||
|
es <- sepBy parseExpr (char ',' >> spaces)
|
||||||
|
spaces >> char ']' >> spaces
|
||||||
|
return es
|
||||||
|
|
||||||
|
parseSplit :: Parser Expr
|
||||||
|
parseSplit =
|
||||||
|
do
|
||||||
|
char '~' >> spaces
|
||||||
|
e <- parseExpr
|
||||||
|
spaces >> string "->"
|
||||||
|
spaces >> char '{'
|
||||||
|
contextNum <- getState
|
||||||
|
putState $ return $ 1 + fromMaybe (-1) contextNum
|
||||||
|
es <- many1 (spaces >> parseSelector)
|
||||||
|
putState contextNum
|
||||||
|
spaces >> char '}' >> spaces >> string "->" >> spaces
|
||||||
|
e' <- parseExpr
|
||||||
|
spaces
|
||||||
|
return $ Split e es e'
|
||||||
|
|
||||||
|
parseSelectorMarker :: Parser SelectorMarker
|
||||||
|
parseSelectorMarker = (char '!' >> return Remove) <|> return None
|
||||||
|
|
||||||
|
parseSelector :: Parser Selector
|
||||||
|
parseSelector =
|
||||||
|
do
|
||||||
|
name <- parseVar
|
||||||
|
spaces >> string "<-" >> spaces
|
||||||
|
expr <- parseExpr
|
||||||
|
spaces
|
||||||
|
return $ Selector name expr
|
||||||
|
|
||||||
|
parseIfElse :: Parser Expr
|
||||||
|
parseIfElse =
|
||||||
|
do
|
||||||
|
P.kwIf >> spaces
|
||||||
|
ec <- parseExpr
|
||||||
|
spaces >> P.kwThen >> spaces
|
||||||
|
et <- parseExpr
|
||||||
|
spaces >> P.kwElse >> spaces
|
||||||
|
ee <- parseExpr
|
||||||
|
spaces
|
||||||
|
return $ IfElse ec et ee
|
||||||
|
|
||||||
|
parseLength :: Parser Expr
|
||||||
|
parseLength =
|
||||||
|
do
|
||||||
|
char '|' >> spaces
|
||||||
|
e <- parseExpr
|
||||||
|
spaces >> char '|' >> spaces
|
||||||
|
return $ LengthOf e
|
||||||
|
|
||||||
|
parseParameter :: Parser Expr
|
||||||
|
parseParameter =
|
||||||
|
do
|
||||||
|
char '#'
|
||||||
|
d <- digit
|
||||||
|
spaces
|
||||||
|
return $ Parameter $ read [d]
|
||||||
|
|
||||||
|
parseParenthesized :: Parser Expr
|
||||||
|
parseParenthesized =
|
||||||
|
do
|
||||||
|
char '(' >> spaces
|
||||||
|
e <- parseExpr
|
||||||
|
spaces >> char ')' >> spaces
|
||||||
|
return e
|
||||||
|
|
||||||
|
parseBasicExpr :: Parser Expr
|
||||||
|
parseBasicExpr = choice
|
||||||
|
[ IntLiteral <$> P.int
|
||||||
|
, parseThis
|
||||||
|
, parseList
|
||||||
|
, parseSplit
|
||||||
|
, parseLength
|
||||||
|
, parseParameter
|
||||||
|
, parseParenthesized
|
||||||
|
, Var <$> try parseVar
|
||||||
|
, P.kwRand $> Random
|
||||||
|
, parseIfElse
|
||||||
|
]
|
||||||
|
|
||||||
|
parsePostfix :: Parser (Expr -> Expr)
|
||||||
|
parsePostfix = parsePostfixAccess <|> parsePostfixCall
|
||||||
|
|
||||||
|
parsePostfixAccess :: Parser (Expr -> Expr)
|
||||||
|
parsePostfixAccess =
|
||||||
|
do
|
||||||
|
char '[' >> spaces
|
||||||
|
e <- parseExpr
|
||||||
|
spaces >> char ']' >> spaces
|
||||||
|
marker <- parseSelectorMarker
|
||||||
|
spaces
|
||||||
|
return $ \e' -> Access e' e marker
|
||||||
|
|
||||||
|
parsePostfixCall :: Parser (Expr -> Expr)
|
||||||
|
parsePostfixCall =
|
||||||
|
do
|
||||||
|
char '(' >> spaces
|
||||||
|
es <- sepBy parseExpr (char ',' >> spaces)
|
||||||
|
char ')' >> spaces
|
||||||
|
return $ flip FunctionCall es
|
||||||
|
|
||||||
|
parsePostfixedExpr :: Parser Expr
|
||||||
|
parsePostfixedExpr =
|
||||||
|
do
|
||||||
|
eb <- parseBasicExpr
|
||||||
|
spaces
|
||||||
|
ps <- many parsePostfix
|
||||||
|
return $ foldl (flip ($)) eb ps
|
||||||
|
|
||||||
|
parseExpr :: Parser Expr
|
||||||
|
parseExpr = P.precedence BinOp parsePostfixedExpr
|
||||||
|
[ P.op "*" Multiply, P.op "/" Divide
|
||||||
|
, P.op "+" Add, P.op "-" Subtract
|
||||||
|
, P.op "<<" Insert
|
||||||
|
, P.op "++" Concat
|
||||||
|
, try (P.op "<=" LessThanEq) <|> try (P.op ">=" GreaterThanEq) <|>
|
||||||
|
P.op "<" LessThan <|> P.op ">" GreaterThan <|>
|
||||||
|
P.op "==" Equal <|> P.op "!=" NotEqual
|
||||||
|
, P.op "&&" And <|> P.op "||" Or
|
||||||
|
]
|
||||||
|
|
||||||
|
parseFunction :: Parser Function
|
||||||
|
parseFunction =
|
||||||
|
do
|
||||||
|
name <- parseVar
|
||||||
|
spaces >> char '(' >> spaces
|
||||||
|
vs <- sepBy parseVar (char ',' >> spaces)
|
||||||
|
spaces >> char ')' >> spaces >> char '=' >> spaces
|
||||||
|
body <- parseExpr
|
||||||
|
spaces
|
||||||
|
return $ Function name vs body
|
||||||
|
|
||||||
|
parseProg :: Parser Prog
|
||||||
|
parseProg = Prog <$> sepBy1 parseFunction (char ';' >> spaces)
|
||||||
|
|
||||||
|
parse :: SourceName -> String -> Either ParseError Prog
|
||||||
|
parse = runParser parseProg Nothing
|
||||||
|
|
||||||
|
{- "Type" checker -}
|
||||||
|
mergePossibleType :: PossibleType -> PossibleType -> PossibleType
|
||||||
|
mergePossibleType List _ = List
|
||||||
|
mergePossibleType _ List = List
|
||||||
|
mergePossibleType _ _ = Any
|
||||||
|
|
||||||
|
getPossibleType :: String -> Expr -> PossibleType
|
||||||
|
getPossibleType s (Var s') = if s == s' then List else Any
|
||||||
|
getPossibleType _ (ListLiteral _) = List
|
||||||
|
getPossibleType s (Split _ _ e) = getPossibleType s e
|
||||||
|
getPossibleType s (IfElse i t e) =
|
||||||
|
foldl1 mergePossibleType $ map (getPossibleType s) [i, t, e]
|
||||||
|
getPossibleType _ (BinOp Insert _ _) = List
|
||||||
|
getPossibleType _ (BinOp Concat _ _) = List
|
||||||
|
getPossibleType _ _ = Any
|
||||||
|
|
||||||
|
{- Translator -}
|
||||||
|
type Translator = Control.Monad.State.State (Map.Map String [String], Int)
|
||||||
|
|
||||||
|
currentTemp :: Translator String
|
||||||
|
currentTemp = do
|
||||||
|
t <- gets snd
|
||||||
|
return $ "temp" ++ show t
|
||||||
|
|
||||||
|
incrementTemp :: Translator String
|
||||||
|
incrementTemp = do
|
||||||
|
modify (second (+1))
|
||||||
|
currentTemp
|
||||||
|
|
||||||
|
hasLambda :: Expr -> Bool
|
||||||
|
hasLambda (ListLiteral es) = any hasLambda es
|
||||||
|
hasLambda (Split e ss r) =
|
||||||
|
hasLambda e || any (\(Selector _ e') -> hasLambda e') ss || hasLambda r
|
||||||
|
hasLambda (IfElse i t e) = hasLambda i || hasLambda t || hasLambda e
|
||||||
|
hasLambda (BinOp o l r) = hasLambda l || hasLambda r
|
||||||
|
hasLambda (FunctionCall e es) = any hasLambda $ e : es
|
||||||
|
hasLambda (LengthOf e) = hasLambda e
|
||||||
|
hasLambda (Access e _ _) = hasLambda e
|
||||||
|
hasLambda Parameter{} = True
|
||||||
|
hasLambda _ = False
|
||||||
|
|
||||||
|
translate :: Prog -> [Py.PyStmt]
|
||||||
|
translate p = fst $ runState (translateProg p) (Map.empty, 0)
|
||||||
|
|
||||||
|
translateProg :: Prog -> Translator [Py.PyStmt]
|
||||||
|
translateProg (Prog fs) = concat <$> traverse translateFunction fs
|
||||||
|
|
||||||
|
translateFunction :: Function -> Translator [Py.PyStmt]
|
||||||
|
translateFunction (Function n ps ex) = do
|
||||||
|
let createIf p = Py.BinOp Py.Equal (Py.Var p) (Py.ListLiteral [])
|
||||||
|
let createReturn p = Py.IfElse (createIf p) [Py.Return (Py.Var p)] [] Nothing
|
||||||
|
let fastReturn = [createReturn p | p <- take 1 ps, getPossibleType p ex == List]
|
||||||
|
(ss, e) <- translateExpr ex
|
||||||
|
return $ return $ Py.FunctionDef n ps $ fastReturn ++ ss ++ [Py.Return e]
|
||||||
|
|
||||||
|
translateSelector :: Selector -> Translator Py.PyStmt
|
||||||
|
translateSelector (Selector n e) =
|
||||||
|
let
|
||||||
|
cacheCheck = Py.NotIn (Py.StrLiteral n) (Py.Var "cache")
|
||||||
|
cacheAccess = Py.Access (Py.Var "cache") [Py.StrLiteral n]
|
||||||
|
cacheSet = Py.Assign (Py.AccessPat (Py.Var "cache") [Py.StrLiteral n])
|
||||||
|
body e' = [ Py.IfElse cacheCheck [cacheSet e'] [] Nothing, Py.Return cacheAccess]
|
||||||
|
in
|
||||||
|
do
|
||||||
|
(ss, e') <- translateExpr e
|
||||||
|
vs <- gets fst
|
||||||
|
let callPrereq p = Py.Standalone $ Py.FunctionCall (Py.Var p) []
|
||||||
|
let prereqs = maybe [] (map callPrereq) $ Map.lookup n vs
|
||||||
|
return $ Py.FunctionDef n [] $ ss ++ prereqs ++ body e'
|
||||||
|
|
||||||
|
translateExpr :: Expr -> Translator ([Py.PyStmt], Py.PyExpr)
|
||||||
|
translateExpr (Var s) = do
|
||||||
|
vs <- gets fst
|
||||||
|
let sVar = Py.Var s
|
||||||
|
let expr = if Map.member s vs then Py.FunctionCall sVar [] else sVar
|
||||||
|
return ([], expr)
|
||||||
|
translateExpr (IntLiteral i) = return ([], Py.IntLiteral i)
|
||||||
|
translateExpr (ListLiteral l) = do
|
||||||
|
tl <- mapM translateExpr l
|
||||||
|
return (concatMap fst tl, Py.ListLiteral $ map snd tl)
|
||||||
|
translateExpr (Split e ss e') = do
|
||||||
|
vs <- gets fst
|
||||||
|
let cacheAssign = Py.Assign (Py.VarPat "cache") (Py.DictLiteral [])
|
||||||
|
let cacheStmt = [ cacheAssign | Map.size vs == 0 ]
|
||||||
|
let vnames = map (\(Selector n es) -> n) ss
|
||||||
|
let prereqs = snd $ foldl (\(ds, m) (Selector n es) -> (n:ds, Map.insert n ds m)) ([], Map.empty) ss
|
||||||
|
modify $ first $ Map.union prereqs
|
||||||
|
fs <- mapM translateSelector ss
|
||||||
|
(sts, te) <- translateExpr e'
|
||||||
|
modify $ first $ const vs
|
||||||
|
return (cacheStmt ++ fs ++ sts, te)
|
||||||
|
translateExpr (IfElse i t e) = do
|
||||||
|
temp <- incrementTemp
|
||||||
|
let tempPat = Py.VarPat temp
|
||||||
|
(ists, ie) <- translateExpr i
|
||||||
|
(tsts, te) <- translateExpr t
|
||||||
|
(ests, ee) <- translateExpr e
|
||||||
|
let thenSts = tsts ++ [Py.Assign tempPat te]
|
||||||
|
let elseSts = ests ++ [Py.Assign tempPat ee]
|
||||||
|
let newIf = Py.IfElse ie thenSts [] $ Just elseSts
|
||||||
|
return (ists ++ [newIf], Py.Var temp)
|
||||||
|
translateExpr (BinOp o l r) = do
|
||||||
|
(lsts, le) <- translateExpr l
|
||||||
|
(rsts, re) <- translateExpr r
|
||||||
|
(opsts, oe) <- translateOp o le re
|
||||||
|
return (lsts ++ rsts ++ opsts, oe)
|
||||||
|
translateExpr (FunctionCall f ps) = do
|
||||||
|
(fsts, fe) <- translateExpr f
|
||||||
|
tps <- mapM translateExpr ps
|
||||||
|
return (fsts ++ concatMap fst tps, Py.FunctionCall fe $ map snd tps)
|
||||||
|
translateExpr (LengthOf e) =
|
||||||
|
second (Py.FunctionCall (Py.Var "len") . return) <$> translateExpr e
|
||||||
|
translateExpr (Access e Random m) = do
|
||||||
|
temp <- incrementTemp
|
||||||
|
(sts, ce) <- translateExpr e
|
||||||
|
let lenExpr = Py.FunctionCall (Py.Var "len") [Py.Var temp]
|
||||||
|
let randExpr = Py.FunctionCall (Py.Var "randint") [ Py.IntLiteral 0, lenExpr ]
|
||||||
|
return (sts, singleAccess ce randExpr m)
|
||||||
|
translateExpr (Access c i m) = do
|
||||||
|
(csts, ce) <- translateExpr c
|
||||||
|
(ists, ie) <- translateExpr i
|
||||||
|
temp <- incrementTemp
|
||||||
|
if hasLambda i
|
||||||
|
then return (csts ++ ists ++ [createFilterLambda temp ie m], Py.FunctionCall (Py.Var temp) [ce])
|
||||||
|
else return (csts ++ ists, singleAccess ce ie m)
|
||||||
|
translateExpr (Parameter i) = return $ ([], Py.Var $ "arg" ++ show i)
|
||||||
|
translateExpr _ = fail "Invalid expression"
|
||||||
|
|
||||||
|
singleAccess :: Py.PyExpr -> Py.PyExpr -> SelectorMarker -> Py.PyExpr
|
||||||
|
singleAccess c i None = Py.Access c [i]
|
||||||
|
singleAccess c i Remove = Py.FunctionCall (Py.Member c "pop") [i]
|
||||||
|
|
||||||
|
createFilterLambda :: String -> Py.PyExpr -> SelectorMarker -> Py.PyStmt
|
||||||
|
createFilterLambda s e None = Py.FunctionDef s ["arg"]
|
||||||
|
[ Py.Assign (Py.VarPat "out") (Py.ListLiteral [])
|
||||||
|
, Py.For (Py.VarPat "arg0") (Py.Var "arg")
|
||||||
|
[ Py.IfElse e
|
||||||
|
[ Py.Standalone $ Py.FunctionCall (Py.Member (Py.Var "out") "append")
|
||||||
|
[ Py.Var "arg0" ]
|
||||||
|
]
|
||||||
|
[]
|
||||||
|
Nothing
|
||||||
|
]
|
||||||
|
, Py.Return $ Py.Var "out"
|
||||||
|
]
|
||||||
|
createFilterLambda s e Remove = Py.FunctionDef s ["arg"]
|
||||||
|
[ Py.Assign (Py.VarPat "i") $ Py.IntLiteral 0
|
||||||
|
, Py.Assign (Py.VarPat "out") (Py.ListLiteral [])
|
||||||
|
, Py.While (Py.BinOp Py.LessThan (Py.Var "i") $ Py.FunctionCall (Py.Var "len") [Py.Var "arg"])
|
||||||
|
[ Py.IfElse e
|
||||||
|
[ Py.Standalone $ Py.FunctionCall (Py.Member (Py.Var "out") "append")
|
||||||
|
[ singleAccess (Py.Var "arg") (Py.Var "i") Remove
|
||||||
|
]
|
||||||
|
]
|
||||||
|
[]
|
||||||
|
Nothing
|
||||||
|
, Py.Assign (Py.VarPat "i") (Py.BinOp Py.Add (Py.Var "i") (Py.IntLiteral 1))
|
||||||
|
]
|
||||||
|
, Py.Return $ Py.Var "out"
|
||||||
|
]
|
||||||
|
|
||||||
|
translateOp :: Op -> Py.PyExpr -> Py.PyExpr -> Translator ([Py.PyStmt], Py.PyExpr)
|
||||||
|
translateOp Add l r = return ([], Py.BinOp Py.Add l r)
|
||||||
|
translateOp Subtract l r = return ([], Py.BinOp Py.Subtract l r)
|
||||||
|
translateOp Multiply l r = return ([], Py.BinOp Py.Multiply l r)
|
||||||
|
translateOp Divide l r = return ([], Py.BinOp Py.Divide l r)
|
||||||
|
translateOp LessThan l r = return ([], Py.BinOp Py.LessThan l r)
|
||||||
|
translateOp LessThanEq l r = return ([], Py.BinOp Py.LessThanEq l r)
|
||||||
|
translateOp GreaterThan l r = return ([], Py.BinOp Py.GreaterThan l r)
|
||||||
|
translateOp GreaterThanEq l r = return ([], Py.BinOp Py.GreaterThanEq l r)
|
||||||
|
translateOp Equal l r = return ([], Py.BinOp Py.Equal l r)
|
||||||
|
translateOp NotEqual l r = return ([], Py.BinOp Py.NotEqual l r)
|
||||||
|
translateOp And l r = return ([], Py.BinOp Py.And l r)
|
||||||
|
translateOp Or l r = return ([], Py.BinOp Py.Or l r)
|
||||||
|
translateOp Concat l r = return ([], Py.BinOp Py.Add l r)
|
||||||
|
translateOp Insert l r = do
|
||||||
|
temp <- incrementTemp
|
||||||
|
let assignStmt = Py.Assign (Py.VarPat temp) l
|
||||||
|
let appendFunc = Py.Member (Py.Var temp) "append"
|
||||||
|
let insertStmt = Py.Standalone $ Py.FunctionCall appendFunc [r]
|
||||||
|
return ([assignStmt, insertStmt], Py.Var temp)
|
||||||
461
code/cs325-langs/src/LanguageThree.hs
Normal file
461
code/cs325-langs/src/LanguageThree.hs
Normal file
@@ -0,0 +1,461 @@
|
|||||||
|
module LanguageThree where
|
||||||
|
import qualified CommonParsing as P
|
||||||
|
import qualified PythonAst as Py
|
||||||
|
import Control.Monad.State
|
||||||
|
import Data.Bifunctor
|
||||||
|
import Data.Foldable
|
||||||
|
import Data.Functor
|
||||||
|
import qualified Data.Map as Map
|
||||||
|
import Data.Maybe
|
||||||
|
import Text.Parsec hiding (State)
|
||||||
|
import Text.Parsec.Char
|
||||||
|
import Text.Parsec.Combinator
|
||||||
|
|
||||||
|
{- Data Types -}
|
||||||
|
data Op
|
||||||
|
= Add
|
||||||
|
| Subtract
|
||||||
|
| Multiply
|
||||||
|
| Divide
|
||||||
|
| LessThan
|
||||||
|
| LessThanEqual
|
||||||
|
| GreaterThan
|
||||||
|
| GreaterThanEqual
|
||||||
|
| Equal
|
||||||
|
| NotEqual
|
||||||
|
| And
|
||||||
|
| Or
|
||||||
|
|
||||||
|
data Expr
|
||||||
|
= TraverserCall String [Expr]
|
||||||
|
| FunctionCall String [Expr]
|
||||||
|
| BinOp Op Expr Expr
|
||||||
|
| Lambda [String] Expr
|
||||||
|
| Var String
|
||||||
|
| IntLiteral Int
|
||||||
|
| BoolLiteral Bool
|
||||||
|
| ListLiteral [Expr]
|
||||||
|
| TupleLiteral [Expr]
|
||||||
|
|
||||||
|
type Branch = (Expr, [Stmt])
|
||||||
|
|
||||||
|
data Stmt
|
||||||
|
= IfElse Branch [Branch] [Stmt]
|
||||||
|
| While Branch
|
||||||
|
| Traverser String [(String, Expr)]
|
||||||
|
| Let Pat Expr
|
||||||
|
| Return Expr
|
||||||
|
| Standalone Expr
|
||||||
|
|
||||||
|
data Pat
|
||||||
|
= VarPat String
|
||||||
|
| TuplePat [Pat]
|
||||||
|
|
||||||
|
data SortedMarker = Sorted | Unsorted deriving Eq
|
||||||
|
|
||||||
|
data Function = Function SortedMarker String [String] [Stmt]
|
||||||
|
|
||||||
|
data Prog = Prog [Function]
|
||||||
|
|
||||||
|
{- Parser -}
|
||||||
|
type Parser = Parsec String ()
|
||||||
|
|
||||||
|
parseVar :: Parser String
|
||||||
|
parseVar = P.var
|
||||||
|
[ "if", "elif", "else"
|
||||||
|
, "while", "let", "traverser"
|
||||||
|
, "function", "sort"
|
||||||
|
, "true", "false"
|
||||||
|
]
|
||||||
|
|
||||||
|
parseBool :: Parser Bool
|
||||||
|
parseBool = (string "true" $> True) <|> (string "false" $> False)
|
||||||
|
|
||||||
|
parseList :: Parser Expr
|
||||||
|
parseList = ListLiteral <$> P.list '[' ']' ',' parseExpr
|
||||||
|
|
||||||
|
parseTupleElems :: Parser [Expr]
|
||||||
|
parseTupleElems = P.list '(' ')' ',' parseExpr
|
||||||
|
|
||||||
|
parseTuple :: Parser Expr
|
||||||
|
parseTuple = do
|
||||||
|
es <- parseTupleElems
|
||||||
|
return $ case es of
|
||||||
|
e:[] -> e
|
||||||
|
_ -> TupleLiteral es
|
||||||
|
|
||||||
|
parseLambda :: Parser Expr
|
||||||
|
parseLambda = try $ do
|
||||||
|
vs <- P.list '(' ')' ',' parseVar
|
||||||
|
string "->" >> spaces
|
||||||
|
Lambda vs <$> parseExpr
|
||||||
|
|
||||||
|
parseCall :: Parser Expr
|
||||||
|
parseCall = try $ do
|
||||||
|
v <- parseVar
|
||||||
|
choice
|
||||||
|
[ TraverserCall v <$> (char '!' *> parseTupleElems)
|
||||||
|
, FunctionCall v <$> parseTupleElems
|
||||||
|
]
|
||||||
|
|
||||||
|
parseBasic :: Parser Expr
|
||||||
|
parseBasic = choice
|
||||||
|
[ IntLiteral <$> P.int
|
||||||
|
, BoolLiteral <$> parseBool
|
||||||
|
, try parseCall
|
||||||
|
, Var <$> parseVar
|
||||||
|
, parseList
|
||||||
|
, parseLambda
|
||||||
|
, parseTuple
|
||||||
|
]
|
||||||
|
|
||||||
|
parseExpr :: Parser Expr
|
||||||
|
parseExpr = P.precedence BinOp parseBasic
|
||||||
|
[ P.op "*" Multiply <|> P.op "/" Divide
|
||||||
|
, P.op "+" Add <|> P.op "-" Subtract
|
||||||
|
, P.op "==" Equal <|> P.op "!=" NotEqual <|>
|
||||||
|
try (P.op "<=" LessThanEqual) <|> P.op "<" LessThan <|>
|
||||||
|
try (P.op ">=" GreaterThanEqual) <|> P.op ">" GreaterThan
|
||||||
|
, P.op "and" And
|
||||||
|
, P.op "or" Or
|
||||||
|
]
|
||||||
|
|
||||||
|
parseBlock :: Parser [Stmt]
|
||||||
|
parseBlock = char '{' >> spaces >> many parseStmt <* char '}' <* spaces
|
||||||
|
|
||||||
|
parseBranch :: Parser Branch
|
||||||
|
parseBranch = (,) <$> (parseExpr <* spaces) <*> parseBlock
|
||||||
|
|
||||||
|
parseIf :: Parser Stmt
|
||||||
|
parseIf = do
|
||||||
|
i <- P.kwIf >> parseBranch
|
||||||
|
els <- many (P.kwElsif >> parseBranch)
|
||||||
|
e <- try (P.kwElse >> parseBlock) <|> return []
|
||||||
|
return $ IfElse i els e
|
||||||
|
|
||||||
|
parseWhile :: Parser Stmt
|
||||||
|
parseWhile = While <$> (P.kwWhile >> parseBranch)
|
||||||
|
|
||||||
|
parseTraverser :: Parser Stmt
|
||||||
|
parseTraverser = Traverser
|
||||||
|
<$> (P.kwTraverser *> parseVar)
|
||||||
|
<*> (P.list '(' ')' ',' parseKey) <* char ';' <* spaces
|
||||||
|
|
||||||
|
parseKey :: Parser (String, Expr)
|
||||||
|
parseKey = (,)
|
||||||
|
<$> (parseVar <* spaces <* char ':' <* spaces)
|
||||||
|
<*> parseExpr
|
||||||
|
|
||||||
|
parseLet :: Parser Stmt
|
||||||
|
parseLet = Let
|
||||||
|
<$> (P.kwLet >> parsePat <* char '=' <* spaces)
|
||||||
|
<*> parseExpr <* char ';' <* spaces
|
||||||
|
|
||||||
|
parseReturn :: Parser Stmt
|
||||||
|
parseReturn = Return <$> (P.kwReturn >> parseExpr <* char ';' <* spaces)
|
||||||
|
|
||||||
|
parsePat :: Parser Pat
|
||||||
|
parsePat = (VarPat <$> parseVar) <|> (TuplePat <$> P.list '(' ')' ',' parsePat)
|
||||||
|
|
||||||
|
parseStmt :: Parser Stmt
|
||||||
|
parseStmt = choice
|
||||||
|
[ parseTraverser
|
||||||
|
, parseLet
|
||||||
|
, parseIf
|
||||||
|
, parseWhile
|
||||||
|
, parseReturn
|
||||||
|
, Standalone <$> (parseExpr <* char ';' <* spaces)
|
||||||
|
]
|
||||||
|
|
||||||
|
parseFunction :: Parser Function
|
||||||
|
parseFunction = Function
|
||||||
|
<$> (P.kwSorted $> Sorted <|> return Unsorted)
|
||||||
|
<*> (P.kwFunction >> parseVar)
|
||||||
|
<*> (P.list '(' ')' ',' parseVar)
|
||||||
|
<*> parseBlock
|
||||||
|
|
||||||
|
parseProg :: Parser Prog
|
||||||
|
parseProg = Prog <$> many parseFunction
|
||||||
|
|
||||||
|
parse :: String -> String -> Either ParseError Prog
|
||||||
|
parse = runParser parseProg ()
|
||||||
|
|
||||||
|
{- Translation -}
|
||||||
|
data TraverserBounds = Range Py.PyExpr Py.PyExpr | Random
|
||||||
|
|
||||||
|
data TraverserData = TraverserData
|
||||||
|
{ list :: Maybe String
|
||||||
|
, bounds :: Maybe TraverserBounds
|
||||||
|
, rev :: Bool
|
||||||
|
}
|
||||||
|
|
||||||
|
data ValidTraverserData = ValidTraverserData
|
||||||
|
{ validList :: String
|
||||||
|
, validBounds :: TraverserBounds
|
||||||
|
, validRev :: Bool
|
||||||
|
}
|
||||||
|
|
||||||
|
type Translator = State (Map.Map String ValidTraverserData, [Py.PyStmt], Int)
|
||||||
|
|
||||||
|
getScoped :: Translator (Map.Map String ValidTraverserData)
|
||||||
|
getScoped = gets (\(m, _, _) -> m)
|
||||||
|
|
||||||
|
setScoped :: Map.Map String ValidTraverserData -> Translator ()
|
||||||
|
setScoped m = modify (\(_, ss, i) -> (m, ss, i))
|
||||||
|
|
||||||
|
scope :: Translator a -> Translator a
|
||||||
|
scope m = do
|
||||||
|
s <- getScoped
|
||||||
|
a <- m
|
||||||
|
setScoped s
|
||||||
|
return a
|
||||||
|
|
||||||
|
clearTraverser :: String -> Translator ()
|
||||||
|
clearTraverser s = modify (\(m, ss, i) -> (Map.delete s m, ss, i))
|
||||||
|
|
||||||
|
putTraverser :: String -> ValidTraverserData -> Translator ()
|
||||||
|
putTraverser s vtd = modify (\(m, ss, i) -> (Map.insert s vtd m, ss, i))
|
||||||
|
|
||||||
|
getTemp :: Translator String
|
||||||
|
getTemp = gets $ \(_, _, i) -> "temp" ++ show i
|
||||||
|
|
||||||
|
freshTemp :: Translator String
|
||||||
|
freshTemp = modify (second (+1)) >> getTemp
|
||||||
|
|
||||||
|
emitStatement :: Py.PyStmt -> Translator ()
|
||||||
|
emitStatement = modify . first . (:)
|
||||||
|
|
||||||
|
collectStatements :: Translator a -> Translator ([Py.PyStmt], a)
|
||||||
|
collectStatements t = do
|
||||||
|
modify (first $ const [])
|
||||||
|
a <- t
|
||||||
|
ss <- gets $ \(_, ss, _) -> ss
|
||||||
|
modify (first $ const [])
|
||||||
|
return (ss, a)
|
||||||
|
|
||||||
|
withdrawStatements :: Translator (Py.PyStmt) -> Translator [Py.PyStmt]
|
||||||
|
withdrawStatements ts =
|
||||||
|
(\(ss, s) -> ss ++ [s]) <$> (collectStatements ts)
|
||||||
|
|
||||||
|
requireTraverser :: String -> Translator ValidTraverserData
|
||||||
|
requireTraverser s = gets (\(m, _, _) -> Map.lookup s m) >>= handleMaybe
|
||||||
|
where
|
||||||
|
handleMaybe Nothing = fail "Invalid traverser"
|
||||||
|
handleMaybe (Just vtd) = return vtd
|
||||||
|
|
||||||
|
traverserIncrement :: Bool -> Py.PyExpr -> Py.PyExpr -> Py.PyExpr
|
||||||
|
traverserIncrement rev by e =
|
||||||
|
Py.BinOp op e (Py.BinOp Py.Multiply by (Py.IntLiteral 1))
|
||||||
|
where op = if rev then Py.Subtract else Py.Add
|
||||||
|
|
||||||
|
traverserValid :: Py.PyExpr -> ValidTraverserData -> Py.PyExpr
|
||||||
|
traverserValid e vtd =
|
||||||
|
case validBounds vtd of
|
||||||
|
Range f t ->
|
||||||
|
if validRev vtd
|
||||||
|
then Py.BinOp Py.GreaterThanEq e f
|
||||||
|
else Py.BinOp Py.LessThan e t
|
||||||
|
Random -> Py.BoolLiteral True
|
||||||
|
|
||||||
|
traverserStep :: String -> ValidTraverserData -> Py.PyStmt
|
||||||
|
traverserStep s vtd =
|
||||||
|
case validBounds vtd of
|
||||||
|
Range _ _ -> Py.Assign (Py.VarPat s) $ Py.BinOp op (Py.Var s) (Py.IntLiteral 1)
|
||||||
|
where op = if validRev vtd then Py.Subtract else Py.Add
|
||||||
|
Random -> traverserRandom s $ validList vtd
|
||||||
|
|
||||||
|
traverserRandom :: String -> String -> Py.PyStmt
|
||||||
|
traverserRandom s l =
|
||||||
|
Py.Assign (Py.VarPat s) $ Py.FunctionCall (Py.Var "random.randrange")
|
||||||
|
[Py.FunctionCall (Py.Var "len") [Py.Var l]]
|
||||||
|
|
||||||
|
hasVar :: String -> Py.PyPat -> Bool
|
||||||
|
hasVar s (Py.VarPat s') = s == s'
|
||||||
|
hasVar s (Py.TuplePat ps) = any (hasVar s) ps
|
||||||
|
hasVar s _ = False
|
||||||
|
|
||||||
|
substituteVariable :: String -> Py.PyExpr -> Py.PyExpr -> Py.PyExpr
|
||||||
|
substituteVariable s e (Py.BinOp o l r) =
|
||||||
|
Py.BinOp o (substituteVariable s e l) (substituteVariable s e r)
|
||||||
|
substituteVariable s e (Py.ListLiteral es) =
|
||||||
|
Py.ListLiteral $ map (substituteVariable s e) es
|
||||||
|
substituteVariable s e (Py.DictLiteral es) =
|
||||||
|
Py.DictLiteral $
|
||||||
|
map (first (substituteVariable s e) . second (substituteVariable s e)) es
|
||||||
|
substituteVariable s e (Py.Lambda ps e') =
|
||||||
|
Py.Lambda ps $ if any (hasVar s) ps then substituteVariable s e e' else e'
|
||||||
|
substituteVariable s e (Py.Var s')
|
||||||
|
| s == s' = e
|
||||||
|
| otherwise = Py.Var s'
|
||||||
|
substituteVariable s e (Py.TupleLiteral es) =
|
||||||
|
Py.TupleLiteral $ map (substituteVariable s e) es
|
||||||
|
substituteVariable s e (Py.FunctionCall e' es) =
|
||||||
|
Py.FunctionCall (substituteVariable s e e') $
|
||||||
|
map (substituteVariable s e) es
|
||||||
|
substituteVariable s e (Py.Access e' es) =
|
||||||
|
Py.Access (substituteVariable s e e') $
|
||||||
|
map (substituteVariable s e) es
|
||||||
|
substituteVariable s e (Py.Ternary i t e') =
|
||||||
|
Py.Ternary (substituteVariable s e i) (substituteVariable s e t)
|
||||||
|
(substituteVariable s e e')
|
||||||
|
substituteVariable s e (Py.Member e' m) =
|
||||||
|
Py.Member (substituteVariable s e e') m
|
||||||
|
substituteVariable s e (Py.In e1 e2) =
|
||||||
|
Py.In (substituteVariable s e e1) (substituteVariable s e e2)
|
||||||
|
substituteVariable s e (Py.NotIn e1 e2) =
|
||||||
|
Py.NotIn (substituteVariable s e e1) (substituteVariable s e e2)
|
||||||
|
substituteVariable s e (Py.Slice f t) =
|
||||||
|
Py.Slice (substituteVariable s e <$> f) (substituteVariable s e <$> t)
|
||||||
|
|
||||||
|
translateExpr :: Expr -> Translator Py.PyExpr
|
||||||
|
translateExpr (TraverserCall "pop" [Var s]) = do
|
||||||
|
l <- validList <$> requireTraverser s
|
||||||
|
return $ Py.FunctionCall (Py.Member (Py.Var l) "pop") [Py.Var s]
|
||||||
|
translateExpr (TraverserCall "pos" [Var s]) = do
|
||||||
|
requireTraverser s
|
||||||
|
return $ Py.Var s
|
||||||
|
translateExpr (TraverserCall "at" [Var s]) = do
|
||||||
|
l <- validList <$> requireTraverser s
|
||||||
|
return $ Py.Access (Py.Var l) [Py.Var s]
|
||||||
|
translateExpr (TraverserCall "at" [Var s, IntLiteral i]) = do
|
||||||
|
vtd <- requireTraverser s
|
||||||
|
return $ Py.Access (Py.Var $ validList vtd)
|
||||||
|
[traverserIncrement (validRev vtd) (Py.IntLiteral i) (Py.Var s)]
|
||||||
|
translateExpr (TraverserCall "step" [Var s]) = do
|
||||||
|
vtd <- requireTraverser s
|
||||||
|
emitStatement $ traverserStep s vtd
|
||||||
|
return $ Py.IntLiteral 0
|
||||||
|
translateExpr (TraverserCall "canstep" [Var s]) = do
|
||||||
|
vtd <- requireTraverser s
|
||||||
|
return $
|
||||||
|
traverserValid
|
||||||
|
(traverserIncrement (validRev vtd) (Py.IntLiteral 1) (Py.Var s)) vtd
|
||||||
|
translateExpr (TraverserCall "valid" [Var s]) = do
|
||||||
|
vtd <- requireTraverser s
|
||||||
|
return $ traverserValid (Py.Var s) vtd
|
||||||
|
translateExpr (TraverserCall "subset" [Var s1, Var s2]) = do
|
||||||
|
l1 <- validList <$> requireTraverser s1
|
||||||
|
l2 <- validList <$> requireTraverser s2
|
||||||
|
if l1 == l2
|
||||||
|
then return $ Py.Access (Py.Var l1) [Py.Slice (Just $ Py.Var s1) (Just $ Py.Var s2)]
|
||||||
|
else fail "Incompatible traversers!"
|
||||||
|
translateExpr (TraverserCall "bisect" [Var s, Lambda [x] e]) = do
|
||||||
|
vtd <- requireTraverser s
|
||||||
|
newTemp <- freshTemp
|
||||||
|
lambdaExpr <- translateExpr e
|
||||||
|
let access = Py.Access (Py.Var $ validList vtd) [Py.Var s]
|
||||||
|
let translated = substituteVariable x access lambdaExpr
|
||||||
|
let append s = Py.FunctionCall (Py.Member (Py.Var s) "append") [ access ]
|
||||||
|
let bisectStmt = Py.FunctionDef newTemp []
|
||||||
|
[ Py.Nonlocal [s]
|
||||||
|
, Py.Assign (Py.VarPat "l") (Py.ListLiteral [])
|
||||||
|
, Py.Assign (Py.VarPat "r") (Py.ListLiteral [])
|
||||||
|
, Py.While (traverserValid (Py.Var s) vtd)
|
||||||
|
[ Py.IfElse translated
|
||||||
|
[ Py.Standalone $ append "l" ]
|
||||||
|
[]
|
||||||
|
(Just [ Py.Standalone $ append "r" ])
|
||||||
|
, traverserStep s vtd
|
||||||
|
]
|
||||||
|
, Py.Return $ Py.TupleLiteral [Py.Var "l", Py.Var "r"]
|
||||||
|
]
|
||||||
|
emitStatement bisectStmt
|
||||||
|
return $ Py.FunctionCall (Py.Var newTemp) []
|
||||||
|
translateExpr (TraverserCall _ _) = fail "Invalid traverser operation"
|
||||||
|
translateExpr (FunctionCall f ps) = do
|
||||||
|
pes <- mapM translateExpr ps
|
||||||
|
return $ Py.FunctionCall (Py.Var f) pes
|
||||||
|
translateExpr (BinOp o l r) =
|
||||||
|
Py.BinOp (translateOp o) <$> translateExpr l <*> translateExpr r
|
||||||
|
translateExpr (Lambda ps e) =
|
||||||
|
Py.Lambda (map Py.VarPat ps) <$> translateExpr e
|
||||||
|
translateExpr (Var s) = return $ Py.Var s
|
||||||
|
translateExpr (IntLiteral i) = return $ Py.IntLiteral i
|
||||||
|
translateExpr (BoolLiteral b) = return $ Py.BoolLiteral b
|
||||||
|
translateExpr (ListLiteral es) = Py.ListLiteral <$> mapM translateExpr es
|
||||||
|
translateExpr (TupleLiteral es) = Py.TupleLiteral <$> mapM translateExpr es
|
||||||
|
|
||||||
|
applyOption :: TraverserData -> (String, Py.PyExpr) -> Maybe TraverserData
|
||||||
|
applyOption td ("list", Py.Var s) =
|
||||||
|
return $ td { list = Just s }
|
||||||
|
applyOption td ("span", Py.TupleLiteral [f, t]) =
|
||||||
|
return $ td { bounds = Just $ Range f t }
|
||||||
|
applyOption td ("random", Py.BoolLiteral True) =
|
||||||
|
return $ td { bounds = Just Random }
|
||||||
|
applyOption td ("reverse", Py.BoolLiteral b) =
|
||||||
|
return $ td { rev = b }
|
||||||
|
applyOption td _ = Nothing
|
||||||
|
|
||||||
|
translateOption :: (String, Expr) -> Translator (String, Py.PyExpr)
|
||||||
|
translateOption (s, e) = (,) s <$> translateExpr e
|
||||||
|
|
||||||
|
defaultTraverser :: TraverserData
|
||||||
|
defaultTraverser =
|
||||||
|
TraverserData { list = Nothing, bounds = Nothing, rev = False }
|
||||||
|
|
||||||
|
translateBranch :: Branch -> Translator (Py.PyExpr, [Py.PyStmt])
|
||||||
|
translateBranch (e, s) = (,) <$> translateExpr e <*>
|
||||||
|
(concat <$> mapM (withdrawStatements . translateStmt) s)
|
||||||
|
|
||||||
|
translateStmt :: Stmt -> Translator Py.PyStmt
|
||||||
|
translateStmt (IfElse i els e) = uncurry Py.IfElse
|
||||||
|
<$> (translateBranch i) <*> (mapM translateBranch els) <*> convertElse e
|
||||||
|
where
|
||||||
|
convertElse [] = return Nothing
|
||||||
|
convertElse es = Just . concat <$>
|
||||||
|
mapM (withdrawStatements . translateStmt) es
|
||||||
|
translateStmt (While b) = uncurry Py.While <$> translateBranch b
|
||||||
|
translateStmt (Traverser s os) =
|
||||||
|
foldlM applyOption defaultTraverser <$> mapM translateOption os >>= saveTraverser
|
||||||
|
where
|
||||||
|
saveTraverser :: Maybe TraverserData -> Translator Py.PyStmt
|
||||||
|
saveTraverser (Just (td@TraverserData { list = Just l, bounds = Just bs})) =
|
||||||
|
putTraverser s vtd $> translateInitialBounds s vtd
|
||||||
|
where
|
||||||
|
vtd = ValidTraverserData
|
||||||
|
{ validList = l
|
||||||
|
, validBounds = bs
|
||||||
|
, validRev = rev td
|
||||||
|
}
|
||||||
|
saveTraverser Nothing = fail "Invalid traverser (!)"
|
||||||
|
translateStmt (Let p e) = Py.Assign <$> translatePat p <*> translateExpr e
|
||||||
|
translateStmt (Return e) = Py.Return <$> translateExpr e
|
||||||
|
translateStmt (Standalone e) = Py.Standalone <$> translateExpr e
|
||||||
|
|
||||||
|
translateInitialBounds :: String -> ValidTraverserData -> Py.PyStmt
|
||||||
|
translateInitialBounds s vtd =
|
||||||
|
case (validBounds vtd, validRev vtd) of
|
||||||
|
(Random, _) -> traverserRandom s $ validList vtd
|
||||||
|
(Range l _, False) -> Py.Assign (Py.VarPat s) l
|
||||||
|
(Range _ r, True) -> Py.Assign (Py.VarPat s) r
|
||||||
|
|
||||||
|
translatePat :: Pat -> Translator Py.PyPat
|
||||||
|
translatePat (VarPat s) = clearTraverser s $> Py.VarPat s
|
||||||
|
translatePat (TuplePat ts) = Py.TuplePat <$> mapM translatePat ts
|
||||||
|
|
||||||
|
translateOp :: Op -> Py.PyBinOp
|
||||||
|
translateOp Add = Py.Add
|
||||||
|
translateOp Subtract = Py.Subtract
|
||||||
|
translateOp Multiply = Py.Multiply
|
||||||
|
translateOp Divide = Py.Divide
|
||||||
|
translateOp LessThan = Py.LessThan
|
||||||
|
translateOp LessThanEqual = Py.LessThanEq
|
||||||
|
translateOp GreaterThan = Py.GreaterThan
|
||||||
|
translateOp GreaterThanEqual = Py.GreaterThanEq
|
||||||
|
translateOp Equal = Py.Equal
|
||||||
|
translateOp NotEqual = Py.NotEqual
|
||||||
|
translateOp And = Py.And
|
||||||
|
translateOp Or = Py.Or
|
||||||
|
|
||||||
|
translateFunction :: Function -> [Py.PyStmt]
|
||||||
|
translateFunction (Function m s ps ss) = return $ Py.FunctionDef s ps $
|
||||||
|
[ Py.Standalone $ Py.FunctionCall (Py.Member (Py.Var p) "sort") []
|
||||||
|
| p <- take 1 ps, m == Sorted ] ++ stmts
|
||||||
|
where
|
||||||
|
stmts = concat $ evalState
|
||||||
|
(mapM (withdrawStatements . translateStmt) ss) (Map.empty, [], 0)
|
||||||
|
|
||||||
|
translate :: Prog -> [Py.PyStmt]
|
||||||
|
translate (Prog fs) =
|
||||||
|
(Py.FromImport "bisect" ["bisect"]) :
|
||||||
|
(Py.Import "random") : concatMap translateFunction fs
|
||||||
198
code/cs325-langs/src/LanguageTwo.hs
Normal file
198
code/cs325-langs/src/LanguageTwo.hs
Normal file
@@ -0,0 +1,198 @@
|
|||||||
|
module LanguageTwo where
|
||||||
|
import qualified PythonAst as Py
|
||||||
|
import qualified CommonParsing as P
|
||||||
|
import Data.Char
|
||||||
|
import Data.Functor
|
||||||
|
import Text.Parsec
|
||||||
|
import Text.Parsec.Char
|
||||||
|
import Text.Parsec.Combinator
|
||||||
|
|
||||||
|
{- Data Types -}
|
||||||
|
data Op
|
||||||
|
= Add
|
||||||
|
| Subtract
|
||||||
|
| Multiply
|
||||||
|
| Divide
|
||||||
|
| Equal
|
||||||
|
| NotEqual
|
||||||
|
| And
|
||||||
|
| Or
|
||||||
|
|
||||||
|
data Expr
|
||||||
|
= IntLiteral Int
|
||||||
|
| BinOp Op Expr Expr
|
||||||
|
| Var String
|
||||||
|
| Length Expr
|
||||||
|
|
||||||
|
data Stmt
|
||||||
|
= IfElse Expr Stmt (Maybe Stmt)
|
||||||
|
| Assign String Expr
|
||||||
|
| Block [Stmt]
|
||||||
|
|
||||||
|
data Prog = Prog Expr [Stmt] [Stmt]
|
||||||
|
|
||||||
|
{- Parser -}
|
||||||
|
type Parser = Parsec String ()
|
||||||
|
|
||||||
|
parseVar :: Parser String
|
||||||
|
parseVar = P.var [ "if", "else", "state", "effect", "combine" ]
|
||||||
|
|
||||||
|
parseLength :: Parser Expr
|
||||||
|
parseLength = Length <$> P.surround '|' '|' parseExpr
|
||||||
|
|
||||||
|
parseParenthesized :: Parser Expr
|
||||||
|
parseParenthesized = P.surround '(' ')' parseExpr
|
||||||
|
|
||||||
|
parseBasic :: Parser Expr
|
||||||
|
parseBasic = choice
|
||||||
|
[ IntLiteral <$> P.int
|
||||||
|
, Var <$> parseVar
|
||||||
|
, parseLength
|
||||||
|
, parseParenthesized
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
parseExpr :: Parser Expr
|
||||||
|
parseExpr = P.precedence BinOp parseBasic
|
||||||
|
[ P.op "*" Multiply <|> P.op "/" Divide
|
||||||
|
, P.op "+" Add <|> P.op "-" Subtract
|
||||||
|
, P.op "==" Equal <|> P.op "!=" NotEqual
|
||||||
|
, P.op "&&" And
|
||||||
|
, try $ P.op "||" Or
|
||||||
|
]
|
||||||
|
|
||||||
|
parseIf :: Parser Stmt
|
||||||
|
parseIf = do
|
||||||
|
P.kwIf >> spaces
|
||||||
|
c <- parseParenthesized
|
||||||
|
t <- parseStmt <* spaces
|
||||||
|
e <- (Just <$> (P.kwElse >> spaces *> parseStmt)) <|> return Nothing
|
||||||
|
return $ IfElse c t e
|
||||||
|
|
||||||
|
parseBlockStmts :: Parser [Stmt]
|
||||||
|
parseBlockStmts = P.surround '{' '}' (many parseStmt)
|
||||||
|
|
||||||
|
parseBlock :: Parser Stmt
|
||||||
|
parseBlock = Block <$> parseBlockStmts
|
||||||
|
|
||||||
|
parseAssign :: Parser Stmt
|
||||||
|
parseAssign = Assign <$>
|
||||||
|
(parseVar <* char '=' <* spaces) <*>
|
||||||
|
parseExpr <* (char ';' >> spaces)
|
||||||
|
|
||||||
|
parseStmt :: Parser Stmt
|
||||||
|
parseStmt = choice
|
||||||
|
[ parseIf
|
||||||
|
, parseAssign
|
||||||
|
, parseBlock
|
||||||
|
]
|
||||||
|
|
||||||
|
parseProgram :: Parser Prog
|
||||||
|
parseProgram = do
|
||||||
|
state <- P.kwState >> spaces *> parseExpr <* char ';' <* spaces
|
||||||
|
effect <- P.kwEffect >> spaces *> parseBlockStmts <* spaces
|
||||||
|
combined <- P.kwCombine >> spaces *> parseBlockStmts <* spaces
|
||||||
|
return $ Prog state effect combined
|
||||||
|
|
||||||
|
parse :: String -> String -> Either ParseError Prog
|
||||||
|
parse = runParser parseProgram ()
|
||||||
|
|
||||||
|
{- Translation -}
|
||||||
|
baseFunction :: Py.PyExpr -> [Py.PyStmt] -> [Py.PyStmt] -> Py.PyStmt
|
||||||
|
baseFunction s e c = Py.FunctionDef "prog" ["xs"] $
|
||||||
|
[Py.IfElse
|
||||||
|
(Py.BinOp Py.LessThan
|
||||||
|
(Py.FunctionCall (Py.Var "len") [Py.Var "xs"])
|
||||||
|
(Py.IntLiteral 2))
|
||||||
|
[Py.Return $ Py.Tuple [s, Py.Var "xs"]]
|
||||||
|
[]
|
||||||
|
Nothing
|
||||||
|
, Py.Assign (Py.VarPat "leng")
|
||||||
|
(Py.BinOp Py.FloorDiv
|
||||||
|
(Py.FunctionCall (Py.Var "len") [Py.Var "xs"])
|
||||||
|
(Py.IntLiteral 2))
|
||||||
|
, Py.Assign (Py.VarPat "left")
|
||||||
|
(Py.Access
|
||||||
|
(Py.Var "xs")
|
||||||
|
[Py.Slice Nothing $ Just (Py.Var "leng")])
|
||||||
|
, Py.Assign (Py.VarPat "right")
|
||||||
|
(Py.Access
|
||||||
|
(Py.Var "xs")
|
||||||
|
[Py.Slice (Just (Py.Var "leng")) Nothing])
|
||||||
|
, Py.Assign (Py.TuplePat [Py.VarPat "ls", Py.VarPat "left"])
|
||||||
|
(Py.FunctionCall (Py.Var "prog") [Py.Var "left"])
|
||||||
|
, Py.Assign (Py.TuplePat [Py.VarPat "rs", Py.VarPat "right"])
|
||||||
|
(Py.FunctionCall (Py.Var "prog") [Py.Var "right"])
|
||||||
|
, Py.Standalone $
|
||||||
|
Py.FunctionCall (Py.Member (Py.Var "left") "reverse") []
|
||||||
|
, Py.Standalone $
|
||||||
|
Py.FunctionCall (Py.Member (Py.Var "right") "reverse") []
|
||||||
|
, Py.Assign (Py.VarPat "state") s
|
||||||
|
, Py.Assign (Py.VarPat "source") (Py.IntLiteral 0)
|
||||||
|
, Py.Assign (Py.VarPat "total") (Py.ListLiteral [])
|
||||||
|
, Py.While
|
||||||
|
(Py.BinOp Py.And
|
||||||
|
(Py.BinOp Py.NotEqual (Py.Var "left") (Py.ListLiteral []))
|
||||||
|
(Py.BinOp Py.NotEqual (Py.Var "right") (Py.ListLiteral []))) $
|
||||||
|
[ Py.IfElse
|
||||||
|
(Py.BinOp Py.LessThanEq
|
||||||
|
(Py.Access (Py.Var "left") [Py.IntLiteral $ -1])
|
||||||
|
(Py.Access (Py.Var "right") [Py.IntLiteral $ -1]))
|
||||||
|
[ Py.Standalone $
|
||||||
|
Py.FunctionCall (Py.Member (Py.Var "total") "append")
|
||||||
|
[Py.FunctionCall (Py.Member (Py.Var "left") "pop") []]
|
||||||
|
, Py.Assign (Py.VarPat "source") (Py.IntLiteral 1)
|
||||||
|
]
|
||||||
|
[] $
|
||||||
|
Just
|
||||||
|
[ Py.Standalone $
|
||||||
|
Py.FunctionCall (Py.Member (Py.Var "total") "append")
|
||||||
|
[Py.FunctionCall (Py.Member (Py.Var "right") "pop") []]
|
||||||
|
, Py.Assign (Py.VarPat "source") (Py.IntLiteral 2)
|
||||||
|
]
|
||||||
|
] ++ e
|
||||||
|
] ++ c ++
|
||||||
|
[ Py.Standalone $ Py.FunctionCall (Py.Member (Py.Var "left") "reverse") []
|
||||||
|
, Py.Standalone $ Py.FunctionCall (Py.Member (Py.Var "right") "reverse") []
|
||||||
|
, Py.Return $ Py.Tuple
|
||||||
|
[ Py.Var "state"
|
||||||
|
, foldl (Py.BinOp Py.Add) (Py.Var "total") [Py.Var "left", Py.Var "right"]
|
||||||
|
]
|
||||||
|
]
|
||||||
|
|
||||||
|
translateExpr :: Expr -> Py.PyExpr
|
||||||
|
translateExpr (IntLiteral i) = Py.IntLiteral i
|
||||||
|
translateExpr (BinOp op l r) =
|
||||||
|
Py.BinOp (translateOp op) (translateExpr l) (translateExpr r)
|
||||||
|
translateExpr (Var s)
|
||||||
|
| s == "SOURCE" = Py.Var "source"
|
||||||
|
| s == "LEFT" = Py.Var "left"
|
||||||
|
| s == "RIGHT" = Py.Var "right"
|
||||||
|
| s == "STATE" = Py.Var "state"
|
||||||
|
| s == "LSTATE" = Py.Var "ls"
|
||||||
|
| s == "RSTATE" = Py.Var "rs"
|
||||||
|
| s == "L" = Py.IntLiteral 1
|
||||||
|
| s == "R" = Py.IntLiteral 2
|
||||||
|
| otherwise = Py.Var s
|
||||||
|
translateExpr (Length e) = Py.FunctionCall (Py.Var "len") [translateExpr e]
|
||||||
|
|
||||||
|
translateOp :: Op -> Py.PyBinOp
|
||||||
|
translateOp Add = Py.Add
|
||||||
|
translateOp Subtract = Py.Subtract
|
||||||
|
translateOp Multiply = Py.Multiply
|
||||||
|
translateOp Divide = Py.Divide
|
||||||
|
translateOp Equal = Py.Equal
|
||||||
|
translateOp NotEqual = Py.NotEqual
|
||||||
|
translateOp And = Py.And
|
||||||
|
translateOp Or = Py.Or
|
||||||
|
|
||||||
|
translateStmt :: Stmt -> [Py.PyStmt]
|
||||||
|
translateStmt (IfElse c t e) =
|
||||||
|
[Py.IfElse (translateExpr c) (translateStmt t) [] (translateStmt <$> e)]
|
||||||
|
translateStmt (Assign "STATE" e) = [Py.Assign (Py.VarPat "state") (translateExpr e)]
|
||||||
|
translateStmt (Assign v e) = [Py.Assign (Py.VarPat v) (translateExpr e)]
|
||||||
|
translateStmt (Block s) = concatMap translateStmt s
|
||||||
|
|
||||||
|
translate :: Prog -> [Py.PyStmt]
|
||||||
|
translate (Prog s e c) =
|
||||||
|
[baseFunction (translateExpr s) (concatMap translateStmt e) (concatMap translateStmt c)]
|
||||||
52
code/cs325-langs/src/PythonAst.hs
Normal file
52
code/cs325-langs/src/PythonAst.hs
Normal file
@@ -0,0 +1,52 @@
|
|||||||
|
module PythonAst where
|
||||||
|
|
||||||
|
data PyBinOp
|
||||||
|
= Add
|
||||||
|
| Subtract
|
||||||
|
| Multiply
|
||||||
|
| Divide
|
||||||
|
| FloorDiv
|
||||||
|
| LessThan
|
||||||
|
| LessThanEq
|
||||||
|
| GreaterThan
|
||||||
|
| GreaterThanEq
|
||||||
|
| Equal
|
||||||
|
| NotEqual
|
||||||
|
| And
|
||||||
|
| Or
|
||||||
|
|
||||||
|
data PyExpr
|
||||||
|
= BinOp PyBinOp PyExpr PyExpr
|
||||||
|
| IntLiteral Int
|
||||||
|
| StrLiteral String
|
||||||
|
| BoolLiteral Bool
|
||||||
|
| ListLiteral [PyExpr]
|
||||||
|
| DictLiteral [(PyExpr, PyExpr)]
|
||||||
|
| Lambda [PyPat] PyExpr
|
||||||
|
| Var String
|
||||||
|
| TupleLiteral [PyExpr]
|
||||||
|
| FunctionCall PyExpr [PyExpr]
|
||||||
|
| Access PyExpr [PyExpr]
|
||||||
|
| Ternary PyExpr PyExpr PyExpr
|
||||||
|
| Member PyExpr String
|
||||||
|
| In PyExpr PyExpr
|
||||||
|
| NotIn PyExpr PyExpr
|
||||||
|
| Slice (Maybe PyExpr) (Maybe PyExpr)
|
||||||
|
|
||||||
|
data PyPat
|
||||||
|
= VarPat String
|
||||||
|
| IgnorePat
|
||||||
|
| TuplePat [PyPat]
|
||||||
|
| AccessPat PyExpr [PyExpr]
|
||||||
|
|
||||||
|
data PyStmt
|
||||||
|
= Assign PyPat PyExpr
|
||||||
|
| IfElse PyExpr [PyStmt] [(PyExpr, [PyStmt])] (Maybe [PyStmt])
|
||||||
|
| While PyExpr [PyStmt]
|
||||||
|
| For PyPat PyExpr [PyStmt]
|
||||||
|
| FunctionDef String [String] [PyStmt]
|
||||||
|
| Return PyExpr
|
||||||
|
| Standalone PyExpr
|
||||||
|
| Import String
|
||||||
|
| FromImport String [String]
|
||||||
|
| Nonlocal [String]
|
||||||
142
code/cs325-langs/src/PythonGen.hs
Normal file
142
code/cs325-langs/src/PythonGen.hs
Normal file
@@ -0,0 +1,142 @@
|
|||||||
|
module PythonGen where
|
||||||
|
import PythonAst
|
||||||
|
import Data.List
|
||||||
|
import Data.Bifunctor
|
||||||
|
import Data.Maybe
|
||||||
|
|
||||||
|
indent :: String -> String
|
||||||
|
indent = (" " ++)
|
||||||
|
|
||||||
|
stmtBlock :: [PyStmt] -> [String]
|
||||||
|
stmtBlock = concatMap translateStmt
|
||||||
|
|
||||||
|
block :: String -> [String] -> [String]
|
||||||
|
block s ss = (s ++ ":") : map indent ss
|
||||||
|
|
||||||
|
prefix :: String -> PyExpr -> [PyStmt] -> [String]
|
||||||
|
prefix s e sts = block (s ++ " " ++ translateExpr e) $ stmtBlock sts
|
||||||
|
|
||||||
|
if_ :: PyExpr -> [PyStmt] -> [String]
|
||||||
|
if_ = prefix "if"
|
||||||
|
|
||||||
|
elif :: PyExpr -> [PyStmt] -> [String]
|
||||||
|
elif = prefix "elif"
|
||||||
|
|
||||||
|
else_ :: [PyStmt] -> [String]
|
||||||
|
else_ = block "else" . stmtBlock
|
||||||
|
|
||||||
|
while :: PyExpr -> [PyStmt] -> [String]
|
||||||
|
while = prefix "while"
|
||||||
|
|
||||||
|
parenth :: String -> String
|
||||||
|
parenth s = "(" ++ s ++ ")"
|
||||||
|
|
||||||
|
translateStmt :: PyStmt -> [String]
|
||||||
|
translateStmt (Assign p e) = [translatePat p ++ " = " ++ translateExpr e]
|
||||||
|
translateStmt (IfElse i t es e) =
|
||||||
|
if_ i t ++ concatMap (uncurry elif) es ++ maybe [] else_ e
|
||||||
|
translateStmt (While c t) = while c t
|
||||||
|
translateStmt (For x in_ b) = block head body
|
||||||
|
where
|
||||||
|
head = "for " ++ translatePat x ++ " in " ++ translateExpr in_
|
||||||
|
body = stmtBlock b
|
||||||
|
translateStmt (FunctionDef s ps b) = block head body
|
||||||
|
where
|
||||||
|
head = "def " ++ s ++ "(" ++ intercalate "," ps ++ ")"
|
||||||
|
body = stmtBlock b
|
||||||
|
translateStmt (Return e) = ["return " ++ translateExpr e]
|
||||||
|
translateStmt (Standalone e) = [translateExpr e]
|
||||||
|
translateStmt (Import s) = ["import " ++ s]
|
||||||
|
translateStmt (FromImport s ss) =
|
||||||
|
["from " ++ s ++ " import " ++ intercalate "," ss]
|
||||||
|
translateStmt (Nonlocal vs) =
|
||||||
|
["nonlocal " ++ intercalate "," vs]
|
||||||
|
|
||||||
|
precedence :: PyBinOp -> Int
|
||||||
|
precedence Add = 3
|
||||||
|
precedence Subtract = 3
|
||||||
|
precedence Multiply = 4
|
||||||
|
precedence Divide = 4
|
||||||
|
precedence FloorDiv = 4
|
||||||
|
precedence LessThan = 2
|
||||||
|
precedence LessThanEq = 2
|
||||||
|
precedence GreaterThan = 2
|
||||||
|
precedence GreaterThanEq = 2
|
||||||
|
precedence Equal = 2
|
||||||
|
precedence NotEqual = 2
|
||||||
|
precedence And = 1
|
||||||
|
precedence Or = 0
|
||||||
|
|
||||||
|
opString :: PyBinOp -> String
|
||||||
|
opString Add = "+"
|
||||||
|
opString Subtract = "-"
|
||||||
|
opString Multiply = "*"
|
||||||
|
opString Divide = "/"
|
||||||
|
opString FloorDiv = "//"
|
||||||
|
opString LessThan = "<"
|
||||||
|
opString LessThanEq = "<="
|
||||||
|
opString GreaterThan = ">"
|
||||||
|
opString GreaterThanEq = ">="
|
||||||
|
opString Equal = "=="
|
||||||
|
opString NotEqual = "!="
|
||||||
|
opString And = " and "
|
||||||
|
opString Or = " or "
|
||||||
|
|
||||||
|
translateOp :: PyBinOp -> PyBinOp -> PyExpr -> String
|
||||||
|
translateOp o o' =
|
||||||
|
if precedence o > precedence o'
|
||||||
|
then parenth . translateExpr
|
||||||
|
else translateExpr
|
||||||
|
|
||||||
|
dictMapping :: PyExpr -> PyExpr -> String
|
||||||
|
dictMapping f t = translateExpr f ++ ": " ++ translateExpr t
|
||||||
|
|
||||||
|
list :: String -> String -> [PyExpr] -> String
|
||||||
|
list o c es = o ++ intercalate ", " (map translateExpr es) ++ c
|
||||||
|
|
||||||
|
translateExpr :: PyExpr -> String
|
||||||
|
translateExpr (BinOp o l@(BinOp o1 _ _) r@(BinOp o2 _ _)) =
|
||||||
|
translateOp o o1 l ++ opString o ++ translateOp o o2 r
|
||||||
|
translateExpr (BinOp o l@(BinOp o1 _ _) r) =
|
||||||
|
translateOp o o1 l ++ opString o ++ translateExpr r
|
||||||
|
translateExpr (BinOp o l r@(BinOp o2 _ _)) =
|
||||||
|
translateExpr l ++ opString o ++ translateOp o o2 r
|
||||||
|
translateExpr (BinOp o l r) =
|
||||||
|
translateExpr l ++ opString o ++ translateExpr r
|
||||||
|
translateExpr (IntLiteral i) = show i
|
||||||
|
translateExpr (StrLiteral s) = "\"" ++ s ++ "\""
|
||||||
|
translateExpr (BoolLiteral b) = if b then "true" else "false"
|
||||||
|
translateExpr (ListLiteral l) = list "[" "]" l
|
||||||
|
translateExpr (DictLiteral l) =
|
||||||
|
"{" ++ intercalate ", " (map (uncurry dictMapping) l) ++ "}"
|
||||||
|
translateExpr (Lambda ps e) = parenth (head ++ ": " ++ body)
|
||||||
|
where
|
||||||
|
head = "lambda " ++ intercalate ", " (map translatePat ps)
|
||||||
|
body = translateExpr e
|
||||||
|
translateExpr (Var s) = s
|
||||||
|
translateExpr (TupleLiteral es) = list "(" ")" es
|
||||||
|
translateExpr (FunctionCall f ps) = translateExpr f ++ list "(" ")" ps
|
||||||
|
translateExpr (Access (Var s) e) = s ++ list "[" "]" e
|
||||||
|
translateExpr (Access e@Access{} i) = translateExpr e ++ list "[" "]" i
|
||||||
|
translateExpr (Access e i) = "(" ++ translateExpr e ++ ")" ++ list "[" "]" i
|
||||||
|
translateExpr (Ternary c t e) =
|
||||||
|
translateExpr t ++ " if " ++ translateExpr c ++ " else " ++ translateExpr e
|
||||||
|
translateExpr (Member (Var s) m) = s ++ "." ++ m
|
||||||
|
translateExpr (Member e@Member{} m) = translateExpr e ++ "." ++ m
|
||||||
|
translateExpr (Member e m) = "(" ++ translateExpr e ++ ")." ++ m
|
||||||
|
translateExpr (In m c) =
|
||||||
|
"(" ++ translateExpr m ++ ") in (" ++ translateExpr c ++ ")"
|
||||||
|
translateExpr (NotIn m c) =
|
||||||
|
"(" ++ translateExpr m ++ ") not in (" ++ translateExpr c ++ ")"
|
||||||
|
translateExpr (Slice l r) =
|
||||||
|
maybe [] (parenth . translateExpr) l ++ ":" ++ maybe [] (parenth . translateExpr) r
|
||||||
|
|
||||||
|
translatePat :: PyPat -> String
|
||||||
|
translatePat (VarPat s) = s
|
||||||
|
translatePat IgnorePat = "_"
|
||||||
|
translatePat (TuplePat ps) =
|
||||||
|
"(" ++ intercalate "," (map translatePat ps) ++ ")"
|
||||||
|
translatePat (AccessPat e es) = translateExpr (Access e es)
|
||||||
|
|
||||||
|
translate :: [PyStmt] -> String
|
||||||
|
translate = intercalate "\n" . concatMap translateStmt
|
||||||
@@ -2,5 +2,5 @@
|
|||||||
title: Daniel's Blog
|
title: Daniel's Blog
|
||||||
---
|
---
|
||||||
## Hello!
|
## Hello!
|
||||||
Welcome to my blog. Here, I write abour various subjects, including (but not limited to)
|
Welcome to my blog. Here, I write about various subjects, including (but not limited to)
|
||||||
functional programming, compiler development, programming language theory, and occasionally video games. I hope you find something useful here!
|
functional programming, compiler development, programming language theory, and occasionally video games. I hope you find something useful here!
|
||||||
|
|||||||
@@ -140,3 +140,4 @@ Here are the posts that I've written so far for this series:
|
|||||||
* [Compilation]({{< relref "06_compiler_compilation.md" >}})
|
* [Compilation]({{< relref "06_compiler_compilation.md" >}})
|
||||||
* [Runtime]({{< relref "07_compiler_runtime.md" >}})
|
* [Runtime]({{< relref "07_compiler_runtime.md" >}})
|
||||||
* [LLVM]({{< relref "08_compiler_llvm.md" >}})
|
* [LLVM]({{< relref "08_compiler_llvm.md" >}})
|
||||||
|
* [Garbage Collection]({{< relref "09_compiler_garbage_collection.md" >}})
|
||||||
|
|||||||
511
content/blog/00_cs325_languages_hw1.md
Normal file
511
content/blog/00_cs325_languages_hw1.md
Normal file
@@ -0,0 +1,511 @@
|
|||||||
|
---
|
||||||
|
title: A Language for an Assignment - Homework 1
|
||||||
|
date: 2019-12-27T23:27:09-08:00
|
||||||
|
tags: ["Haskell", "Python", "Algorithms"]
|
||||||
|
---
|
||||||
|
|
||||||
|
On a rainy Oregon day, I was walking between classes with a group of friends.
|
||||||
|
We were discussing the various ways to obfuscate solutions to the weekly
|
||||||
|
homework assignments in our Algorithms course: replace every `if` with
|
||||||
|
a ternary expression, use single variable names, put everything on one line.
|
||||||
|
I said:
|
||||||
|
|
||||||
|
> The
|
||||||
|
{{< sidenote "right" "chad-note" "chad" >}}
|
||||||
|
This is in reference to a meme, <a href="https://knowyourmeme.com/memes/virgin-vs-chad">Virgin vs Chad</a>.
|
||||||
|
A "chad" characteristic is masculine or "alpha" to the point of absurdity.
|
||||||
|
{{< /sidenote >}} move would be to make your own, different language for every homework assignment.
|
||||||
|
|
||||||
|
It was required of us to use
|
||||||
|
{{< sidenote "left" "python-note" "Python" >}}
|
||||||
|
A friend suggested making a Haskell program
|
||||||
|
that generates Python-based interpreters for languages. While that would be truly
|
||||||
|
absurd, I'll leave <em>this</em> challenge for another day.
|
||||||
|
{{< /sidenote >}} for our solutions, so that was the first limitation on this challenge.
|
||||||
|
Someone suggested to write the languages in Haskell, since that's what we used
|
||||||
|
in our Programming Languages class. So the final goal ended up:
|
||||||
|
|
||||||
|
* For each of the 10 homework assignments in CS325 - Analysis of Algorithms,
|
||||||
|
* Create a Haskell program that translates a language into,
|
||||||
|
* A valid Python program that works (nearly) out of the box and passes all the test cases.
|
||||||
|
|
||||||
|
It may not be worth it to create a whole
|
||||||
|
{{< sidenote "right" "general-purpose-note" "general-purpose" >}}
|
||||||
|
A general purpose language is one that's designed to be used in various
|
||||||
|
domains. For instance, C++ is a general-purpose language because it can
|
||||||
|
be used for embedded systems, GUI programs, and pretty much anything else.
|
||||||
|
This is in contrast to a domain-specific language, such as Game Maker Language,
|
||||||
|
which is aimed at a much narrower set of uses.
|
||||||
|
{{< /sidenote >}} language for each problem,
|
||||||
|
but nowhere in the challenge did we say that it had to be general-purpose. In
|
||||||
|
fact, some interesting design thinking can go into designing a domain-specific
|
||||||
|
language for a particular assignment. So let's jump right into it, and make
|
||||||
|
a language for the first homework assignment.
|
||||||
|
|
||||||
|
### Homework 1
|
||||||
|
There are two problems in Homework 1. Here they are, verbatim:
|
||||||
|
|
||||||
|
{{< codelines "text" "cs325-langs/hws/hw1.txt" 32 38 >}}
|
||||||
|
|
||||||
|
And the second:
|
||||||
|
|
||||||
|
{{< codelines "text" "cs325-langs/hws/hw1.txt" 47 68 >}}
|
||||||
|
|
||||||
|
We want to make a language __specifically__ for these two tasks (one of which
|
||||||
|
is split into many tasks). What common things can we isolate? I see two:
|
||||||
|
|
||||||
|
First, __all the problems deal with lists__. This may seem like a trivial observation,
|
||||||
|
but these two problems are the __only__ thing we use our language for. We have
|
||||||
|
list access,
|
||||||
|
{{< sidenote "right" "filterting-note" "list filtering" >}}
|
||||||
|
Quickselect is a variation on quicksort, which itself
|
||||||
|
finds all the "lesser" and "greater" elements in the input array.
|
||||||
|
{{< /sidenote >}} and list creation. That should serve as a good base!
|
||||||
|
|
||||||
|
If you squint a little bit, __all the problems are recursive with the same base case__.
|
||||||
|
Consider the first few lines of `search`, implemented naively:
|
||||||
|
|
||||||
|
```Python
|
||||||
|
def search(xs, k):
|
||||||
|
if xs == []:
|
||||||
|
return false
|
||||||
|
```
|
||||||
|
|
||||||
|
How about `sorted`? Take a look:
|
||||||
|
|
||||||
|
```Python
|
||||||
|
def sorted(xs):
|
||||||
|
if xs == []:
|
||||||
|
return []
|
||||||
|
```
|
||||||
|
|
||||||
|
I'm sure you see the picture. But it will take some real mental gymnastics to twist the
|
||||||
|
rest of the problems into this shape. What about `qselect`, for instance? There's two
|
||||||
|
cases for what it may return:
|
||||||
|
|
||||||
|
* `None` or equivalent if the index is out of bounds (we give it `4` an a list `[1, 2]`).
|
||||||
|
* A number if `qselect` worked.
|
||||||
|
|
||||||
|
The test cases never provide a concrete example of what should be returned from
|
||||||
|
`qselect` in the first case, so we'll interpret it like
|
||||||
|
{{< sidenote "right" "undefined-note" "undefined behavior" >}}
|
||||||
|
For a quick sidenote about undefined behavior, check out how
|
||||||
|
C++ optimizes the <a href="https://godbolt.org/z/3skK9j">Collatz Conjecture function</a>.
|
||||||
|
Clang doesn't know whether or not the function will terminate (whether the Collatz Conjecture
|
||||||
|
function terminates is an <a href="https://en.wikipedia.org/wiki/Collatz_conjecture">unsolved problem</a>),
|
||||||
|
but functions that don't terminate are undefined behavior. There's only one other way the function
|
||||||
|
returns, and that's with "1". Thus, clang optimizes the entire function to a single "return 1" call.
|
||||||
|
{{< /sidenote >}} in C++:
|
||||||
|
we can do whatever we want. So, let's allow it to return `[]` in the `None` case.
|
||||||
|
This makes this base case valid:
|
||||||
|
|
||||||
|
```Python
|
||||||
|
def qselect(xs, k):
|
||||||
|
if xs == []:
|
||||||
|
return []
|
||||||
|
```
|
||||||
|
|
||||||
|
"Oh yeah, now it's all coming together." With one more observation (which will come
|
||||||
|
from a piece I haven't yet shown you!), we'll be able to generalize this base case.
|
||||||
|
|
||||||
|
The observation is this section in the assignment:
|
||||||
|
|
||||||
|
{{< codelines "text" "cs325-langs/hws/hw1.txt" 83 98 >}}
|
||||||
|
|
||||||
|
The real key is the part about "returning the `[]` where x should be inserted". It so
|
||||||
|
happens that when the list given to the function is empty, the number should be inserted
|
||||||
|
precisely into that list. Thus:
|
||||||
|
|
||||||
|
```Python
|
||||||
|
def _search(xs, k):
|
||||||
|
if xs == []:
|
||||||
|
return xs
|
||||||
|
```
|
||||||
|
|
||||||
|
The same works for `qselect`:
|
||||||
|
|
||||||
|
```Python
|
||||||
|
def qselect(xs, k):
|
||||||
|
if xs == []:
|
||||||
|
return xs
|
||||||
|
```
|
||||||
|
|
||||||
|
And for sorted, too:
|
||||||
|
|
||||||
|
```Python
|
||||||
|
def sorted(xs):
|
||||||
|
if xs == []:
|
||||||
|
return xs
|
||||||
|
```
|
||||||
|
|
||||||
|
There are some functions that are exceptions, though:
|
||||||
|
|
||||||
|
```Python
|
||||||
|
def insert(xs, k):
|
||||||
|
# We can't return early here!
|
||||||
|
# If we do, we'll never insert anything.
|
||||||
|
```
|
||||||
|
|
||||||
|
Also:
|
||||||
|
|
||||||
|
```Python
|
||||||
|
def search(xs, k):
|
||||||
|
# We have to return true or false, never
|
||||||
|
# an empty list.
|
||||||
|
```
|
||||||
|
|
||||||
|
So, whenever we __don't__ return a list, we don't want to add a special case.
|
||||||
|
We arrive at the following common base case: __whenever a function returns a list, if its first argument
|
||||||
|
is the empty list, the first argument is immediately returned__.
|
||||||
|
|
||||||
|
We've largely exhasuted the conclusiosn we can draw from these problems. Let's get to designing a language.
|
||||||
|
|
||||||
|
### A Silly Language
|
||||||
|
Let's start by visualizing our goals. Without base cases, the solution to `_search`
|
||||||
|
would be something like this:
|
||||||
|
|
||||||
|
{{< codelines "text" "cs325-langs/sols/hw1.lang" 11 14 >}}
|
||||||
|
|
||||||
|
Here we have an __`if`-expression__. It has to have an `else`, and evaluates to the value
|
||||||
|
of the chosen branch. That is, `if true then 0 else 1` evaluates to `0`, while
|
||||||
|
`if false then 0 else 1` evaluates to `1`. Otherwise, we follow the binary tree search
|
||||||
|
algorithm faithfully.
|
||||||
|
|
||||||
|
Using this definition of `_search`, we can define `search` pretty easily:
|
||||||
|
|
||||||
|
{{< codelines "text" "cs325-langs/sols/hw1.lang" 17 17 >}}
|
||||||
|
|
||||||
|
Let's use Haskell's `(++)` operator for concatentation. This will help us understand
|
||||||
|
when the user is operating on lists, and when they're not. With this, `sorted` becomes:
|
||||||
|
|
||||||
|
{{< codelines "text" "cs325-langs/sols/hw1.lang" 16 16 >}}
|
||||||
|
|
||||||
|
Let's go for `qselect` now. We'll introduce a very silly language feature for this
|
||||||
|
problem:
|
||||||
|
{{< sidenote "right" "selector-note" "list selectors" >}}
|
||||||
|
You've probably never heard of list selectors, and for a good reason:
|
||||||
|
this is a <em>terrible</em> language feature. I'll go in more detail
|
||||||
|
later, but I wanted to make this clear right away.
|
||||||
|
{{< /sidenote >}}. We observe that `qselect` aims to partition the list into
|
||||||
|
other lists. We thus add the following pieces of syntax:
|
||||||
|
|
||||||
|
```
|
||||||
|
~xs -> {
|
||||||
|
pivot <- xs[rand]!
|
||||||
|
left <- xs[#0 <= pivot]
|
||||||
|
...
|
||||||
|
} -> ...
|
||||||
|
```
|
||||||
|
|
||||||
|
There are three new things here.
|
||||||
|
|
||||||
|
1. The actual "list selector": `~xs -> { .. } -> ...`. Between the curly braces
|
||||||
|
are branches which select parts of the list and assign them to new variables.
|
||||||
|
Thus, `pivot <- xs[rand]!` assigns the element at a random index to the variable `pivot`.
|
||||||
|
the `!` at the end means "after taking this out of `xs`, delete it from `xs`". The
|
||||||
|
syntax {{< sidenote "right" "curly-note" "starts with \"~\"" >}}
|
||||||
|
An observant reader will note that there's no need for the "xs" after the "~".
|
||||||
|
The idea was to add a special case syntax to reference the "selected list", but
|
||||||
|
I ended up not bothering. So in fact, this part of the syntax is useless.
|
||||||
|
{{< /sidenote >}} to make it easier to parse.
|
||||||
|
2. The `rand` list access syntax. `xs[rand]` is a special case that picks a random
|
||||||
|
element from `xs`.
|
||||||
|
3. The `xs[#0 <= pivot]` syntax. This is another special case that selects all elements
|
||||||
|
from `xs` that match the given predicate (where `#0` is replaced with each element in `xs`).
|
||||||
|
|
||||||
|
The big part of qselect is to not evaluate `right` unless you have to. So, we shouldn't
|
||||||
|
eagerly evaluate the list selector. We also don't want something like `right[|right|-1]` to evaluate
|
||||||
|
`right` twice. So we settle on
|
||||||
|
{{< sidenote "right" "lazy-note" "lazy evaluation" >}}
|
||||||
|
Lazy evaluation means only evaluating an expression when we need to. Thus,
|
||||||
|
although we might encounter the expression for <code>right</code>, we
|
||||||
|
only evaluate it when the time comes. Lazy evaluation, at least
|
||||||
|
the way that Haskell has it, is more specific: an expression is evaluated only
|
||||||
|
once, or not at all.
|
||||||
|
{{</ sidenote >}}.
|
||||||
|
Ah, but the `!` marker introduces
|
||||||
|
{{< sidenote "left" "side-effect-note" "side effects" >}}
|
||||||
|
A side effect is a term frequently used when talking about functional programming.
|
||||||
|
Evaluating the expression <code>xs[rand]!</code> doesn't just get a random element,
|
||||||
|
it also changes <em>something else</em>. In this case, that something else is
|
||||||
|
the <code>xs</code> list.
|
||||||
|
{{< /sidenote >}}. So we can't just evaluate these things all willy-nilly.
|
||||||
|
So, let's make it so that each expression in the selector list requires the ones above it. Thus,
|
||||||
|
`left` will require `pivot`, and `right` will require `left` and `pivot`. So,
|
||||||
|
lazily evaluated, ordered expressions. The whole `qselect` becomes:
|
||||||
|
|
||||||
|
{{< codelines "text" "cs325-langs/sols/hw1.lang" 1 9 >}}
|
||||||
|
|
||||||
|
We've now figured out all the language constructs. Let's start working on
|
||||||
|
some implementation!
|
||||||
|
|
||||||
|
#### Implementation
|
||||||
|
It would be silly of me to explain every detail of creating a language in Haskell
|
||||||
|
in this post; this is neither the purpose of the post, nor is it plausible
|
||||||
|
to do this without covering monads, parser combinators, grammars, abstract syntax
|
||||||
|
trees, and more. So, instead, I'll discuss the _interesting_ parts of the
|
||||||
|
implementation.
|
||||||
|
|
||||||
|
##### Temporary Variables
|
||||||
|
Our language is expression-based, yes. A function is a single,
|
||||||
|
arbitrarily complex expression (involving `if/else`, list
|
||||||
|
selectors, and more). So it would make sense to translate
|
||||||
|
a function to a single, arbitrarily complex Python expression.
|
||||||
|
However, the way we've designed our language makes it
|
||||||
|
not-so-suitable for converting to a single expression! For
|
||||||
|
instance, consider `xs[rand]`. We need to compute the list,
|
||||||
|
get its length, generate a random number, and then access
|
||||||
|
the corresponding element in the list. We use the list
|
||||||
|
here twice, and simply repeating the expression would not
|
||||||
|
be very smart: we'd be evaluating twice. So instead,
|
||||||
|
we'll use a variable, assign the list to that variable,
|
||||||
|
and then access that variable multiple times.
|
||||||
|
|
||||||
|
To be extra safe, let's use a fresh temporary variable
|
||||||
|
every time we need to store something. The simplest
|
||||||
|
way is to simply maintain a counter of how many temporary
|
||||||
|
variables we've already used, and generate a new variable
|
||||||
|
by prepending the word "temp" to that number. We start
|
||||||
|
with `temp0`, then `temp1`, and so on. To keep a counter,
|
||||||
|
we can use a state monad:
|
||||||
|
|
||||||
|
{{< codelines "Haskell" "cs325-langs/src/LanguageOne.hs" 230 230 >}}
|
||||||
|
|
||||||
|
Don't worry about the `Map.Map String [String]`, we'll get to that in a bit.
|
||||||
|
For now, all we have to worry about is the second element of the tuple,
|
||||||
|
the integer counting how many temporary variables we've used. We can
|
||||||
|
get the current temporary variable as follows:
|
||||||
|
|
||||||
|
{{< codelines "Haskell" "cs325-langs/src/LanguageOne.hs" 232 235 >}}
|
||||||
|
|
||||||
|
We can also get a fresh temporary variable like this:
|
||||||
|
|
||||||
|
{{< codelines "Haskell" "cs325-langs/src/LanguageOne.hs" 237 240 >}}
|
||||||
|
|
||||||
|
Now, the
|
||||||
|
{{< sidenote "left" "code-note" "code" >}}
|
||||||
|
Since we are translating an expression, we must have the result of
|
||||||
|
the translation yield an Python expression we can use in generating
|
||||||
|
larger Python expressions. However, as we've seen, we occasionally
|
||||||
|
have to use statements. Thus, the <code>translateExpr</code> function
|
||||||
|
returns a <code>Translator ([Py.PyStmt], Py.PyExpr)</code>.
|
||||||
|
{{< /sidenote >}}for generating a random list access looks like
|
||||||
|
{{< sidenote "right" "ast-note" "this:" >}}
|
||||||
|
The <code>Py.*</code> constructors are a part of a Python AST module I quickly
|
||||||
|
threw together. I won't showcase it here, but you can always look at the
|
||||||
|
source code for the blog (which includes this project)
|
||||||
|
<a href="https://dev.danilafe.com/Web-Projects/blog-static">here</a>.
|
||||||
|
{{< /sidenote >}}
|
||||||
|
|
||||||
|
{{< codelines "Haskell" "cs325-langs/src/LanguageOne.hs" 325 330 >}}
|
||||||
|
|
||||||
|
##### Implementing "lazy evaluation"
|
||||||
|
Lazy evaluation in functional programs usually arises from
|
||||||
|
{{< sidenote "right" "graph-note" "graph reduction" >}}
|
||||||
|
Graph reduction, more specifically the <em>Spineless,
|
||||||
|
Tagless G-machine</em> is at the core of the Glasgow Haskell
|
||||||
|
Compiler (GHC). Simon Peyton Jones' earlier book,
|
||||||
|
<em>Implementing Functional Languages: a tutorial</em>
|
||||||
|
details an earlier version of the G-machine.
|
||||||
|
{{< /sidenote >}}. However, Python is neither
|
||||||
|
functional nor graph-based, and we only lazily
|
||||||
|
evaluate list selectors. Thus, we'll have to do
|
||||||
|
some work to get our lazy evaluation to work as we desire.
|
||||||
|
Here's what I came up with:
|
||||||
|
|
||||||
|
1. It's difficult to insert Python statements where they are
|
||||||
|
needed: we'd have to figure out in which scope each variable
|
||||||
|
has already been declared, and in which scope it's yet
|
||||||
|
to be assigned.
|
||||||
|
2. Instead, we can use a Python dictionary, called `cache`,
|
||||||
|
and store computed versions of each variable in the cache.
|
||||||
|
3. It's pretty difficult to check if a variable
|
||||||
|
is in the cache, compute it if not, and then return the
|
||||||
|
result of the computation, in one expression. This is
|
||||||
|
true, unless that single expression is a function call, and we have a dedicated
|
||||||
|
function that takes no arguments, computes the expression if needed,
|
||||||
|
and uses the cache otherwise. We choose this route.
|
||||||
|
4. We have already promised that we'd evaluate all the selected
|
||||||
|
variables above a given variable before evaluating the variable
|
||||||
|
itself. So, each function will first call (and therefore
|
||||||
|
{{< sidenote "right" "force-note" "force" >}}
|
||||||
|
Forcing, in this case, comes from the context of lazy evaluation. To
|
||||||
|
force a variable or an expression is to tell the program to compute its
|
||||||
|
value, even though it may have been putting it off.
|
||||||
|
{{< /sidenote >}}) the functions
|
||||||
|
generated for variables declared above the function's own variable.
|
||||||
|
5. To keep track of all of this, we use the already-existing state monad
|
||||||
|
as a reader monad (that is, we clear the changes we make to the monad
|
||||||
|
after we're done translating the list selector). This is where the `Map.Map String [String]`
|
||||||
|
comes from.
|
||||||
|
|
||||||
|
The `Map.Map String [String]` keeps track of variables that will be lazily computed,
|
||||||
|
and also of the dependencies of each variable (the variables that need
|
||||||
|
to be access before the variable itself). We compute such a map for
|
||||||
|
each selector as follows:
|
||||||
|
|
||||||
|
{{< codelines "Haskell" "cs325-langs/src/LanguageOne.hs" 298 298 >}}
|
||||||
|
|
||||||
|
We update the existing map using `Map.union`:
|
||||||
|
|
||||||
|
{{< codelines "Haskell" "cs325-langs/src/LanguageOne.hs" 299 299 >}}
|
||||||
|
|
||||||
|
And, after we're done generating expressions in the body of this selector,
|
||||||
|
we clear it to its previous value `vs`:
|
||||||
|
|
||||||
|
{{< codelines "Haskell" "cs325-langs/src/LanguageOne.hs" 302 302 >}}
|
||||||
|
|
||||||
|
We generate a single selector as follows:
|
||||||
|
|
||||||
|
{{< codelines "Haskell" "cs325-langs/src/LanguageOne.hs" 268 281 >}}
|
||||||
|
|
||||||
|
This generates a function definition statement, which we will examine in
|
||||||
|
generated Python code later on.
|
||||||
|
|
||||||
|
Solving the problem this way also introduces another gotcha: sometimes,
|
||||||
|
a variable is produced by a function call, and other times the variable
|
||||||
|
is just a Python variable. We write this as follows:
|
||||||
|
|
||||||
|
{{< codelines "Haskell" "cs325-langs/src/LanguageOne.hs" 283 288 >}}
|
||||||
|
|
||||||
|
##### Special Case Insertion
|
||||||
|
This is a silly language for a single homework assignment. I'm not
|
||||||
|
planning to implement Hindley-Milner type inference, or anything
|
||||||
|
of that sort. For the purpose of this language, things will be
|
||||||
|
either a list, or not a list. And as long as a function __can__ return
|
||||||
|
a list, it can also return the list from its base case. Thus,
|
||||||
|
that's all we will try to figure out. The checking code is so
|
||||||
|
short that we can include the whole snippet at once:
|
||||||
|
|
||||||
|
{{< codelines "Haskell" "cs325-langs/src/LanguageOne.hs" 219 227 >}}
|
||||||
|
|
||||||
|
`mergePossibleType`
|
||||||
|
{{< sidenote "right" "bool-identity-note" "figures out" >}}
|
||||||
|
An observant reader will note that this is just a logical
|
||||||
|
OR function. It's not, however, good practice to use
|
||||||
|
booleans for types that have two constructors with no arguments.
|
||||||
|
Check out this <a href="https://programming-elm.com/blog/2019-05-20-solving-the-boolean-identity-crisis-part-1/">
|
||||||
|
Elm-based article</a> about this, which the author calls the
|
||||||
|
Boolean Identity Crisis.
|
||||||
|
{{< /sidenote >}}, given two possible types for an
|
||||||
|
expression, the final type for the expression.
|
||||||
|
|
||||||
|
There's only one real trick to this. Sometimes, like in
|
||||||
|
`_search`, the only time we return something _known_ to be a list, that
|
||||||
|
something is `xs`. Since we're making a list manipulation language,
|
||||||
|
let's __assume the first argument to the function is a list__, and
|
||||||
|
__use this information to determine expression types__. We guess
|
||||||
|
types in a very basic manner otherwise: If you use the concatenation
|
||||||
|
operator, or a list literal, then obviously we're working on a list.
|
||||||
|
If you're returning the first argument of the function, that's also
|
||||||
|
a list. Otherwise, it could be anything.
|
||||||
|
|
||||||
|
My Haskell linter actually suggested a pretty clever way of writing
|
||||||
|
the whole "add a base case if this function returns a list" code.
|
||||||
|
Check it out:
|
||||||
|
|
||||||
|
{{< codelines "Haskell" "cs325-langs/src/LanguageOne.hs" 260 266 >}}
|
||||||
|
|
||||||
|
Specifically, look at the line with `let fastReturn = ...`. It
|
||||||
|
uses a list comprehension: we take a parameter `p` from the list of
|
||||||
|
parameter `ps`, but only produce the statements for the base case
|
||||||
|
if the possible type computed using `p` is `List`.
|
||||||
|
|
||||||
|
### The Output
|
||||||
|
What kind of beast have we created? Take a look for yourself:
|
||||||
|
```Python
|
||||||
|
def qselect(xs,k):
|
||||||
|
if xs==[]:
|
||||||
|
return xs
|
||||||
|
cache = {}
|
||||||
|
def pivot():
|
||||||
|
if ("pivot") not in (cache):
|
||||||
|
cache["pivot"] = xs.pop(0)
|
||||||
|
return cache["pivot"]
|
||||||
|
def left():
|
||||||
|
def temp2(arg):
|
||||||
|
out = []
|
||||||
|
for arg0 in arg:
|
||||||
|
if arg0<=pivot():
|
||||||
|
out.append(arg0)
|
||||||
|
return out
|
||||||
|
pivot()
|
||||||
|
if ("left") not in (cache):
|
||||||
|
cache["left"] = temp2(xs)
|
||||||
|
return cache["left"]
|
||||||
|
def right():
|
||||||
|
def temp3(arg):
|
||||||
|
out = []
|
||||||
|
for arg0 in arg:
|
||||||
|
if arg0>pivot():
|
||||||
|
out.append(arg0)
|
||||||
|
return out
|
||||||
|
left()
|
||||||
|
pivot()
|
||||||
|
if ("right") not in (cache):
|
||||||
|
cache["right"] = temp3(xs)
|
||||||
|
return cache["right"]
|
||||||
|
if k>(len(left())+1):
|
||||||
|
temp4 = qselect(right(), k-len(left())-1)
|
||||||
|
else:
|
||||||
|
if k==(len(left())+1):
|
||||||
|
temp5 = [pivot()]
|
||||||
|
else:
|
||||||
|
temp5 = qselect(left(), k)
|
||||||
|
temp4 = temp5
|
||||||
|
return temp4
|
||||||
|
def _search(xs,k):
|
||||||
|
if xs==[]:
|
||||||
|
return xs
|
||||||
|
if xs[1]==k:
|
||||||
|
temp6 = xs
|
||||||
|
else:
|
||||||
|
if xs[1]>k:
|
||||||
|
temp8 = _search(xs[0], k)
|
||||||
|
else:
|
||||||
|
temp8 = _search(xs[2], k)
|
||||||
|
temp6 = temp8
|
||||||
|
return temp6
|
||||||
|
def sorted(xs):
|
||||||
|
if xs==[]:
|
||||||
|
return xs
|
||||||
|
return sorted(xs[0])+[xs[1]]+sorted(xs[2])
|
||||||
|
def search(xs,k):
|
||||||
|
return len(_search(xs, k))!=0
|
||||||
|
def insert(xs,k):
|
||||||
|
return _insert(k, _search(xs, k))
|
||||||
|
def _insert(k,xs):
|
||||||
|
if k==[]:
|
||||||
|
return k
|
||||||
|
if len(xs)==0:
|
||||||
|
temp16 = xs
|
||||||
|
temp16.append([])
|
||||||
|
temp17 = temp16
|
||||||
|
temp17.append(k)
|
||||||
|
temp18 = temp17
|
||||||
|
temp18.append([])
|
||||||
|
temp15 = temp18
|
||||||
|
else:
|
||||||
|
temp15 = xs
|
||||||
|
return temp15
|
||||||
|
```
|
||||||
|
It's...horrible! All the `tempX` variables, __three layers of nested function declarations__, hardcoded cache access. This is not something you'd ever want to write.
|
||||||
|
Even to get this code, I had to come up with hacks __in a language I created__.
|
||||||
|
The first is the hack is to make the `qselect` function use the `xs == []` base
|
||||||
|
case. This doesn't happen by default, because `qselect` doesn't return a list!
|
||||||
|
To "fix" this, I made `qselect` return the number it found, wrapped in a
|
||||||
|
list literal. This is not up to spec, and would require another function
|
||||||
|
to unwrap this list.
|
||||||
|
|
||||||
|
While `qselect` was struggling with not having the base case, `insert` had
|
||||||
|
a base case it didn't need: `insert` shouldn't return the list itself
|
||||||
|
when it's empty, it should insert into it! However, when we use the `<<`
|
||||||
|
list insertion operator, the language infers `insert` to be a list-returning
|
||||||
|
function itself, inserting into an empty list will always fail. So, we
|
||||||
|
make a function `_insert`, which __takes the arguments in reverse__.
|
||||||
|
The base case will still be generated, but the first argument (against
|
||||||
|
which the base case is checked) will be a number, so the `k == []` check
|
||||||
|
will always fail.
|
||||||
|
|
||||||
|
That concludes this post. I'll be working on more solutions to homework
|
||||||
|
assignments in self-made languages, so keep an eye out!
|
||||||
218
content/blog/01_cs325_languages_hw2.md
Normal file
218
content/blog/01_cs325_languages_hw2.md
Normal file
@@ -0,0 +1,218 @@
|
|||||||
|
---
|
||||||
|
title: A Language for an Assignment - Homework 2
|
||||||
|
date: 2019-12-30T20:05:10-08:00
|
||||||
|
tags: ["Haskell", "Python", "Algorithms"]
|
||||||
|
---
|
||||||
|
|
||||||
|
After the madness of the
|
||||||
|
[language for homework 1]({{< relref "00_cs325_languages_hw1.md" >}}),
|
||||||
|
the solution to the second homework offers a moment of respite.
|
||||||
|
Let's get right into the problems, shall we?
|
||||||
|
|
||||||
|
### Homework 2
|
||||||
|
Besides some free-response questions, the homework contains
|
||||||
|
two problems. The first:
|
||||||
|
|
||||||
|
{{< codelines "text" "cs325-langs/hws/hw2.txt" 29 34 >}}
|
||||||
|
|
||||||
|
And the second:
|
||||||
|
|
||||||
|
{{< codelines "text" "cs325-langs/hws/hw2.txt" 36 44 >}}
|
||||||
|
|
||||||
|
At first glance, it's not obvious why these problems are good for
|
||||||
|
us. However, there's one key observation: __`num_inversions` can be implemented
|
||||||
|
using a slightly-modified `mergesort`__. The trick is to maintain a counter
|
||||||
|
of inversions in every recursive call to `mergesort`, updating
|
||||||
|
it every time we take an element from the
|
||||||
|
{{< sidenote "right" "right-note" "right list" >}}
|
||||||
|
If this nomenclature is not clear to you, recall that
|
||||||
|
mergesort divides a list into two smaller lists. The
|
||||||
|
"right list" refers to the second of the two, because
|
||||||
|
if you visualize the original list as a rectangle, and cut
|
||||||
|
it in half (vertically, down the middle), then the second list
|
||||||
|
(from the left) is on the right.
|
||||||
|
{{< /sidenote >}} while there are still elements in the
|
||||||
|
{{< sidenote "left" "left-note" "left list" >}}
|
||||||
|
Why this is the case is left as an exercise to the reader.
|
||||||
|
{{< /sidenote >}}.
|
||||||
|
When we return from the call,
|
||||||
|
we add up the number of inversions from running `num_inversions`
|
||||||
|
on the smaller lists, and the number of inversions that we counted
|
||||||
|
as I described. We then return both the total number
|
||||||
|
of inversions and the sorted list.
|
||||||
|
|
||||||
|
So, we either perform the standard mergesort, or we perform mergesort
|
||||||
|
with additional steps added on. The additional steps can be divided into
|
||||||
|
three general categories:
|
||||||
|
|
||||||
|
1. __Initialization__: We create / set some initial state. This state
|
||||||
|
doesn't depend on the lists or anything else.
|
||||||
|
2. __Effect__: Each time that an element is moved from one of the two smaller
|
||||||
|
lists into the output list, we may change the state in some way (create
|
||||||
|
an effect).
|
||||||
|
3. __Combination__: The final state, and the results of the two
|
||||||
|
sub-problem states, are combined into the output of the function.
|
||||||
|
|
||||||
|
This is all very abstract. In the concrete case of inversions,
|
||||||
|
these steps are as follows:
|
||||||
|
|
||||||
|
1. __Initializtion__: The initial state, which is just the counter, is set to 0.
|
||||||
|
2. __Effect__: Each time an element is moved, if it comes from the right list,
|
||||||
|
the number of inversions is updated.
|
||||||
|
3. __Combination__: We update the state, simply adding the left and right
|
||||||
|
inversion counts.
|
||||||
|
|
||||||
|
We can make a language out of this!
|
||||||
|
|
||||||
|
### A Language
|
||||||
|
Again, let's start by visualizing what the solution will look like. How about this:
|
||||||
|
|
||||||
|
{{< rawblock "cs325-langs/sols/hw2.lang" >}}
|
||||||
|
|
||||||
|
We divide the code into the same three steps that we described above. The first
|
||||||
|
section is the initial state. Since it doesn't depend on anything, we expect
|
||||||
|
it to be some kind of literal, like an integer. Next, we have the effect section,
|
||||||
|
which has access to the variables below:
|
||||||
|
|
||||||
|
* `STATE`, to manipulate or check the current state.
|
||||||
|
* `LEFT` and `RIGHT`, to access the two lists being merged.
|
||||||
|
* `L` and `R`, constants that are used to compare against the `SOURCE` variable.
|
||||||
|
* `SOURCE`, to denote which list a number came from.
|
||||||
|
* `LSTATE` and `RSTATE`, to denote the final states from the two subproblems.
|
||||||
|
|
||||||
|
We use an `if`-statement to check if the element that was popped came
|
||||||
|
from the right list (by checking `SOURCE == R`). If it did, we increment the counter
|
||||||
|
(state) by the proper amount. In the combine step, which has access to the
|
||||||
|
same variables, we simply increment the state by the counters from the left
|
||||||
|
and right solutions, stored in `LSTATE` and `RSTATE`. That's it!
|
||||||
|
|
||||||
|
#### Implementation
|
||||||
|
The implementation is not tricky at all. We don't need to use monads like we did last
|
||||||
|
time, and nor do we have to perform any fancy Python nested function declarations.
|
||||||
|
|
||||||
|
To keep with the Python convention of lowercase variables, we'll translate the
|
||||||
|
uppercase "global" variables to lowercase. We'll do it like so:
|
||||||
|
|
||||||
|
{{< codelines "Haskell" "cs325-langs/src/LanguageTwo.hs" 167 176 >}}
|
||||||
|
|
||||||
|
Note that we translated `L` and `R` to integer literals. We'll indicate the source of
|
||||||
|
each element with an integer, since there's no real point to representing it with
|
||||||
|
a string or a variable. We'll need to be aware of this when we implement the actual, generic
|
||||||
|
mergesort code. Let's do that now:
|
||||||
|
|
||||||
|
{{< codelines "Haskell" "cs325-langs/src/LanguageTwo.hs" 101 161 >}}
|
||||||
|
|
||||||
|
This is probably the ugliest part of this assignment: we handwrote a Python
|
||||||
|
AST in Haskell that implements mergesort with our augmentations. Note that
|
||||||
|
this is a function, which takes a `Py.PyExpr` (the initial state expression),
|
||||||
|
and two lists of `Py.PyStmt`, which are the "effect" and "combination" code,
|
||||||
|
respectively. We simply splice them into our regular mergesort function.
|
||||||
|
The translation is otherwise pretty trivial, so there's no real reason
|
||||||
|
to show it here.
|
||||||
|
|
||||||
|
### The Output
|
||||||
|
What's the output of our solution to `num_inversions`? Take a look for yourself:
|
||||||
|
|
||||||
|
```Python
|
||||||
|
def prog(xs):
|
||||||
|
if len(xs)<2:
|
||||||
|
return (0, xs)
|
||||||
|
leng = len(xs)//2
|
||||||
|
left = xs[:(leng)]
|
||||||
|
right = xs[(leng):]
|
||||||
|
(ls,left) = prog(left)
|
||||||
|
(rs,right) = prog(right)
|
||||||
|
left.reverse()
|
||||||
|
right.reverse()
|
||||||
|
state = 0
|
||||||
|
source = 0
|
||||||
|
total = []
|
||||||
|
while (left!=[])and(right!=[]):
|
||||||
|
if left[-1]<=right[-1]:
|
||||||
|
total.append(left.pop())
|
||||||
|
source = 1
|
||||||
|
else:
|
||||||
|
total.append(right.pop())
|
||||||
|
source = 2
|
||||||
|
if source==2:
|
||||||
|
state = state+len(left)
|
||||||
|
state = state+ls+rs
|
||||||
|
left.reverse()
|
||||||
|
right.reverse()
|
||||||
|
return (state, total+left+right)
|
||||||
|
```
|
||||||
|
|
||||||
|
Honestly, that's pretty clean. As clean as `left.reverse()` to allow for \\(O(1)\\) pop is.
|
||||||
|
What's really clean, however, is the implementation of mergesort in our language.
|
||||||
|
It goes as follows:
|
||||||
|
|
||||||
|
```
|
||||||
|
state 0;
|
||||||
|
effect {}
|
||||||
|
combine {}
|
||||||
|
```
|
||||||
|
|
||||||
|
To implement mergesort in our language, which describes mergesort variants, all
|
||||||
|
we have to do is not specify any additional behavior. Cool, huh?
|
||||||
|
|
||||||
|
That's the end of this post. If you liked this one (and the previous one!),
|
||||||
|
keep an eye out for more!
|
||||||
|
|
||||||
|
### Appendix (Missing Homework Question)
|
||||||
|
I should not view homework assignments on a small-screen device. There __was__ a third problem
|
||||||
|
on homework 2:
|
||||||
|
|
||||||
|
{{< codelines "text" "cs325-langs/hws/hw2.txt" 46 65 >}}
|
||||||
|
|
||||||
|
This is not a mergesort variant, and adding support for it into our second language
|
||||||
|
will prevent us from making it the neat specialized
|
||||||
|
{{< sidenote "right" "dsl-note" "DSL" >}}
|
||||||
|
DSL is a shortened form of "domain specific language", which was briefly
|
||||||
|
described in another sidenote while solving homework 1.
|
||||||
|
{{< /sidenote >}} that was just saw. We'll do something else, instead:
|
||||||
|
we'll use the language we defined in homework 1 to solve this
|
||||||
|
problem:
|
||||||
|
|
||||||
|
```
|
||||||
|
empty() = [0, 0];
|
||||||
|
longest(xs) =
|
||||||
|
if |xs| != 0
|
||||||
|
then _longest(longest(xs[0]), longest(xs[2]))
|
||||||
|
else empty();
|
||||||
|
_longest(l, r) = [max(l[0], r[0]) + 1, max(l[0]+r[0], max(l[1], r[1]))];
|
||||||
|
```
|
||||||
|
|
||||||
|
{{< sidenote "right" "terrible-note" "This is quite terrible." >}}
|
||||||
|
This is probably true with any program written in our first
|
||||||
|
language.
|
||||||
|
{{< /sidenote >}} In these 6 lines of code, there are two hacks
|
||||||
|
to work around the peculiarities of the language.
|
||||||
|
|
||||||
|
At each recursive call, we want to keep track of both the depth
|
||||||
|
of the tree and the existing longest path. This is because
|
||||||
|
the longest path could be found either somewhere down
|
||||||
|
a subtree, or from combining the largest depths of
|
||||||
|
two subtrees. To return two values from a function in Python,
|
||||||
|
we'd use a tuple. Here, we use a list.
|
||||||
|
|
||||||
|
Alarm bells should be going off here. There's no reason why we should
|
||||||
|
ever return an empty list from the recursive call: at the very least, we
|
||||||
|
want to return `[0,0]`. But placing such a list literal in a function
|
||||||
|
will trigger the special case insertion. So, we have to hide this literal
|
||||||
|
from the compiler. Fortunately, that's not too hard to do - the compiler
|
||||||
|
is pretty halfhearted in its inference of types. Simply putting
|
||||||
|
the literal behind a constant function (`empty`) does the trick.
|
||||||
|
|
||||||
|
The program uses the subproblem depths multiple times in the
|
||||||
|
final computation. We thus probably want to assign these values
|
||||||
|
to names so we don't have to perform any repeated work. Since
|
||||||
|
the only two mechanisms for
|
||||||
|
{{< sidenote "right" "binding-note" "binding variables" >}}
|
||||||
|
To bind a variable means to assign a value to it.
|
||||||
|
{{< /sidenote >}} in this language are function calls
|
||||||
|
and list selectors, we use a helper function `_longest`,
|
||||||
|
which takes two subproblem solutions an combines them
|
||||||
|
into a new solution. It's pretty obvious that `_longest`
|
||||||
|
returns a list, so the compiler will try insert a base
|
||||||
|
case. Fortunately, subproblem solutions are always
|
||||||
|
lists of two numbers, so this doesn't affect us too much.
|
||||||
429
content/blog/02_cs325_languages_hw3.md
Normal file
429
content/blog/02_cs325_languages_hw3.md
Normal file
@@ -0,0 +1,429 @@
|
|||||||
|
---
|
||||||
|
title: A Language for an Assignment - Homework 3
|
||||||
|
date: 2020-01-02T22:17:43-08:00
|
||||||
|
tags: ["Haskell", "Python", "Algorithms"]
|
||||||
|
---
|
||||||
|
|
||||||
|
It rained in Sunriver on New Year's Eve, and it continued to rain
|
||||||
|
for the next couple of days. So, instead of going skiing as planned,
|
||||||
|
to the dismay of my family and friends, I spent the majority of
|
||||||
|
those days working on the third language for homework 3. It
|
||||||
|
was quite the language, too - the homework has three problems, each of
|
||||||
|
which has a solution independent of the others. I invite you
|
||||||
|
to join me in my descent into madness as we construct another language.
|
||||||
|
|
||||||
|
### Homework 3
|
||||||
|
Let's take a look at the three homework problems. The first two are
|
||||||
|
related, but are solved using a different technique:
|
||||||
|
|
||||||
|
{{< codelines "text" "cs325-langs/hws/hw3.txt" 18 30 >}}
|
||||||
|
|
||||||
|
This problem requires us to find the `k` numbers closest to some
|
||||||
|
query (which I will call `n`) from a list `xs`. The list isn't sorted, and the
|
||||||
|
problem must run in linear time. Sorting the list would require
|
||||||
|
the standard
|
||||||
|
{{< sidenote "right" "n-note" "\(O(n\log n)\) time." >}}
|
||||||
|
The \(n\) in this expression is not the same as the query <code>n</code>,
|
||||||
|
but rather the length of the list. In fact, I have not yet assigned
|
||||||
|
the length of the input <code>xs</code> to any variable. If we say that
|
||||||
|
\(m\) is a number that denotes that length, the proper expression
|
||||||
|
for the complexity is \(O(m \log m)\).
|
||||||
|
{{< /sidenote >}} Thus, we have to take another route, which should
|
||||||
|
already be familiar: quickselect. Using quickselect, we can find the `k`th
|
||||||
|
closest number, and then collect all the numbers that are closer than the `kth`
|
||||||
|
closest number. So, we need a language that:
|
||||||
|
|
||||||
|
* Supports quickselect (and thus, list partitioning and recursion).
|
||||||
|
* Supports iteration, {{< sidenote "left" "iteration-note" "multiple times." >}}
|
||||||
|
Why would we need to iterate multiple times? Note that we could have a list
|
||||||
|
of numbers that are all the same, <code>[1,1,1,1,1]</code>. Then, we'll need
|
||||||
|
to know how many of the numbers <em>equally close</em> as the <code>k</code>th
|
||||||
|
element we need to include, which will require another pass through the list.
|
||||||
|
{{< /sidenote >}}
|
||||||
|
|
||||||
|
That's a good start. Let's take a look at the second problem:
|
||||||
|
|
||||||
|
{{< codelines "text" "cs325-langs/hws/hw3.txt" 33 47 >}}
|
||||||
|
|
||||||
|
This problem really is easier. We have to find the position of _the_ closest
|
||||||
|
element, and then try expand towards either the left or right, depending on
|
||||||
|
which end is better. This expansion will take several steps, and will
|
||||||
|
likely require a way to "look" at a given part of the list. So let's add two more
|
||||||
|
rules. We need a language that also:
|
||||||
|
|
||||||
|
* Supports looping control flow, such as `while`.
|
||||||
|
* {{< sidenote "right" "view-note" "Allows for a \"view\" into the list" >}}
|
||||||
|
We could, of course, simply use list indexing. But then, we'd just be making
|
||||||
|
a simple imperative language, and that's boring. So let's play around
|
||||||
|
with our design a little, and experimentally add such a "list view" component.
|
||||||
|
{{< /sidenote >}}
|
||||||
|
(like an abstraction over indexing).
|
||||||
|
|
||||||
|
This is shaping up to be a fun language. Let's take a look at the last problem:
|
||||||
|
{{< codelines "text" "cs325-langs/hws/hw3.txt" 50 64 >}}
|
||||||
|
|
||||||
|
This problem requires more iterations of a list. We have several
|
||||||
|
{{< sidenote "right" "cursor-note" "\"cursors\"" >}}
|
||||||
|
I always make the language before I write the post, since a lot of
|
||||||
|
design decisions change mid-implementation. I realize now that
|
||||||
|
"cursors" would've been a better name for this language feature,
|
||||||
|
but alas, it is too late.
|
||||||
|
{{< /sidenote >}} looking into the list, and depending if the values
|
||||||
|
at each of the cursors add up, we do or do not add a new tuple to a list. So,
|
||||||
|
two more requirements:
|
||||||
|
|
||||||
|
* The "cursors" must be able to interact.
|
||||||
|
* The language can represent {{< sidenote "left" "tuple-note" "tuples." >}}
|
||||||
|
We could, of course, hack some other way to return a list of tuples, but
|
||||||
|
it turns out tuples are pretty simple to implement, and help make for nicer
|
||||||
|
programming in our language.
|
||||||
|
{{< /sidenote >}}
|
||||||
|
|
||||||
|
I think we've gathered what we want from the homework. Let's move on to the
|
||||||
|
language!
|
||||||
|
|
||||||
|
### A Language
|
||||||
|
As is now usual, let's envision a solution to the problems in our language. There
|
||||||
|
are actually quite a lot of functions to look at, so let's see them one by one.
|
||||||
|
First, let's look at `qselect`.
|
||||||
|
|
||||||
|
{{< codelines "text" "cs325-langs/sols/hw3.lang" 1 19 >}}
|
||||||
|
|
||||||
|
After the early return, the first interesting part of the language is the
|
||||||
|
use of what I have decided to call a __list traverser__. The list
|
||||||
|
traverser is a __generalization of a list index__. Whenever we use a list
|
||||||
|
index variable, we generally use the following operations:
|
||||||
|
|
||||||
|
* __Initialize__: we set the list index to some initial value, such as 0.
|
||||||
|
* __Step__: If we're walking the list from left to right, we increment the index.
|
||||||
|
If we're walking the list from right to left, we decrement the index.
|
||||||
|
* __Validity Check__: We check if the index is still valid (that is, we haven't
|
||||||
|
gone past the edge of the list).
|
||||||
|
* __Access__: Get the element the cursor is pointing to.
|
||||||
|
|
||||||
|
A {{< sidenote "right" "cpp-note" "traverser declaration" >}}
|
||||||
|
A fun fact is that we've just rediscovered C++
|
||||||
|
<a href="http://www.cplusplus.com/reference/iterator/">iterators</a>. C++
|
||||||
|
containers and their iterators provide us with the operations I described:
|
||||||
|
|
||||||
|
We can initialize an iterator like <code>auto it = list.begin()</code>. We
|
||||||
|
can step the iterator using <code>it++</code>. We can check its validity
|
||||||
|
using <code>it != list.end()</code>, and access what it's pointing to using
|
||||||
|
<code>*it</code>. While C++ uses templates and inheritance for this,
|
||||||
|
we define a language feature specifically for lists.
|
||||||
|
|
||||||
|
{{< /sidenote >}} describes these operations. The declartion for the `bisector`
|
||||||
|
traverser creates a "cursor" over the list `xs`, that goes between the 0th
|
||||||
|
and last elements of `xs`. The declaration for the `pivot` traverser creates
|
||||||
|
a "cursor" over the list `xs` that jumps around random locations in the list.
|
||||||
|
|
||||||
|
The next interesting part of the language is a __traverser macro__. This thing,
|
||||||
|
that looks like a function call (but isn't), performs an operation on the
|
||||||
|
cursor. For instance, `pop!` removes the element at the cursor from the list,
|
||||||
|
whereas `bisect!` categorizes the remaining elements in the cursor's list
|
||||||
|
into two lists, using a boolean-returning lambda (written in Java syntax).
|
||||||
|
|
||||||
|
Note that this implementation of `qselect` takes a function `c`, which it
|
||||||
|
uses to judge the actual value of the number. This is because our `qselect`
|
||||||
|
won't be finding _the_ smallest number, but the number with the smallest difference
|
||||||
|
with `n`. `n` will be factored in via the function.
|
||||||
|
|
||||||
|
Next up, let's take a look at the function that uses `qselect`, `closestUnsorted`:
|
||||||
|
|
||||||
|
{{< codelines "text" "cs325-langs/sols/hw3.lang" 21 46 >}}
|
||||||
|
|
||||||
|
Like we discussed, it finds the `k`th closest element (calling it `min`),
|
||||||
|
and counts how many elements that are __equal__ need to be included,
|
||||||
|
by setting the number to `k` at first, and subtracting 1 for every number
|
||||||
|
it encounters that's closer than `min`. Notice that we use the `valid!` and
|
||||||
|
`step!` macros, which implement the operations we described above. Notice
|
||||||
|
that the user doesn't deal with adding and subtracting numbers, and doing
|
||||||
|
comparisons. All they have to do is ask "am I still good to iterate?"
|
||||||
|
|
||||||
|
Next, let's take a look at `closestSorted`, which will require more
|
||||||
|
traverser macros.
|
||||||
|
|
||||||
|
{{< codelines "text" "cs325-langs/sols/hw3.lang" 48 70 >}}
|
||||||
|
|
||||||
|
The first new macro is `canstep!`. This macro just verifies that
|
||||||
|
the traverser can make another step. We need this for the "reverse" iterator,
|
||||||
|
which indicates the lower bound of the range of numbers we want to return,
|
||||||
|
because `subset!` (which itself is just Python's slice, like `xs[a:b]`), uses an inclusive bottom
|
||||||
|
index, and thus, we can't afford to step it before knowing that we can, and that
|
||||||
|
it's a better choice after the step.
|
||||||
|
|
||||||
|
Similarly, we have the `at!(t, i)` macro, which looks at the
|
||||||
|
traverser `t`, with offset `i`.
|
||||||
|
|
||||||
|
We have two loops. The first loop runs as long as we can expand the range in both
|
||||||
|
directions, and picks the better direction at each iteration. The second loop
|
||||||
|
runs as long as we still want more numbers, but have already hit the edge
|
||||||
|
of the list on the left or on the right.
|
||||||
|
|
||||||
|
Finally, let's look at the solution to `xyz`:
|
||||||
|
|
||||||
|
{{< codelines "text" "cs325-langs/sols/hw3.lang" 72 95 >}}
|
||||||
|
|
||||||
|
I won't go in depth, but notice that the expression in the `span` part
|
||||||
|
of the `traverser` declaration can access another traverser. We treat
|
||||||
|
as a feature the fact that this expression isn't immediately evaluated at the place
|
||||||
|
of the traverser declaration. Rather, every time that a comparison for a traverser
|
||||||
|
operation is performed, this expression is re-evaluated. This allows us to put
|
||||||
|
dynamic bounds on traversers `y` and `z`, one of which must not exceed the other.
|
||||||
|
|
||||||
|
Note also a new keyword that was just used: `sorted`. This is a harmless little
|
||||||
|
language feature that automatically calls `.sort()` on the first argument of
|
||||||
|
the function.
|
||||||
|
|
||||||
|
This is more than enough to work with. Let's move on to the implementation.
|
||||||
|
|
||||||
|
#### Implementation
|
||||||
|
Again, let's not go too far into the details of implementing the language from scratch.
|
||||||
|
Instead, let's take a look into specific parts of the language that deserve attention.
|
||||||
|
|
||||||
|
##### Revenge of the State Monad
|
||||||
|
Our previous language was, indeed, a respite from complexity. Translation was
|
||||||
|
straightforward, and the resulting expressions and statements were plugged straight
|
||||||
|
into a handwritten AST. We cannot get away with this here; the language is powerful
|
||||||
|
enough to implement three list-based problems, which comes at the cost of increased
|
||||||
|
complexity.
|
||||||
|
|
||||||
|
We need, once again, to generate temporary variables. We also need to keep track of
|
||||||
|
which variables are traversers, and the properties of these traversers, throughout
|
||||||
|
each function of the language. We thus fall back to using `Control.Monad.State`:
|
||||||
|
|
||||||
|
{{< codelines "Haskell" "cs325-langs/src/LanguageThree.hs" 198 198 >}}
|
||||||
|
|
||||||
|
There's one part of the state tuple that we haven't yet explained: the list of
|
||||||
|
statements.
|
||||||
|
|
||||||
|
##### Generating Statements
|
||||||
|
Recall that our translation function for expressions in the first homework had the type:
|
||||||
|
|
||||||
|
```Haskell
|
||||||
|
translateExpr :: Expr -> Translator ([Py.PyStmt], Py.PyExpr)
|
||||||
|
```
|
||||||
|
|
||||||
|
We then had to use `do`-notation, and explicitly concatenate lists
|
||||||
|
of emitted statements. In this language, I took an alternative route: I made
|
||||||
|
the statements part of the state. They are thus implicitly generated and
|
||||||
|
stored in the monad, and expression generators don't have to worry about
|
||||||
|
concatenating them. When the program is ready to use the generated statements
|
||||||
|
(say, when an `if`-statement needs to use the statements emitted by the condition
|
||||||
|
expression), we retrieve them from the monad:
|
||||||
|
|
||||||
|
{{< codelines "Haskell" "cs325-langs/src/LanguageThree.hs" 228 234 >}}
|
||||||
|
|
||||||
|
I should note, for transparency, that there's a bug in my use of this function.
|
||||||
|
When I compile `if`-statements, I accidentally place statements generated by
|
||||||
|
the condition into the body of the `if`. This bug doesn't manifest
|
||||||
|
in the solutions to the homework problems, and so I decided not to spend any more
|
||||||
|
time on fixing it.
|
||||||
|
|
||||||
|
##### Validating Traverser Declarations
|
||||||
|
We declare two separate types that hold traverser data. The first is a kind of "draft"
|
||||||
|
type, `TraverserData`:
|
||||||
|
|
||||||
|
{{< codelines "Haskell" "cs325-langs/src/LanguageThree.hs" 184 190 >}}
|
||||||
|
|
||||||
|
This record holds all possible configurations of a traverser
|
||||||
|
that occur as the program is iterating through the various `key: value` pairs in
|
||||||
|
the declaration. For instance, at the very beginning of processing a traverser declaration,
|
||||||
|
our program will use a "default" `TraverserData`, with all fields set to `Nothing` or
|
||||||
|
their default value. This value will then be modified by the first key/value pair,
|
||||||
|
changing, for instance, the list that the traverser operates on. This new modified
|
||||||
|
`TraverserData` will then be modified by the next key/value pair, and so on. Doing
|
||||||
|
this with every key/value pair (called an option in the below snippet)
|
||||||
|
is effectively a foldl operation.
|
||||||
|
|
||||||
|
{{< codelines "Haskell" "cs325-langs/src/LanguageThree.hs" 378 387 >}}
|
||||||
|
|
||||||
|
The data may not have all the required fields until the very end, and its type
|
||||||
|
reflects that: `Maybe String` here, `Maybe TraverserBounds` there. We don't
|
||||||
|
want to deal with unwrapping the `Maybe a` values every time we use the traverser,
|
||||||
|
especially if we've done so before. So, we define a `ValidTraverserData` record
|
||||||
|
that does not have `Maybe` arguments, and thus, has all the required data. At the
|
||||||
|
end of a traverser declaration, we attempt to translate a `TraverserData` into
|
||||||
|
a `ValidTraverserData`, invoking `fail` if we can't, and storing the `ValidTraverserData`
|
||||||
|
into the state otherwise:
|
||||||
|
|
||||||
|
{{< codelines "Haskell" "cs325-langs/src/LanguageThree.hs" 408 420 >}}
|
||||||
|
|
||||||
|
Then, every time we retrieve a traverser from the state,
|
||||||
|
define a lookup monadic operation like this:
|
||||||
|
|
||||||
|
{{< codelines "Haskell" "cs325-langs/src/LanguageThree.hs" 240 244 >}}
|
||||||
|
|
||||||
|
##### Compiling Macros
|
||||||
|
I didn't call them macros for no reason. Clearly, we don't want to generate
|
||||||
|
code that
|
||||||
|
{{< sidenote "right" "increment-note" "calls functions only to increment an index." >}}
|
||||||
|
In fact, there's no easy way to do this at all. Python's integers (if we choose to
|
||||||
|
represent our traversers using integers), are immutable. Furthermore, unlike C++,
|
||||||
|
where passing by reference allows a function to change its parameters "outside"
|
||||||
|
the call, Python offers no way to reassign a different value to a variable given
|
||||||
|
to a function.
|
||||||
|
<br><br>
|
||||||
|
For an example use of C++'s pass-by-reference mechanic, consider <code>std::swap</code>:
|
||||||
|
it's a function, but it modifies the two variables given to it. There's no
|
||||||
|
way to generically implement such a function in Python.
|
||||||
|
{{< /sidenote >}} We also can't allow arbitrary expressions to serve as traversers:
|
||||||
|
our translator keeps some context about which variables are traversers, what their
|
||||||
|
bounds are, and how they behave. Thus, __calls to traverser macros are very much macros__:
|
||||||
|
they operate on AST nodes, and __require__ that their first argument is a variable,
|
||||||
|
named like the traverser. We use the `requireTraverser` monadic operation
|
||||||
|
to get the traverser associated with the given variable name, and then perform
|
||||||
|
the operation as intended. The `at!(t)` operation is straightforward:
|
||||||
|
|
||||||
|
{{< codelines "Haskell" "cs325-langs/src/LanguageThree.hs" 317 319 >}}
|
||||||
|
|
||||||
|
The `at!(t,i)` is less so, since it deals with the intricacies of accessing
|
||||||
|
the list at either a positive of negative offset, depending on the direction
|
||||||
|
of the traverser. We implement a function to properly generate an expression for the offset:
|
||||||
|
|
||||||
|
{{< codelines "Haskell" "cs325-langs/src/LanguageThree.hs" 246 249 >}}
|
||||||
|
|
||||||
|
We then implement `at!(t,i)` as follows:
|
||||||
|
|
||||||
|
{{< codelines "Haskell" "cs325-langs/src/LanguageThree.hs" 320 323 >}}
|
||||||
|
|
||||||
|
The most complicated macro is `bisect!`. It must be able to step the traverser,
|
||||||
|
and also return a tuple of two lists that the bisection yields. We also
|
||||||
|
prefer that it didn't pollute the environment with extra variables. To
|
||||||
|
achieve this, we want `bisect!` to be a function call. We want this
|
||||||
|
function to implement the iteration and list construction.
|
||||||
|
|
||||||
|
`bisect!`, by definition, takes a lambda. This lambda, in our language, is declared
|
||||||
|
in the lexical scope in which `bisect!` is called. Thus, to guarantee correct translation,
|
||||||
|
we must do one of two things:
|
||||||
|
|
||||||
|
1. Translate 1-to-1, and create a lambda, passing it to a fixed `bisect` function declared
|
||||||
|
elsewhere.
|
||||||
|
2. Translate to a nested function declaration,
|
||||||
|
{{< sidenote "right" "inline-note" "inlining the lambda." >}}
|
||||||
|
Inlining, in this case, means replacing a call to a function with the function's body.
|
||||||
|
We do this to prevent the overhead of calling a function, which typically involves pushing
|
||||||
|
on a stack and other extraneous work. If our function is simple, like a simple
|
||||||
|
comparison, it doesn't make sense to spend the effort calling it.
|
||||||
|
{{< /sidenote >}}
|
||||||
|
|
||||||
|
Since I quite like the idea of inlining a lambda, let's settle for that. To do this,
|
||||||
|
we pull a fresh temporary variable and declare a function, into which we place
|
||||||
|
the traverser iteration code, as well as the body of the lambda, with the variable
|
||||||
|
substituted for the list access expression.
|
||||||
|
{{< sidenote "left" "nonlocal-note" "Here's the code:" >}}
|
||||||
|
Reading the lexical scope is one thing, but modifying it is another. To prevent
|
||||||
|
accidental changes to the variables outside a nested function, Python assumes
|
||||||
|
that variables assigned inside the function body are local to the function. Thus, to make
|
||||||
|
sure changing our variable (the traverser index) has an effect outside the function
|
||||||
|
(as it should) we must include the <code>nonlocal</code> keyword, telling
|
||||||
|
Python that we're not declaring a new, local variable, but mutating the old one.
|
||||||
|
{{< /sidenote >}}
|
||||||
|
|
||||||
|
{{< codelines "Haskell" "cs325-langs/src/LanguageThree.hs" 342 363 >}}
|
||||||
|
|
||||||
|
### The Output
|
||||||
|
Let's see what the compiler spits out:
|
||||||
|
|
||||||
|
```Python
|
||||||
|
from bisect import bisect
|
||||||
|
import random
|
||||||
|
def qselect(xs,k,c):
|
||||||
|
if xs==[]:
|
||||||
|
return 0
|
||||||
|
bisector = 0
|
||||||
|
pivot = random.randrange(len(xs))
|
||||||
|
pivotE = xs.pop(pivot)
|
||||||
|
def temp1():
|
||||||
|
nonlocal bisector
|
||||||
|
l = []
|
||||||
|
r = []
|
||||||
|
while bisector<len(xs):
|
||||||
|
if c(xs[bisector])<c(pivotE):
|
||||||
|
l.append(xs[bisector])
|
||||||
|
else:
|
||||||
|
r.append(xs[bisector])
|
||||||
|
bisector = bisector+1
|
||||||
|
return (l, r)
|
||||||
|
(leftList,rightList) = temp1()
|
||||||
|
if k>len(leftList)+1:
|
||||||
|
return qselect(rightList, k-len(leftList)-1, c)
|
||||||
|
elif k==len(leftList)+1:
|
||||||
|
return pivotE
|
||||||
|
else:
|
||||||
|
return qselect(leftList, k, c)
|
||||||
|
def closestUnsorted(xs,k,n):
|
||||||
|
min = qselect(list(xs), k, (lambda x: abs(x-n)))
|
||||||
|
out = []
|
||||||
|
countEqual = k
|
||||||
|
iter = 0
|
||||||
|
while iter<len(xs):
|
||||||
|
if abs(xs[iter]-n)<abs(min-n):
|
||||||
|
countEqual = countEqual-1
|
||||||
|
iter = iter+1
|
||||||
|
0
|
||||||
|
iter = 0
|
||||||
|
while iter<len(xs):
|
||||||
|
if abs(xs[iter]-n)==abs(min-n) and countEqual>0:
|
||||||
|
countEqual = countEqual-1
|
||||||
|
out = out+[xs[iter]]
|
||||||
|
elif abs(xs[iter]-n)<abs(min-n):
|
||||||
|
out = out+[xs[iter]]
|
||||||
|
iter = iter+1
|
||||||
|
0
|
||||||
|
return out
|
||||||
|
def closestSorted(xs,k,n):
|
||||||
|
start = bisect(xs, n)
|
||||||
|
counter = 0
|
||||||
|
left = start
|
||||||
|
right = start
|
||||||
|
while counter!=k and left-1*1>=0 and right<len(xs):
|
||||||
|
if abs(xs[left-1*1]-n)<abs(xs[right]-n):
|
||||||
|
left = left-1
|
||||||
|
0
|
||||||
|
else:
|
||||||
|
right = right+1
|
||||||
|
0
|
||||||
|
counter = counter+1
|
||||||
|
while counter!=k and (left-1*1>=0 or right<len(xs)):
|
||||||
|
if left-1*1>=0:
|
||||||
|
left = left-1
|
||||||
|
0
|
||||||
|
else:
|
||||||
|
right = right+1
|
||||||
|
0
|
||||||
|
counter = counter+1
|
||||||
|
return xs[(left):(right)]
|
||||||
|
def xyz(xs,k):
|
||||||
|
xs.sort()
|
||||||
|
x = 0
|
||||||
|
dest = []
|
||||||
|
while x<len(xs):
|
||||||
|
z = x+2
|
||||||
|
y = x+1
|
||||||
|
while y<z and z<len(xs):
|
||||||
|
if xs[x]+xs[y]==xs[z]:
|
||||||
|
dest = dest+[(xs[x], xs[y], xs[z])]
|
||||||
|
z = z+1
|
||||||
|
0
|
||||||
|
elif xs[x]+xs[y]>xs[z]:
|
||||||
|
z = z+1
|
||||||
|
0
|
||||||
|
else:
|
||||||
|
y = y+1
|
||||||
|
0
|
||||||
|
x = x+1
|
||||||
|
0
|
||||||
|
return dest
|
||||||
|
```
|
||||||
|
|
||||||
|
Observe that the generated code just uses indices, `+`, `-`, and various comparison operators.
|
||||||
|
Our traverser is an example of a __zero cost abstraction__, a feature that, conceptually,
|
||||||
|
operates at a higher level, making us no longer worry about adding, subtracting, and
|
||||||
|
comparing numbers, while, in the final output, not damaging the performance of safety
|
||||||
|
of the code. Also observe the various `0` standalone statements. This is an issue
|
||||||
|
with the translator: traverser macros may not always yield an expression, but
|
||||||
|
the type of `translateExpr` and `translateStmt` effectively requires one. Thus,
|
||||||
|
when a macro doesn't generate anything useful, we give it the placeholder expression `0`.
|
||||||
|
|
||||||
|
That concludes this third post in the series. I hope to see you in the next one!
|
||||||
@@ -574,5 +574,6 @@ In the next several posts, we will improve
|
|||||||
our compiler to properly free unused memory
|
our compiler to properly free unused memory
|
||||||
usign a __garbage collector__, implement
|
usign a __garbage collector__, implement
|
||||||
lambda functions using __lambda lifting__,
|
lambda functions using __lambda lifting__,
|
||||||
and use our Alloc instruction to implement `let/in` expressions. See
|
and use our Alloc instruction to implement `let/in` expressions.
|
||||||
you there!
|
We get started on the first of these tasks in
|
||||||
|
[Part 9 - Garbage Collection]({{< relref "09_compiler_garbage_collection.md" >}}).
|
||||||
|
|||||||
558
content/blog/09_compiler_garbage_collection.md
Normal file
558
content/blog/09_compiler_garbage_collection.md
Normal file
@@ -0,0 +1,558 @@
|
|||||||
|
---
|
||||||
|
title: Compiling a Functional Language Using C++, Part 9 - Garbage Collection
|
||||||
|
date: 2020-02-10T19:22:41-08:00
|
||||||
|
tags: ["C and C++", "Functional Languages", "Compilers"]
|
||||||
|
---
|
||||||
|
|
||||||
|
> "When will you learn? When will you learn that __your actions have consequences?__"
|
||||||
|
|
||||||
|
So far, we've entirely ignored the problem of memory management. Every time
|
||||||
|
that we need a new node for our growing graph, we simply ask for more memory
|
||||||
|
from the runtime with `malloc`. But selfishly, even when we no longer require
|
||||||
|
the memory allocated for a particular node, when that node is no longer in use,
|
||||||
|
we do not `free` it. In fact, our runtime currently has no idea about
|
||||||
|
which nodes are needed and which ones are ready to be discarded.
|
||||||
|
|
||||||
|
To convince ourselves that this is a problem, let's first assess the extent of the damage.
|
||||||
|
Consider the program from `works3.txt`:
|
||||||
|
|
||||||
|
{{< rawblock "compiler/09/examples/works3.txt" >}}
|
||||||
|
|
||||||
|
Compiling and running this program through `valgrind`, we get the following output:
|
||||||
|
|
||||||
|
```
|
||||||
|
==XXXX== LEAK SUMMARY:
|
||||||
|
==XXXX== definitely lost: 288 bytes in 12 blocks
|
||||||
|
==XXXX== indirectly lost: 768 bytes in 34 blocks
|
||||||
|
==XXXX== possibly lost: 0 bytes in 0 blocks
|
||||||
|
==XXXX== still reachable: 0 bytes in 0 blocks
|
||||||
|
==XXXX== suppressed: 0 bytes in 0 blocks
|
||||||
|
```
|
||||||
|
|
||||||
|
We lost 1056 bytes of memory, just to return the length of a list
|
||||||
|
with 3 elements. The problem of leaking memory is very real.
|
||||||
|
|
||||||
|
How do we solve this issue? We can't embed memory management into our language;
|
||||||
|
We want to keep it pure, and managing memory is typically pretty far from
|
||||||
|
that goal. Instead, we will make our runtime do the work of freeing memory.
|
||||||
|
Even then, this is a nontrivial goal: our runtime manipulates graphs, each
|
||||||
|
of which can be combined with others in arbitrary ways. In general, there
|
||||||
|
will not always be a _single_ node that, when freed, will guarantee that
|
||||||
|
another node can be freed as well. Instead, it's very possible in our
|
||||||
|
graphs that two parent nodes both refer to a third, and only when both
|
||||||
|
parents are freed can we free that third node itself. Consider,
|
||||||
|
for instance, the function `square` as follows:
|
||||||
|
|
||||||
|
```
|
||||||
|
defn square x = {
|
||||||
|
x * x
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
This function will receive, on top of the stack, a single graph representing `x`.
|
||||||
|
It will then create two applications of a global `(+)` function, each time
|
||||||
|
to the graph of `x`. Thus, it will construct a tree with two `App` nodes, both
|
||||||
|
of which
|
||||||
|
{{< sidenote "right" "lazy-note" "must keep track of a reference to x.">}}
|
||||||
|
We later take advantage of this, by replacing the graph of <code>x</code> with the
|
||||||
|
result of evaluating it. Since both <code>App</code> nodes point to the same
|
||||||
|
graph, when we evaluate it once, each node observes this update, and is not
|
||||||
|
required to evaluate <code>x</code> again. With this, we achieve lazy evaluation.
|
||||||
|
{{< /sidenote >}} The runtime will have to wait until both `App` nodes
|
||||||
|
are freed before it can free the graph of `x`.
|
||||||
|
|
||||||
|
This seems simple enough! If there are multiple things that may reference a node
|
||||||
|
in the graph, why don't we just keep track of how many there are? Once we know
|
||||||
|
that no more things are still referencing a node, we can free it. This is
|
||||||
|
called [reference counting](https://en.wikipedia.org/wiki/Reference_counting).
|
||||||
|
Reference counting is a valid technique, but unfortunately, it will not suit us.
|
||||||
|
The reason for this is that our language may produce
|
||||||
|
[cyclic graphs](https://en.wikipedia.org/wiki/Cycle_(graph_theory)). Consider,
|
||||||
|
for example, this definition of an infinite list of the number 1:
|
||||||
|
|
||||||
|
```
|
||||||
|
defn ones = { Cons 1 ones }
|
||||||
|
```
|
||||||
|
|
||||||
|
Envisioning the graph of the tree, we can see `ones` as an application
|
||||||
|
of the constructor `Cons` to two arguments, one of which is `ones` again.
|
||||||
|
{{< sidenote "right" "recursive-note" "It refers to itself!" >}}
|
||||||
|
Things are actually more complicated than this. In our current language,
|
||||||
|
recursive definitions are only possible in function definitions (like
|
||||||
|
<code>ones</code>). In our runtime, each time there is a reference
|
||||||
|
to a function, this is done through a <em>new node</em>, which
|
||||||
|
means that functions with recursive definitions are <em>not</em> represented cyclically.
|
||||||
|
Therefore, reference counting would work. However, in the future,
|
||||||
|
our language will have more ways of creating circular definitions,
|
||||||
|
some of which will indeed create cycles in our graphs. So, to
|
||||||
|
prepare for this, we will avoid the use of reference counting.
|
||||||
|
{{< /sidenote >}} In this case, when we compute the number of nodes
|
||||||
|
that require `ones`, we will always find the number to be at least 1: `ones`
|
||||||
|
needs `ones`, which needs `ones`, and so on. It will not be possible for
|
||||||
|
us to free `ones`, then, by simply counting the number of references to it.
|
||||||
|
|
||||||
|
There's a more powerful technique than reference counting for freeing
|
||||||
|
unused memory: __mark-and-sweep garbage collection__. This technique
|
||||||
|
is conceptually pretty simple to grasp, yet will allow us to handle
|
||||||
|
cycles in our graphs. Unsurprisingly, we implement this type
|
||||||
|
of garbage collection in two stages:
|
||||||
|
|
||||||
|
1. __Mark__: We go through every node that is still needed by
|
||||||
|
the runtime, and recursively mark it, its children, and so on as "to keep".
|
||||||
|
2. __Sweep__: We go through every node we haven't yet freed, and,
|
||||||
|
if it hasn't been marked as "to keep", we free it.
|
||||||
|
|
||||||
|
This also seems simple enough. There are two main things for us
|
||||||
|
to figure out:
|
||||||
|
|
||||||
|
1. For __Mark__, what are the "nodes still needed by the runtime"?
|
||||||
|
These are just the nodes on the various G-machine stacks. If
|
||||||
|
a node is not on the stack, nor is it a child of a node
|
||||||
|
that is on the stack, why should we keep it around?
|
||||||
|
2. For __Sweep__, how do we keep track of all the nodes we haven't
|
||||||
|
yet freed? In our case, the solution is a global list of allocated
|
||||||
|
nodes, which is updated every time that a node is allocated.
|
||||||
|
|
||||||
|
Wait a minute, though. Inside of `unwind` in C, we only have
|
||||||
|
a reference to the most recent stack. Our execution model allows
|
||||||
|
for an arbitrary number of stacks: we can keep using `Eval`,
|
||||||
|
placing the current stack on the dump, and starting a new stack
|
||||||
|
from scratch to evaluate a node. How can we traverse these stacks
|
||||||
|
from inside unwind? One solution could be to have each stack
|
||||||
|
point to the "parent" stack. To find all the nodes on the
|
||||||
|
stack, then, we'd start with the current stack, mark all the
|
||||||
|
nodes on it as "required", then move on to the parent stack,
|
||||||
|
rinse and repeat. This is plausible and pretty simple, but
|
||||||
|
there's another way.
|
||||||
|
|
||||||
|
We clean up after ourselves.
|
||||||
|
|
||||||
|
### Towards a Cleaner Stack
|
||||||
|
Simon Peyton Jones wrote his G-machine semantics in a particular way. Every time
|
||||||
|
that a function is called, all it leaves behind on the stack is the graph node
|
||||||
|
that represents the function's output. Our own internal functions, however, have been less
|
||||||
|
careful. Consider, for instance, the "binary operator" function I showed you.
|
||||||
|
Its body is given by the following G-machine instructions:
|
||||||
|
|
||||||
|
```C++
|
||||||
|
instructions.push_back(instruction_ptr(new instruction_push(1)));
|
||||||
|
instructions.push_back(instruction_ptr(new instruction_eval()));
|
||||||
|
instructions.push_back(instruction_ptr(new instruction_push(1)));
|
||||||
|
instructions.push_back(instruction_ptr(new instruction_eval()));
|
||||||
|
instructions.push_back(instruction_ptr(new instruction_binop(op)));
|
||||||
|
```
|
||||||
|
|
||||||
|
When the function is called, there are at least 3 things on the stack:
|
||||||
|
|
||||||
|
1. The "outermost" application node, to be replaced with an indirection (to enable laziness).
|
||||||
|
2. The second argument to the binary operator.
|
||||||
|
3. The first argument to the binary operator.
|
||||||
|
|
||||||
|
Then, __Push__ adds another node to the stack, an __Eval__ forces
|
||||||
|
its evaluation (and leaves it on the stack). This happens again with the second argument.
|
||||||
|
Finally, we call __BinOp__, popping two values off the stack and combining them
|
||||||
|
according to the binary operator. This leaves the stack with 4 things: the 3 I described
|
||||||
|
above, and thew newly computed value. This is fine as far as `eval` is concerned: its
|
||||||
|
implementation only asks for the top value on the stack after `unwind` finishes. But
|
||||||
|
for anything more complicated, this is a very bad side effect. We want to leave the
|
||||||
|
stack as clean as we found it - with one node and no garbage.
|
||||||
|
|
||||||
|
Fortunately, the way we compile functions is a good guide for how we should
|
||||||
|
compile internal operators and constructors. The idea is captured
|
||||||
|
by the two instructions we insert at the end of a user-defined
|
||||||
|
function:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/09/definition.cpp" 56 57 >}}
|
||||||
|
|
||||||
|
Once a result is computed, we turn the node that represented the application
|
||||||
|
into an indirection, and point it to the computed result (as I said before,
|
||||||
|
this enables lazy evaluation). We also pop the arguments given to the function
|
||||||
|
off the stack. Let's add these two things to the `gen_llvm_internal_op` function:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/09/main.cpp" 70 85 >}}
|
||||||
|
|
||||||
|
Notice, in particular, the `instruction_update(2)` and `instruction_pop(2)`
|
||||||
|
instructions that were recently added. A similar thing has to be done for data
|
||||||
|
type constructors. The difference, though, is that __Pack__ removes the data
|
||||||
|
it packs from the stack, and thus, __Pop__ is not needed:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/09/definition.cpp" 102 117 >}}
|
||||||
|
|
||||||
|
With this done, let's run a quick test: let's print the number of things
|
||||||
|
on the stack at the end of an `eval` call (before the stack is freed,
|
||||||
|
of course). We can compare the output of runtime without the fix (`old`)
|
||||||
|
and with the fix (`current`):
|
||||||
|
|
||||||
|
```
|
||||||
|
current old
|
||||||
|
|
||||||
|
Current stack size is 0 | Current stack size: 1
|
||||||
|
Current stack size is 0 | Current stack size: 1
|
||||||
|
Current stack size is 0 | Current stack size: 1
|
||||||
|
Current stack size is 0 | Current stack size: 1
|
||||||
|
Current stack size is 0 | Current stack size: 0
|
||||||
|
Current stack size is 0 | Current stack size: 0
|
||||||
|
Current stack size is 0 | Current stack size: 3
|
||||||
|
Current stack size is 0 | Current stack size: 0
|
||||||
|
Current stack size is 0 | Current stack size: 3
|
||||||
|
Current stack size is 0 | Current stack size: 0
|
||||||
|
Current stack size is 0 | Current stack size: 3
|
||||||
|
Result: 3 | Result: 3
|
||||||
|
```
|
||||||
|
|
||||||
|
The stack is now much cleaner! Every time `eval` is called, it starts
|
||||||
|
with one node, and ends with one node (which is then popped).
|
||||||
|
|
||||||
|
### One Stack to Rule Them All
|
||||||
|
|
||||||
|
Wait a minute. If the stack is really always empty at the end, do we really need to construct
|
||||||
|
a new stack every time?
|
||||||
|
{{< sidenote "right" "arity-note" "I think not" >}}
|
||||||
|
There's some nuance to this. While it is true that for the most
|
||||||
|
part, we can get rid of the new stacks in favor of a single
|
||||||
|
one, our runtime will experience a change. The change lies
|
||||||
|
in the Unwind-Global rule, which <em>requires that the
|
||||||
|
stack has as many children as the function needs
|
||||||
|
arguments</em>. Until now, there was no way
|
||||||
|
for this condition to be accidentally satisfied: the function
|
||||||
|
we were unwinding was the only thing on the stack. Now,
|
||||||
|
though, things are different: the function being
|
||||||
|
unwound may share a stack with something else,
|
||||||
|
and just checking the stack size will not be sufficient.
|
||||||
|
<em>I believe</em> that this is not a problem for us,
|
||||||
|
since the compiler will only emit <strong>Eval</strong>
|
||||||
|
instructions for things it knows are data types or numbers,
|
||||||
|
meaning their type is not a partially applied function
|
||||||
|
that is missing arguments. However, this is a nontrivial
|
||||||
|
observation.
|
||||||
|
{{< /sidenote >}}, and Simon Peyton Jones seems to
|
||||||
|
agree. In _Implementing Functional Languages: a tutorial_, he mentions
|
||||||
|
that the dump does not need to be implemented as a real stack of stacks.
|
||||||
|
So let's try this out: instead of starting a new stack using `eval`,
|
||||||
|
let's use an existing one, by just calling `unwind` again. To do so,
|
||||||
|
all we have to do is change our `instruction_eval` instruction. When
|
||||||
|
the G-machine wants something evaluated now, it should just call
|
||||||
|
`unwind` directly!
|
||||||
|
|
||||||
|
To make this change, we have to make `unwind` available to the
|
||||||
|
compiler. We thus declare it in the `llvm_context.cpp` file:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/09/llvm_context.cpp" 158 163 >}}
|
||||||
|
|
||||||
|
And even create a function to construct a call to `unwind`
|
||||||
|
with the following signature:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/09/llvm_context.hpp" 58 58 >}}
|
||||||
|
|
||||||
|
We implement it like so:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/09/llvm_context.cpp" 217 220 >}}
|
||||||
|
|
||||||
|
Finally, the `instruction_eval::gen_llvm` method simply calls
|
||||||
|
`unwind`:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/09/instruction.cpp" 157 159 >}}
|
||||||
|
|
||||||
|
After this change, we only call `eval` from `main`. Furthermore,
|
||||||
|
since `eval` releases all the resources it allocates before
|
||||||
|
returning, we won't be able to
|
||||||
|
{{< sidenote "right" "retrieve-note" "easily retrieve" >}}
|
||||||
|
We were able to do this before, but that's because our
|
||||||
|
runtime didn't free the nodes, <em>ever</em>. Now that
|
||||||
|
it does, returning a node violates that node's lifetime.
|
||||||
|
{{< /sidenote >}}the result of the evaluation from it.
|
||||||
|
Thus, we simply merge `eval` with `main` - combining
|
||||||
|
the printing and the initialization / freeing
|
||||||
|
code.
|
||||||
|
|
||||||
|
With this, only one stack will be allocated for the entirety of
|
||||||
|
program execution. This doesn't just help us save on memory
|
||||||
|
allocations, but also __solves the problem of marking
|
||||||
|
valid nodes during garbage collection__! Instead of traversing
|
||||||
|
a dump of stacks, we can now simply traverse a single stack;
|
||||||
|
all that we need is in one place.
|
||||||
|
|
||||||
|
So this takes care, more or less, of the "mark" portion of mark-and-sweep.
|
||||||
|
Using the stack, we can recursively mark the nodes that we need. But
|
||||||
|
what about "sweeping"? How can we possibly know of every node that
|
||||||
|
we've allocated? There's some more bookkeping for us to do.
|
||||||
|
|
||||||
|
### It's All Connected
|
||||||
|
There exists a simple technique I've previously seen (and used)
|
||||||
|
for keeping track of all the allocated memory. The technique is
|
||||||
|
to __turn all the allocated nodes into elements of a linked list__.
|
||||||
|
The general process of implementing this proceeds as follows:
|
||||||
|
|
||||||
|
1. To each node, add a "next" pointer.
|
||||||
|
2. Keep a handle to the whole node chain somewhere.
|
||||||
|
3. Add each newly allocated node to the front of the whole chain.
|
||||||
|
|
||||||
|
This "somewhere" could be a global variable. However,
|
||||||
|
since we already pass a stack to almost all of our
|
||||||
|
functions, it makes more sense to make the list handle
|
||||||
|
a part of some data structure that will also contain the stack,
|
||||||
|
and pass that around, instead. This keeps all of the G-machine
|
||||||
|
data in one place, and in principle could allow for concurrent
|
||||||
|
execution of more than one G-machine in a single program. Let's
|
||||||
|
call our new data structure `gmachine`:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/09/runtime.h" 69 74 >}}
|
||||||
|
|
||||||
|
Here, the `stack` field holds the G-machine stack,
|
||||||
|
and the `gc_nodes` is the handle to the list of all the nodes
|
||||||
|
we've allocated and not yet freed. Don't worry about the `gc_node_count`
|
||||||
|
and `gc_threshold` fields - we'll get to them a little later.
|
||||||
|
|
||||||
|
This is going to be a significant change. First of all, since
|
||||||
|
the handle won't be global, it can't be accessed from inside the
|
||||||
|
`alloc_*` functions. Instead, we have to make sure to add
|
||||||
|
nodes allocated through `alloc_*` to a G-machine somewhere
|
||||||
|
wherever we call the allocators. To make it easier to add nodes to a G-machine
|
||||||
|
GC handle, let's make a new function, `track`:
|
||||||
|
|
||||||
|
```C
|
||||||
|
struct node_base* gmachine_track(struct gmachine*, struct node_base*);
|
||||||
|
```
|
||||||
|
|
||||||
|
This function will add the given node to the G-machine's handle,
|
||||||
|
and return that same node. This way, we can wrap nodes in
|
||||||
|
a call to `gmachine_track`. We will talk about this
|
||||||
|
function's implementation later in the post.
|
||||||
|
|
||||||
|
This doesn't get us all the way to a working runtime, though:
|
||||||
|
right now, we still pass around `struct stack*` instead of
|
||||||
|
`struct gmachine*` everywhere. However, the whole point
|
||||||
|
of adding the `gmachine` struct was to store more data in it!
|
||||||
|
Surely we need that new data somewhere, and thus, we need to
|
||||||
|
use the `gmachine` struct for _some_ functions. What functions
|
||||||
|
_do_ need a whole `gmachine*`, and which ones only need
|
||||||
|
a `stack*`?
|
||||||
|
|
||||||
|
1. {{< sidenote "right" "ownership-note" "Clearly," >}}
|
||||||
|
This might not be clear. Maybe <em>pushing</em> onto a stack will
|
||||||
|
add a node to our GC handle, and so, we need to have access
|
||||||
|
to the handle in <code>stack_push</code>. The underlying
|
||||||
|
question is that of <em>ownership</em>: when we allocate
|
||||||
|
a node, which part of the program does it "belong" to?
|
||||||
|
The "owner" of the node should do the work of managing
|
||||||
|
when to free it or keep it. Since we already agreed to
|
||||||
|
create a <code>gmachine</code> struct to house the GC
|
||||||
|
handle, it makes sense that nodes are owned by the
|
||||||
|
G-machine. Thus, the assumption in functions like
|
||||||
|
<code>stack_push</code> is that the "owner" of the node
|
||||||
|
already took care of allocating and tracking it, and
|
||||||
|
<code>stack_push</code> itself shouldn't bother.
|
||||||
|
{{< /sidenote >}} `stack_push`, `stack_pop`, and similar functions
|
||||||
|
do not require a G-machine.
|
||||||
|
2. `stack_alloc` and `stack_pack` __do__ need a G-machine,
|
||||||
|
because they must allocate new nodes. Where the nodes
|
||||||
|
are allocated, we should add them to the GC handle.
|
||||||
|
3. Since they use `stack_alloc` and `stack_pack`,
|
||||||
|
generated functions also need a G-machine.
|
||||||
|
4. Since `unwind` calls the generated functions,
|
||||||
|
it must also receive a G-machine.
|
||||||
|
|
||||||
|
As far as stack functions go, we only _need_ to update
|
||||||
|
`stack_alloc` and `stack_pack`. Everything else
|
||||||
|
doesn't require new node allocations, and thus,
|
||||||
|
does not require the GC handle. However, this makes
|
||||||
|
our code rather ugly: we have a set of mostly `stack_*`
|
||||||
|
functions, followed suddenly by two `gmachine_*` functions.
|
||||||
|
In the interest of cleanliness, let's instead do the following:
|
||||||
|
|
||||||
|
1. Make all functions associated with G-machine rules (like
|
||||||
|
__Alloc__, __Update__, and so on) require a `gmachine*`. This
|
||||||
|
way, theres a correspondence between our code and the theory.
|
||||||
|
2. Leave the rest of the functions (`stack_push`, `stack_pop`,
|
||||||
|
etc.) as is. They are not G-machine specific, and don't
|
||||||
|
require a GC handle, so there's no need to touch them.
|
||||||
|
|
||||||
|
Let's make this change. We end up with the following
|
||||||
|
functions:
|
||||||
|
|
||||||
|
{{< codelines "C" "compiler/09/runtime.h" 56 84 >}}
|
||||||
|
|
||||||
|
For the majority of the changed functions, the
|
||||||
|
updates are
|
||||||
|
{{< sidenote "right" "cosmetic-note" "cosmetic." >}}
|
||||||
|
We must also update the LLVM/C++ declarations of
|
||||||
|
the affected functions: many of them now take a
|
||||||
|
<code>gmachine_ptr_type</code> instead of <code>stack_ptr_type</code>.
|
||||||
|
This change is not shown explicitly here (it is hard to do with our
|
||||||
|
growing code base), but it is nonetheless significant.
|
||||||
|
{{< /sidenote >}} The functions
|
||||||
|
that require more significant modifications are `gmachine_alloc`
|
||||||
|
and `gmachine_pack`. In both, we must now make a call to `gmachine_track`
|
||||||
|
to ensure that a newly allocated node will be garbage collected in the future.
|
||||||
|
The updated code for `gmachine_alloc` is:
|
||||||
|
|
||||||
|
{{< codelines "C" "compiler/09/runtime.c" 140 145 >}}
|
||||||
|
|
||||||
|
Correspondingly, the updated code for `gmachine_pack` is:
|
||||||
|
|
||||||
|
{{< codelines "C" "compiler/09/runtime.c" 147 162 >}}
|
||||||
|
|
||||||
|
Note that we've secretly made one more change. Instead of
|
||||||
|
allocating `sizeof(*data) * n` bytes of memory for
|
||||||
|
the array of packed nodes, we allocate `sizeof(*data) * (n + 1)`,
|
||||||
|
and set the last element to `NULL`. This will allow other
|
||||||
|
functions (which we will soon write) to know how many elements are packed inside
|
||||||
|
a `node_data` (effectively, we've added a `NULL` terminator).
|
||||||
|
|
||||||
|
We must change our compiler to keep it up to date with this change. Importantly,
|
||||||
|
it must know that a G-machine struct exists. To give it
|
||||||
|
this information, we add a new
|
||||||
|
`llvm::StructType*` called `gmachine_type` to the `llvm_context` class,
|
||||||
|
initialize it in the constructor, and set its body as follows:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/09/llvm_context.cpp" 21 26 >}}
|
||||||
|
|
||||||
|
The compiler must also know that generated functions now use the G-machine
|
||||||
|
struct rather than a stack struct:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/09/llvm_context.cpp" 19 19 >}}
|
||||||
|
|
||||||
|
Since we still use some functions that require a stack and not a G-machine,
|
||||||
|
we must have a way to get the stack from a G-machine. To do this,
|
||||||
|
we create a new `unwrap` function, which uses LLVM's GEP instruction
|
||||||
|
to get a pointer to the G-machine's stack field:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/09/llvm_context.cpp" 222 225 >}}
|
||||||
|
|
||||||
|
We use this function elsewhere, such `llvm_context::create_pop`:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/09/llvm_context.cpp" 176 179 >}}
|
||||||
|
|
||||||
|
Finally, we want to make sure our generated functions don't allocate
|
||||||
|
nodes without tracking them with the G-machine. To do so, we modify
|
||||||
|
all the `create_*` methods to require the G-machine function argument,
|
||||||
|
and update the functions themselves to call `gmachine_track`. For
|
||||||
|
example, here's `llvm_context::create_num`:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/09/llvm_context.cpp" 235 239 >}}
|
||||||
|
|
||||||
|
Of course, this requires us to add a new `create_track` method
|
||||||
|
to the `llvm_context`:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/09/llvm_context.cpp" 212 215 >}}
|
||||||
|
|
||||||
|
This is good. Let's now implement the actual mark-and-sweep algorithm
|
||||||
|
in `gmachine_gc`:
|
||||||
|
|
||||||
|
{{< codelines "C" "compiler/09/runtime.c" 186 204 >}}
|
||||||
|
|
||||||
|
In the code above, we first iterate through the stack,
|
||||||
|
calling `gc_visit_node` on every node that we encounter. The
|
||||||
|
assumption is that once `gc_visit_node` is done, every node
|
||||||
|
that _can_ be reached has its `gc_reachable` field set to 1,
|
||||||
|
and all the others have it set to 0.
|
||||||
|
|
||||||
|
Once we reach the end of the stack, we continue to the "sweep" phase,
|
||||||
|
iterating through the linked list of nodes (held in the G-machine
|
||||||
|
GC handle `gc_nodes`). For each node, if its `gc_reachable` flag
|
||||||
|
is not set, we remove it from the linked list, and call `free_node_direct`
|
||||||
|
on it. Otherwise (that is, if the flag __is__ set), we clear it,
|
||||||
|
so that the node can potentially be garbage collected in a future
|
||||||
|
invocation of `gmachine_gc`.
|
||||||
|
|
||||||
|
`gc_visit_node` recursively marks a node and its children as reachable:
|
||||||
|
|
||||||
|
{{< codelines "C" "compiler/09/runtime.c" 51 70 >}}
|
||||||
|
|
||||||
|
This is possible with the `node_data` nodes because of the change we
|
||||||
|
made to the `gmachine_pack` instruction earlier: now, the last element
|
||||||
|
of the "packed" array is `NULL`, telling `gc_visit_node` that it has
|
||||||
|
reached the end of the list of children.
|
||||||
|
|
||||||
|
`free_node_direct` performs a non-recursive deallocation of all
|
||||||
|
the resources held by a particular node. So far, this is only
|
||||||
|
needed for `node_data` nodes, since the arrays holding their children
|
||||||
|
are dynamically allocated. Thus, the code for the function is
|
||||||
|
pretty simple:
|
||||||
|
|
||||||
|
{{< codelines "C" "compiler/09/runtime.c" 45 49 >}}
|
||||||
|
|
||||||
|
### When to Collect
|
||||||
|
When should we run garbage collection? Initially, I tried
|
||||||
|
running it after every call to `unwind`. However, this
|
||||||
|
quickly proved impractical: the performance of all
|
||||||
|
the programs in the language decreased by a spectacular
|
||||||
|
amount. Programs like `works1.txt` and `works2.txt`
|
||||||
|
would take tens of seconds to complete.
|
||||||
|
|
||||||
|
Instead of this madness, let's settle for an approach
|
||||||
|
common to many garbage collectors. Let's __perform
|
||||||
|
garbage collection every time the amount of
|
||||||
|
memory we've allocated doubles__. Tracking when the
|
||||||
|
amount of allocated memory doubles is the purpose of
|
||||||
|
the `gc_node_count` and `gc_threshold` fields in the
|
||||||
|
`gmachine` struct. The former field tracks how many
|
||||||
|
nodes have been tracked by the garbage collector, and the
|
||||||
|
latter holds the number of nodes the G-machine must
|
||||||
|
reach before triggering garbage collection.
|
||||||
|
|
||||||
|
Since the G-machine is made aware of allocations
|
||||||
|
by a call to the `gmachine_track` function, this
|
||||||
|
is where we will attempt to perform garbage collection.
|
||||||
|
We end up with the following code:
|
||||||
|
|
||||||
|
{{< codelines "C++" "compiler/09/runtime.c" 171 184 >}}
|
||||||
|
|
||||||
|
When a node is added to the GC handle, we increment the `gc_node_count`
|
||||||
|
field. If the new value of this field exceeds the threshold,
|
||||||
|
we perform garbage collection. There are cases in which
|
||||||
|
this is fairly dangerous: for instance, `gmachine_pack` first
|
||||||
|
moves all packed nodes into an array, then allocates a `node_data`
|
||||||
|
node. This means that for a brief moment, the nodes stored
|
||||||
|
into the new data node are inaccessible from the stack,
|
||||||
|
and thus susceptible to garbage collection. To prevent
|
||||||
|
situations like this, we run `gc_visit_node` on the node
|
||||||
|
being tracked, marking it and its children as "reachable".
|
||||||
|
Finally, we set the next "free" threshold to double
|
||||||
|
the number of currently allocated nodes.
|
||||||
|
|
||||||
|
This is about as much as we need to do. The change in this
|
||||||
|
post was a major one, and required updating multiple files.
|
||||||
|
As always, you're welcome to check out [the compiler source
|
||||||
|
code for this post](https://dev.danilafe.com/Web-Projects/blog-static/src/branch/master/code/compiler/09).
|
||||||
|
To wrap up, let's evaluate our change.
|
||||||
|
|
||||||
|
To especially stress the compiler, I came up with a prime number
|
||||||
|
generator. Since booleans are not in the standard library, and
|
||||||
|
since it isn't possible to pattern match on numbers, my
|
||||||
|
only option was the use Peano encoding. This effectively
|
||||||
|
means that numbers are represented as linked lists,
|
||||||
|
which makes garbage collection all the more
|
||||||
|
important. The program is quite long, but you can
|
||||||
|
[find the entire code here](https://dev.danilafe.com/Web-Projects/blog-static/src/branch/master/code/compiler/09/examples/primes.txt).
|
||||||
|
|
||||||
|
When I ran the `primes` program compiled using the
|
||||||
|
previous version of the compiler using `time`, I
|
||||||
|
got the following output:
|
||||||
|
|
||||||
|
```
|
||||||
|
Maximum resident set size (kbytes): 935764
|
||||||
|
Minor (reclaiming a frame) page faults: 233642
|
||||||
|
```
|
||||||
|
|
||||||
|
In contrast, here is the output of `time` when running
|
||||||
|
the same program compiled with the new version of
|
||||||
|
the compiler:
|
||||||
|
|
||||||
|
```
|
||||||
|
Maximum resident set size (kbytes): 7448
|
||||||
|
Minor (reclaiming a frame) page faults: 1577
|
||||||
|
```
|
||||||
|
|
||||||
|
We have reduced maximum memory usage by a factor of
|
||||||
|
125, and the number of page faults by a factor of 148.
|
||||||
|
That seems pretty good!
|
||||||
|
|
||||||
|
With this success, we end today's post. As I mentioned
|
||||||
|
before, we're not done. The language is still clunky to use,
|
||||||
|
and can benefit from `let/in` expressions and __lambda functions__.
|
||||||
|
Furthermore, our language is currently monomorphic, and would
|
||||||
|
be much better with __polymorphism__. Finally, to make our language
|
||||||
|
capable of more-than-trivial work, we may want to implement
|
||||||
|
__Input/Output__ and __strings__. I hope to see you in future posts,
|
||||||
|
where we will implement these features!
|
||||||
65
content/blog/10_compiler_polymorphism.md
Normal file
65
content/blog/10_compiler_polymorphism.md
Normal file
@@ -0,0 +1,65 @@
|
|||||||
|
---
|
||||||
|
title: Compiling a Functional Language Using C++, Part 10 - Polymorphism
|
||||||
|
date: 2019-12-09T23:26:46-08:00
|
||||||
|
tags: ["C and C++", "Functional Languages", "Compilers"]
|
||||||
|
draft: true
|
||||||
|
---
|
||||||
|
|
||||||
|
Last time, we wrote some pretty interesting programs in our little language.
|
||||||
|
We successfully expressed arithmetic and recursion. But there's one thing
|
||||||
|
that we cannot express in our language without further changes: an `if` statement.
|
||||||
|
|
||||||
|
Suppose we didn't want to add a special `if/else` expression into our language.
|
||||||
|
Thanks to lazy evaluation, we can express it using a function:
|
||||||
|
|
||||||
|
```
|
||||||
|
defn if c t e = {
|
||||||
|
case c of {
|
||||||
|
True -> { t }
|
||||||
|
False -> { e }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
But an issue still remains: so far, our compiler remains __monomorphic__. That
|
||||||
|
is, a particular function can only have one possible type for each one of its
|
||||||
|
arguments. With our current setup, something like this
|
||||||
|
{{< sidenote "right" "if-note" "would not work:" >}}
|
||||||
|
In a polymorphically typed language, the inner <code>if</code> would just evaluate to
|
||||||
|
<code>False</code>, and the whole expression to 3.
|
||||||
|
{{< /sidenote >}}
|
||||||
|
|
||||||
|
```
|
||||||
|
if (if True False True) 11 3
|
||||||
|
```
|
||||||
|
|
||||||
|
This is because, for this to work, both of the following would need to hold (borrowing
|
||||||
|
some of our notation from the [typechecking]({{< relref "03_compiler_typechecking.md" >}}) post):
|
||||||
|
|
||||||
|
$$
|
||||||
|
\\text{if} : \\text{Int} \\rightarrow \\text{Int}
|
||||||
|
$$
|
||||||
|
$$
|
||||||
|
\\text{if} : \\text{Bool} \\rightarrow \\text{Bool}
|
||||||
|
$$
|
||||||
|
|
||||||
|
But using our rules so far, such a thing is impossible, since there is no way for
|
||||||
|
\\(\text{Int}\\) to be unified with \\(\text{Bool}\\). We need a more powerful
|
||||||
|
set of rules to describe our program's types. One such set of rules is
|
||||||
|
the [Hindley-Milner type system](https://en.wikipedia.org/wiki/Hindley%E2%80%93Milner_type_system),
|
||||||
|
which we have previously alluded to. In fact, the rules we came up
|
||||||
|
with were already very close to Hindley-Milner, with the exception of two:
|
||||||
|
__generalization__ and __instantiation__. Instantiation first:
|
||||||
|
|
||||||
|
$$
|
||||||
|
\frac
|
||||||
|
{\\Gamma \\vdash e : \\sigma \\quad \\sigma' \\sqsubseteq \\sigma}
|
||||||
|
{\\Gamma \\vdash e : \\sigma'}
|
||||||
|
$$
|
||||||
|
|
||||||
|
Next, generalization:
|
||||||
|
$$
|
||||||
|
\frac
|
||||||
|
{\\Gamma \\vdash e : \\sigma \\quad \\alpha \\not \\in \\text{free}(\\Gamma)}
|
||||||
|
{\\Gamma \\vdash e : \\forall a . \\sigma}
|
||||||
|
$$
|
||||||
116
content/blog/crystal_nix.md
Normal file
116
content/blog/crystal_nix.md
Normal file
@@ -0,0 +1,116 @@
|
|||||||
|
---
|
||||||
|
title: Building a Basic Crystal Project with Nix
|
||||||
|
date: 2020-02-16T14:31:42-08:00
|
||||||
|
tags: ["Crystal", "Nix"]
|
||||||
|
---
|
||||||
|
I really like the idea of Nix: you can have reproducible builds, written more or less
|
||||||
|
declaratively. I also really like the programming language [Crystal](https://crystal-lang.org/),
|
||||||
|
which is a compiled Ruby derivative. Recently, I decided to try learn NixOS as a package author,
|
||||||
|
and decided to make a Crystal project of mine, [pegasus](https://github.com/DanilaFe/pegasus),
|
||||||
|
my guinea pig. In this post, I will document my experience setting up Nix with Crystal.
|
||||||
|
|
||||||
|
### Getting Started
|
||||||
|
Pegasus is a rather simple package in terms of the build process - it has no dependencies, and
|
||||||
|
can be built with nothing but a Crystal compiler. Thus, I didn't have to worry about
|
||||||
|
dependencies. However, the `nixpkgs` repository does have a way to specify build dependencies
|
||||||
|
for a Nix project: [`crystal2nix`](https://github.com/NixOS/nixpkgs/blob/master/pkgs/development/compilers/crystal/crystal2nix.nix).
|
||||||
|
|
||||||
|
`crystal2nix` is another Nix package, which consists of a single Crystal binary program of
|
||||||
|
the same name. It translates a `shards.lock` file, generated by Crystal's `shards` package
|
||||||
|
manager, into a `shards.nix` file, which allows Nix to properly build the dependencies
|
||||||
|
of a Crystal package. If you have a project with a `shards.lock` file, you can use `shards2nix`
|
||||||
|
inside a `nix-shell` as follows:
|
||||||
|
|
||||||
|
```Bash
|
||||||
|
nix-shell -p crystal2nix --run crystal2nix
|
||||||
|
```
|
||||||
|
|
||||||
|
The above command says, create an environment with the `crystal2nix` package, and run the
|
||||||
|
program. Note that you should run this
|
||||||
|
[inside the project's root](https://github.com/NixOS/nixpkgs/blob/21bfc57dd9eb5c7c58b6ab0bfa707cbc7cf04e98/pkgs/development/compilers/crystal/build-package.nix#L2). Also note that if you
|
||||||
|
don't depend on other Crystal packages, you will not have a `shards.lock`, and running
|
||||||
|
`crystal2nix` is unnecessary.
|
||||||
|
|
||||||
|
The Crystal folder in the `nixpkgs` repository contains one more handy utility:
|
||||||
|
`buildCrystalPackage`. This is a function exported by the `crystal` Nix package, which
|
||||||
|
significantly simplifies the process of building a Crystal binary package. We can
|
||||||
|
look to `crystal2nix.nix` (linked above) for a concrete example. We can observe the following
|
||||||
|
attributes:
|
||||||
|
|
||||||
|
* `pname` - the name of the package.
|
||||||
|
* `version` - the
|
||||||
|
{{< sidenote "right" "version-note" "version" >}}
|
||||||
|
In my example code, I set the Nix package version to the commit hash. Doing this alone
|
||||||
|
is probably not the best idea, since it will prevent version numbers from being ordered.
|
||||||
|
However, version <code>0.1.0</code> didn't make sense either, since the project technically
|
||||||
|
doesn't have a release yet. You should set this to an actual package version if you have
|
||||||
|
one.
|
||||||
|
{{< /sidenote >}} of the package, as usual.
|
||||||
|
* `crystalBinaries.<xxx>.src` - the source Crystal file for binary `xxx`.
|
||||||
|
|
||||||
|
Using these attributes, I concocted the following expression for pegasus and all
|
||||||
|
of its included programs:
|
||||||
|
|
||||||
|
```nix
|
||||||
|
{ stdenv, crystal, fetchFromGitHub }:
|
||||||
|
|
||||||
|
let
|
||||||
|
version = "0489d47b191ecf8501787355b948801506e7c70f";
|
||||||
|
src = fetchFromGitHub {
|
||||||
|
owner = "DanilaFe";
|
||||||
|
repo = "pegasus";
|
||||||
|
rev = version;
|
||||||
|
sha256 = "097m7l16byis07xlg97wn5hdsz9k6c3h1ybzd2i7xhkj24kx230s";
|
||||||
|
};
|
||||||
|
in
|
||||||
|
crystal.buildCrystalPackage {
|
||||||
|
pname = "pegasus";
|
||||||
|
inherit version;
|
||||||
|
inherit src;
|
||||||
|
|
||||||
|
crystalBinaries.pegasus.src = "src/pegasus.cr";
|
||||||
|
crystalBinaries.pegasus-dot.src = "src/tools/dot/pegasus_dot.cr";
|
||||||
|
crystalBinaries.pegasus-sim.src = "src/tools/sim/pegasus_sim.cr";
|
||||||
|
crystalBinaries.pegasus-c.src = "src/generators/c/pegasus_c.cr";
|
||||||
|
crystalBinaries.pegasus-csem.src = "src/generators/csem/pegasus_csem.cr";
|
||||||
|
crystalBinaries.pegasus-crystal.src = "src/generators/crystal/pegasus_crystal.cr";
|
||||||
|
crystalBinaries.pegasus-crystalsem.src = "src/generators/crystalsem/pegasus_crystalsem.cr";
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
Here, I used Nix's `fetchFromGitHub` helper function. It clones a Git repository
|
||||||
|
from `https://github.com/<owner>/<repo>`, checks out the `rev` commit or branch,
|
||||||
|
and makes sure that it matches the `sha256` hash. The hash check is required so
|
||||||
|
that Nix can maintain the reproducibility of the build: if the commit is changed,
|
||||||
|
the code to compile may not be the same, and thus, the package would be different. The
|
||||||
|
hash helps detect such changes. To generate the hash, I used `nix-prefetch-git`,
|
||||||
|
which tries to clone the repository and compute its hash.
|
||||||
|
|
||||||
|
In the case that your project has a `shards.nix` file generated as above, you will also
|
||||||
|
need to add the following line inside your `buildCrystalPackage` call:
|
||||||
|
|
||||||
|
```
|
||||||
|
shardsFile = ./shards.nix;
|
||||||
|
```
|
||||||
|
|
||||||
|
The `shards.nix` file will contain all the dependency Git repositories, and the
|
||||||
|
`shardsFile` attribute will forward this list to `buildCrystalPackage`, which
|
||||||
|
will handle their inclusion in the package build.
|
||||||
|
|
||||||
|
That's pretty much it! The `buildCrystalPackage` Nix function does most of the heavy
|
||||||
|
lifting for Crystal binary packages. Please also check out
|
||||||
|
[this web page](https://edef.eu/~qyliss/nixlib/file/nixpkgs/doc/languages-frameworks/crystal.section.md.html):
|
||||||
|
I found out from it that `pname` had to be used instead of `name`, and it also has some information
|
||||||
|
regarding additional compiler options and build inputs.
|
||||||
|
|
||||||
|
### Appendix: A Small Caveat
|
||||||
|
I was running the `crystal2nix` (and doing all of my Nix-related work) in a NixOS virtual
|
||||||
|
machine. However, my version of NixOS was somewhat out of date (`19.04`), and I could
|
||||||
|
not retrieve `crystal2nix`. I had to switch channels to `nixos-19.09`, which is the current
|
||||||
|
stable version of NixOS.
|
||||||
|
|
||||||
|
There was one more difficulty involved in
|
||||||
|
[switching channels](https://nixos.wiki/wiki/Nix_channels): I had to do it as root.
|
||||||
|
It so happens that if you add a channel as non-root user, your system will still use
|
||||||
|
the channel specified by root, and thus, you will experience the update. You can spot
|
||||||
|
this issue in the output of `nix-env -u`; it will complain of duplicate packages.
|
||||||
110
content/blog/haskell_language_server_again.md
Normal file
110
content/blog/haskell_language_server_again.md
Normal file
@@ -0,0 +1,110 @@
|
|||||||
|
---
|
||||||
|
title: Using GHC IDE for Haskell Error Checking and Autocompletion
|
||||||
|
date: 2020-01-06T17:07:25-08:00
|
||||||
|
tags: ["Haskell", "Language Server Protocol"]
|
||||||
|
---
|
||||||
|
|
||||||
|
Last year, when I took Oregon State University's CS 381 class, I ended up setting
|
||||||
|
up my editor with the Haskell IDE engine. This made it possible
|
||||||
|
to detect errors, view types, and have good autocompletion within the editor itself.
|
||||||
|
Recently, I've found that GHC IDE works better for my projects, so instead
|
||||||
|
of butchering the original article, I'll just quickly write an updated version here,
|
||||||
|
referencing the old one when necessary.
|
||||||
|
|
||||||
|
By the end of the article, your editor should be able to detect errors and
|
||||||
|
properly autocomplete Haskell code, somewhat like in the below screenshot:
|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
|
### Downloading and Installing GHC IDE
|
||||||
|
GHC IDE is a Haskell-based program that uses the
|
||||||
|
{{< sidenote "right" "lsp-note" "language server protocol" >}}
|
||||||
|
You don't really need to know what the language server protocol (LSP) is
|
||||||
|
to use GHC IDE. If you are nonetheless interested, I wrote a little
|
||||||
|
bit about it <a href="{{< ref "/blog/haskell_language_server" >}}#prelude-language-server-protocol">in the previous iteration of this post.</a>
|
||||||
|
If you want more information, check out the <a href="https://microsoft.github.io/language-server-protocol/">official Microsoft page on LSP.</a>
|
||||||
|
{{< /sidenote >}} to communicate with any editor that supports it. Editors
|
||||||
|
with support the the LSP include Atom, Visual Studio Code, Emacs, and Vim. Thus,
|
||||||
|
You can get a good Haskell development environment without tying yourself to one
|
||||||
|
application or service.
|
||||||
|
|
||||||
|
We first want to download the GHC IDE. To do this, you need to have
|
||||||
|
[Git](https://git-scm.com/) installed. Once you have that, in your Git bash (on Windows)
|
||||||
|
or in your terminal (maxOS, Linux), type the command:
|
||||||
|
|
||||||
|
```
|
||||||
|
git clone https://github.com/digital-asset/ghcide.git
|
||||||
|
```
|
||||||
|
|
||||||
|
To install GHC IDE, you can use either `cabal` (which is typically the `cabal-install` package,
|
||||||
|
and is required normally for this class) or `stack` (a build tool). For `cabal`:
|
||||||
|
|
||||||
|
```
|
||||||
|
cabal install
|
||||||
|
```
|
||||||
|
|
||||||
|
And for `stack`:
|
||||||
|
|
||||||
|
```
|
||||||
|
stack install
|
||||||
|
```
|
||||||
|
|
||||||
|
This will create an executable in your `~/.local/bin` directory. By default, this
|
||||||
|
is not usable from other programs, such as Vim, so you should add this directory
|
||||||
|
to your path. On Linux and macOS, this is done by adding the following line
|
||||||
|
to your `.bashrc` file (or equivalent):
|
||||||
|
|
||||||
|
```
|
||||||
|
export PATH=$PATH:/home/<yourusername>/.local/bin
|
||||||
|
```
|
||||||
|
|
||||||
|
On Windows, this is done by
|
||||||
|
{{< sidenote "right" "path-note" "editing your PATH variable." >}}
|
||||||
|
If you need to know how to change your <code>PATH</code>, I wrote
|
||||||
|
about it briefly in the <a href="{{< ref "/blog/haskell_language_server" >}}
|
||||||
|
#installation-of-v0-5-0-0-windows-systems">previous iteration of this post.</a>
|
||||||
|
{{< /sidenote >}} I don't run Windows,
|
||||||
|
so I don't know where `cabal install` will place the executable, but I do know
|
||||||
|
where the executable will appear if you use `stack install` - in the directory
|
||||||
|
given by:
|
||||||
|
|
||||||
|
```
|
||||||
|
stack path --local-bin
|
||||||
|
```
|
||||||
|
|
||||||
|
Adding that to your path should be sufficient to use GHC IDE.
|
||||||
|
|
||||||
|
### Setting up Your Editor
|
||||||
|
This is where the paths diverge. I personally use (Neo)vim, but for the sake
|
||||||
|
of completeness, I'll go over installation for Atom and VSCode (I'm not including
|
||||||
|
Emacs because I know nothing about configuring Emacs).
|
||||||
|
|
||||||
|
#### Atom
|
||||||
|
There appears to be an Atom extension specifically for GHC IDE:
|
||||||
|
[ide-haskell-ghcide](https://atom.io/packages/ide-haskell-ghcide). It doesn't
|
||||||
|
have a lot of configuration options, and will certainly require GHC IDE to
|
||||||
|
be in your path. However, since both GHC IDE and the Haskell IDE engine
|
||||||
|
use the Language Server Protocol, the more mature [ide-haskell-hie](https://atom.io/packages/ide-haskell-hie) extension may work, as well. In fact, since `ide-haskell-ghcide` is so young,
|
||||||
|
I'd recommend trying `ide-haskell-hie` first, configuring the settings (found under
|
||||||
|
_Settings > Packages > (Search ide-haskell-hie) > Settings_)
|
||||||
|
to use the following full path:
|
||||||
|
|
||||||
|
```
|
||||||
|
<output of stack path --local-bin>/ghcide
|
||||||
|
```
|
||||||
|
|
||||||
|
#### VSCode
|
||||||
|
The team behind GHC IDE maintains an official VSCode extension found
|
||||||
|
[here](https://marketplace.visualstudio.com/items?itemName=DigitalAssetHoldingsLLC.ghcide).
|
||||||
|
Installing it, when you have GHC IDE also installed, should be sufficient to get
|
||||||
|
VSCode to autocomplete and error check.
|
||||||
|
|
||||||
|
#### (Neo)vim
|
||||||
|
My original recommendations for (neo)vim remain unchanged, with the exception
|
||||||
|
of using `ghcide` instead of `hie` in the `serverCommands` variable. You
|
||||||
|
can find the original instructions
|
||||||
|
[here](https://danilafe.com/blog/haskell_language_server/#neovim).
|
||||||
|
|
||||||
|
### Conclusion
|
||||||
|
I hope that using GHC IDE, you'll be able to have a significantly more pleasant
|
||||||
|
Haskell experience in CS 381. Enjoy!
|
||||||
@@ -10,7 +10,7 @@ I found that __sidenotes__ were a feature that I didn't even know I needed.
|
|||||||
A lot of my writing seems to use small parenthesized remarks (like this), which,
|
A lot of my writing seems to use small parenthesized remarks (like this), which,
|
||||||
although it doesn't break the flow in a grammatical sense, lengthens the
|
although it doesn't break the flow in a grammatical sense, lengthens the
|
||||||
sentence, and makes it harder to follow. Since I do my best to write content
|
sentence, and makes it harder to follow. Since I do my best to write content
|
||||||
to help explain stuff (like the [compiler series]({{ relref "00_compiler_intro.md" }})),
|
to help explain stuff (like the [compiler series]({{< relref "00_compiler_intro.md" >}})),
|
||||||
making sentences __more__ difficult to understand is a no-go.
|
making sentences __more__ difficult to understand is a no-go.
|
||||||
|
|
||||||
So, what do they look like?
|
So, what do they look like?
|
||||||
|
|||||||
@@ -1,7 +1,10 @@
|
|||||||
@import "style.scss";
|
@import "style.scss";
|
||||||
|
|
||||||
$sidenote-width: 350px;
|
$sidenote-accommodate-shrink: 10rem;
|
||||||
$sidenote-offset: 15px;
|
$sidenote-width: 30rem;
|
||||||
|
$sidenote-offset: 1.5rem;
|
||||||
|
$sidenote-padding: 1rem;
|
||||||
|
$sidenote-highlight-border-width: .2rem;
|
||||||
|
|
||||||
.sidenote {
|
.sidenote {
|
||||||
&:hover {
|
&:hover {
|
||||||
@@ -11,15 +14,16 @@ $sidenote-offset: 15px;
|
|||||||
}
|
}
|
||||||
|
|
||||||
.sidenote-content {
|
.sidenote-content {
|
||||||
border: 2px dashed;
|
border: $sidenote-highlight-border-width dashed;
|
||||||
padding: 9px;
|
padding: $sidenote-padding -
|
||||||
|
($sidenote-highlight-border-width - $standard-border-width);
|
||||||
border-color: $primary-color;
|
border-color: $primary-color;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
.sidenote-label {
|
.sidenote-label {
|
||||||
border-bottom: 2px solid $primary-color;
|
border-bottom: .2rem solid $primary-color;
|
||||||
}
|
}
|
||||||
|
|
||||||
.sidenote-checkbox {
|
.sidenote-checkbox {
|
||||||
@@ -30,7 +34,7 @@ $sidenote-offset: 15px;
|
|||||||
display: block;
|
display: block;
|
||||||
position: absolute;
|
position: absolute;
|
||||||
width: $sidenote-width;
|
width: $sidenote-width;
|
||||||
margin-top: -1.5em;
|
margin-top: -1.5rem;
|
||||||
|
|
||||||
&.sidenote-right {
|
&.sidenote-right {
|
||||||
right: 0;
|
right: 0;
|
||||||
@@ -42,29 +46,50 @@ $sidenote-offset: 15px;
|
|||||||
margin-left: -($sidenote-width + $sidenote-offset);
|
margin-left: -($sidenote-width + $sidenote-offset);
|
||||||
}
|
}
|
||||||
|
|
||||||
@media screen and
|
|
||||||
(max-width: $container-width + 2 * ($sidenote-width + 2 * $sidenote-offset)) {
|
|
||||||
position: static;
|
|
||||||
margin-top: 10px;
|
|
||||||
margin-bottom: 10px;
|
|
||||||
width: 100%;
|
|
||||||
display: none;
|
|
||||||
|
|
||||||
.sidenote-checkbox:checked ~ & {
|
|
||||||
display: block;
|
|
||||||
}
|
|
||||||
|
|
||||||
&.sidenote-left {
|
|
||||||
margin-left: 0px;
|
|
||||||
}
|
|
||||||
|
|
||||||
&.sidenote-right {
|
|
||||||
margin-right: 0px;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@include bordered-block;
|
@include bordered-block;
|
||||||
padding: 10px;
|
padding: $sidenote-padding;
|
||||||
box-sizing: border-box;
|
box-sizing: border-box;
|
||||||
text-align: left;
|
text-align: left;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@mixin hidden-sidenote {
|
||||||
|
position: static;
|
||||||
|
margin-top: 1rem;
|
||||||
|
margin-bottom: 1rem;
|
||||||
|
width: 100%;
|
||||||
|
display: none;
|
||||||
|
|
||||||
|
.sidenote-checkbox:checked ~ & {
|
||||||
|
display: block;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@media screen and
|
||||||
|
(max-width: $container-width + 2 * ($sidenote-width + 2 * $sidenote-offset)) {
|
||||||
|
.sidenote-content.sidenote-left {
|
||||||
|
@include hidden-sidenote;
|
||||||
|
margin-left: 0rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.container {
|
||||||
|
position: relative;
|
||||||
|
left: -$sidenote-width/2
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@media screen and
|
||||||
|
(max-width: $container-width + ($sidenote-width + 3 * $sidenote-offset)) {
|
||||||
|
.post-content {
|
||||||
|
max-width: 100%;
|
||||||
|
}
|
||||||
|
|
||||||
|
.sidenote-content.sidenote-right {
|
||||||
|
@include hidden-sidenote;
|
||||||
|
margin-right: 0rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.container {
|
||||||
|
position: initial;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -1,24 +1,28 @@
|
|||||||
$container-width: 800px;
|
$container-width: 45rem;
|
||||||
|
$standard-border-width: .075rem;
|
||||||
|
|
||||||
$primary-color: #36e281;
|
$primary-color: #36e281;
|
||||||
$primary-color-dark: darken($primary-color, 10%);
|
$primary-color-dark: darken($primary-color, 10%);
|
||||||
$code-color: #f0f0f0;
|
$code-color: #f0f0f0;
|
||||||
$code-color-dark: darken($code-color, 10%);
|
$code-color-dark: darken($code-color, 10%);
|
||||||
$border-color: #bfbfbf;
|
$border-color: #bfbfbf;
|
||||||
|
|
||||||
$font-heading: "Lora", serif;
|
$font-heading: "Lora", serif;
|
||||||
$font-body: "Raleway", serif;
|
$font-body: "Raleway", serif;
|
||||||
$font-code: "Inconsolata", monospace;
|
$font-code: "Inconsolata", monospace;
|
||||||
$standard-border: 1px solid $border-color;
|
|
||||||
|
$standard-border: $standard-border-width solid $border-color;
|
||||||
|
|
||||||
@mixin bordered-block {
|
@mixin bordered-block {
|
||||||
border: $standard-border;
|
border: $standard-border;
|
||||||
border-radius: 2px;
|
border-radius: .2rem;
|
||||||
}
|
}
|
||||||
|
|
||||||
body {
|
body {
|
||||||
font-family: $font-body;
|
font-family: $font-body;
|
||||||
font-size: 1.0em;
|
font-size: 1.0rem;
|
||||||
line-height: 1.5;
|
line-height: 1.5;
|
||||||
margin-bottom: 1em;
|
margin-bottom: 1rem;
|
||||||
text-align: justify;
|
text-align: justify;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -27,8 +31,8 @@ main {
|
|||||||
}
|
}
|
||||||
|
|
||||||
h1, h2, h3, h4, h5, h6 {
|
h1, h2, h3, h4, h5, h6 {
|
||||||
margin-bottom: .1em;
|
margin-bottom: .1rem;
|
||||||
margin-top: .5em;
|
margin-top: .5rem;
|
||||||
font-family: $font-heading;
|
font-family: $font-heading;
|
||||||
font-weight: normal;
|
font-weight: normal;
|
||||||
text-align: left;
|
text-align: left;
|
||||||
@@ -49,7 +53,7 @@ code {
|
|||||||
|
|
||||||
pre code {
|
pre code {
|
||||||
display: block;
|
display: block;
|
||||||
padding: 0.5em;
|
padding: 0.5rem;
|
||||||
overflow-x: auto;
|
overflow-x: auto;
|
||||||
background-color: $code-color;
|
background-color: $code-color;
|
||||||
}
|
}
|
||||||
@@ -61,12 +65,12 @@ pre code {
|
|||||||
box-sizing: border-box;
|
box-sizing: border-box;
|
||||||
|
|
||||||
@media screen and (max-width: $container-width){
|
@media screen and (max-width: $container-width){
|
||||||
padding: 0em 1em 0em 1em;
|
padding: 0rem 1rem 0rem 1rem;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
.button, input[type="submit"] {
|
.button, input[type="submit"] {
|
||||||
padding: 0.5em;
|
padding: 0.5rem;
|
||||||
background-color: $primary-color;
|
background-color: $primary-color;
|
||||||
border: none;
|
border: none;
|
||||||
color: white;
|
color: white;
|
||||||
@@ -87,7 +91,7 @@ pre code {
|
|||||||
nav {
|
nav {
|
||||||
background-color: $primary-color;
|
background-color: $primary-color;
|
||||||
width: 100%;
|
width: 100%;
|
||||||
margin: 1em 0px 1em 0px;
|
margin: 1rem 0rem 1rem 0rem;
|
||||||
}
|
}
|
||||||
|
|
||||||
nav a {
|
nav a {
|
||||||
@@ -110,7 +114,7 @@ nav a {
|
|||||||
}
|
}
|
||||||
|
|
||||||
.post-content {
|
.post-content {
|
||||||
margin-top: .5em;
|
margin-top: .5rem;
|
||||||
}
|
}
|
||||||
|
|
||||||
h1 {
|
h1 {
|
||||||
|
|||||||
@@ -6,9 +6,11 @@
|
|||||||
<link href="https://fonts.googleapis.com/css?family=Inconsolata|Lora|Raleway" rel="stylesheet">
|
<link href="https://fonts.googleapis.com/css?family=Inconsolata|Lora|Raleway" rel="stylesheet">
|
||||||
<link rel="stylesheet" href="//cdnjs.cloudflare.com/ajax/libs/normalize/5.0.0/normalize.min.css">
|
<link rel="stylesheet" href="//cdnjs.cloudflare.com/ajax/libs/normalize/5.0.0/normalize.min.css">
|
||||||
{{ $style := resources.Get "scss/style.scss" | resources.ToCSS | resources.Minify }}
|
{{ $style := resources.Get "scss/style.scss" | resources.ToCSS | resources.Minify }}
|
||||||
{{ $sidenotes:= resources.Get "scss/sidenotes.scss" | resources.ToCSS | resources.Minify }}
|
{{ $sidenotes := resources.Get "scss/sidenotes.scss" | resources.ToCSS | resources.Minify }}
|
||||||
|
{{ $icon := resources.Get "img/favicon.png" }}
|
||||||
<link rel="stylesheet" href="{{ $style.Permalink }}">
|
<link rel="stylesheet" href="{{ $style.Permalink }}">
|
||||||
<link rel="stylesheet" href="{{ $sidenotes.Permalink }}">
|
<link rel="stylesheet" href="{{ $sidenotes.Permalink }}">
|
||||||
|
<link rel="icon" type="image/png" href="{{ $icon.Permalink }}">
|
||||||
|
|
||||||
<script src='https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/MathJax.js?config=TeX-MML-AM_CHTML' async></script>
|
<script src='https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/MathJax.js?config=TeX-MML-AM_CHTML' async></script>
|
||||||
{{ template "_internal/google_analytics.html" . }}
|
{{ template "_internal/google_analytics.html" . }}
|
||||||
|
|||||||
9
themes/vanilla/layouts/shortcodes/numberedsidenote
Normal file
9
themes/vanilla/layouts/shortcodes/numberedsidenote
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
{{ .Page.Scratch.Add "numbernote-id" 1 }}
|
||||||
|
{{ $id := .Page.Scratch.Get "numbernote-id" }}
|
||||||
|
<span class="sidenote">
|
||||||
|
<label class="sidenote-label" for="numbernote-{{ $id }}">({{ $id }})</label>
|
||||||
|
<input class="sidenote-checkbox" type="checkbox" id="numbernote-{{ $id }}"></input>
|
||||||
|
<span class="sidenote-content sidenote-{{ .Get 0 }}">
|
||||||
|
{{ .Inner }}
|
||||||
|
</span>
|
||||||
|
</span>
|
||||||
Reference in New Issue
Block a user