63 Commits

Author SHA1 Message Date
eac1151616 Do not attribute G-machine to SPJ specifically
All checks were successful
continuous-integration/drone/push Build is passing
2020-02-23 21:29:21 -08:00
f7a7100fea Reword explanation of Update + Pop 2020-02-23 21:26:56 -08:00
c207d1dfcf Remove unused line at the end of works1.txt 2020-02-23 21:26:37 -08:00
df051fd643 Fix n vs n-1 mistake 2020-02-23 21:20:32 -08:00
419ab937b6 Switch to full text RSS 2020-02-23 21:01:41 -08:00
7ff919c31b Make the shortcodes HTML-specific 2020-02-23 20:24:40 -08:00
ee90351c17 Add Crystal Nix post
All checks were successful
continuous-integration/drone/push Build is passing
2020-02-16 22:55:25 -08:00
fbdbf67ce3 Add gettext to build requirements to satisfy flex
All checks were successful
continuous-integration/drone/push Build is passing
2020-02-11 19:28:24 -08:00
a7e32d300a Add bison and flex to build requirements
Some checks failed
continuous-integration/drone/push Build is failing
2020-02-11 19:17:31 -08:00
56387cb936 Add make to pacman command
Some checks failed
continuous-integration/drone/push Build is failing
2020-02-11 18:56:55 -08:00
df965816ac Update pacman command to download database files.
Some checks failed
continuous-integration/drone/push Build is failing
2020-02-11 18:53:39 -08:00
c7341c9b15 Try again to include all the required tooling
Some checks failed
continuous-integration/drone/push Build is failing
2020-02-11 18:52:06 -08:00
00322d7e9f Try switch to an image with CMake.
Some checks failed
continuous-integration/drone/push Build is failing
2020-02-11 18:47:40 -08:00
ef93632130 Add garbage collection post to main link
Some checks failed
continuous-integration/drone/push Build is failing
2020-02-11 18:40:59 -08:00
0f744888ef Move testing code into a script 2020-02-11 18:37:21 -08:00
a5b84bab69 Revert to YAML 2020-02-11 14:25:44 -08:00
12725500a8 Try wrap pipeline in array 2020-02-11 14:20:47 -08:00
1917c08e51 Messing around to try get the syntax right, part 2 2020-02-11 14:03:24 -08:00
b304057560 Messing around to try get the syntax right 2020-02-11 14:02:13 -08:00
e5a39d8dfb Replace ints with strings 2020-02-11 13:50:05 -08:00
54ccef9c72 Remove string interpolation for a bit 2020-02-11 13:48:30 -08:00
c103c6acbf Try to build multiple compiler versions 2020-02-11 13:40:00 -08:00
d6f53076c0 Switch to jsonnet syntax
All checks were successful
continuous-integration/drone/push Build is passing
2020-02-11 13:35:36 -08:00
b07ea8fe9c Add initial drone configuration file
All checks were successful
continuous-integration/drone/push Build is passing
2020-02-11 13:25:10 -08:00
9a7441779f Fix typo in compiler series 2020-02-10 19:38:46 -08:00
a6f27e446d Remove draft flag and update date on 9th compiler post 2020-02-10 19:23:15 -08:00
e7f0ccfa16 Finish compiler series 2020-02-10 19:18:55 -08:00
e5d01a4e19 Add the primes program from compiler series 2020-02-10 18:13:04 -08:00
b7d72f2fbf Implement garbage collection in runtime 2020-02-06 11:32:19 -08:00
281dbbd174 Track allocated nodes using a G-machine struct in compiler series. 2020-02-06 10:05:26 -08:00
153349f3d5 Add intermediate style to fit right sidenotes 2020-02-03 13:38:21 -08:00
8d22acfe78 Switch to single stack in runtime for Compiler Series 2020-01-31 15:29:12 -08:00
c1b030ee97 Bump polymorphism compiler post up one spot 2020-01-27 20:34:04 -08:00
803f52b2d0 Update the compiler to leave the stack clean 2020-01-27 20:29:01 -08:00
2f96abeef6 Instantiate compiler for garbage collection 2020-01-27 14:52:25 -08:00
163fcd2b2e Revert "Update style to use inches". It sucks!
This reverts commit 133979218a.
2020-01-15 18:32:34 -08:00
9ddcb1b3f2 Merge branch 'master' of dev.danilafe.com:Web-Projects/blog-static 2020-01-15 13:06:23 -08:00
133979218a Update style to use inches 2020-01-15 13:05:03 -08:00
ef545be03c Fix warnings from Hugo 2020-01-06 19:19:55 -08:00
c534dc7508 Remove draft tag from GHC IDE post 2020-01-06 18:49:28 -08:00
263ffe2b8c Add GHC IDE instructions for Haskell 2020-01-06 18:38:44 -08:00
67181fb033 Finish third post in CS325 series. 2020-01-03 23:47:36 -08:00
a026e67a3b Add first draft of Homework 3 (CS325) 2020-01-03 21:09:15 -08:00
d9544398b9 Add homework 3 solution for CS325 2020-01-02 21:20:32 -08:00
1c4bb29fdd Fix minor grammar mistake 2020-01-01 11:18:49 -08:00
765d497724 Address missing problem and make some other improvements in CS325HW2 2020-01-01 11:12:44 -08:00
80410c9200 Extract common parsing code 2019-12-31 21:59:13 -08:00
4e918db5cb Add the post for the second homework assignment. 2019-12-30 23:28:22 -08:00
382102f071 Add solution to CS325 hw2 2019-12-30 20:04:39 -08:00
6e88780f8b Add favicon to HTML 2019-12-30 14:56:09 -08:00
e3035b9d66 Make G-machine CSS use rem 2019-12-30 14:50:00 -08:00
8765626898 Fix typo on index page 2019-12-30 14:42:36 -08:00
c38247df9e Add ID to broken sidenote 2019-12-30 14:32:09 -08:00
baf44f8627 Fix todo 2019-12-29 22:51:59 -08:00
19aa126025 Add the first post in CS325 series 2019-12-29 22:47:36 -08:00
a406fb0846 Add first draft of Language 1 for CS325 2019-12-28 23:12:15 -08:00
75664e90bb Add solutions for HW1 for CS325 madness 2019-12-27 23:20:37 -08:00
f74209c970 Add common code for CS325 madness 2019-12-27 23:20:18 -08:00
c7ce8a3107 Add homework assignments 2019-12-27 23:18:00 -08:00
b3b906dd90 Add polymorphism draft 2019-12-27 23:13:23 -08:00
b8e0e0b4ce Change CSS to use rems 2019-12-27 23:12:35 -08:00
eb02e1e6b0 Fix broken link 2019-12-24 15:30:12 -08:00
b2fc6ea5a8 Add numbered sidenotes 2019-12-09 23:11:28 -08:00
84 changed files with 7343 additions and 56 deletions

11
.drone.yml Normal file
View File

@@ -0,0 +1,11 @@
kind: pipeline
type: docker
name: default
steps:
- name: test-compiler
image: archlinux
commands:
- pacman -Sy cmake gcc make llvm bison flex gettext --noconfirm
- cd code/compiler
- ./test.sh

View File

@@ -6,7 +6,7 @@
} }
.gmachine-instruction-name { .gmachine-instruction-name {
padding: 10px; padding: .8rem;
border-right: $standard-border; border-right: $standard-border;
flex-grow: 1; flex-grow: 1;
flex-basis: 20%; flex-basis: 20%;
@@ -28,12 +28,12 @@
} }
.gmachine-inner-label { .gmachine-inner-label {
padding: 10px; padding: .8rem;
font-weight: bold; font-weight: bold;
} }
.gmachine-inner-text { .gmachine-inner-text {
padding: 10px; padding: .8rem;
text-align: right; text-align: right;
flex-grow: 1; flex-grow: 1;
} }

View File

@@ -1,3 +1,2 @@
defn main = { sum 320 6 } defn main = { sum 320 6 }
defn sum x y = { x + y } defn sum x y = { x + y }

View File

@@ -0,0 +1,42 @@
cmake_minimum_required(VERSION 3.1)
project(compiler)
# Find all the required packages
find_package(BISON)
find_package(FLEX)
find_package(LLVM REQUIRED CONFIG)
# Set up the flex and bison targets
bison_target(parser
${CMAKE_CURRENT_SOURCE_DIR}/parser.y
${CMAKE_CURRENT_BINARY_DIR}/parser.cpp
COMPILE_FLAGS "-d")
flex_target(scanner
${CMAKE_CURRENT_SOURCE_DIR}/scanner.l
${CMAKE_CURRENT_BINARY_DIR}/scanner.cpp)
add_flex_bison_dependency(scanner parser)
# Find all the relevant LLVM components
llvm_map_components_to_libnames(LLVM_LIBS core x86asmparser x86codegen)
# Create compiler executable
add_executable(compiler
ast.cpp ast.hpp definition.cpp
llvm_context.cpp llvm_context.hpp
type_env.cpp type_env.hpp
env.cpp env.hpp
type.cpp type.hpp
error.cpp error.hpp
binop.cpp binop.hpp
instruction.cpp instruction.hpp
${BISON_parser_OUTPUTS}
${FLEX_scanner_OUTPUTS}
main.cpp
)
# Configure compiler executable
target_include_directories(compiler PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
target_include_directories(compiler PUBLIC ${CMAKE_CURRENT_BINARY_DIR})
target_include_directories(compiler PUBLIC ${LLVM_INCLUDE_DIRS})
target_compile_definitions(compiler PUBLIC ${LLVM_DEFINITIONS})
target_link_libraries(compiler ${LLVM_LIBS})

264
code/compiler/09/ast.cpp Normal file
View File

@@ -0,0 +1,264 @@
#include "ast.hpp"
#include <ostream>
#include "binop.hpp"
#include "error.hpp"
static void print_indent(int n, std::ostream& to) {
while(n--) to << " ";
}
type_ptr ast::typecheck_common(type_mgr& mgr, const type_env& env) {
node_type = typecheck(mgr, env);
return node_type;
}
void ast::resolve_common(const type_mgr& mgr) {
type_var* var;
type_ptr resolved_type = mgr.resolve(node_type, var);
if(var) throw type_error("ambiguously typed program");
resolve(mgr);
node_type = std::move(resolved_type);
}
void ast_int::print(int indent, std::ostream& to) const {
print_indent(indent, to);
to << "INT: " << value << std::endl;
}
type_ptr ast_int::typecheck(type_mgr& mgr, const type_env& env) const {
return type_ptr(new type_base("Int"));
}
void ast_int::resolve(const type_mgr& mgr) const {
}
void ast_int::compile(const env_ptr& env, std::vector<instruction_ptr>& into) const {
into.push_back(instruction_ptr(new instruction_pushint(value)));
}
void ast_lid::print(int indent, std::ostream& to) const {
print_indent(indent, to);
to << "LID: " << id << std::endl;
}
type_ptr ast_lid::typecheck(type_mgr& mgr, const type_env& env) const {
return env.lookup(id);
}
void ast_lid::resolve(const type_mgr& mgr) const {
}
void ast_lid::compile(const env_ptr& env, std::vector<instruction_ptr>& into) const {
into.push_back(instruction_ptr(
env->has_variable(id) ?
(instruction*) new instruction_push(env->get_offset(id)) :
(instruction*) new instruction_pushglobal(id)));
}
void ast_uid::print(int indent, std::ostream& to) const {
print_indent(indent, to);
to << "UID: " << id << std::endl;
}
type_ptr ast_uid::typecheck(type_mgr& mgr, const type_env& env) const {
return env.lookup(id);
}
void ast_uid::resolve(const type_mgr& mgr) const {
}
void ast_uid::compile(const env_ptr& env, std::vector<instruction_ptr>& into) const {
into.push_back(instruction_ptr(new instruction_pushglobal(id)));
}
void ast_binop::print(int indent, std::ostream& to) const {
print_indent(indent, to);
to << "BINOP: " << op_name(op) << std::endl;
left->print(indent + 1, to);
right->print(indent + 1, to);
}
type_ptr ast_binop::typecheck(type_mgr& mgr, const type_env& env) const {
type_ptr ltype = left->typecheck_common(mgr, env);
type_ptr rtype = right->typecheck_common(mgr, env);
type_ptr ftype = env.lookup(op_name(op));
if(!ftype) throw type_error(std::string("unknown binary operator ") + op_name(op));
type_ptr return_type = mgr.new_type();
type_ptr arrow_one = type_ptr(new type_arr(rtype, return_type));
type_ptr arrow_two = type_ptr(new type_arr(ltype, arrow_one));
mgr.unify(arrow_two, ftype);
return return_type;
}
void ast_binop::resolve(const type_mgr& mgr) const {
left->resolve_common(mgr);
right->resolve_common(mgr);
}
void ast_binop::compile(const env_ptr& env, std::vector<instruction_ptr>& into) const {
right->compile(env, into);
left->compile(env_ptr(new env_offset(1, env)), into);
into.push_back(instruction_ptr(new instruction_pushglobal(op_action(op))));
into.push_back(instruction_ptr(new instruction_mkapp()));
into.push_back(instruction_ptr(new instruction_mkapp()));
}
void ast_app::print(int indent, std::ostream& to) const {
print_indent(indent, to);
to << "APP:" << std::endl;
left->print(indent + 1, to);
right->print(indent + 1, to);
}
type_ptr ast_app::typecheck(type_mgr& mgr, const type_env& env) const {
type_ptr ltype = left->typecheck_common(mgr, env);
type_ptr rtype = right->typecheck_common(mgr, env);
type_ptr return_type = mgr.new_type();
type_ptr arrow = type_ptr(new type_arr(rtype, return_type));
mgr.unify(arrow, ltype);
return return_type;
}
void ast_app::resolve(const type_mgr& mgr) const {
left->resolve_common(mgr);
right->resolve_common(mgr);
}
void ast_app::compile(const env_ptr& env, std::vector<instruction_ptr>& into) const {
right->compile(env, into);
left->compile(env_ptr(new env_offset(1, env)), into);
into.push_back(instruction_ptr(new instruction_mkapp()));
}
void ast_case::print(int indent, std::ostream& to) const {
print_indent(indent, to);
to << "CASE: " << std::endl;
for(auto& branch : branches) {
print_indent(indent + 1, to);
branch->pat->print(to);
to << std::endl;
branch->expr->print(indent + 2, to);
}
}
type_ptr ast_case::typecheck(type_mgr& mgr, const type_env& env) const {
type_var* var;
type_ptr case_type = mgr.resolve(of->typecheck_common(mgr, env), var);
type_ptr branch_type = mgr.new_type();
for(auto& branch : branches) {
type_env new_env = env.scope();
branch->pat->match(case_type, mgr, new_env);
type_ptr curr_branch_type = branch->expr->typecheck_common(mgr, new_env);
mgr.unify(branch_type, curr_branch_type);
}
case_type = mgr.resolve(case_type, var);
if(!dynamic_cast<type_data*>(case_type.get())) {
throw type_error("attempting case analysis of non-data type");
}
return branch_type;
}
void ast_case::resolve(const type_mgr& mgr) const {
of->resolve_common(mgr);
for(auto& branch : branches) {
branch->expr->resolve_common(mgr);
}
}
void ast_case::compile(const env_ptr& env, std::vector<instruction_ptr>& into) const {
type_data* type = dynamic_cast<type_data*>(of->node_type.get());
of->compile(env, into);
into.push_back(instruction_ptr(new instruction_eval()));
instruction_jump* jump_instruction = new instruction_jump();
into.push_back(instruction_ptr(jump_instruction));
for(auto& branch : branches) {
std::vector<instruction_ptr> branch_instructions;
pattern_var* vpat;
pattern_constr* cpat;
if((vpat = dynamic_cast<pattern_var*>(branch->pat.get()))) {
branch->expr->compile(env_ptr(new env_offset(1, env)), branch_instructions);
for(auto& constr_pair : type->constructors) {
if(jump_instruction->tag_mappings.find(constr_pair.second.tag) !=
jump_instruction->tag_mappings.end())
break;
jump_instruction->tag_mappings[constr_pair.second.tag] =
jump_instruction->branches.size();
}
jump_instruction->branches.push_back(std::move(branch_instructions));
} else if((cpat = dynamic_cast<pattern_constr*>(branch->pat.get()))) {
env_ptr new_env = env;
for(auto it = cpat->params.rbegin(); it != cpat->params.rend(); it++) {
new_env = env_ptr(new env_var(*it, new_env));
}
branch_instructions.push_back(instruction_ptr(new instruction_split(
cpat->params.size())));
branch->expr->compile(new_env, branch_instructions);
branch_instructions.push_back(instruction_ptr(new instruction_slide(
cpat->params.size())));
int new_tag = type->constructors[cpat->constr].tag;
if(jump_instruction->tag_mappings.find(new_tag) !=
jump_instruction->tag_mappings.end())
throw type_error("technically not a type error: duplicate pattern");
jump_instruction->tag_mappings[new_tag] =
jump_instruction->branches.size();
jump_instruction->branches.push_back(std::move(branch_instructions));
}
}
for(auto& constr_pair : type->constructors) {
if(jump_instruction->tag_mappings.find(constr_pair.second.tag) ==
jump_instruction->tag_mappings.end())
throw type_error("non-total pattern");
}
}
void pattern_var::print(std::ostream& to) const {
to << var;
}
void pattern_var::match(type_ptr t, type_mgr& mgr, type_env& env) const {
env.bind(var, t);
}
void pattern_constr::print(std::ostream& to) const {
to << constr;
for(auto& param : params) {
to << " " << param;
}
}
void pattern_constr::match(type_ptr t, type_mgr& mgr, type_env& env) const {
type_ptr constructor_type = env.lookup(constr);
if(!constructor_type) {
throw type_error(std::string("pattern using unknown constructor ") + constr);
}
for(int i = 0; i < params.size(); i++) {
type_arr* arr = dynamic_cast<type_arr*>(constructor_type.get());
if(!arr) throw type_error("too many parameters in constructor pattern");
env.bind(params[i], arr->left);
constructor_type = arr->right;
}
mgr.unify(t, constructor_type);
}

141
code/compiler/09/ast.hpp Normal file
View File

@@ -0,0 +1,141 @@
#pragma once
#include <memory>
#include <vector>
#include "type.hpp"
#include "type_env.hpp"
#include "binop.hpp"
#include "instruction.hpp"
#include "env.hpp"
struct ast {
type_ptr node_type;
virtual ~ast() = default;
virtual void print(int indent, std::ostream& to) const = 0;
virtual type_ptr typecheck(type_mgr& mgr, const type_env& env) const = 0;
virtual void resolve(const type_mgr& mgr) const = 0;
virtual void compile(const env_ptr& env,
std::vector<instruction_ptr>& into) const = 0;
type_ptr typecheck_common(type_mgr& mgr, const type_env& env);
void resolve_common(const type_mgr& mgr);
};
using ast_ptr = std::unique_ptr<ast>;
struct pattern {
virtual ~pattern() = default;
virtual void print(std::ostream& to) const = 0;
virtual void match(type_ptr t, type_mgr& mgr, type_env& env) const = 0;
};
using pattern_ptr = std::unique_ptr<pattern>;
struct branch {
pattern_ptr pat;
ast_ptr expr;
branch(pattern_ptr p, ast_ptr a)
: pat(std::move(p)), expr(std::move(a)) {}
};
using branch_ptr = std::unique_ptr<branch>;
struct ast_int : public ast {
int value;
explicit ast_int(int v)
: value(v) {}
void print(int indent, std::ostream& to) const;
type_ptr typecheck(type_mgr& mgr, const type_env& env) const;
void resolve(const type_mgr& mgr) const;
void compile(const env_ptr& env, std::vector<instruction_ptr>& into) const;
};
struct ast_lid : public ast {
std::string id;
explicit ast_lid(std::string i)
: id(std::move(i)) {}
void print(int indent, std::ostream& to) const;
type_ptr typecheck(type_mgr& mgr, const type_env& env) const;
void resolve(const type_mgr& mgr) const;
void compile(const env_ptr& env, std::vector<instruction_ptr>& into) const;
};
struct ast_uid : public ast {
std::string id;
explicit ast_uid(std::string i)
: id(std::move(i)) {}
void print(int indent, std::ostream& to) const;
type_ptr typecheck(type_mgr& mgr, const type_env& env) const;
void resolve(const type_mgr& mgr) const;
void compile(const env_ptr& env, std::vector<instruction_ptr>& into) const;
};
struct ast_binop : public ast {
binop op;
ast_ptr left;
ast_ptr right;
ast_binop(binop o, ast_ptr l, ast_ptr r)
: op(o), left(std::move(l)), right(std::move(r)) {}
void print(int indent, std::ostream& to) const;
type_ptr typecheck(type_mgr& mgr, const type_env& env) const;
void resolve(const type_mgr& mgr) const;
void compile(const env_ptr& env, std::vector<instruction_ptr>& into) const;
};
struct ast_app : public ast {
ast_ptr left;
ast_ptr right;
ast_app(ast_ptr l, ast_ptr r)
: left(std::move(l)), right(std::move(r)) {}
void print(int indent, std::ostream& to) const;
type_ptr typecheck(type_mgr& mgr, const type_env& env) const;
void resolve(const type_mgr& mgr) const;
void compile(const env_ptr& env, std::vector<instruction_ptr>& into) const;
};
struct ast_case : public ast {
ast_ptr of;
std::vector<branch_ptr> branches;
ast_case(ast_ptr o, std::vector<branch_ptr> b)
: of(std::move(o)), branches(std::move(b)) {}
void print(int indent, std::ostream& to) const;
type_ptr typecheck(type_mgr& mgr, const type_env& env) const;
void resolve(const type_mgr& mgr) const;
void compile(const env_ptr& env, std::vector<instruction_ptr>& into) const;
};
struct pattern_var : public pattern {
std::string var;
pattern_var(std::string v)
: var(std::move(v)) {}
void print(std::ostream &to) const;
void match(type_ptr t, type_mgr& mgr, type_env& env) const;
};
struct pattern_constr : public pattern {
std::string constr;
std::vector<std::string> params;
pattern_constr(std::string c, std::vector<std::string> p)
: constr(std::move(c)), params(std::move(p)) {}
void print(std::ostream &to) const;
void match(type_ptr t, type_mgr&, type_env& env) const;
};

View File

@@ -0,0 +1,21 @@
#include "binop.hpp"
std::string op_name(binop op) {
switch(op) {
case PLUS: return "+";
case MINUS: return "-";
case TIMES: return "*";
case DIVIDE: return "/";
}
return "??";
}
std::string op_action(binop op) {
switch(op) {
case PLUS: return "plus";
case MINUS: return "minus";
case TIMES: return "times";
case DIVIDE: return "divide";
}
return "??";
}

View File

@@ -0,0 +1,12 @@
#pragma once
#include <string>
enum binop {
PLUS,
MINUS,
TIMES,
DIVIDE
};
std::string op_name(binop op);
std::string op_action(binop op);

View File

@@ -0,0 +1,121 @@
#include "definition.hpp"
#include "error.hpp"
#include "ast.hpp"
#include "instruction.hpp"
#include "llvm_context.hpp"
#include <llvm/IR/DerivedTypes.h>
#include <llvm/IR/Function.h>
#include <llvm/IR/Type.h>
void definition_defn::typecheck_first(type_mgr& mgr, type_env& env) {
return_type = mgr.new_type();
type_ptr full_type = return_type;
for(auto it = params.rbegin(); it != params.rend(); it++) {
type_ptr param_type = mgr.new_type();
full_type = type_ptr(new type_arr(param_type, full_type));
param_types.push_back(param_type);
}
env.bind(name, full_type);
}
void definition_defn::typecheck_second(type_mgr& mgr, const type_env& env) const {
type_env new_env = env.scope();
auto param_it = params.begin();
auto type_it = param_types.rbegin();
while(param_it != params.end() && type_it != param_types.rend()) {
new_env.bind(*param_it, *type_it);
param_it++;
type_it++;
}
type_ptr body_type = body->typecheck_common(mgr, new_env);
mgr.unify(return_type, body_type);
}
void definition_defn::resolve(const type_mgr& mgr) {
type_var* var;
body->resolve_common(mgr);
return_type = mgr.resolve(return_type, var);
if(var) throw type_error("ambiguously typed program");
for(auto& param_type : param_types) {
param_type = mgr.resolve(param_type, var);
if(var) throw type_error("ambiguously typed program");
}
}
void definition_defn::compile() {
env_ptr new_env = env_ptr(new env_offset(0, nullptr));
for(auto it = params.rbegin(); it != params.rend(); it++) {
new_env = env_ptr(new env_var(*it, new_env));
}
body->compile(new_env, instructions);
instructions.push_back(instruction_ptr(new instruction_update(params.size())));
instructions.push_back(instruction_ptr(new instruction_pop(params.size())));
}
void definition_defn::gen_llvm_first(llvm_context& ctx) {
generated_function = ctx.create_custom_function(name, params.size());
}
void definition_defn::gen_llvm_second(llvm_context& ctx) {
ctx.builder.SetInsertPoint(&generated_function->getEntryBlock());
for(auto& instruction : instructions) {
instruction->gen_llvm(ctx, generated_function);
}
ctx.builder.CreateRetVoid();
}
void definition_data::typecheck_first(type_mgr& mgr, type_env& env) {
type_data* this_type = new type_data(name);
type_ptr return_type = type_ptr(this_type);
int next_tag = 0;
for(auto& constructor : constructors) {
constructor->tag = next_tag;
this_type->constructors[constructor->name] = { next_tag++ };
type_ptr full_type = return_type;
for(auto it = constructor->types.rbegin(); it != constructor->types.rend(); it++) {
type_ptr type = type_ptr(new type_base(*it));
full_type = type_ptr(new type_arr(type, full_type));
}
env.bind(constructor->name, full_type);
}
}
void definition_data::typecheck_second(type_mgr& mgr, const type_env& env) const {
// Nothing
}
void definition_data::resolve(const type_mgr& mgr) {
// Nothing
}
void definition_data::compile() {
}
void definition_data::gen_llvm_first(llvm_context& ctx) {
for(auto& constructor : constructors) {
auto new_function =
ctx.create_custom_function(constructor->name, constructor->types.size());
std::vector<instruction_ptr> instructions;
instructions.push_back(instruction_ptr(
new instruction_pack(constructor->tag, constructor->types.size())
));
instructions.push_back(instruction_ptr(new instruction_update(0)));
ctx.builder.SetInsertPoint(&new_function->getEntryBlock());
for (auto& instruction : instructions) {
instruction->gen_llvm(ctx, new_function);
}
ctx.builder.CreateRetVoid();
}
}
void definition_data::gen_llvm_second(llvm_context& ctx) {
// Nothing
}

View File

@@ -0,0 +1,73 @@
#pragma once
#include <memory>
#include <vector>
#include "instruction.hpp"
#include "llvm_context.hpp"
#include "type_env.hpp"
struct ast;
using ast_ptr = std::unique_ptr<ast>;
struct definition {
virtual ~definition() = default;
virtual void typecheck_first(type_mgr& mgr, type_env& env) = 0;
virtual void typecheck_second(type_mgr& mgr, const type_env& env) const = 0;
virtual void resolve(const type_mgr& mgr) = 0;
virtual void compile() = 0;
virtual void gen_llvm_first(llvm_context& ctx) = 0;
virtual void gen_llvm_second(llvm_context& ctx) = 0;
};
using definition_ptr = std::unique_ptr<definition>;
struct constructor {
std::string name;
std::vector<std::string> types;
int8_t tag;
constructor(std::string n, std::vector<std::string> ts)
: name(std::move(n)), types(std::move(ts)) {}
};
using constructor_ptr = std::unique_ptr<constructor>;
struct definition_defn : public definition {
std::string name;
std::vector<std::string> params;
ast_ptr body;
type_ptr return_type;
std::vector<type_ptr> param_types;
std::vector<instruction_ptr> instructions;
llvm::Function* generated_function;
definition_defn(std::string n, std::vector<std::string> p, ast_ptr b)
: name(std::move(n)), params(std::move(p)), body(std::move(b)) {
}
void typecheck_first(type_mgr& mgr, type_env& env);
void typecheck_second(type_mgr& mgr, const type_env& env) const;
void resolve(const type_mgr& mgr);
void compile();
void gen_llvm_first(llvm_context& ctx);
void gen_llvm_second(llvm_context& ctx);
};
struct definition_data : public definition {
std::string name;
std::vector<constructor_ptr> constructors;
definition_data(std::string n, std::vector<constructor_ptr> cs)
: name(std::move(n)), constructors(std::move(cs)) {}
void typecheck_first(type_mgr& mgr, type_env& env);
void typecheck_second(type_mgr& mgr, const type_env& env) const;
void resolve(const type_mgr& mgr);
void compile();
void gen_llvm_first(llvm_context& ctx);
void gen_llvm_second(llvm_context& ctx);
};

23
code/compiler/09/env.cpp Normal file
View File

@@ -0,0 +1,23 @@
#include "env.hpp"
int env_var::get_offset(const std::string& name) const {
if(name == this->name) return 0;
if(parent) return parent->get_offset(name) + 1;
throw 0;
}
bool env_var::has_variable(const std::string& name) const {
if(name == this->name) return true;
if(parent) return parent->has_variable(name);
return false;
}
int env_offset::get_offset(const std::string& name) const {
if(parent) return parent->get_offset(name) + offset;
throw 0;
}
bool env_offset::has_variable(const std::string& name) const {
if(parent) return parent->has_variable(name);
return false;
}

34
code/compiler/09/env.hpp Normal file
View File

@@ -0,0 +1,34 @@
#pragma once
#include <memory>
#include <string>
struct env {
virtual ~env() = default;
virtual int get_offset(const std::string& name) const = 0;
virtual bool has_variable(const std::string& name) const = 0;
};
using env_ptr = std::shared_ptr<env>;
struct env_var : public env {
std::string name;
env_ptr parent;
env_var(std::string& n, env_ptr p)
: name(std::move(n)), parent(std::move(p)) {}
int get_offset(const std::string& name) const;
bool has_variable(const std::string& name) const;
};
struct env_offset : public env {
int offset;
env_ptr parent;
env_offset(int o, env_ptr p)
: offset(o), parent(std::move(p)) {}
int get_offset(const std::string& name) const;
bool has_variable(const std::string& name) const;
};

View File

@@ -0,0 +1,5 @@
#include "error.hpp"
const char* type_error::what() const noexcept {
return "an error occured while checking the types of the program";
}

View File

@@ -0,0 +1,21 @@
#pragma once
#include <exception>
#include "type.hpp"
struct type_error : std::exception {
std::string description;
type_error(std::string d)
: description(std::move(d)) {}
const char* what() const noexcept override;
};
struct unification_error : public type_error {
type_ptr left;
type_ptr right;
unification_error(type_ptr l, type_ptr r)
: left(std::move(l)), right(std::move(r)),
type_error("failed to unify types") {}
};

View File

@@ -0,0 +1,2 @@
data Bool = { True, False }
defn main = { 3 + True }

View File

@@ -0,0 +1 @@
defn main = { 1 2 3 4 5 }

View File

@@ -0,0 +1,8 @@
data List = { Nil, Cons Int List }
defn head l = {
case l of {
Nil -> { 0 }
Cons x y z -> { x }
}
}

View File

@@ -0,0 +1,129 @@
data List = { Nil, Cons Nat List }
data Bool = { True, False }
data Nat = { O, S Nat }
defn ifN c t e = {
case c of {
True -> { t }
False -> { e }
}
}
defn ifL c t e = {
case c of {
True -> { t }
False -> { e }
}
}
defn toInt n = {
case n of {
O -> { 0 }
S np -> { 1 + toInt np }
}
}
defn lte n m = {
case m of {
O -> {
case n of {
O -> { True }
S np -> { False }
}
}
S mp -> {
case n of {
O -> { True }
S np -> { lte np mp }
}
}
}
}
defn minus n m = {
case m of {
O -> { n }
S mp -> {
case n of {
O -> { O }
S np -> {
minus np mp
}
}
}
}
}
defn mod n m = {
ifN (lte m n) (mod (minus n m) m) n
}
defn notDivisibleBy n m = {
case (mod m n) of {
O -> { False }
S mp -> { True }
}
}
defn filter f l = {
case l of {
Nil -> { Nil }
Cons x xs -> { ifL (f x) (Cons x (filter f xs)) (filter f xs) }
}
}
defn map f l = {
case l of {
Nil -> { Nil }
Cons x xs -> { Cons (f x) (map f xs) }
}
}
defn nats = {
Cons (S (S O)) (map S nats)
}
defn primesRec l = {
case l of {
Nil -> { Nil }
Cons p xs -> { Cons p (primesRec (filter (notDivisibleBy p) xs)) }
}
}
defn primes = {
primesRec nats
}
defn take n l = {
case l of {
Nil -> { Nil }
Cons x xs -> {
case n of {
O -> { Nil }
S np -> { Cons x (take np xs) }
}
}
}
}
defn head l = {
case l of {
Nil -> { O }
Cons x xs -> { x }
}
}
defn reverseAcc a l = {
case l of {
Nil -> { a }
Cons x xs -> { reverseAcc (Cons x a) xs }
}
}
defn reverse l = {
reverseAcc Nil l
}
defn main = {
toInt (head (reverse (take ((S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S (S O))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))) primes)))
}

View File

@@ -0,0 +1,31 @@
#include "../runtime.h"
void f_add(struct stack* s) {
struct node_num* left = (struct node_num*) eval(stack_peek(s, 0));
struct node_num* right = (struct node_num*) eval(stack_peek(s, 1));
stack_push(s, (struct node_base*) alloc_num(left->value + right->value));
}
void f_main(struct stack* s) {
// PushInt 320
stack_push(s, (struct node_base*) alloc_num(320));
// PushInt 6
stack_push(s, (struct node_base*) alloc_num(6));
// PushGlobal f_add (the function for +)
stack_push(s, (struct node_base*) alloc_global(f_add, 2));
struct node_base* left;
struct node_base* right;
// MkApp
left = stack_pop(s);
right = stack_pop(s);
stack_push(s, (struct node_base*) alloc_app(left, right));
// MkApp
left = stack_pop(s);
right = stack_pop(s);
stack_push(s, (struct node_base*) alloc_app(left, right));
}

View File

@@ -0,0 +1,2 @@
defn main = { sum 320 6 }
defn sum x y = { x + y }

View File

@@ -0,0 +1,3 @@
defn add x y = { x + y }
defn double x = { add x x }
defn main = { double 163 }

View File

@@ -0,0 +1,8 @@
data List = { Nil, Cons Int List }
defn length l = {
case l of {
Nil -> { 0 }
Cons x xs -> { 1 + length xs }
}
}
defn main = { length (Cons 1 (Cons 2 (Cons 3 Nil))) }

View File

@@ -0,0 +1,16 @@
data List = { Nil, Cons Int List }
defn add x y = { x + y }
defn mul x y = { x * y }
defn foldr f b l = {
case l of {
Nil -> { b }
Cons x xs -> { f x (foldr f b xs) }
}
}
defn main = {
foldr add 0 (Cons 1 (Cons 2 (Cons 3 (Cons 4 Nil)))) +
foldr mul 1 (Cons 1 (Cons 2 (Cons 3 (Cons 4 Nil))))
}

View File

@@ -0,0 +1,17 @@
data List = { Nil, Cons Int List }
defn sumZip l m = {
case l of {
Nil -> { 0 }
Cons x xs -> {
case m of {
Nil -> { 0 }
Cons y ys -> { x + y + sumZip xs ys }
}
}
}
}
defn ones = { Cons 1 ones }
defn main = { sumZip ones (Cons 1 (Cons 2 (Cons 3 Nil))) }

View File

@@ -0,0 +1,177 @@
#include "instruction.hpp"
#include "llvm_context.hpp"
#include <llvm/IR/BasicBlock.h>
#include <llvm/IR/Function.h>
using namespace llvm;
static void print_indent(int n, std::ostream& to) {
while(n--) to << " ";
}
void instruction_pushint::print(int indent, std::ostream& to) const {
print_indent(indent, to);
to << "PushInt(" << value << ")" << std::endl;
}
void instruction_pushint::gen_llvm(llvm_context& ctx, Function* f) const {
ctx.create_push(f, ctx.create_num(f, ctx.create_i32(value)));
}
void instruction_pushglobal::print(int indent, std::ostream& to) const {
print_indent(indent, to);
to << "PushGlobal(" << name << ")" << std::endl;
}
void instruction_pushglobal::gen_llvm(llvm_context& ctx, Function* f) const {
auto& global_f = ctx.custom_functions.at("f_" + name);
auto arity = ctx.create_i32(global_f->arity);
ctx.create_push(f, ctx.create_global(f, global_f->function, arity));
}
void instruction_push::print(int indent, std::ostream& to) const {
print_indent(indent, to);
to << "Push(" << offset << ")" << std::endl;
}
void instruction_push::gen_llvm(llvm_context& ctx, Function* f) const {
ctx.create_push(f, ctx.create_peek(f, ctx.create_size(offset)));
}
void instruction_pop::print(int indent, std::ostream& to) const {
print_indent(indent, to);
to << "Pop(" << count << ")" << std::endl;
}
void instruction_pop::gen_llvm(llvm_context& ctx, Function* f) const {
ctx.create_popn(f, ctx.create_size(count));
}
void instruction_mkapp::print(int indent, std::ostream& to) const {
print_indent(indent, to);
to << "MkApp()" << std::endl;
}
void instruction_mkapp::gen_llvm(llvm_context& ctx, Function* f) const {
auto left = ctx.create_pop(f);
auto right = ctx.create_pop(f);
ctx.create_push(f, ctx.create_app(f, left, right));
}
void instruction_update::print(int indent, std::ostream& to) const {
print_indent(indent, to);
to << "Update(" << offset << ")" << std::endl;
}
void instruction_update::gen_llvm(llvm_context& ctx, Function* f) const {
ctx.create_update(f, ctx.create_size(offset));
}
void instruction_pack::print(int indent, std::ostream& to) const {
print_indent(indent, to);
to << "Pack(" << tag << ", " << size << ")" << std::endl;
}
void instruction_pack::gen_llvm(llvm_context& ctx, Function* f) const {
ctx.create_pack(f, ctx.create_size(size), ctx.create_i8(tag));
}
void instruction_split::print(int indent, std::ostream& to) const {
print_indent(indent, to);
to << "Split()" << std::endl;
}
void instruction_split::gen_llvm(llvm_context& ctx, Function* f) const {
ctx.create_split(f, ctx.create_size(size));
}
void instruction_jump::print(int indent, std::ostream& to) const {
print_indent(indent, to);
to << "Jump(" << std::endl;
for(auto& instruction_set : branches) {
for(auto& instruction : instruction_set) {
instruction->print(indent + 2, to);
}
to << std::endl;
}
print_indent(indent, to);
to << ")" << std::endl;
}
void instruction_jump::gen_llvm(llvm_context& ctx, Function* f) const {
auto top_node = ctx.create_peek(f, ctx.create_size(0));
auto tag = ctx.unwrap_data_tag(top_node);
auto safety_block = BasicBlock::Create(ctx.ctx, "safety", f);
auto switch_op = ctx.builder.CreateSwitch(tag, safety_block, tag_mappings.size());
std::vector<BasicBlock*> blocks;
for(auto& branch : branches) {
auto branch_block = BasicBlock::Create(ctx.ctx, "branch", f);
ctx.builder.SetInsertPoint(branch_block);
for(auto& instruction : branch) {
instruction->gen_llvm(ctx, f);
}
ctx.builder.CreateBr(safety_block);
blocks.push_back(branch_block);
}
for(auto& mapping : tag_mappings) {
switch_op->addCase(ctx.create_i8(mapping.first), blocks[mapping.second]);
}
ctx.builder.SetInsertPoint(safety_block);
}
void instruction_slide::print(int indent, std::ostream& to) const {
print_indent(indent, to);
to << "Slide(" << offset << ")" << std::endl;
}
void instruction_slide::gen_llvm(llvm_context& ctx, Function* f) const {
ctx.create_slide(f, ctx.create_size(offset));
}
void instruction_binop::print(int indent, std::ostream& to) const {
print_indent(indent, to);
to << "BinOp(" << op_action(op) << ")" << std::endl;
}
void instruction_binop::gen_llvm(llvm_context& ctx, Function* f) const {
auto left_int = ctx.unwrap_num(ctx.create_pop(f));
auto right_int = ctx.unwrap_num(ctx.create_pop(f));
llvm::Value* result;
switch(op) {
case PLUS: result = ctx.builder.CreateAdd(left_int, right_int); break;
case MINUS: result = ctx.builder.CreateSub(left_int, right_int); break;
case TIMES: result = ctx.builder.CreateMul(left_int, right_int); break;
case DIVIDE: result = ctx.builder.CreateSDiv(left_int, right_int); break;
}
ctx.create_push(f, ctx.create_num(f, result));
}
void instruction_eval::print(int indent, std::ostream& to) const {
print_indent(indent, to);
to << "Eval()" << std::endl;
}
void instruction_eval::gen_llvm(llvm_context& ctx, Function* f) const {
ctx.create_unwind(f);
}
void instruction_alloc::print(int indent, std::ostream& to) const {
print_indent(indent, to);
to << "Alloc(" << amount << ")" << std::endl;
}
void instruction_alloc::gen_llvm(llvm_context& ctx, Function* f) const {
ctx.create_alloc(f, ctx.create_size(amount));
}
void instruction_unwind::print(int indent, std::ostream& to) const {
print_indent(indent, to);
to << "Unwind()" << std::endl;
}
void instruction_unwind::gen_llvm(llvm_context& ctx, Function* f) const {
// Nothing
}

View File

@@ -0,0 +1,142 @@
#pragma once
#include <llvm/IR/Function.h>
#include <string>
#include <memory>
#include <vector>
#include <map>
#include <ostream>
#include "binop.hpp"
#include "llvm_context.hpp"
struct instruction {
virtual ~instruction() = default;
virtual void print(int indent, std::ostream& to) const = 0;
virtual void gen_llvm(llvm_context& ctx, llvm::Function* f) const = 0;
};
using instruction_ptr = std::unique_ptr<instruction>;
struct instruction_pushint : public instruction {
int value;
instruction_pushint(int v)
: value(v) {}
void print(int indent, std::ostream& to) const;
void gen_llvm(llvm_context& ctx, llvm::Function* f) const;
};
struct instruction_pushglobal : public instruction {
std::string name;
instruction_pushglobal(std::string n)
: name(std::move(n)) {}
void print(int indent, std::ostream& to) const;
void gen_llvm(llvm_context& ctx, llvm::Function* f) const;
};
struct instruction_push : public instruction {
int offset;
instruction_push(int o)
: offset(o) {}
void print(int indent, std::ostream& to) const;
void gen_llvm(llvm_context& ctx, llvm::Function* f) const;
};
struct instruction_pop : public instruction {
int count;
instruction_pop(int c)
: count(c) {}
void print(int indent, std::ostream& to) const;
void gen_llvm(llvm_context& ctx, llvm::Function* f) const;
};
struct instruction_mkapp : public instruction {
void print(int indent, std::ostream& to) const;
void gen_llvm(llvm_context& ctx, llvm::Function* f) const;
};
struct instruction_update : public instruction {
int offset;
instruction_update(int o)
: offset(o) {}
void print(int indent, std::ostream& to) const;
void gen_llvm(llvm_context& ctx, llvm::Function* f) const;
};
struct instruction_pack : public instruction {
int tag;
int size;
instruction_pack(int t, int s)
: tag(t), size(s) {}
void print(int indent, std::ostream& to) const;
void gen_llvm(llvm_context& ctx, llvm::Function* f) const;
};
struct instruction_split : public instruction {
int size;
instruction_split(int s)
: size(s) {}
void print(int indent, std::ostream& to) const;
void gen_llvm(llvm_context& ctx, llvm::Function* f) const;
};
struct instruction_jump : public instruction {
std::vector<std::vector<instruction_ptr>> branches;
std::map<int, int> tag_mappings;
void print(int indent, std::ostream& to) const;
void gen_llvm(llvm_context& ctx, llvm::Function* f) const;
};
struct instruction_slide : public instruction {
int offset;
instruction_slide(int o)
: offset(o) {}
void print(int indent, std::ostream& to) const;
void gen_llvm(llvm_context& ctx, llvm::Function* f) const;
};
struct instruction_binop : public instruction {
binop op;
instruction_binop(binop o)
: op(o) {}
void print(int indent, std::ostream& to) const;
void gen_llvm(llvm_context& ctx, llvm::Function* f) const;
};
struct instruction_eval : public instruction {
void print(int indent, std::ostream& to) const;
void gen_llvm(llvm_context& ctx, llvm::Function* f) const;
};
struct instruction_alloc : public instruction {
int amount;
instruction_alloc(int a)
: amount(a) {}
void print(int indent, std::ostream& to) const;
void gen_llvm(llvm_context& ctx, llvm::Function* f) const;
};
struct instruction_unwind : public instruction {
void print(int indent, std::ostream& to) const;
void gen_llvm(llvm_context& ctx, llvm::Function* f) const;
};

View File

@@ -0,0 +1,278 @@
#include "llvm_context.hpp"
#include <llvm/IR/DerivedTypes.h>
using namespace llvm;
void llvm_context::create_types() {
stack_type = StructType::create(ctx, "stack");
gmachine_type = StructType::create(ctx, "gmachine");
stack_ptr_type = PointerType::getUnqual(stack_type);
gmachine_ptr_type = PointerType::getUnqual(gmachine_type);
tag_type = IntegerType::getInt8Ty(ctx);
struct_types["node_base"] = StructType::create(ctx, "node_base");
struct_types["node_app"] = StructType::create(ctx, "node_app");
struct_types["node_num"] = StructType::create(ctx, "node_num");
struct_types["node_global"] = StructType::create(ctx, "node_global");
struct_types["node_ind"] = StructType::create(ctx, "node_ind");
struct_types["node_data"] = StructType::create(ctx, "node_data");
node_ptr_type = PointerType::getUnqual(struct_types.at("node_base"));
function_type = FunctionType::get(Type::getVoidTy(ctx), { gmachine_ptr_type }, false);
gmachine_type->setBody(
stack_ptr_type,
node_ptr_type,
IntegerType::getInt64Ty(ctx),
IntegerType::getInt64Ty(ctx)
);
struct_types.at("node_base")->setBody(
IntegerType::getInt32Ty(ctx),
IntegerType::getInt8Ty(ctx),
node_ptr_type
);
struct_types.at("node_app")->setBody(
struct_types.at("node_base"),
node_ptr_type,
node_ptr_type
);
struct_types.at("node_num")->setBody(
struct_types.at("node_base"),
IntegerType::getInt32Ty(ctx)
);
struct_types.at("node_global")->setBody(
struct_types.at("node_base"),
FunctionType::get(Type::getVoidTy(ctx), { stack_ptr_type }, false)
);
struct_types.at("node_ind")->setBody(
struct_types.at("node_base"),
node_ptr_type
);
struct_types.at("node_data")->setBody(
struct_types.at("node_base"),
IntegerType::getInt8Ty(ctx),
PointerType::getUnqual(node_ptr_type)
);
}
void llvm_context::create_functions() {
auto void_type = Type::getVoidTy(ctx);
auto sizet_type = IntegerType::get(ctx, sizeof(size_t) * 8);
functions["stack_init"] = Function::Create(
FunctionType::get(void_type, { stack_ptr_type }, false),
Function::LinkageTypes::ExternalLinkage,
"stack_init",
&module
);
functions["stack_free"] = Function::Create(
FunctionType::get(void_type, { stack_ptr_type }, false),
Function::LinkageTypes::ExternalLinkage,
"stack_free",
&module
);
functions["stack_push"] = Function::Create(
FunctionType::get(void_type, { stack_ptr_type, node_ptr_type }, false),
Function::LinkageTypes::ExternalLinkage,
"stack_push",
&module
);
functions["stack_pop"] = Function::Create(
FunctionType::get(node_ptr_type, { stack_ptr_type }, false),
Function::LinkageTypes::ExternalLinkage,
"stack_pop",
&module
);
functions["stack_peek"] = Function::Create(
FunctionType::get(node_ptr_type, { stack_ptr_type, sizet_type }, false),
Function::LinkageTypes::ExternalLinkage,
"stack_peek",
&module
);
functions["stack_popn"] = Function::Create(
FunctionType::get(void_type, { stack_ptr_type, sizet_type }, false),
Function::LinkageTypes::ExternalLinkage,
"stack_popn",
&module
);
functions["gmachine_slide"] = Function::Create(
FunctionType::get(void_type, { gmachine_ptr_type, sizet_type }, false),
Function::LinkageTypes::ExternalLinkage,
"gmachine_slide",
&module
);
functions["gmachine_update"] = Function::Create(
FunctionType::get(void_type, { gmachine_ptr_type, sizet_type }, false),
Function::LinkageTypes::ExternalLinkage,
"gmachine_update",
&module
);
functions["gmachine_alloc"] = Function::Create(
FunctionType::get(void_type, { gmachine_ptr_type, sizet_type }, false),
Function::LinkageTypes::ExternalLinkage,
"gmachine_alloc",
&module
);
functions["gmachine_pack"] = Function::Create(
FunctionType::get(void_type, { gmachine_ptr_type, sizet_type, tag_type }, false),
Function::LinkageTypes::ExternalLinkage,
"gmachine_pack",
&module
);
functions["gmachine_split"] = Function::Create(
FunctionType::get(void_type, { gmachine_ptr_type, sizet_type }, false),
Function::LinkageTypes::ExternalLinkage,
"gmachine_split",
&module
);
functions["gmachine_track"] = Function::Create(
FunctionType::get(node_ptr_type, { gmachine_ptr_type, node_ptr_type }, false),
Function::LinkageTypes::ExternalLinkage,
"gmachine_track",
&module
);
auto int32_type = IntegerType::getInt32Ty(ctx);
functions["alloc_app"] = Function::Create(
FunctionType::get(node_ptr_type, { node_ptr_type, node_ptr_type }, false),
Function::LinkageTypes::ExternalLinkage,
"alloc_app",
&module
);
functions["alloc_num"] = Function::Create(
FunctionType::get(node_ptr_type, { int32_type }, false),
Function::LinkageTypes::ExternalLinkage,
"alloc_num",
&module
);
functions["alloc_global"] = Function::Create(
FunctionType::get(node_ptr_type, { function_type, int32_type }, false),
Function::LinkageTypes::ExternalLinkage,
"alloc_global",
&module
);
functions["alloc_ind"] = Function::Create(
FunctionType::get(node_ptr_type, { node_ptr_type }, false),
Function::LinkageTypes::ExternalLinkage,
"alloc_ind",
&module
);
functions["unwind"] = Function::Create(
FunctionType::get(void_type, { gmachine_ptr_type }, false),
Function::LinkageTypes::ExternalLinkage,
"unwind",
&module
);
}
ConstantInt* llvm_context::create_i8(int8_t i) {
return ConstantInt::get(ctx, APInt(8, i));
}
ConstantInt* llvm_context::create_i32(int32_t i) {
return ConstantInt::get(ctx, APInt(32, i));
}
ConstantInt* llvm_context::create_size(size_t i) {
return ConstantInt::get(ctx, APInt(sizeof(size_t) * 8, i));
}
Value* llvm_context::create_pop(Function* f) {
auto pop_f = functions.at("stack_pop");
return builder.CreateCall(pop_f, { unwrap_gmachine_stack_ptr(f->arg_begin()) });
}
Value* llvm_context::create_peek(Function* f, Value* off) {
auto peek_f = functions.at("stack_peek");
return builder.CreateCall(peek_f, { unwrap_gmachine_stack_ptr(f->arg_begin()), off });
}
void llvm_context::create_push(Function* f, Value* v) {
auto push_f = functions.at("stack_push");
builder.CreateCall(push_f, { unwrap_gmachine_stack_ptr(f->arg_begin()), v });
}
void llvm_context::create_popn(Function* f, Value* off) {
auto popn_f = functions.at("stack_popn");
builder.CreateCall(popn_f, { unwrap_gmachine_stack_ptr(f->arg_begin()), off });
}
void llvm_context::create_update(Function* f, Value* off) {
auto update_f = functions.at("gmachine_update");
builder.CreateCall(update_f, { f->arg_begin(), off });
}
void llvm_context::create_pack(Function* f, Value* c, Value* t) {
auto pack_f = functions.at("gmachine_pack");
builder.CreateCall(pack_f, { f->arg_begin(), c, t });
}
void llvm_context::create_split(Function* f, Value* c) {
auto split_f = functions.at("gmachine_split");
builder.CreateCall(split_f, { f->arg_begin(), c });
}
void llvm_context::create_slide(Function* f, Value* off) {
auto slide_f = functions.at("gmachine_slide");
builder.CreateCall(slide_f, { f->arg_begin(), off });
}
void llvm_context::create_alloc(Function* f, Value* n) {
auto alloc_f = functions.at("gmachine_alloc");
builder.CreateCall(alloc_f, { f->arg_begin(), n });
}
Value* llvm_context::create_track(Function* f, Value* v) {
auto track_f = functions.at("gmachine_track");
return builder.CreateCall(track_f, { f->arg_begin(), v });
}
void llvm_context::create_unwind(Function* f) {
auto unwind_f = functions.at("unwind");
builder.CreateCall(unwind_f, { f->args().begin() });
}
Value* llvm_context::unwrap_gmachine_stack_ptr(Value* g) {
auto offset_0 = create_i32(0);
return builder.CreateGEP(g, { offset_0, offset_0 });
}
Value* llvm_context::unwrap_num(Value* v) {
auto num_ptr_type = PointerType::getUnqual(struct_types.at("node_num"));
auto cast = builder.CreatePointerCast(v, num_ptr_type);
auto offset_0 = create_i32(0);
auto offset_1 = create_i32(1);
auto int_ptr = builder.CreateGEP(cast, { offset_0, offset_1 });
return builder.CreateLoad(int_ptr);
}
Value* llvm_context::create_num(Function* f, Value* v) {
auto alloc_num_f = functions.at("alloc_num");
auto alloc_num_call = builder.CreateCall(alloc_num_f, { v });
return create_track(f, alloc_num_call);
}
Value* llvm_context::unwrap_data_tag(Value* v) {
auto data_ptr_type = PointerType::getUnqual(struct_types.at("node_data"));
auto cast = builder.CreatePointerCast(v, data_ptr_type);
auto offset_0 = create_i32(0);
auto offset_1 = create_i32(1);
auto tag_ptr = builder.CreateGEP(cast, { offset_0, offset_1 });
return builder.CreateLoad(tag_ptr);
}
Value* llvm_context::create_global(Function* f, Value* gf, Value* a) {
auto alloc_global_f = functions.at("alloc_global");
auto alloc_global_call = builder.CreateCall(alloc_global_f, { gf, a });
return create_track(f, alloc_global_call);
}
Value* llvm_context::create_app(Function* f, Value* l, Value* r) {
auto alloc_app_f = functions.at("alloc_app");
auto alloc_app_call = builder.CreateCall(alloc_app_f, { l, r });
return create_track(f, alloc_app_call);
}
llvm::Function* llvm_context::create_custom_function(std::string name, int32_t arity) {
auto void_type = llvm::Type::getVoidTy(ctx);
auto new_function = llvm::Function::Create(
function_type,
llvm::Function::LinkageTypes::ExternalLinkage,
"f_" + name,
&module
);
auto start_block = llvm::BasicBlock::Create(ctx, "entry", new_function);
auto new_custom_f = custom_function_ptr(new custom_function());
new_custom_f->arity = arity;
new_custom_f->function = new_function;
custom_functions["f_" + name] = std::move(new_custom_f);
return new_function;
}

View File

@@ -0,0 +1,72 @@
#pragma once
#include <llvm/IR/DerivedTypes.h>
#include <llvm/IR/Function.h>
#include <llvm/IR/LLVMContext.h>
#include <llvm/IR/IRBuilder.h>
#include <llvm/IR/Module.h>
#include <llvm/IR/Value.h>
#include <map>
struct llvm_context {
struct custom_function {
llvm::Function* function;
int32_t arity;
};
using custom_function_ptr = std::unique_ptr<custom_function>;
llvm::LLVMContext ctx;
llvm::IRBuilder<> builder;
llvm::Module module;
std::map<std::string, custom_function_ptr> custom_functions;
std::map<std::string, llvm::Function*> functions;
std::map<std::string, llvm::StructType*> struct_types;
llvm::StructType* stack_type;
llvm::StructType* gmachine_type;
llvm::PointerType* stack_ptr_type;
llvm::PointerType* gmachine_ptr_type;
llvm::PointerType* node_ptr_type;
llvm::IntegerType* tag_type;
llvm::FunctionType* function_type;
llvm_context()
: builder(ctx), module("bloglang", ctx) {
create_types();
create_functions();
}
void create_types();
void create_functions();
llvm::ConstantInt* create_i8(int8_t);
llvm::ConstantInt* create_i32(int32_t);
llvm::ConstantInt* create_size(size_t);
llvm::Value* create_pop(llvm::Function*);
llvm::Value* create_peek(llvm::Function*, llvm::Value*);
void create_push(llvm::Function*, llvm::Value*);
void create_popn(llvm::Function*, llvm::Value*);
void create_update(llvm::Function*, llvm::Value*);
void create_pack(llvm::Function*, llvm::Value*, llvm::Value*);
void create_split(llvm::Function*, llvm::Value*);
void create_slide(llvm::Function*, llvm::Value*);
void create_alloc(llvm::Function*, llvm::Value*);
llvm::Value* create_track(llvm::Function*, llvm::Value*);
void create_unwind(llvm::Function*);
llvm::Value* unwrap_gmachine_stack_ptr(llvm::Value*);
llvm::Value* unwrap_num(llvm::Value*);
llvm::Value* create_num(llvm::Function*, llvm::Value*);
llvm::Value* unwrap_data_tag(llvm::Value*);
llvm::Value* create_global(llvm::Function*, llvm::Value*, llvm::Value*);
llvm::Value* create_app(llvm::Function*, llvm::Value*, llvm::Value*);
llvm::Function* create_custom_function(std::string name, int32_t arity);
};

176
code/compiler/09/main.cpp Normal file
View File

@@ -0,0 +1,176 @@
#include "ast.hpp"
#include <iostream>
#include "binop.hpp"
#include "definition.hpp"
#include "instruction.hpp"
#include "llvm_context.hpp"
#include "parser.hpp"
#include "error.hpp"
#include "type.hpp"
#include "llvm/IR/LegacyPassManager.h"
#include "llvm/IR/Verifier.h"
#include "llvm/Support/TargetSelect.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Target/TargetMachine.h"
void yy::parser::error(const std::string& msg) {
std::cout << "An error occured: " << msg << std::endl;
}
extern std::vector<definition_ptr> program;
void typecheck_program(
const std::vector<definition_ptr>& prog,
type_mgr& mgr, type_env& env) {
type_ptr int_type = type_ptr(new type_base("Int"));
type_ptr binop_type = type_ptr(new type_arr(
int_type,
type_ptr(new type_arr(int_type, int_type))));
env.bind("+", binop_type);
env.bind("-", binop_type);
env.bind("*", binop_type);
env.bind("/", binop_type);
for(auto& def : prog) {
def->typecheck_first(mgr, env);
}
for(auto& def : prog) {
def->typecheck_second(mgr, env);
}
for(auto& pair : env.names) {
std::cout << pair.first << ": ";
pair.second->print(mgr, std::cout);
std::cout << std::endl;
}
for(auto& def : prog) {
def->resolve(mgr);
}
}
void compile_program(const std::vector<definition_ptr>& prog) {
for(auto& def : prog) {
def->compile();
definition_defn* defn = dynamic_cast<definition_defn*>(def.get());
if(!defn) continue;
for(auto& instruction : defn->instructions) {
instruction->print(0, std::cout);
}
std::cout << std::endl;
}
}
void gen_llvm_internal_op(llvm_context& ctx, binop op) {
auto new_function = ctx.create_custom_function(op_action(op), 2);
std::vector<instruction_ptr> instructions;
instructions.push_back(instruction_ptr(new instruction_push(1)));
instructions.push_back(instruction_ptr(new instruction_eval()));
instructions.push_back(instruction_ptr(new instruction_push(1)));
instructions.push_back(instruction_ptr(new instruction_eval()));
instructions.push_back(instruction_ptr(new instruction_binop(op)));
instructions.push_back(instruction_ptr(new instruction_update(2)));
instructions.push_back(instruction_ptr(new instruction_pop(2)));
ctx.builder.SetInsertPoint(&new_function->getEntryBlock());
for(auto& instruction : instructions) {
instruction->gen_llvm(ctx, new_function);
}
ctx.builder.CreateRetVoid();
}
void output_llvm(llvm_context& ctx, const std::string& filename) {
std::string targetTriple = llvm::sys::getDefaultTargetTriple();
llvm::InitializeNativeTarget();
llvm::InitializeNativeTargetAsmParser();
llvm::InitializeNativeTargetAsmPrinter();
std::string error;
const llvm::Target* target =
llvm::TargetRegistry::lookupTarget(targetTriple, error);
if (!target) {
std::cerr << error << std::endl;
} else {
std::string cpu = "generic";
std::string features = "";
llvm::TargetOptions options;
llvm::TargetMachine* targetMachine =
target->createTargetMachine(targetTriple, cpu, features,
options, llvm::Optional<llvm::Reloc::Model>());
ctx.module.setDataLayout(targetMachine->createDataLayout());
ctx.module.setTargetTriple(targetTriple);
std::error_code ec;
llvm::raw_fd_ostream file(filename, ec, llvm::sys::fs::F_None);
if (ec) {
throw 0;
} else {
llvm::TargetMachine::CodeGenFileType type = llvm::TargetMachine::CGFT_ObjectFile;
llvm::legacy::PassManager pm;
if (targetMachine->addPassesToEmitFile(pm, file, NULL, type)) {
throw 0;
} else {
pm.run(ctx.module);
file.close();
}
}
}
}
void gen_llvm(const std::vector<definition_ptr>& prog) {
llvm_context ctx;
gen_llvm_internal_op(ctx, PLUS);
gen_llvm_internal_op(ctx, MINUS);
gen_llvm_internal_op(ctx, TIMES);
gen_llvm_internal_op(ctx, DIVIDE);
for(auto& definition : prog) {
definition->gen_llvm_first(ctx);
}
for(auto& definition : prog) {
definition->gen_llvm_second(ctx);
}
ctx.module.print(llvm::outs(), nullptr);
output_llvm(ctx, "program.o");
}
int main() {
yy::parser parser;
type_mgr mgr;
type_env env;
parser.parse();
for(auto& definition : program) {
definition_defn* def = dynamic_cast<definition_defn*>(definition.get());
if(!def) continue;
std::cout << def->name;
for(auto& param : def->params) std::cout << " " << param;
std::cout << ":" << std::endl;
def->body->print(1, std::cout);
}
try {
typecheck_program(program, mgr, env);
compile_program(program);
gen_llvm(program);
} catch(unification_error& err) {
std::cout << "failed to unify types: " << std::endl;
std::cout << " (1) \033[34m";
err.left->print(mgr, std::cout);
std::cout << "\033[0m" << std::endl;
std::cout << " (2) \033[32m";
err.right->print(mgr, std::cout);
std::cout << "\033[0m" << std::endl;
} catch(type_error& err) {
std::cout << "failed to type check program: " << err.description << std::endl;
}
}

141
code/compiler/09/parser.y Normal file
View File

@@ -0,0 +1,141 @@
%{
#include <string>
#include <iostream>
#include "ast.hpp"
#include "definition.hpp"
#include "parser.hpp"
std::vector<definition_ptr> program;
extern yy::parser::symbol_type yylex();
%}
%token PLUS
%token TIMES
%token MINUS
%token DIVIDE
%token <int> INT
%token DEFN
%token DATA
%token CASE
%token OF
%token OCURLY
%token CCURLY
%token OPAREN
%token CPAREN
%token COMMA
%token ARROW
%token EQUAL
%token <std::string> LID
%token <std::string> UID
%language "c++"
%define api.value.type variant
%define api.token.constructor
%type <std::vector<std::string>> lowercaseParams uppercaseParams
%type <std::vector<definition_ptr>> program definitions
%type <std::vector<branch_ptr>> branches
%type <std::vector<constructor_ptr>> constructors
%type <ast_ptr> aAdd aMul case app appBase
%type <definition_ptr> definition defn data
%type <branch_ptr> branch
%type <pattern_ptr> pattern
%type <constructor_ptr> constructor
%start program
%%
program
: definitions { program = std::move($1); }
;
definitions
: definitions definition { $$ = std::move($1); $$.push_back(std::move($2)); }
| definition { $$ = std::vector<definition_ptr>(); $$.push_back(std::move($1)); }
;
definition
: defn { $$ = std::move($1); }
| data { $$ = std::move($1); }
;
defn
: DEFN LID lowercaseParams EQUAL OCURLY aAdd CCURLY
{ $$ = definition_ptr(
new definition_defn(std::move($2), std::move($3), std::move($6))); }
;
lowercaseParams
: %empty { $$ = std::vector<std::string>(); }
| lowercaseParams LID { $$ = std::move($1); $$.push_back(std::move($2)); }
;
uppercaseParams
: %empty { $$ = std::vector<std::string>(); }
| uppercaseParams UID { $$ = std::move($1); $$.push_back(std::move($2)); }
;
aAdd
: aAdd PLUS aMul { $$ = ast_ptr(new ast_binop(PLUS, std::move($1), std::move($3))); }
| aAdd MINUS aMul { $$ = ast_ptr(new ast_binop(MINUS, std::move($1), std::move($3))); }
| aMul { $$ = std::move($1); }
;
aMul
: aMul TIMES app { $$ = ast_ptr(new ast_binop(TIMES, std::move($1), std::move($3))); }
| aMul DIVIDE app { $$ = ast_ptr(new ast_binop(DIVIDE, std::move($1), std::move($3))); }
| app { $$ = std::move($1); }
;
app
: app appBase { $$ = ast_ptr(new ast_app(std::move($1), std::move($2))); }
| appBase { $$ = std::move($1); }
;
appBase
: INT { $$ = ast_ptr(new ast_int($1)); }
| LID { $$ = ast_ptr(new ast_lid(std::move($1))); }
| UID { $$ = ast_ptr(new ast_uid(std::move($1))); }
| OPAREN aAdd CPAREN { $$ = std::move($2); }
| case { $$ = std::move($1); }
;
case
: CASE aAdd OF OCURLY branches CCURLY
{ $$ = ast_ptr(new ast_case(std::move($2), std::move($5))); }
;
branches
: branches branch { $$ = std::move($1); $$.push_back(std::move($2)); }
| branch { $$ = std::vector<branch_ptr>(); $$.push_back(std::move($1));}
;
branch
: pattern ARROW OCURLY aAdd CCURLY
{ $$ = branch_ptr(new branch(std::move($1), std::move($4))); }
;
pattern
: LID { $$ = pattern_ptr(new pattern_var(std::move($1))); }
| UID lowercaseParams
{ $$ = pattern_ptr(new pattern_constr(std::move($1), std::move($2))); }
;
data
: DATA UID EQUAL OCURLY constructors CCURLY
{ $$ = definition_ptr(new definition_data(std::move($2), std::move($5))); }
;
constructors
: constructors COMMA constructor { $$ = std::move($1); $$.push_back(std::move($3)); }
| constructor
{ $$ = std::vector<constructor_ptr>(); $$.push_back(std::move($1)); }
;
constructor
: UID uppercaseParams
{ $$ = constructor_ptr(new constructor(std::move($1), std::move($2))); }
;

269
code/compiler/09/runtime.c Normal file
View File

@@ -0,0 +1,269 @@
#include <stdint.h>
#include <assert.h>
#include <memory.h>
#include <stdio.h>
#include "runtime.h"
struct node_base* alloc_node() {
struct node_base* new_node = malloc(sizeof(struct node_app));
new_node->gc_next = NULL;
new_node->gc_reachable = 0;
assert(new_node != NULL);
return new_node;
}
struct node_app* alloc_app(struct node_base* l, struct node_base* r) {
struct node_app* node = (struct node_app*) alloc_node();
node->base.tag = NODE_APP;
node->left = l;
node->right = r;
return node;
}
struct node_num* alloc_num(int32_t n) {
struct node_num* node = (struct node_num*) alloc_node();
node->base.tag = NODE_NUM;
node->value = n;
return node;
}
struct node_global* alloc_global(void (*f)(struct gmachine*), int32_t a) {
struct node_global* node = (struct node_global*) alloc_node();
node->base.tag = NODE_GLOBAL;
node->arity = a;
node->function = f;
return node;
}
struct node_ind* alloc_ind(struct node_base* n) {
struct node_ind* node = (struct node_ind*) alloc_node();
node->base.tag = NODE_IND;
node->next = n;
return node;
}
void free_node_direct(struct node_base* n) {
if(n->tag == NODE_DATA) {
free(((struct node_data*) n)->array);
}
}
void gc_visit_node(struct node_base* n) {
if(n->gc_reachable) return;
n->gc_reachable = 1;
if(n->tag == NODE_APP) {
struct node_app* app = (struct node_app*) n;
gc_visit_node(app->left);
gc_visit_node(app->right);
} if(n->tag == NODE_IND) {
struct node_ind* ind = (struct node_ind*) n;
gc_visit_node(ind->next);
} if(n->tag == NODE_DATA) {
struct node_data* data = (struct node_data*) n;
struct node_base** to_visit = data->array;
while(*to_visit) {
gc_visit_node(*to_visit);
to_visit++;
}
}
}
void stack_init(struct stack* s) {
s->size = 4;
s->count = 0;
s->data = malloc(sizeof(*s->data) * s->size);
assert(s->data != NULL);
}
void stack_free(struct stack* s) {
free(s->data);
}
void stack_push(struct stack* s, struct node_base* n) {
while(s->count >= s->size) {
s->data = realloc(s->data, sizeof(*s->data) * (s->size *= 2));
assert(s->data != NULL);
}
s->data[s->count++] = n;
}
struct node_base* stack_pop(struct stack* s) {
assert(s->count > 0);
return s->data[--s->count];
}
struct node_base* stack_peek(struct stack* s, size_t o) {
assert(s->count > o);
return s->data[s->count - o - 1];
}
void stack_popn(struct stack* s, size_t n) {
assert(s->count >= n);
s->count -= n;
}
void gmachine_init(struct gmachine* g) {
stack_init(&g->stack);
g->gc_nodes = NULL;
g->gc_node_count = 0;
g->gc_node_threshold = 128;
}
void gmachine_free(struct gmachine* g) {
stack_free(&g->stack);
struct node_base* to_free = g->gc_nodes;
struct node_base* next;
while(to_free) {
next = to_free->gc_next;
free_node_direct(to_free);
free(to_free);
to_free = next;
}
}
void gmachine_slide(struct gmachine* g, size_t n) {
assert(g->stack.count > n);
g->stack.data[g->stack.count - n - 1] = g->stack.data[g->stack.count - 1];
g->stack.count -= n;
}
void gmachine_update(struct gmachine* g, size_t o) {
assert(g->stack.count > o + 1);
struct node_ind* ind =
(struct node_ind*) g->stack.data[g->stack.count - o - 2];
ind->base.tag = NODE_IND;
ind->next = g->stack.data[g->stack.count -= 1];
}
void gmachine_alloc(struct gmachine* g, size_t o) {
while(o--) {
stack_push(&g->stack,
gmachine_track(g, (struct node_base*) alloc_ind(NULL)));
}
}
void gmachine_pack(struct gmachine* g, size_t n, int8_t t) {
assert(g->stack.count >= n);
struct node_base** data = malloc(sizeof(*data) * (n + 1));
assert(data != NULL);
memcpy(data, &g->stack.data[g->stack.count - n], n * sizeof(*data));
data[n] = NULL;
struct node_data* new_node = (struct node_data*) alloc_node();
new_node->array = data;
new_node->base.tag = NODE_DATA;
new_node->tag = t;
stack_popn(&g->stack, n);
stack_push(&g->stack, gmachine_track(g, (struct node_base*) new_node));
}
void gmachine_split(struct gmachine* g, size_t n) {
struct node_data* node = (struct node_data*) stack_pop(&g->stack);
for(size_t i = 0; i < n; i++) {
stack_push(&g->stack, node->array[i]);
}
}
struct node_base* gmachine_track(struct gmachine* g, struct node_base* b) {
g->gc_node_count++;
b->gc_next = g->gc_nodes;
g->gc_nodes = b;
if(g->gc_node_count >= g->gc_node_threshold) {
uint64_t nodes_before = g->gc_node_count;
gc_visit_node(b);
gmachine_gc(g);
g->gc_node_threshold = g->gc_node_count * 2;
}
return b;
}
void gmachine_gc(struct gmachine* g) {
for(size_t i = 0; i < g->stack.count; i++) {
gc_visit_node(g->stack.data[i]);
}
struct node_base** head_ptr = &g->gc_nodes;
while(*head_ptr) {
if((*head_ptr)->gc_reachable) {
(*head_ptr)->gc_reachable = 0;
head_ptr = &(*head_ptr)->gc_next;
} else {
struct node_base* to_free = *head_ptr;
*head_ptr = to_free->gc_next;
free_node_direct(to_free);
free(to_free);
g->gc_node_count--;
}
}
}
void unwind(struct gmachine* g) {
struct stack* s = &g->stack;
while(1) {
struct node_base* peek = stack_peek(s, 0);
if(peek->tag == NODE_APP) {
struct node_app* n = (struct node_app*) peek;
stack_push(s, n->left);
} else if(peek->tag == NODE_GLOBAL) {
struct node_global* n = (struct node_global*) peek;
assert(s->count > n->arity);
for(size_t i = 1; i <= n->arity; i++) {
s->data[s->count - i]
= ((struct node_app*) s->data[s->count - i - 1])->right;
}
n->function(g);
} else if(peek->tag == NODE_IND) {
struct node_ind* n = (struct node_ind*) peek;
stack_pop(s);
stack_push(s, n->next);
} else {
break;
}
}
}
extern void f_main(struct gmachine* s);
void print_node(struct node_base* n) {
if(n->tag == NODE_APP) {
struct node_app* app = (struct node_app*) n;
print_node(app->left);
putchar(' ');
print_node(app->right);
} else if(n->tag == NODE_DATA) {
printf("(Packed)");
} else if(n->tag == NODE_GLOBAL) {
struct node_global* global = (struct node_global*) n;
printf("(Global: %p)", global->function);
} else if(n->tag == NODE_IND) {
print_node(((struct node_ind*) n)->next);
} else if(n->tag == NODE_NUM) {
struct node_num* num = (struct node_num*) n;
printf("%d", num->value);
}
}
int main(int argc, char** argv) {
struct gmachine gmachine;
struct node_global* first_node = alloc_global(f_main, 0);
struct node_base* result;
gmachine_init(&gmachine);
gmachine_track(&gmachine, (struct node_base*) first_node);
stack_push(&gmachine.stack, (struct node_base*) first_node);
unwind(&gmachine);
result = stack_pop(&gmachine.stack);
printf("Result: ");
print_node(result);
putchar('\n');
gmachine_free(&gmachine);
}

View File

@@ -0,0 +1,84 @@
#pragma once
#include <stdlib.h>
struct gmachine;
enum node_tag {
NODE_APP,
NODE_NUM,
NODE_GLOBAL,
NODE_IND,
NODE_DATA
};
struct node_base {
enum node_tag tag;
int8_t gc_reachable;
struct node_base* gc_next;
};
struct node_app {
struct node_base base;
struct node_base* left;
struct node_base* right;
};
struct node_num {
struct node_base base;
int32_t value;
};
struct node_global {
struct node_base base;
int32_t arity;
void (*function)(struct gmachine*);
};
struct node_ind {
struct node_base base;
struct node_base* next;
};
struct node_data {
struct node_base base;
int8_t tag;
struct node_base** array;
};
struct node_base* alloc_node();
struct node_app* alloc_app(struct node_base* l, struct node_base* r);
struct node_num* alloc_num(int32_t n);
struct node_global* alloc_global(void (*f)(struct gmachine*), int32_t a);
struct node_ind* alloc_ind(struct node_base* n);
void free_node_direct(struct node_base*);
void gc_visit_node(struct node_base*);
struct stack {
size_t size;
size_t count;
struct node_base** data;
};
void stack_init(struct stack* s);
void stack_free(struct stack* s);
void stack_push(struct stack* s, struct node_base* n);
struct node_base* stack_pop(struct stack* s);
struct node_base* stack_peek(struct stack* s, size_t o);
void stack_popn(struct stack* s, size_t n);
struct gmachine {
struct stack stack;
struct node_base* gc_nodes;
int64_t gc_node_count;
int64_t gc_node_threshold;
};
void gmachine_init(struct gmachine* g);
void gmachine_free(struct gmachine* g);
void gmachine_slide(struct gmachine* g, size_t n);
void gmachine_update(struct gmachine* g, size_t o);
void gmachine_alloc(struct gmachine* g, size_t o);
void gmachine_pack(struct gmachine* g, size_t n, int8_t t);
void gmachine_split(struct gmachine* g, size_t n);
struct node_base* gmachine_track(struct gmachine* g, struct node_base* b);
void gmachine_gc(struct gmachine* g);

View File

@@ -0,0 +1,35 @@
%option noyywrap
%{
#include <iostream>
#include "ast.hpp"
#include "definition.hpp"
#include "parser.hpp"
#define YY_DECL yy::parser::symbol_type yylex()
%}
%%
[ \n]+ {}
\+ { return yy::parser::make_PLUS(); }
\* { return yy::parser::make_TIMES(); }
- { return yy::parser::make_MINUS(); }
\/ { return yy::parser::make_DIVIDE(); }
[0-9]+ { return yy::parser::make_INT(atoi(yytext)); }
defn { return yy::parser::make_DEFN(); }
data { return yy::parser::make_DATA(); }
case { return yy::parser::make_CASE(); }
of { return yy::parser::make_OF(); }
\{ { return yy::parser::make_OCURLY(); }
\} { return yy::parser::make_CCURLY(); }
\( { return yy::parser::make_OPAREN(); }
\) { return yy::parser::make_CPAREN(); }
, { return yy::parser::make_COMMA(); }
-> { return yy::parser::make_ARROW(); }
= { return yy::parser::make_EQUAL(); }
[a-z][a-zA-Z]* { return yy::parser::make_LID(std::string(yytext)); }
[A-Z][a-zA-Z]* { return yy::parser::make_UID(std::string(yytext)); }
%%

99
code/compiler/09/type.cpp Normal file
View File

@@ -0,0 +1,99 @@
#include "type.hpp"
#include <sstream>
#include <algorithm>
#include "error.hpp"
void type_var::print(const type_mgr& mgr, std::ostream& to) const {
auto it = mgr.types.find(name);
if(it != mgr.types.end()) {
it->second->print(mgr, to);
} else {
to << name;
}
}
void type_base::print(const type_mgr& mgr, std::ostream& to) const {
to << name;
}
void type_arr::print(const type_mgr& mgr, std::ostream& to) const {
left->print(mgr, to);
to << " -> (";
right->print(mgr, to);
to << ")";
}
std::string type_mgr::new_type_name() {
int temp = last_id++;
std::string str = "";
while(temp != -1) {
str += (char) ('a' + (temp % 26));
temp = temp / 26 - 1;
}
std::reverse(str.begin(), str.end());
return str;
}
type_ptr type_mgr::new_type() {
return type_ptr(new type_var(new_type_name()));
}
type_ptr type_mgr::new_arrow_type() {
return type_ptr(new type_arr(new_type(), new_type()));
}
type_ptr type_mgr::resolve(type_ptr t, type_var*& var) const {
type_var* cast;
var = nullptr;
while((cast = dynamic_cast<type_var*>(t.get()))) {
auto it = types.find(cast->name);
if(it == types.end()) {
var = cast;
break;
}
t = it->second;
}
return t;
}
void type_mgr::unify(type_ptr l, type_ptr r) {
type_var* lvar;
type_var* rvar;
type_arr* larr;
type_arr* rarr;
type_base* lid;
type_base* rid;
l = resolve(l, lvar);
r = resolve(r, rvar);
if(lvar) {
bind(lvar->name, r);
return;
} else if(rvar) {
bind(rvar->name, l);
return;
} else if((larr = dynamic_cast<type_arr*>(l.get())) &&
(rarr = dynamic_cast<type_arr*>(r.get()))) {
unify(larr->left, rarr->left);
unify(larr->right, rarr->right);
return;
} else if((lid = dynamic_cast<type_base*>(l.get())) &&
(rid = dynamic_cast<type_base*>(r.get()))) {
if(lid->name == rid->name) return;
}
throw unification_error(l, r);
}
void type_mgr::bind(const std::string& s, type_ptr t) {
type_var* other = dynamic_cast<type_var*>(t.get());
if(other && other->name == s) return;
types[s] = t;
}

65
code/compiler/09/type.hpp Normal file
View File

@@ -0,0 +1,65 @@
#pragma once
#include <memory>
#include <map>
struct type_mgr;
struct type {
virtual ~type() = default;
virtual void print(const type_mgr& mgr, std::ostream& to) const = 0;
};
using type_ptr = std::shared_ptr<type>;
struct type_var : public type {
std::string name;
type_var(std::string n)
: name(std::move(n)) {}
void print(const type_mgr& mgr, std::ostream& to) const;
};
struct type_base : public type {
std::string name;
type_base(std::string n)
: name(std::move(n)) {}
void print(const type_mgr& mgr, std::ostream& to) const;
};
struct type_data : public type_base {
struct constructor {
int tag;
};
std::map<std::string, constructor> constructors;
type_data(std::string n)
: type_base(std::move(n)) {}
};
struct type_arr : public type {
type_ptr left;
type_ptr right;
type_arr(type_ptr l, type_ptr r)
: left(std::move(l)), right(std::move(r)) {}
void print(const type_mgr& mgr, std::ostream& to) const;
};
struct type_mgr {
int last_id = 0;
std::map<std::string, type_ptr> types;
std::string new_type_name();
type_ptr new_type();
type_ptr new_arrow_type();
void unify(type_ptr l, type_ptr r);
type_ptr resolve(type_ptr t, type_var*& var) const;
void bind(const std::string& s, type_ptr t);
};

View File

@@ -0,0 +1,16 @@
#include "type_env.hpp"
type_ptr type_env::lookup(const std::string& name) const {
auto it = names.find(name);
if(it != names.end()) return it->second;
if(parent) return parent->lookup(name);
return nullptr;
}
void type_env::bind(const std::string& name, type_ptr t) {
names[name] = t;
}
type_env type_env::scope() const {
return type_env(this);
}

View File

@@ -0,0 +1,16 @@
#pragma once
#include <map>
#include "type.hpp"
struct type_env {
std::map<std::string, type_ptr> names;
type_env const* parent = nullptr;
type_env(type_env const* p)
: parent(p) {}
type_env() : type_env(nullptr) {}
type_ptr lookup(const std::string& name) const;
void bind(const std::string& name, type_ptr t);
type_env scope() const;
};

4
code/compiler/test.sh Executable file
View File

@@ -0,0 +1,4 @@
cd 09
mkdir -p build && cd build
cmake ..
make -j8

View File

@@ -0,0 +1,119 @@
CS 325-001, Analysis of Algorithms, Fall 2019
HW1 - Python 3, qsort, BST, and qselect
Due electronically on flip on Monday 9/30 at 11:59pm.
No late submission will be accepted.
Need to submit on flip: report.txt, qsort.py, and qselect.py.
qselect.py will be automatically graded for correctness (1%).
flip $ /nfs/farm/classes/eecs/fall2019/cs325-001/submit hw1 qselect.py qsort.py report.txt
Note:
1. You can ssh to flip machines from your own machine by:
$ ssh access.engr.oregonstate.edu
2. You can add /nfs/farm/classes/eecs/fall2019/cs325-001/ to your $PATH:
$ export PATH=$PATH:/nfs/farm/classes/eecs/fall2019/cs325-001/
and add the above command to your ~/.bash_profile,
so that you don't need to type it every time.
(alternatively, you can use symbolic links or aliases to avoid typing the long path)
3. You can choose to submit each file separately, or submit them together.
Textbooks for References:
[1] CLRS Ch. 9.2 and Ch. 12
0. Q: What's the best-case, worst-case, and average-case time complexities of quicksort.
Briefly explain each case.
1. [WILL BE GRADED]
Quickselect with Randomized Pivot (CLRS Ch. 9.2).
>>> from qselect import *
>>> qselect(2, [3, 10, 4, 7, 19])
4
>>> qselect(4, [11, 2, 8, 3])
11
Q: What's the best-case, worst-case, and average-case time complexities? Briefly explain.
Filename: qselect.py
2. Buggy Qsort Revisited
In the slides we showed a buggy version of qsort which is weird in an interesting way:
it actually returns a binary search tree for the given array, rooted at the pivot:
>>> from qsort import *
>>> tree = sort([4,2,6,3,5,7,1,9])
>>> tree
[[[[], 1, []], 2, [[], 3, []]], 4, [[[], 5, []], 6, [[], 7, [[], 9, []]]]]
which encodes a binary search tree:
4
/ \
2 6
/ \ / \
1 3 5 7
\
9
Now on top of that piece of code, add three functions:
* sorted(t): returns the sorted order (infix traversal)
* search(t, x): returns whether x is in t
* insert(t, x): inserts x into t (in-place) if it is missing, otherwise does nothing.
>>> sorted(tree)
[1, 2, 3, 4, 5, 6, 7, 9]
>>> search(tree, 6)
True
>>> search(tree, 6.5)
False
>>> insert(tree, 6.5)
>>> tree
[[[[], 1, []], 2, [[], 3, []]], 4, [[[], 5, []], 6, [[[], 6.5, []], 7, [[], 9, []]]]]
>>> insert(tree, 3)
>>> tree
[[[[], 1, []], 2, [[], 3, []]], 4, [[[], 5, []], 6, [[[], 6.5, []], 7, [[], 9, []]]]]
Hint: both search and insert should depend on a helper function _search(tree, x) which
returns the subtree (a list) rooted at x when x is found, or the [] where x should
be inserted.
e.g.,
>>> tree = sort([4,2,6,3,5,7,1,9]) # starting from the initial tree
>>> _search(tree, 3)
[[], 3, []]
>>> _search(tree, 0)
[]
>>> _search(tree, 6.5)
[]
>>> _search(tree, 0) is _search(tree, 6.5)
False
>>> _search(tree, 0) == _search(tree, 6.5)
True
Note the last two []'s are different nodes (with different memory addresses):
the first one is the left child of 1, while the second one is the left child of 7
(so that insert is very easy).
Filename: qsort.py
Q: What are the time complexities for the operations implemented?
Debriefing (required!): --------------------------
1. Approximately how many hours did you spend on this assignment?
2. Would you rate it as easy, moderate, or difficult?
3. Did you work on it mostly alone, or mostly with other people?
4. How deeply do you feel you understand the material it covers (0%100%)?
5. Any other comments?
This section is intended to help us calibrate the homework assignments.
Your answers to this section will *not* affect your grade; however, skipping it
will certainly do.

View File

@@ -0,0 +1,170 @@
CS 325, Algorithms (MS/MEng-level), Fall 2019
HW10 - Challenge Problem - RNA Structure Prediction (6%)
This problem combines dynamic programming and priority queues.
Due Wednesday 12/4, 11:59pm.
No late submission will be accepted.
Include in your submission: report.txt, rna.py.
Grading:
* report.txt -- 1%
* 1-best structure -- 2%
* number of structures -- 1%
* k-best structures -- 2%
Textbooks for References:
[1] KT Ch. 6.5 (DP over intervals -- RNA structure)
[2] KT slides: DP I (RNA section)
http://www.cs.princeton.edu/~wayne/kleinberg-tardos/
***Please analyze time/space complexities for each problem in report.txt.
1. Given an RNA sequence, such as ACAGU, we can predict its secondary structure
by tagging each nucleotide as (, ., or ). Each matching pair of () must be
AU, GC, or GU (or their mirror symmetries: UA, CG, UG).
We also assume pairs can _not_ cross each other.
The following are valid structures for ACAGU:
ACAGU
.....
...()
..(.)
.(.).
(...)
((.))
We want to find the structure with the maximum number of matching pairs.
In the above example, the last structure is optimal (2 pairs).
>>> best("ACAGU")
(2, '((.))')
Tie-breaking: arbitrary. Don't worry as long as your structure
is one of the correct best structures.
some other cases (more cases at the bottom):
GCACG
(2, '().()')
UUCAGGA
(3, '(((.)))')
GUUAGAGUCU
(4, '(.()((.)))')
AUAACCUUAUAGGGCUCUG
(8, '.(((..)()()((()))))')
AACCGCUGUGUCAAGCCCAUCCUGCCUUGUU
(11, '(((.(..(.((.)((...().))()))))))')
GAUGCCGUGUAGUCCAAAGACUUCACCGUUGG
(14, '.()()(()(()())(((.((.)(.))()))))')
CAUCGGGGUCUGAGAUGGCCAUGAAGGGCACGUACUGUUU
(18, '(()())(((((.)))()(((())(.(.().()()))))))')
ACGGCCAGUAAAGGUCAUAUACGCGGAAUGACAGGUCUAUCUAC
(19, '.()(((.)(..))(((.()()(())))(((.)((())))))())')
AGGCAUCAAACCCUGCAUGGGAGCACCGCCACUGGCGAUUUUGGUA
(20, '.(()())...((((()()))((()(.()(((.)))()())))))()')
2. Total number of all possible structures
>>> total("ACAGU")
6
3. k-best structures: output the 1-best, 2nd-best, ... kth-best structures.
>>> kbest("ACAGU", 3)
[(2, '((.))'), (1, '(...)'), (1, '.(.).')]
The list must be sorted.
Tie-breaking: arbitrary.
In case the input k is bigger than the number of possible structures, output all.
Sanity check: kbest(s, 1)[0][0] == best(s)[0] for each RNA sequence s.
All three functions should be in one file: rna.py.
See more testcases at the end.
Debriefing (required!): --------------------------
0. What's your name?
1. Approximately how many hours did you spend on this assignment?
2. Would you rate it as easy, moderate, or difficult?
3. Did you work on it mostly alone, or mostly with other people?
4. How deeply do you feel you understand the material it covers (0%-100%)?
5. Any other comments?
This section is intended to help us calibrate the homework assignments.
Your answers to this section will *not* affect your grade; however, skipping it
will certainly do.
TESTCASES:
for each sequence s, we list three lines:
best(s)
total(s)
kbest(s, 10)
ACAGU
(2, '((.))')
6
[(2, '((.))'), (1, '.(.).'), (1, '..(.)'), (1, '...()'), (1, '(...)'), (0, '.....')]
------
AC
(0, '..')
1
[(0, '..')]
------
GUAC
(2, '(())')
5
[(2, '(())'), (1, '()..'), (1, '.().'), (1, '(..)'), (0, '....')]
------
GCACG
(2, '().()')
6
[(2, '().()'), (1, '(..).'), (1, '()...'), (1, '.(..)'), (1, '...()'), (0, '.....')]
------
CCGG
(2, '(())')
6
[(2, '(())'), (1, '(.).'), (1, '.().'), (1, '.(.)'), (1, '(..)'), (0, '....')]
------
CCCGGG
(3, '((()))')
20
[(3, '((()))'), (2, '((.)).'), (2, '(.()).'), (2, '.(()).'), (2, '.(().)'), (2, '.((.))'), (2, '((.).)'), (2, '(.(.))'), (2, '(.().)'), (2, '((..))')]
------
UUCAGGA
(3, '(((.)))')
24
[(3, '(((.)))'), (2, '((.).).'), (2, '((..)).'), (2, '(.(.)).'), (2, '((.))..'), (2, '.((.)).'), (2, '.((.).)'), (2, '.((..))'), (2, '((..).)'), (2, '((.)..)')]
------
AUAACCUA
(2, '.((...))')
19
[(2, '((.)..).'), (2, '(()...).'), (2, '()(...).'), (2, '().(..).'), (2, '()....()'), (2, '.()(..).'), (2, '.()...()'), (2, '.(.)..()'), (2, '.((...))'), (2, '.(.(..))')]
------
UUGGACUUG
(4, '(()((.)))')
129
[(4, '(())(.)()'), (4, '(()((.)))'), (3, '(().)..()'), (3, '(().).(.)'), (3, '(().)(..)'), (3, '((.))..()'), (3, '((.)).(.)'), (3, '((.))(..)'), (3, '(())(..).'), (3, '(())(.)..')]
------
UUUGGCACUA
(4, '(.()()(.))')
179
[(4, '((()).).()'), (4, '((.)()).()'), (4, '(.()()).()'), (4, '.(()()).()'), (4, '.(()()(.))'), (4, '((()).(.))'), (4, '((.)()(.))'), (4, '((()())..)'), (4, '(.()()(.))'), (3, '((()).)...')]
------
GAUGCCGUGUAGUCCAAAGACUUC
(11, '(((()()((()(.))))((.))))')
2977987
[(11, '(()())(((()().))(((.))))'), (11, '(()())(((()()).)(((.))))'), (11, '(()())(((()(.)))(((.))))'), (11, '(()()()((()(.)))(((.))))'), (11, '(((()()((()().)))((.))))'), (11, '(((()()((()(.))))((.))))'), (11, '(()()()((()()).)(((.))))'), (11, '(()()()((()().))(((.))))'), (11, '(((()()((()()).))((.))))'), (10, '(()()()((()().).)((.))).')]
------
AGGCAUCAAACCCUGCAUGGGAGCG
(10, '.(()())...((((()()))).())')
560580
[(10, '.(()())...((((())())).)()'), (10, '.(()())...((((()()))).)()'), (10, '.(()())...(((()(()))).)()'), (10, '.(()())...(((()(()))).())'), (10, '.(()())...((((())())).())'), (10, '.(()())...((((()()))).())'), (9, '((.).)(...(.((()()))).)()'), (9, '((.).)(...(((.)(()))).)()'), (9, '((.).)(...(.(()(()))).)()'), (9, '((.).)(...((.(()()))).)()')]
------

View File

@@ -0,0 +1,42 @@
HW11 -- OPTIONAL (for your practice only -- solutions will be released on Tuesday)
Edit Distance (see updated final review solutions)
flip $ /nfs/farm/classes/eecs/fall2019/cs325-001/submit hw11 edit.py
Implement two functions:
* distance1(s, t): Viterbi-style (either top-down or bottom-up)
* distance2(s, t): Dijkstra-style (best-first)
For Dijkstra, you can use either heapdict or heapq (see review problem 7).
Given that this graph is extremely sparse (why?), heapq (ElogE) might be faster than heapdict (ElogV)
because the latter has overhead for hash.
They should return the same result (just return the edit distance).
We have 10 testcases (listed below); the first 5 test distance1(),
and the second 5 test distance2() on the same 5 string pairs.
My solutions (on flip2):
Testing Case 1 (open)... 0.001 s, Correct
Testing Case 2 (open)... 0.000 s, Correct
Testing Case 3 (open)... 0.012 s, Correct
Testing Case 4 (open)... 0.155 s, Correct
Testing Case 5 (open)... 0.112 s, Correct
Testing Case 6 (hidden)... 0.000 s, Correct
Testing Case 7 (hidden)... 0.000 s, Correct
Testing Case 8 (hidden)... 0.004 s, Correct
Testing Case 9 (hidden)... 0.009 s, Correct
Testing Case 10 (hidden)... 0.021 s, Correct
Total Time: 0.316 s
distance1("abcdefh", "abbcdfg") == 3
distance1("pretty", "prettier") == 3
distance1("aaaaaaadaaaaaaaaaaaaaaaaacaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaabaaaaaaaaaaaaaaaaaaaaaaaaaaaaaxaaaaaaaaaaaaaaaaaaaaaa") == 5
distance1('cpuyedzrwcbritzclzhwwabmlyresvewkdxwkamyzbxtwiqzvokqpkecyywrbvhlqgxzutdjfmvlhsezfbhfjbllmfhzlqlcwibubyyjupbwhztskyksfthkptxqlmhivfjbgclwsombvytdztapwpzmdqfwwrhqsgztobeuiatcwmrzfbwhfnpzzasomrhotoqiwvexlgxsnafiagfewmopdzwanxswfsmbxsmsczbwsgnwy', 'cpuyedzrwcbritzclzhwwabmlyresvewkdxwkamyzbtwiqzvokqpkecyywrbvhlqgxzutdjfmvlhsezfbhfjbllmfhzlqlcwibubyyjupbwhztskyksfthkptxqlmhivfbgclwsombvytdztapwpzmdqfwwrhqsgztobeuiatcwmrzfbwhfnpzzasonrhotoqiwvexlgxsnafiagfewmopdzwanxswfsmbxsmsczbwsgnwy') == 3
distance1('cpuyedzrwcbritzclzhwwabmlyresvewkdxwkamyzbtwiqzvokqpasdfkecyywrbvhlqgxzutdjfmvlhsezfbhbllmfhzlqlcwibubyyjupbwhztsxyksfthkptxqlmhivfjbgclhombvytdztapwpzmdqfwwrhqsgztobeuiatcwmrzfbwhfnpzzasomrttoqiwvexlgxsnafiagfewmopdzwanxswfsmbxsmsczbwsgnwydmbihjkvziitusmkjljrsbafytsinql', 'cpuyedzrwcbritzclzhwwabmlyresvewkdxwkamyzbtwiqzvokqpkecyywrbvhlqgxzutdjfmvlhsezfbhfjbllmfhzlqlcwibubyyjupbwhztskyksfthkptxqlmhivfjbgclwsombvytdztapwpzmdqfwwrhqsgztobeuiatcwmrzfbwhfnpzzasomrhotoqiwvexlgxsnafiagfewmopdzwanxswfsmbxsmsczbwsgnwydmbihjkvziitusmkjljrsbafytsinql') == 11
distance2("abcdefh", "abbcdfg") == 3
distance2("pretty", "prettier") == 3
distance2("aaaaaaadaaaaaaaaaaaaaaaaacaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaabaaaaaaaaaaaaaaaaaaaaaaaaaaaaaxaaaaaaaaaaaaaaaaaaaaaa") == 5
distance2('cpuyedzrwcbritzclzhwwabmlyresvewkdxwkamyzbxtwiqzvokqpkecyywrbvhlqgxzutdjfmvlhsezfbhfjbllmfhzlqlcwibubyyjupbwhztskyksfthkptxqlmhivfjbgclwsombvytdztapwpzmdqfwwrhqsgztobeuiatcwmrzfbwhfnpzzasomrhotoqiwvexlgxsnafiagfewmopdzwanxswfsmbxsmsczbwsgnwy', 'cpuyedzrwcbritzclzhwwabmlyresvewkdxwkamyzbtwiqzvokqpkecyywrbvhlqgxzutdjfmvlhsezfbhfjbllmfhzlqlcwibubyyjupbwhztskyksfthkptxqlmhivfbgclwsombvytdztapwpzmdqfwwrhqsgztobeuiatcwmrzfbwhfnpzzasonrhotoqiwvexlgxsnafiagfewmopdzwanxswfsmbxsmsczbwsgnwy') == 3
distance2('cpuyedzrwcbritzclzhwwabmlyresvewkdxwkamyzbtwiqzvokqpasdfkecyywrbvhlqgxzutdjfmvlhsezfbhbllmfhzlqlcwibubyyjupbwhztsxyksfthkptxqlmhivfjbgclhombvytdztapwpzmdqfwwrhqsgztobeuiatcwmrzfbwhfnpzzasomrttoqiwvexlgxsnafiagfewmopdzwanxswfsmbxsmsczbwsgnwydmbihjkvziitusmkjljrsbafytsinql', 'cpuyedzrwcbritzclzhwwabmlyresvewkdxwkamyzbtwiqzvokqpkecyywrbvhlqgxzutdjfmvlhsezfbhfjbllmfhzlqlcwibubyyjupbwhztskyksfthkptxqlmhivfjbgclwsombvytdztapwpzmdqfwwrhqsgztobeuiatcwmrzfbwhfnpzzasomrhotoqiwvexlgxsnafiagfewmopdzwanxswfsmbxsmsczbwsgnwydmbihjkvziitusmkjljrsbafytsinql') == 11

View File

@@ -0,0 +1,80 @@
CS 325-001, Analysis of Algorithms, Fall 2019
HW2 - Divide-n-conquer: mergesort, number of inversions, longest path
Due Monday Oct 7, 11:59pm (same submission instructions as HW1).
No late submission will be accepted.
Need to submit: report.txt, msort.py, inversions.py, and longest.py.
longest.py will be graded for correctness (1%).
To submit:
flip $ /nfs/farm/classes/eecs/fall2019/cs325-001/submit hw2 report.txt {msort,inversions,longest}.py
(You can submit each file separately, or submit them together.)
To see your best results so far:
flip $ /nfs/farm/classes/eecs/fall2019/cs325-001/query hw2
Textbooks for References:
[1] CLRS Ch. 2
0. Which of the following sorting algorithms are (or can be made) stable?
(a) mergesort
(b) quicksort with the first element as pivot
(c) quicksort with randomized pivot
(d) selection sort
(e) insertion sort
(f) heap sort --- not covered yet (see CLRS Ch. 6)
1. Implement mergesort.
>>> mergesort([4, 2, 5, 1, 6, 3])
[1, 2, 3, 4, 5, 6]
Filename: msort.py
2. Calculate the number of inversions in a list.
>>> num_inversions([4, 1, 3, 2])
4
>>> num_inversions([2, 4, 1, 3])
3
Filename: inversions.py
Must run in O(nlogn) time.
3. [WILL BE GRADED]
Length of the longest path in a binary tree (number of edges).
We will use the "buggy qsort" representation of binary trees from HW1:
[left_subtree, root, right_subtree]
>>> longest([[], 1, []])
0
>>> longest([[[], 1, []], 2, [[], 3, []]])
2
>>> longest([[[[], 1, []], 2, [[], 3, []]], 4, [[[], 5, []], 6, [[], 7, [[], 9, []]]]])
5
Note the answer is 5 because the longest path is 1-2-4-6-7-9.
Filename: longest.py
Must run in O(n) time.
Debriefing (required!): --------------------------
1. Approximately how many hours did you spend on this assignment?
2. Would you rate it as easy, moderate, or difficult?
3. Did you work on it mostly alone, or mostly with other people?
Note you are encouraged to discuss with your classmates,
but each students should submit his/her own code.
4. How deeply do you feel you understand the material it covers (0%100%)?
5. Any other comments?
This section is intended to help us calibrate the homework assignments.
Your answers to this section will *not* affect your grade; however, skipping it
will certainly do.

View File

@@ -0,0 +1,83 @@
CS 325, Algorithms, Fall 2019
HW3 - K closest numbers; Two Pointers
Due Monday Oct 14, 11:59pm. (same submission instructions as HW1-2).
No late submission will be accepted.
Need to submit: report.txt, closest_unsorted.py, closest_sorted.py, xyz.py.
closest_sorted.py will be graded for correctness (1%).
To submit:
flip $ /nfs/farm/classes/eecs/fall2019/cs325-001/submit hw3 report.txt {closest*,xyz}.py
(You can submit each file separately, or submit them together.)
To see your best results so far:
flip $ /nfs/farm/classes/eecs/fall2019/cs325-001/query hw3
1. Given an array A of n numbers, a query x, and a number k,
find the k numbers in A that are closest (in value) to x.
For example:
find([4,1,3,2,7,4], 5.2, 2) returns [4,4]
find([4,1,3,2,7,4], 6.5, 3) returns [4,7,4]
find([5,3,4,1,6,3], 3.5, 2) returns [3,4]
Filename: closest_unsorted.py
Must run in O(n) time.
The elements in the returned list must be in the original order.
In case two numbers are equally close to x, choose the earlier one.
2. [WILL BE GRADED]
Now what if the input array is sorted? Can you do it faster?
find([1,2,3,4,4,7], 5.2, 2) returns [4,4]
find([1,2,3,4,4,7], 6.5, 3) returns [4,4,7]
Filename: closest_sorted.py
Must run in O(logn + k) time.
The elements in the returned list must be in the original order.
Note: in case two numbers are equally close to x, choose the smaller one:
find([1,2,3,4,4,6,6], 5, 3) returns [4,4,6]
find([1,2,3,4,4,5,6], 4, 5) returns [2,3,4,4,5]
Hint: you can use Python's bisect.bisect for binary search.
3. For a given array A of n *distinct* numbers, find all triples (x,y,z)
s.t. x + y = z. (x, y, z are distinct numbers)
e.g.,
find([1, 4, 2, 3, 5]) returns [(1,3,4), (1,2,3), (1,4,5), (2,3,5)]
Note that:
1) no duplicates in the input array
2) you can choose any arbitrary order for triples in the returned list.
Filename: xyz.py
Must run in O(n^2) time.
Hint: you can use any built-in sort in Python.
Debriefing (required!): --------------------------
0. What's your name?
1. Approximately how many hours did you spend on this assignment?
2. Would you rate it as easy, moderate, or difficult?
3. Did you work on it mostly alone, or mostly with other people?
Note you are encouraged to discuss with your classmates,
but each students should submit his/her own code.
4. How deeply do you feel you understand the material it covers (0%-100%)?
5. Which part(s) of the course you like the most so far?
6. Which part(s) of the course you dislike the most so far?
This section is intended to help us calibrate the homework assignments.
Your answers to this section will *not* affect your grade; however, skipping it
will certainly do.

View File

@@ -0,0 +1,114 @@
CS 325-001, Algorithms, Fall 2019
HW4 - Priority Queue and Heaps
Due via the submit program on Monday Oct 21, 11:59pm.
No late submission will be accepted.
Need to submit: report.txt, nbest.py, kmergesort.py, datastream.py.
datastream.py will be graded for correctness (1%).
To submit:
flip $ /nfs/farm/classes/eecs/fall2019/cs325-001/submit hw4 report.txt {nbest,kmergesort,datastream}.py
(You can submit each file separately, or submit them together.)
To see your best results so far:
flip $ /nfs/farm/classes/eecs/fall2019/cs325-001/query hw4
Textbooks for References:
[1] CLRS Ch. 6
[2] KT slides for binary heaps (only read the first 20 pages!):
https://www.cs.princeton.edu/~wayne/kleinberg-tardos/pdf/BinomialHeaps.pdf
[3] Python heapq module
0. There are two methods for building a heap from an unsorted array:
(1) insert each element into the heap --- O(nlogn) -- heapq.heappush()
(2) heapify (top-down) --- O(n) -- heapq.heapify()
(a) Derive these time complexities.
(b) Use a long list of random numbers to show the difference in time. (Hint: random.shuffle or random.sample)
(c) What about sorted or reversely-sorted numbers?
1. Given two lists A and B, each with n integers, return
a sorted list C that contains the smallest n elements from AxB:
AxB = { (x, y) | x in A, y in B }
i.e., AxB is the Cartesian Product of A and B.
ordering: (x,y) < (x',y') iff. x+y < x'+y' or (x+y==x'+y' and y<y')
You need to implement three algorithms and compare:
(a) enumerate all n^2 pairs, sort, and take top n.
(b) enumerate all n^2 pairs, but use qselect from hw1.
(c) Dijkstra-style best-first, only enumerate O(n) (at most 2n) pairs.
Hint: you can use Python's heapq module for priority queue.
Q: What are the time complexities of these algorithms?
>>> a, b = [4, 1, 5, 3], [2, 6, 3, 4]
>>> nbesta(a, b) # algorithm (a), slowest
[(1, 2), (1, 3), (3, 2), (1, 4)]
>>> nbestb(a, b) # algorithm (b), slow
[(1, 2), (1, 3), (3, 2), (1, 4)]
>>> nbestc(a, b) # algorithm (c), fast
[(1, 2), (1, 3), (3, 2), (1, 4)]
Filename: nbest.py
2. k-way mergesort (the classical mergesort is a special case where k=2).
>>> kmergesort([4,1,5,2,6,3,7,0], 3) # k=3
[0,1,2,3,4,5,6,7]
Q: What is the complexity? Write down the detailed analysis in report.txt.
Filename: kmergesort.py
3. [WILL BE GRADED]
Find the k smallest numbers in a data stream of length n (k<<n),
using only O(k) space (the stream itself might be too big to fit in memory).
>>> ksmallest(4, [10, 2, 9, 3, 7, 8, 11, 5, 7])
[2, 3, 5, 7]
>>> ksmallest(3, range(1000000, 0, -1))
[1, 2, 3]
Note:
a) it should work with both lists and lazy lists
b) the output list should be sorted
Q: What is your complexity? Write down the detailed analysis in report.txt.
Filename: datastream.py
[UPDATE] The built-in function heapq.nsmallest() is _not_ allowed for this problem.
The whole point is to implement it yourself. :)
4. (optional) Summarize the time complexities of the basic operations (push, pop-min, peak, heapify) for these implementations of priority queue:
(a) unsorted array
(b) sorted array (highest priority first)
(c) reversly sorted array (lowest priority first)
(d) linked list
(e) binary heap
Debriefing (required!): --------------------------
0. What's your name?
1. Approximately how many hours did you spend on this assignment?
2. Would you rate it as easy, moderate, or difficult?
3. Did you work on it mostly alone, or mostly with other people?
Note you are encouraged to discuss with your classmates,
but each students should submit his/her own code.
4. How deeply do you feel you understand the material it covers (0%-100%)?
5. Which part(s) of the course you like the most so far?
6. Which part(s) of the course you dislike the most so far?
This section is intended to help us calibrate the homework assignments.
Your answers to this section will *not* affect your grade; however, skipping it
will certainly do.

View File

@@ -0,0 +1,130 @@
CS 532-001, Algorithms, Fall 2019
HW5 - DP (part 1: simple)
HWs 5-7 are all on DPs.
Due Monday Oct 28, 11:59pm.
No late submission will be accepted.
Need to submit report.txt, mis.py, bsts.py, bitstrings.py.
mis.py will be graded for correctness (1%).
To submit:
flip $ /nfs/farm/classes/eecs/fall2019/cs325-001/submit hw5 report.txt {mis,bsts,bitstrings}.py
(You can submit each file separately, or submit them together.)
To see your best results so far:
flip $ /nfs/farm/classes/eecs/fall2019/cs325-001/query hw5
Textbooks for References:
[1] CLRS Ch. 15
[2] KT Ch. 6
or Ch. 5 in a previous version:
http://cs.furman.edu/~chealy/cs361/kleinbergbook.pdf
Hint: Among the three coding questions, p3 is the easiest, and p1 is similar to p3.
You'll realize that both are very similar to p0 (Fibonacci).
p2 is slightly different from these, but still very easy.
0. (Optional) Is Fibonacci REALLY O(n)?
Hint: the value of f(n) itself grows exponentially.
1. [WILL BE GRADED]
Maximum Weighted Independent Set
[HINT] independent set is a set where no two numbers are neighbors in the original list.
see also https://en.wikipedia.org/wiki/Independent_set_(graph_theory)
input: a list of numbers (could be negative)
output: a pair of the max sum and the list of numbers chosen
>>> max_wis([7,8,5])
(12, [7,5])
>>> max_wis([-1,8,10])
(10, [10])
>>> max_wis([])
(0, [])
[HINT] if all numbers are negative, the optimal solution is 0,
since [] is an independent set according to the definition above.
>>> max_wis([-5, -1, -4])
(0, [])
Q: What's the complexity?
Include both top-down (max_wis()) and bottom-up (max_wis2()) solutions,
and make sure they produce exact same results.
We'll only grade the top-down version.
Tie-breaking: any best solution is considered correct.
Filename: mis.py
[HINT] you can also use the naive O(2^n) exhaustive search method to verify your answer.
2. Number of n-node BSTs
input: n
output: number of n-node BSTs
>>> bsts(2)
2
>>> bsts(3)
5
>>> bsts(5)
42
[HINT] There are two 2-node BSTs:
2 1
/ \
1 2
Note that all other 2-node BSTs are *isomorphic* to either one.
Qa: What's the complexity of this DP?
Qb: What's the name of this famous number series?
Feel free to use any implementation style.
Filename: bsts.py
3. Number of bit strings of length n that has
1) no two consecutive 0s.
2) two consecutive 0s.
>>> num_no(3)
5
>>> num_yes(3)
3
[HINT] There are three 3-bit 0/1-strings that have two consecutive 0s.
001 100 000
The other five 3-bit 0/1-strings have no two consecutive 0s:
010 011 101 110 111
Feel free to choose any implementation style.
Filename: bitstrings.py
[HINT] Like problem 1, you can also use the O(2^n) exhaustive search method to verify your answer.
Debriefing (required!): --------------------------
0. What's your name?
1. Approximately how many hours did you spend on this assignment?
2. Would you rate it as easy, moderate, or difficult?
3. Did you work on it mostly alone, or mostly with other people?
4. How deeply do you feel you understand the material it covers (0%-100%)?
5. Which part(s) of the course you like the most so far?
6. Which part(s) of the course you dislike the most so far?
This section is intended to help us calibrate the homework assignments.
Your answers to this section will *not* affect your grade; however, skipping it
will certainly do.

View File

@@ -0,0 +1,114 @@
CS 325-001, Algorithms, Fall 2019
HW6 - DP (part 2)
Due on Monday Nov 4, 11:59pm.
No late submission will be accepted.
Need to submit: report.txt, knapsack_unbounded.py, knapsack_bounded.py.
knapsack_bounded.py will be graded for correctness (1%).
To submit:
flip $ /nfs/farm/classes/eecs/fall2019/cs325-001/submit hw6 report.txt knapsack*.py
(You can submit each file separately, or submit them together.)
To see your best results so far:
flip $ /nfs/farm/classes/eecs/fall2019/cs325-001/query hw6
Textbooks for References:
[1] KT Ch. 6.4
or Ch. 5.3 in a previous version:
http://cs.furman.edu/~chealy/cs361/kleinbergbook.pdf
[2] KT slides for DP (pages 1-37):
https://www.cs.princeton.edu/~wayne/kleinberg-tardos/pdf/06DynamicProgrammingI.pdf
[3] Wikipedia: Knapsack (unbounded and 0/1)
[4] CLRS Ch. 15
Please answer time/space complexities for each problem in report.txt.
0. For each of the coding problems below:
(a) Describe a greedy solution.
(b) Show a counterexample to the greedy solution.
(c) Define the DP subproblem
(d) Write the recurrence relations
(e) Do not forget base cases
(f) Analyze the space and time complexities
1. Unbounded Knapsack
You have n items, each with weight w_i and value v_i, and each has infinite copies.
**All numbers are positive integers.**
What's the best value for a bag of W?
>>> best(3, [(2, 4), (3, 5)])
(5, [0, 1])
the input to the best() function is W and a list of pairs (w_i, v_i).
this output means to take 0 copies of item 1 and 1 copy of item 2.
tie-breaking: *reverse* lexicographical: i.e., [1, 0] is better than [0, 1]:
(i.e., take as many copies from the first item as possible, etc.)
>>> best(3, [(1, 5), (1, 5)])
(15, [3, 0])
>>> best(3, [(1, 2), (1, 5)])
(15, [0, 3])
>>> best(3, [(1, 2), (2, 5)])
(7, [1, 1])
>>> best(58, [(5, 9), (9, 18), (6, 12)])
(114, [2, 4, 2])
>>> best(92, [(8, 9), (9, 10), (10, 12), (5, 6)])
(109, [1, 1, 7, 1])
Q: What are the time and space complexities?
filename: knapsack_unbounded.py
2. [WILL BE GRADED]
Bounded Knapsack
You have n items, each with weight w_i and value v_i, and has c_i copies.
**All numbers are positive integers.**
What's the best value for a bag of W?
>>> best(3, [(2, 4, 2), (3, 5, 3)])
(5, [0, 1])
the input to the best() function is W and a list of triples (w_i, v_i, c_i).
tie-breaking: same as in p1:
>>> best(3, [(1, 5, 2), (1, 5, 3)])
(15, [2, 1])
>>> best(3, [(1, 5, 1), (1, 5, 3)])
(15, [1, 2])
>>> best(20, [(1, 10, 6), (3, 15, 4), (2, 10, 3)])
(130, [6, 4, 1])
>>> best(92, [(1, 6, 6), (6, 15, 7), (8, 9, 8), (2, 4, 7), (2, 20, 2)])
(236, [6, 7, 3, 7, 2])
Q: What are the time and space complexities?
filename: knapsack_bounded.py
You are encouraged to come up with a few other testcases yourself to test your code!
Debriefing (required!): --------------------------
0. What's your name?
1. Approximately how many hours did you spend on this assignment?
2. Would you rate it as easy, moderate, or difficult?
3. Did you work on it mostly alone, or mostly with other people?
4. How deeply do you feel you understand the material it covers (0%-100%)?
5. Which part(s) of the course you like the most so far?
6. Which part(s) of the course you dislike the most so far?
This section is intended to help us calibrate the homework assignments.
Your answers to this section will *not* affect your grade; however, skipping it
will certainly do.

View File

@@ -0,0 +1,147 @@
CS 325-001, Algorithms, Fall 2019
HW8 - Graphs (part I); DP (part III)
Due on Monday November 18, 11:59pm.
No late submission will be accepted.
Include in your submission: report.txt, topol.py, viterbi.py.
viterbi.py will be graded for correctness (1%).
To submit:
flip $ /nfs/farm/classes/eecs/fall2019/cs325-001/submit hw8 report.txt {topol,viterbi}.py
(You can submit each file separately, or submit them together.)
To see your best results so far:
flip $ /nfs/farm/classes/eecs/fall2019/cs325-001/query hw8
Textbooks for References:
[1] CLRS Ch. 23 (Elementary Graph Algorithms)
[2] KT Ch. 3 (graphs), or Ch. 2 in this earlier version:
http://cs.furman.edu/~chealy/cs361/kleinbergbook.pdf
[3] KT slides (highly recommend!):
https://www.cs.princeton.edu/~wayne/kleinberg-tardos/pdf/03Graphs.pdf
[4] Jeff Erickson: Ch. 5 (Basic Graph Algorithms):
http://jeffe.cs.illinois.edu/teaching/algorithms/book/05-graphs.pdf
[5] DPV Ch. 3, 4.2, 4.4, 4.7 (Dasgupta, Papadimitriou, Vazirani)
https://www.cs.berkeley.edu/~vazirani/algorithms/chap3.pdf (decomposition of graphs)
https://www.cs.berkeley.edu/~vazirani/algorithms/chap4.pdf (paths, shortest paths)
[6] my advanced DP tutorial (up to page 16):
http://web.engr.oregonstate.edu/~huanlian/slides/COLING-tutorial-anim.pdf
Please answer non-coding questions in report.txt.
0. For the following graphs, decide whether they are
(1) directed or undirected, (2) dense or sparse, and (3) cyclic or acyclic:
(a) Facebook
(b) Twitter
(c) a family
(d) V=airports, E=direct_flights
(e) a mesh
(f) V=courses, E=prerequisites
(g) a tree
(h) V=linux_software_packages, E=dependencies
(i) DP subproblems for 0-1 knapsack
Can you name a very big dense graph?
1. Topological Sort
For a given directed graph, output a topological order if it exists.
Tie-breaking: ARBITRARY tie-breaking. This will make the code
and time complexity analysis a lot easier.
e.g., for the following example:
0 --> 2 --> 3 --> 5 --> 6
/ \ | / \
/ \ v / \
1 > 4 > 7
>>> order(8, [(0,2), (1,2), (2,3), (2,4), (3,4), (3,5), (4,5), (5,6), (5,7)])
[0, 1, 2, 3, 4, 5, 6, 7]
Note that order() takes two arguments, n and list_of_edges,
where n specifies that the nodes are named 0..(n-1).
If we flip the (3,4) edge:
>>> order(8, [(0,2), (1,2), (2,3), (2,4), (4,3), (3,5), (4,5), (5,6), (5,7)])
[0, 1, 2, 4, 3, 5, 6, 7]
If there is a cycle, return None
>>> order(4, [(0,1), (1,2), (2,1), (2,3)])
None
Other cases:
>>> order(5, [(0,1), (1,2), (2,3), (3,4)])
[0, 1, 2, 3, 4]
>>> order(5, [])
[0, 1, 2, 3, 4] # could be any order
>>> order(3, [(1,2), (2,1)])
None
>>> order(1, [(0,0)]) # self-loop
None
Tie-breaking: arbitrary (any valid topological order is fine).
filename: topol.py
questions:
(a) did you realize that bottom-up implementations of DP use (implicit) topological orderings?
e.g., what is the topological ordering in your (or my) bottom-up bounded knapsack code?
(b) what about top-down implementations? what order do they use to traverse the graph?
(c) does that suggest there is a top-down solution for topological sort as well?
2. [WILL BE GRADED]
Viterbi Algorithm For Longest Path in DAG (see DPV 4.7, [2], CLRS problem 15-1)
Recall that the Viterbi algorithm has just two steps:
a) get a topological order (use problem 1 above)
b) follow that order, and do either forward or backward updates
This algorithm captures all DP problems on DAGs, for example,
longest path, shortest path, number of paths, etc.
In this problem, given a DAG (guaranteed acyclic!), output a pair (l, p)
where l is the length of the longest path (number of edges), and p is the path. (you can think of each edge being unit cost)
e.g., for the above example:
>>> longest(8, [(0,2), (1,2), (2,3), (2,4), (3,4), (3,5), (4,5), (5,6), (5,7)])
(5, [0, 2, 3, 4, 5, 6])
>>> longest(8, [(0,2), (1,2), (2,3), (2,4), (4,3), (3,5), (4,5), (5,6), (5,7)])
(5, [0, 2, 4, 3, 5, 6])
>>> longest(8, [(0,1), (0,2), (1,2), (2,3), (2,4), (4,3), (3,5), (4,5), (5,6), (5,7), (6,7)])
(7, [0, 1, 2, 4, 3, 5, 6, 7]) # unique answer
Note that longest() takes two arguments, n and list_of_edges,
where n specifies that the nodes are named 0..(n-1).
Tie-breaking: arbitrary. any longest path is fine.
Filename: viterbi.py
Note: you can use this program to solve MIS, knapsacks, coins, etc.
Debriefing (required!): --------------------------
0. What's your name?
1. Approximately how many hours did you spend on this assignment?
2. Would you rate it as easy, moderate, or difficult?
3. Did you work on it mostly alone, or mostly with other people?
4. How deeply do you feel you understand the material it covers (0%-100%)?
5. Any other comments?
This section is intended to help us calibrate the homework assignments.
Your answers to this section will *not* affect your grade; however, skipping it
will certainly do.

View File

@@ -0,0 +1,166 @@
CS 325, Algorithms, Fall 2019
HW9 - Graphs (part 2), DP (part 4)
Due Monday Nov 25, 11:59pm.
No late submission will be accepted.
Include in your submission: report.txt, dijkstra.py, nbest.py.
dijkstra.py will be graded for correctness (1%).
Textbooks for References:
[1] CLRS Ch. 22 (graph)
[2] my DP tutorial (up to page 16):
http://web.engr.oregonstate.edu/~huanlian/slides/COLING-tutorial-anim.pdf
[3] DPV Ch. 3, 4.2, 4.4, 4.7, 6 (Dasgupta, Papadimitriou, Vazirani)
https://www.cs.berkeley.edu/~vazirani/algorithms/chap3.pdf
https://www.cs.berkeley.edu/~vazirani/algorithms/chap4.pdf
https://www.cs.berkeley.edu/~vazirani/algorithms/chap6.pdf
[4] KT Ch. 6 (DP)
http://www.aw-bc.com/info/kleinberg/assets/downloads/ch6.pdf
[5] KT slides: Greedy II (Dijkstra)
http://www.cs.princeton.edu/~wayne/kleinberg-tardos/
***Please answer time/space complexities for each problem in report.txt.
1. [WILL BE GRADED]
Dijkstra (see CLRS 24.3 and DPV 4.4)
Given an undirected graph, find the shortest path from source (node 0)
to target (node n-1).
Edge weights are guaranteed to be non-negative, since Dijkstra doesn't work
with negative weights, e.g.
3
0 ------ 1
\ /
2 \ / -2
\/
2
in this example, Dijkstra would return length 2 (path 0-2),
but path 0-1-2 is better (length 1).
For example (return a pair of shortest-distance and shortest-path):
1
0 ------ 1
\ / \
5 \ /1 \6
\/ 2 \
2 ------ 3
>>> shortest(4, [(0,1,1), (0,2,5), (1,2,1), (2,3,2), (1,3,6)])
(4, [0,1,2,3])
If the target node (n-1) is unreachable from the source (0),
return None:
>>> shortest(5, [(0,1,1), (0,2,5), (1,2,1), (2,3,2), (1,3,6)])
None
Another example:
1 1
0-----1 2-----3
>>> shortest(4, [(0,1,1), (2,3,1)])
None
Tiebreaking: arbitrary. Any shortest path would do.
Filename: dijkstra.py
Hint: please use heapdict from here:
https://raw.githubusercontent.com/DanielStutzbach/heapdict/master/heapdict.py
>>> from heapdict import heapdict
>>> h = heapdict()
>>> h['a'] = 3
>>> h['b'] = 1
>>> h.peekitem()
('b', 1)
>>> h['a'] = 0
>>> h.peekitem()
('a', 0)
>>> h.popitem()
('a', 0)
>>> len(h)
1
>>> 'a' in h
False
>>> 'b' in h
True
You don't need to submit heapdict.py; we have it in our grader.
2. [Redo the nbest question from Midterm, preparing for HW10 part 3]
Given k pairs of lists A_i and B_i (0 <= i < k), each with n sorted numbers,
find the n smallest pairs in all the (k n^2) pairs.
We say (x,y) < (x', y') if and only if x+y < x'+y'.
Tie-breaking: lexicographical (i.e., prefer smaller x).
You can base your code on the skeleton from the Midterm:
from heapq import heappush, heappop
def nbest(ABs): # no need to pass in k or n
k = len(ABs)
n = len(ABs[0][0])
def trypush(i, p, q): # push pair (A_i,p, B_i,q) if possible
A, B = ABs[i] # A_i, B_i
if p < n and q < n and ______________________________:
heappush(h, (________________, i, p, q, (A[p],B[q])))
used.add((i, p, q))
h, used = ___________________ # initialize
for i in range(k): # NEED TO OPTIMIZE
trypush(______________)
for _ in range(n):
_, i, p, q, pair = ________________
yield pair # return the next pair (in a lazy list)
_______________________
_______________________
But recall we had two optimizations to speed up the first for-loop (queue initialization):
(1) using heapify instead of k initial pushes. You need to implement this (very easy).
(2) using qselect to choose top n out of the k bests. This one is OPTIONAL.
Analyze the time complexity for the version you implemented.
>>> list(nbest([([1,2,4], [2,3,5]), ([0,2,4], [3,4,5])]))
[(0, 3), (1, 2), (0, 4)]
>>> list(nbest([([-1,2],[1,4]), ([0,2],[3,4]), ([0,1],[4,6]), ([-1,2],[1,5])]))
[(-1, 1), (-1, 1)]
>>> list(nbest([([5,6,10,14],[3,5,10,14]),([2,7,9,11],[3,8,12,16]),([1,3,8,10],[5,9,10,11]),([1,2,3,5],[3,4,9,10]),([4,5,9,10],[2,4,6,11]),([4,6,10,13],[2,3,5,9]),([3,7,10,12],[1,2,5,10]),([5,9,14,15],[4,8,13,14])]))
[(1, 3), (3, 1), (1, 4), (2, 3)]
>>> list(nbest([([1,6,8,13],[5,8,11,12]),([1,2,3,5],[5,9,11,13]),([3,5,7,10],[4,6,7,11]),([1,4,7,8],[4,9,11,15]),([4,8,10,13],[4,6,10,11]),([4,8,12,15],[5,10,11,13]),([2,3,4,8],[4,7,11,15]),([4,5,10,15],[5,6,7,8])]))
[(1, 4), (1, 5), (1, 5), (2, 4)]
This problem prepares you for the hardest question in HW10 (part 3).
Filename: nbest.py
Debriefing (required!): --------------------------
0. What's your name?
1. Approximately how many hours did you spend on this assignment?
2. Would you rate it as easy, moderate, or difficult?
3. Did you work on it mostly alone, or mostly with other people?
4. How deeply do you feel you understand the material it covers (0%-100%)?
5. Any other comments?
This section is intended to help us calibrate the homework assignments.
Your answers to this section will *not* affect your grade; however, skipping it
will certainly do.

View File

@@ -0,0 +1,19 @@
qselect(xs,k) =
~xs -> {
pivot <- xs[0]!
left <- xs[#0 <= pivot]
right <- xs[#0 > pivot]
} ->
if k > |left| + 1 then qselect(right, k - |left| - 1)
else if k == |left| + 1 then [pivot]
else qselect(left, k);
_search(xs, k) =
if xs[1] == k then xs
else if xs[1] > k then _search(xs[0], k)
else _search(xs[2], k);
sorted(xs) = sorted(xs[0]) ++ [xs[1]] ++ sorted(xs[2]);
search(xs, k) = |_search(xs, k)| != 0;
insert(xs, k) = _insert(k, _search(xs, k));
_insert(k, xs) = if |xs| == 0 then xs << [] << k << [] else xs

View File

@@ -0,0 +1,11 @@
state 0;
effect {
if(SOURCE == R) {
STATE = STATE + |LEFT|;
}
}
combine {
STATE = STATE + LSTATE + RSTATE;
}

View File

@@ -0,0 +1,95 @@
function qselect(xs, k, c) {
if xs == [] {
return 0;
}
traverser bisector(list: xs, span: (0,len(xs)));
traverser pivot(list: xs, random: true);
let pivotE = pop!(pivot);
let (leftList, rightList) = bisect!(bisector, (x) -> c(x) < c(pivotE));
if k > len(leftList) + 1 {
return qselect(rightList, k - len(leftList) - 1, c);
} elsif k == len(leftList) + 1 {
return pivotE;
} else {
return qselect(leftList, k, c);
}
}
function closestUnsorted(xs, k, n) {
let min = qselect(list(xs), k, (x) -> abs(x - n));
let out = [];
let countEqual = k;
traverser iter(list: xs, span: (0, len(xs)));
while valid!(iter) {
if abs(at!(iter)-n) < abs(min-n) {
let countEqual = countEqual - 1;
}
step!(iter);
}
traverser iter(list: xs, span: (0, len(xs)));
while valid!(iter) {
if abs(at!(iter)-n) == abs(min-n) and countEqual > 0 {
let countEqual = countEqual - 1;
let out = out + [at!(iter)];
} elsif abs(at!(iter)-n) < abs(min-n) {
let out = out + [at!(iter)];
}
step!(iter);
}
return out;
}
function closestSorted(xs, k, n) {
let start = bisect(xs, n);
let counter = 0;
traverser left(list: xs, span: (0, start), reverse: true);
traverser right(list: xs, span: (start, len(xs)));
while counter != k and canstep!(left) and valid!(right) {
if abs(at!(left, 1) - n) < abs(at!(right) - n) {
step!(left);
} else {
step!(right);
}
let counter = counter + 1;
}
while counter != k and (canstep!(left) or valid!(right)) {
if canstep!(left) { step!(left); }
else { step!(right); }
let counter = counter + 1;
}
return subset!(left, right);
}
sorted function xyz(xs, k) {
traverser x(list: xs, span: (0,len(xs)));
let dest = [];
while valid!(x) {
traverser z(list: xs, span: (pos!(x)+2,len(xs)));
traverser y(list: xs, span: (pos!(x)+1,pos!(z)));
while valid!(y) and valid!(z) {
if at!(x) + at!(y) == at!(z) {
let dest = dest + [(at!(x), at!(y), at!(z))];
step!(z);
} elsif at!(x) + at!(y) > at!(z) {
step!(z);
} else {
step!(y);
}
}
step!(x);
}
return dest;
}

View File

@@ -0,0 +1,15 @@
module Common where
import PythonAst
import PythonGen
import Text.Parsec
compile :: (String -> String -> Either ParseError p) -> (p -> [PyStmt]) -> String -> IO ()
compile p t f = do
let inputName = f ++ ".lang"
let outputName = f ++ ".py"
file <- readFile inputName
let either = p inputName file
case either of
Right prog -> writeFile outputName (translate $ t prog)
Left e -> print e

View File

@@ -0,0 +1,90 @@
module CommonParsing where
import Data.Char
import Data.Functor
import Text.Parsec
import Text.Parsec.Char
import Text.Parsec.Combinator
type Parser a b = Parsec String a b
kw :: String -> Parser a ()
kw s = try $ string s <* spaces $> ()
kwIf :: Parser a ()
kwIf = kw "if"
kwThen :: Parser a ()
kwThen = kw "then"
kwElse :: Parser a ()
kwElse = kw "else"
kwElsif :: Parser a ()
kwElsif = kw "elsif"
kwWhile :: Parser a ()
kwWhile = kw "while"
kwState :: Parser a ()
kwState = kw "state"
kwEffect :: Parser a ()
kwEffect = kw "effect"
kwCombine :: Parser a ()
kwCombine = kw "combine"
kwRand :: Parser a ()
kwRand = kw "rand"
kwFunction :: Parser a ()
kwFunction = kw "function"
kwSorted :: Parser a ()
kwSorted = kw "sorted"
kwLet :: Parser a ()
kwLet = kw "let"
kwTraverser :: Parser a ()
kwTraverser = kw "traverser"
kwReturn :: Parser a ()
kwReturn = kw "return"
op :: String -> op -> Parser a op
op s o = string s $> o
int :: Parser a Int
int = read <$> (many1 digit <* spaces)
var :: [String] -> Parser a String
var reserved =
do
c <- satisfy $ \c -> isLetter c || c == '_'
cs <- many (satisfy isLetter <|> digit) <* spaces
let name = c:cs
if name `elem` reserved
then fail "Can't use reserved keyword as identifier"
else return name
list :: Char -> Char -> Char -> Parser a b -> Parser a [b]
list co cc cd pe = surround co cc $ sepBy pe (char cd >> spaces)
surround :: Char -> Char -> Parser a b -> Parser a b
surround c1 c2 pe =
do
char c1 >> spaces
e <- pe
spaces >> char c2 >> spaces
return e
level :: (o -> e -> e -> e) -> Parser a o -> Parser a e -> Parser a e
level c po pe =
do
e <- pe <* spaces
ops <- many $ try $ (flip . c <$> (po <* spaces) <*> pe) <* spaces
return $ foldl (flip ($)) e ops
precedence :: (o -> e -> e -> e) -> Parser a e -> [ Parser a o ] -> Parser a e
precedence = foldl . flip . level

View File

@@ -0,0 +1,393 @@
module LanguageOne where
import qualified PythonAst as Py
import qualified CommonParsing as P
import Data.Bifunctor
import Data.Char
import Data.Functor
import qualified Data.Map as Map
import Data.Maybe
import qualified Data.Set as Set
import Text.Parsec
import Text.Parsec.Char
import Text.Parsec.Combinator
import Control.Monad.State
{- Data Types -}
data PossibleType = List | Any deriving Eq
data SelectorMarker = None | Remove
data Op
= Add
| Subtract
| Multiply
| Divide
| Insert
| Concat
| LessThan
| LessThanEq
| GreaterThan
| GreaterThanEq
| Equal
| NotEqual
| And
| Or
data Selector = Selector String Expr
data Expr
= Var String
| IntLiteral Int
| ListLiteral [Expr]
| Split Expr [Selector] Expr
| IfElse Expr Expr Expr
| BinOp Op Expr Expr
| FunctionCall Expr [Expr]
| LengthOf Expr
| Random
| Access Expr Expr SelectorMarker
| Parameter Int
data Function = Function String [String] Expr
data Prog = Prog [Function]
{- Parser -}
type Parser = Parsec String (Maybe Int)
parseVar :: Parser String
parseVar = P.var ["if", "then", "else", "var"]
parseThis :: Parser Expr
parseThis =
do
char '&'
contextNum <- getState
spaces
return (Var $ "context_" ++ show contextNum)
parseList :: Parser Expr
parseList = ListLiteral <$>
do
char '[' >> spaces
es <- sepBy parseExpr (char ',' >> spaces)
spaces >> char ']' >> spaces
return es
parseSplit :: Parser Expr
parseSplit =
do
char '~' >> spaces
e <- parseExpr
spaces >> string "->"
spaces >> char '{'
contextNum <- getState
putState $ return $ 1 + fromMaybe (-1) contextNum
es <- many1 (spaces >> parseSelector)
putState contextNum
spaces >> char '}' >> spaces >> string "->" >> spaces
e' <- parseExpr
spaces
return $ Split e es e'
parseSelectorMarker :: Parser SelectorMarker
parseSelectorMarker = (char '!' >> return Remove) <|> return None
parseSelector :: Parser Selector
parseSelector =
do
name <- parseVar
spaces >> string "<-" >> spaces
expr <- parseExpr
spaces
return $ Selector name expr
parseIfElse :: Parser Expr
parseIfElse =
do
P.kwIf >> spaces
ec <- parseExpr
spaces >> P.kwThen >> spaces
et <- parseExpr
spaces >> P.kwElse >> spaces
ee <- parseExpr
spaces
return $ IfElse ec et ee
parseLength :: Parser Expr
parseLength =
do
char '|' >> spaces
e <- parseExpr
spaces >> char '|' >> spaces
return $ LengthOf e
parseParameter :: Parser Expr
parseParameter =
do
char '#'
d <- digit
spaces
return $ Parameter $ read [d]
parseParenthesized :: Parser Expr
parseParenthesized =
do
char '(' >> spaces
e <- parseExpr
spaces >> char ')' >> spaces
return e
parseBasicExpr :: Parser Expr
parseBasicExpr = choice
[ IntLiteral <$> P.int
, parseThis
, parseList
, parseSplit
, parseLength
, parseParameter
, parseParenthesized
, Var <$> try parseVar
, P.kwRand $> Random
, parseIfElse
]
parsePostfix :: Parser (Expr -> Expr)
parsePostfix = parsePostfixAccess <|> parsePostfixCall
parsePostfixAccess :: Parser (Expr -> Expr)
parsePostfixAccess =
do
char '[' >> spaces
e <- parseExpr
spaces >> char ']' >> spaces
marker <- parseSelectorMarker
spaces
return $ \e' -> Access e' e marker
parsePostfixCall :: Parser (Expr -> Expr)
parsePostfixCall =
do
char '(' >> spaces
es <- sepBy parseExpr (char ',' >> spaces)
char ')' >> spaces
return $ flip FunctionCall es
parsePostfixedExpr :: Parser Expr
parsePostfixedExpr =
do
eb <- parseBasicExpr
spaces
ps <- many parsePostfix
return $ foldl (flip ($)) eb ps
parseExpr :: Parser Expr
parseExpr = P.precedence BinOp parsePostfixedExpr
[ P.op "*" Multiply, P.op "/" Divide
, P.op "+" Add, P.op "-" Subtract
, P.op "<<" Insert
, P.op "++" Concat
, try (P.op "<=" LessThanEq) <|> try (P.op ">=" GreaterThanEq) <|>
P.op "<" LessThan <|> P.op ">" GreaterThan <|>
P.op "==" Equal <|> P.op "!=" NotEqual
, P.op "&&" And <|> P.op "||" Or
]
parseFunction :: Parser Function
parseFunction =
do
name <- parseVar
spaces >> char '(' >> spaces
vs <- sepBy parseVar (char ',' >> spaces)
spaces >> char ')' >> spaces >> char '=' >> spaces
body <- parseExpr
spaces
return $ Function name vs body
parseProg :: Parser Prog
parseProg = Prog <$> sepBy1 parseFunction (char ';' >> spaces)
parse :: SourceName -> String -> Either ParseError Prog
parse = runParser parseProg Nothing
{- "Type" checker -}
mergePossibleType :: PossibleType -> PossibleType -> PossibleType
mergePossibleType List _ = List
mergePossibleType _ List = List
mergePossibleType _ _ = Any
getPossibleType :: String -> Expr -> PossibleType
getPossibleType s (Var s') = if s == s' then List else Any
getPossibleType _ (ListLiteral _) = List
getPossibleType s (Split _ _ e) = getPossibleType s e
getPossibleType s (IfElse i t e) =
foldl1 mergePossibleType $ map (getPossibleType s) [i, t, e]
getPossibleType _ (BinOp Insert _ _) = List
getPossibleType _ (BinOp Concat _ _) = List
getPossibleType _ _ = Any
{- Translator -}
type Translator = Control.Monad.State.State (Map.Map String [String], Int)
currentTemp :: Translator String
currentTemp = do
t <- gets snd
return $ "temp" ++ show t
incrementTemp :: Translator String
incrementTemp = do
modify (second (+1))
currentTemp
hasLambda :: Expr -> Bool
hasLambda (ListLiteral es) = any hasLambda es
hasLambda (Split e ss r) =
hasLambda e || any (\(Selector _ e') -> hasLambda e') ss || hasLambda r
hasLambda (IfElse i t e) = hasLambda i || hasLambda t || hasLambda e
hasLambda (BinOp o l r) = hasLambda l || hasLambda r
hasLambda (FunctionCall e es) = any hasLambda $ e : es
hasLambda (LengthOf e) = hasLambda e
hasLambda (Access e _ _) = hasLambda e
hasLambda Parameter{} = True
hasLambda _ = False
translate :: Prog -> [Py.PyStmt]
translate p = fst $ runState (translateProg p) (Map.empty, 0)
translateProg :: Prog -> Translator [Py.PyStmt]
translateProg (Prog fs) = concat <$> traverse translateFunction fs
translateFunction :: Function -> Translator [Py.PyStmt]
translateFunction (Function n ps ex) = do
let createIf p = Py.BinOp Py.Equal (Py.Var p) (Py.ListLiteral [])
let createReturn p = Py.IfElse (createIf p) [Py.Return (Py.Var p)] [] Nothing
let fastReturn = [createReturn p | p <- take 1 ps, getPossibleType p ex == List]
(ss, e) <- translateExpr ex
return $ return $ Py.FunctionDef n ps $ fastReturn ++ ss ++ [Py.Return e]
translateSelector :: Selector -> Translator Py.PyStmt
translateSelector (Selector n e) =
let
cacheCheck = Py.NotIn (Py.StrLiteral n) (Py.Var "cache")
cacheAccess = Py.Access (Py.Var "cache") [Py.StrLiteral n]
cacheSet = Py.Assign (Py.AccessPat (Py.Var "cache") [Py.StrLiteral n])
body e' = [ Py.IfElse cacheCheck [cacheSet e'] [] Nothing, Py.Return cacheAccess]
in
do
(ss, e') <- translateExpr e
vs <- gets fst
let callPrereq p = Py.Standalone $ Py.FunctionCall (Py.Var p) []
let prereqs = maybe [] (map callPrereq) $ Map.lookup n vs
return $ Py.FunctionDef n [] $ ss ++ prereqs ++ body e'
translateExpr :: Expr -> Translator ([Py.PyStmt], Py.PyExpr)
translateExpr (Var s) = do
vs <- gets fst
let sVar = Py.Var s
let expr = if Map.member s vs then Py.FunctionCall sVar [] else sVar
return ([], expr)
translateExpr (IntLiteral i) = return ([], Py.IntLiteral i)
translateExpr (ListLiteral l) = do
tl <- mapM translateExpr l
return (concatMap fst tl, Py.ListLiteral $ map snd tl)
translateExpr (Split e ss e') = do
vs <- gets fst
let cacheAssign = Py.Assign (Py.VarPat "cache") (Py.DictLiteral [])
let cacheStmt = [ cacheAssign | Map.size vs == 0 ]
let vnames = map (\(Selector n es) -> n) ss
let prereqs = snd $ foldl (\(ds, m) (Selector n es) -> (n:ds, Map.insert n ds m)) ([], Map.empty) ss
modify $ first $ Map.union prereqs
fs <- mapM translateSelector ss
(sts, te) <- translateExpr e'
modify $ first $ const vs
return (cacheStmt ++ fs ++ sts, te)
translateExpr (IfElse i t e) = do
temp <- incrementTemp
let tempPat = Py.VarPat temp
(ists, ie) <- translateExpr i
(tsts, te) <- translateExpr t
(ests, ee) <- translateExpr e
let thenSts = tsts ++ [Py.Assign tempPat te]
let elseSts = ests ++ [Py.Assign tempPat ee]
let newIf = Py.IfElse ie thenSts [] $ Just elseSts
return (ists ++ [newIf], Py.Var temp)
translateExpr (BinOp o l r) = do
(lsts, le) <- translateExpr l
(rsts, re) <- translateExpr r
(opsts, oe) <- translateOp o le re
return (lsts ++ rsts ++ opsts, oe)
translateExpr (FunctionCall f ps) = do
(fsts, fe) <- translateExpr f
tps <- mapM translateExpr ps
return (fsts ++ concatMap fst tps, Py.FunctionCall fe $ map snd tps)
translateExpr (LengthOf e) =
second (Py.FunctionCall (Py.Var "len") . return) <$> translateExpr e
translateExpr (Access e Random m) = do
temp <- incrementTemp
(sts, ce) <- translateExpr e
let lenExpr = Py.FunctionCall (Py.Var "len") [Py.Var temp]
let randExpr = Py.FunctionCall (Py.Var "randint") [ Py.IntLiteral 0, lenExpr ]
return (sts, singleAccess ce randExpr m)
translateExpr (Access c i m) = do
(csts, ce) <- translateExpr c
(ists, ie) <- translateExpr i
temp <- incrementTemp
if hasLambda i
then return (csts ++ ists ++ [createFilterLambda temp ie m], Py.FunctionCall (Py.Var temp) [ce])
else return (csts ++ ists, singleAccess ce ie m)
translateExpr (Parameter i) = return $ ([], Py.Var $ "arg" ++ show i)
translateExpr _ = fail "Invalid expression"
singleAccess :: Py.PyExpr -> Py.PyExpr -> SelectorMarker -> Py.PyExpr
singleAccess c i None = Py.Access c [i]
singleAccess c i Remove = Py.FunctionCall (Py.Member c "pop") [i]
createFilterLambda :: String -> Py.PyExpr -> SelectorMarker -> Py.PyStmt
createFilterLambda s e None = Py.FunctionDef s ["arg"]
[ Py.Assign (Py.VarPat "out") (Py.ListLiteral [])
, Py.For (Py.VarPat "arg0") (Py.Var "arg")
[ Py.IfElse e
[ Py.Standalone $ Py.FunctionCall (Py.Member (Py.Var "out") "append")
[ Py.Var "arg0" ]
]
[]
Nothing
]
, Py.Return $ Py.Var "out"
]
createFilterLambda s e Remove = Py.FunctionDef s ["arg"]
[ Py.Assign (Py.VarPat "i") $ Py.IntLiteral 0
, Py.Assign (Py.VarPat "out") (Py.ListLiteral [])
, Py.While (Py.BinOp Py.LessThan (Py.Var "i") $ Py.FunctionCall (Py.Var "len") [Py.Var "arg"])
[ Py.IfElse e
[ Py.Standalone $ Py.FunctionCall (Py.Member (Py.Var "out") "append")
[ singleAccess (Py.Var "arg") (Py.Var "i") Remove
]
]
[]
Nothing
, Py.Assign (Py.VarPat "i") (Py.BinOp Py.Add (Py.Var "i") (Py.IntLiteral 1))
]
, Py.Return $ Py.Var "out"
]
translateOp :: Op -> Py.PyExpr -> Py.PyExpr -> Translator ([Py.PyStmt], Py.PyExpr)
translateOp Add l r = return ([], Py.BinOp Py.Add l r)
translateOp Subtract l r = return ([], Py.BinOp Py.Subtract l r)
translateOp Multiply l r = return ([], Py.BinOp Py.Multiply l r)
translateOp Divide l r = return ([], Py.BinOp Py.Divide l r)
translateOp LessThan l r = return ([], Py.BinOp Py.LessThan l r)
translateOp LessThanEq l r = return ([], Py.BinOp Py.LessThanEq l r)
translateOp GreaterThan l r = return ([], Py.BinOp Py.GreaterThan l r)
translateOp GreaterThanEq l r = return ([], Py.BinOp Py.GreaterThanEq l r)
translateOp Equal l r = return ([], Py.BinOp Py.Equal l r)
translateOp NotEqual l r = return ([], Py.BinOp Py.NotEqual l r)
translateOp And l r = return ([], Py.BinOp Py.And l r)
translateOp Or l r = return ([], Py.BinOp Py.Or l r)
translateOp Concat l r = return ([], Py.BinOp Py.Add l r)
translateOp Insert l r = do
temp <- incrementTemp
let assignStmt = Py.Assign (Py.VarPat temp) l
let appendFunc = Py.Member (Py.Var temp) "append"
let insertStmt = Py.Standalone $ Py.FunctionCall appendFunc [r]
return ([assignStmt, insertStmt], Py.Var temp)

View File

@@ -0,0 +1,461 @@
module LanguageThree where
import qualified CommonParsing as P
import qualified PythonAst as Py
import Control.Monad.State
import Data.Bifunctor
import Data.Foldable
import Data.Functor
import qualified Data.Map as Map
import Data.Maybe
import Text.Parsec hiding (State)
import Text.Parsec.Char
import Text.Parsec.Combinator
{- Data Types -}
data Op
= Add
| Subtract
| Multiply
| Divide
| LessThan
| LessThanEqual
| GreaterThan
| GreaterThanEqual
| Equal
| NotEqual
| And
| Or
data Expr
= TraverserCall String [Expr]
| FunctionCall String [Expr]
| BinOp Op Expr Expr
| Lambda [String] Expr
| Var String
| IntLiteral Int
| BoolLiteral Bool
| ListLiteral [Expr]
| TupleLiteral [Expr]
type Branch = (Expr, [Stmt])
data Stmt
= IfElse Branch [Branch] [Stmt]
| While Branch
| Traverser String [(String, Expr)]
| Let Pat Expr
| Return Expr
| Standalone Expr
data Pat
= VarPat String
| TuplePat [Pat]
data SortedMarker = Sorted | Unsorted deriving Eq
data Function = Function SortedMarker String [String] [Stmt]
data Prog = Prog [Function]
{- Parser -}
type Parser = Parsec String ()
parseVar :: Parser String
parseVar = P.var
[ "if", "elif", "else"
, "while", "let", "traverser"
, "function", "sort"
, "true", "false"
]
parseBool :: Parser Bool
parseBool = (string "true" $> True) <|> (string "false" $> False)
parseList :: Parser Expr
parseList = ListLiteral <$> P.list '[' ']' ',' parseExpr
parseTupleElems :: Parser [Expr]
parseTupleElems = P.list '(' ')' ',' parseExpr
parseTuple :: Parser Expr
parseTuple = do
es <- parseTupleElems
return $ case es of
e:[] -> e
_ -> TupleLiteral es
parseLambda :: Parser Expr
parseLambda = try $ do
vs <- P.list '(' ')' ',' parseVar
string "->" >> spaces
Lambda vs <$> parseExpr
parseCall :: Parser Expr
parseCall = try $ do
v <- parseVar
choice
[ TraverserCall v <$> (char '!' *> parseTupleElems)
, FunctionCall v <$> parseTupleElems
]
parseBasic :: Parser Expr
parseBasic = choice
[ IntLiteral <$> P.int
, BoolLiteral <$> parseBool
, try parseCall
, Var <$> parseVar
, parseList
, parseLambda
, parseTuple
]
parseExpr :: Parser Expr
parseExpr = P.precedence BinOp parseBasic
[ P.op "*" Multiply <|> P.op "/" Divide
, P.op "+" Add <|> P.op "-" Subtract
, P.op "==" Equal <|> P.op "!=" NotEqual <|>
try (P.op "<=" LessThanEqual) <|> P.op "<" LessThan <|>
try (P.op ">=" GreaterThanEqual) <|> P.op ">" GreaterThan
, P.op "and" And
, P.op "or" Or
]
parseBlock :: Parser [Stmt]
parseBlock = char '{' >> spaces >> many parseStmt <* char '}' <* spaces
parseBranch :: Parser Branch
parseBranch = (,) <$> (parseExpr <* spaces) <*> parseBlock
parseIf :: Parser Stmt
parseIf = do
i <- P.kwIf >> parseBranch
els <- many (P.kwElsif >> parseBranch)
e <- try (P.kwElse >> parseBlock) <|> return []
return $ IfElse i els e
parseWhile :: Parser Stmt
parseWhile = While <$> (P.kwWhile >> parseBranch)
parseTraverser :: Parser Stmt
parseTraverser = Traverser
<$> (P.kwTraverser *> parseVar)
<*> (P.list '(' ')' ',' parseKey) <* char ';' <* spaces
parseKey :: Parser (String, Expr)
parseKey = (,)
<$> (parseVar <* spaces <* char ':' <* spaces)
<*> parseExpr
parseLet :: Parser Stmt
parseLet = Let
<$> (P.kwLet >> parsePat <* char '=' <* spaces)
<*> parseExpr <* char ';' <* spaces
parseReturn :: Parser Stmt
parseReturn = Return <$> (P.kwReturn >> parseExpr <* char ';' <* spaces)
parsePat :: Parser Pat
parsePat = (VarPat <$> parseVar) <|> (TuplePat <$> P.list '(' ')' ',' parsePat)
parseStmt :: Parser Stmt
parseStmt = choice
[ parseTraverser
, parseLet
, parseIf
, parseWhile
, parseReturn
, Standalone <$> (parseExpr <* char ';' <* spaces)
]
parseFunction :: Parser Function
parseFunction = Function
<$> (P.kwSorted $> Sorted <|> return Unsorted)
<*> (P.kwFunction >> parseVar)
<*> (P.list '(' ')' ',' parseVar)
<*> parseBlock
parseProg :: Parser Prog
parseProg = Prog <$> many parseFunction
parse :: String -> String -> Either ParseError Prog
parse = runParser parseProg ()
{- Translation -}
data TraverserBounds = Range Py.PyExpr Py.PyExpr | Random
data TraverserData = TraverserData
{ list :: Maybe String
, bounds :: Maybe TraverserBounds
, rev :: Bool
}
data ValidTraverserData = ValidTraverserData
{ validList :: String
, validBounds :: TraverserBounds
, validRev :: Bool
}
type Translator = State (Map.Map String ValidTraverserData, [Py.PyStmt], Int)
getScoped :: Translator (Map.Map String ValidTraverserData)
getScoped = gets (\(m, _, _) -> m)
setScoped :: Map.Map String ValidTraverserData -> Translator ()
setScoped m = modify (\(_, ss, i) -> (m, ss, i))
scope :: Translator a -> Translator a
scope m = do
s <- getScoped
a <- m
setScoped s
return a
clearTraverser :: String -> Translator ()
clearTraverser s = modify (\(m, ss, i) -> (Map.delete s m, ss, i))
putTraverser :: String -> ValidTraverserData -> Translator ()
putTraverser s vtd = modify (\(m, ss, i) -> (Map.insert s vtd m, ss, i))
getTemp :: Translator String
getTemp = gets $ \(_, _, i) -> "temp" ++ show i
freshTemp :: Translator String
freshTemp = modify (second (+1)) >> getTemp
emitStatement :: Py.PyStmt -> Translator ()
emitStatement = modify . first . (:)
collectStatements :: Translator a -> Translator ([Py.PyStmt], a)
collectStatements t = do
modify (first $ const [])
a <- t
ss <- gets $ \(_, ss, _) -> ss
modify (first $ const [])
return (ss, a)
withdrawStatements :: Translator (Py.PyStmt) -> Translator [Py.PyStmt]
withdrawStatements ts =
(\(ss, s) -> ss ++ [s]) <$> (collectStatements ts)
requireTraverser :: String -> Translator ValidTraverserData
requireTraverser s = gets (\(m, _, _) -> Map.lookup s m) >>= handleMaybe
where
handleMaybe Nothing = fail "Invalid traverser"
handleMaybe (Just vtd) = return vtd
traverserIncrement :: Bool -> Py.PyExpr -> Py.PyExpr -> Py.PyExpr
traverserIncrement rev by e =
Py.BinOp op e (Py.BinOp Py.Multiply by (Py.IntLiteral 1))
where op = if rev then Py.Subtract else Py.Add
traverserValid :: Py.PyExpr -> ValidTraverserData -> Py.PyExpr
traverserValid e vtd =
case validBounds vtd of
Range f t ->
if validRev vtd
then Py.BinOp Py.GreaterThanEq e f
else Py.BinOp Py.LessThan e t
Random -> Py.BoolLiteral True
traverserStep :: String -> ValidTraverserData -> Py.PyStmt
traverserStep s vtd =
case validBounds vtd of
Range _ _ -> Py.Assign (Py.VarPat s) $ Py.BinOp op (Py.Var s) (Py.IntLiteral 1)
where op = if validRev vtd then Py.Subtract else Py.Add
Random -> traverserRandom s $ validList vtd
traverserRandom :: String -> String -> Py.PyStmt
traverserRandom s l =
Py.Assign (Py.VarPat s) $ Py.FunctionCall (Py.Var "random.randrange")
[Py.FunctionCall (Py.Var "len") [Py.Var l]]
hasVar :: String -> Py.PyPat -> Bool
hasVar s (Py.VarPat s') = s == s'
hasVar s (Py.TuplePat ps) = any (hasVar s) ps
hasVar s _ = False
substituteVariable :: String -> Py.PyExpr -> Py.PyExpr -> Py.PyExpr
substituteVariable s e (Py.BinOp o l r) =
Py.BinOp o (substituteVariable s e l) (substituteVariable s e r)
substituteVariable s e (Py.ListLiteral es) =
Py.ListLiteral $ map (substituteVariable s e) es
substituteVariable s e (Py.DictLiteral es) =
Py.DictLiteral $
map (first (substituteVariable s e) . second (substituteVariable s e)) es
substituteVariable s e (Py.Lambda ps e') =
Py.Lambda ps $ if any (hasVar s) ps then substituteVariable s e e' else e'
substituteVariable s e (Py.Var s')
| s == s' = e
| otherwise = Py.Var s'
substituteVariable s e (Py.TupleLiteral es) =
Py.TupleLiteral $ map (substituteVariable s e) es
substituteVariable s e (Py.FunctionCall e' es) =
Py.FunctionCall (substituteVariable s e e') $
map (substituteVariable s e) es
substituteVariable s e (Py.Access e' es) =
Py.Access (substituteVariable s e e') $
map (substituteVariable s e) es
substituteVariable s e (Py.Ternary i t e') =
Py.Ternary (substituteVariable s e i) (substituteVariable s e t)
(substituteVariable s e e')
substituteVariable s e (Py.Member e' m) =
Py.Member (substituteVariable s e e') m
substituteVariable s e (Py.In e1 e2) =
Py.In (substituteVariable s e e1) (substituteVariable s e e2)
substituteVariable s e (Py.NotIn e1 e2) =
Py.NotIn (substituteVariable s e e1) (substituteVariable s e e2)
substituteVariable s e (Py.Slice f t) =
Py.Slice (substituteVariable s e <$> f) (substituteVariable s e <$> t)
translateExpr :: Expr -> Translator Py.PyExpr
translateExpr (TraverserCall "pop" [Var s]) = do
l <- validList <$> requireTraverser s
return $ Py.FunctionCall (Py.Member (Py.Var l) "pop") [Py.Var s]
translateExpr (TraverserCall "pos" [Var s]) = do
requireTraverser s
return $ Py.Var s
translateExpr (TraverserCall "at" [Var s]) = do
l <- validList <$> requireTraverser s
return $ Py.Access (Py.Var l) [Py.Var s]
translateExpr (TraverserCall "at" [Var s, IntLiteral i]) = do
vtd <- requireTraverser s
return $ Py.Access (Py.Var $ validList vtd)
[traverserIncrement (validRev vtd) (Py.IntLiteral i) (Py.Var s)]
translateExpr (TraverserCall "step" [Var s]) = do
vtd <- requireTraverser s
emitStatement $ traverserStep s vtd
return $ Py.IntLiteral 0
translateExpr (TraverserCall "canstep" [Var s]) = do
vtd <- requireTraverser s
return $
traverserValid
(traverserIncrement (validRev vtd) (Py.IntLiteral 1) (Py.Var s)) vtd
translateExpr (TraverserCall "valid" [Var s]) = do
vtd <- requireTraverser s
return $ traverserValid (Py.Var s) vtd
translateExpr (TraverserCall "subset" [Var s1, Var s2]) = do
l1 <- validList <$> requireTraverser s1
l2 <- validList <$> requireTraverser s2
if l1 == l2
then return $ Py.Access (Py.Var l1) [Py.Slice (Just $ Py.Var s1) (Just $ Py.Var s2)]
else fail "Incompatible traversers!"
translateExpr (TraverserCall "bisect" [Var s, Lambda [x] e]) = do
vtd <- requireTraverser s
newTemp <- freshTemp
lambdaExpr <- translateExpr e
let access = Py.Access (Py.Var $ validList vtd) [Py.Var s]
let translated = substituteVariable x access lambdaExpr
let append s = Py.FunctionCall (Py.Member (Py.Var s) "append") [ access ]
let bisectStmt = Py.FunctionDef newTemp []
[ Py.Nonlocal [s]
, Py.Assign (Py.VarPat "l") (Py.ListLiteral [])
, Py.Assign (Py.VarPat "r") (Py.ListLiteral [])
, Py.While (traverserValid (Py.Var s) vtd)
[ Py.IfElse translated
[ Py.Standalone $ append "l" ]
[]
(Just [ Py.Standalone $ append "r" ])
, traverserStep s vtd
]
, Py.Return $ Py.TupleLiteral [Py.Var "l", Py.Var "r"]
]
emitStatement bisectStmt
return $ Py.FunctionCall (Py.Var newTemp) []
translateExpr (TraverserCall _ _) = fail "Invalid traverser operation"
translateExpr (FunctionCall f ps) = do
pes <- mapM translateExpr ps
return $ Py.FunctionCall (Py.Var f) pes
translateExpr (BinOp o l r) =
Py.BinOp (translateOp o) <$> translateExpr l <*> translateExpr r
translateExpr (Lambda ps e) =
Py.Lambda (map Py.VarPat ps) <$> translateExpr e
translateExpr (Var s) = return $ Py.Var s
translateExpr (IntLiteral i) = return $ Py.IntLiteral i
translateExpr (BoolLiteral b) = return $ Py.BoolLiteral b
translateExpr (ListLiteral es) = Py.ListLiteral <$> mapM translateExpr es
translateExpr (TupleLiteral es) = Py.TupleLiteral <$> mapM translateExpr es
applyOption :: TraverserData -> (String, Py.PyExpr) -> Maybe TraverserData
applyOption td ("list", Py.Var s) =
return $ td { list = Just s }
applyOption td ("span", Py.TupleLiteral [f, t]) =
return $ td { bounds = Just $ Range f t }
applyOption td ("random", Py.BoolLiteral True) =
return $ td { bounds = Just Random }
applyOption td ("reverse", Py.BoolLiteral b) =
return $ td { rev = b }
applyOption td _ = Nothing
translateOption :: (String, Expr) -> Translator (String, Py.PyExpr)
translateOption (s, e) = (,) s <$> translateExpr e
defaultTraverser :: TraverserData
defaultTraverser =
TraverserData { list = Nothing, bounds = Nothing, rev = False }
translateBranch :: Branch -> Translator (Py.PyExpr, [Py.PyStmt])
translateBranch (e, s) = (,) <$> translateExpr e <*>
(concat <$> mapM (withdrawStatements . translateStmt) s)
translateStmt :: Stmt -> Translator Py.PyStmt
translateStmt (IfElse i els e) = uncurry Py.IfElse
<$> (translateBranch i) <*> (mapM translateBranch els) <*> convertElse e
where
convertElse [] = return Nothing
convertElse es = Just . concat <$>
mapM (withdrawStatements . translateStmt) es
translateStmt (While b) = uncurry Py.While <$> translateBranch b
translateStmt (Traverser s os) =
foldlM applyOption defaultTraverser <$> mapM translateOption os >>= saveTraverser
where
saveTraverser :: Maybe TraverserData -> Translator Py.PyStmt
saveTraverser (Just (td@TraverserData { list = Just l, bounds = Just bs})) =
putTraverser s vtd $> translateInitialBounds s vtd
where
vtd = ValidTraverserData
{ validList = l
, validBounds = bs
, validRev = rev td
}
saveTraverser Nothing = fail "Invalid traverser (!)"
translateStmt (Let p e) = Py.Assign <$> translatePat p <*> translateExpr e
translateStmt (Return e) = Py.Return <$> translateExpr e
translateStmt (Standalone e) = Py.Standalone <$> translateExpr e
translateInitialBounds :: String -> ValidTraverserData -> Py.PyStmt
translateInitialBounds s vtd =
case (validBounds vtd, validRev vtd) of
(Random, _) -> traverserRandom s $ validList vtd
(Range l _, False) -> Py.Assign (Py.VarPat s) l
(Range _ r, True) -> Py.Assign (Py.VarPat s) r
translatePat :: Pat -> Translator Py.PyPat
translatePat (VarPat s) = clearTraverser s $> Py.VarPat s
translatePat (TuplePat ts) = Py.TuplePat <$> mapM translatePat ts
translateOp :: Op -> Py.PyBinOp
translateOp Add = Py.Add
translateOp Subtract = Py.Subtract
translateOp Multiply = Py.Multiply
translateOp Divide = Py.Divide
translateOp LessThan = Py.LessThan
translateOp LessThanEqual = Py.LessThanEq
translateOp GreaterThan = Py.GreaterThan
translateOp GreaterThanEqual = Py.GreaterThanEq
translateOp Equal = Py.Equal
translateOp NotEqual = Py.NotEqual
translateOp And = Py.And
translateOp Or = Py.Or
translateFunction :: Function -> [Py.PyStmt]
translateFunction (Function m s ps ss) = return $ Py.FunctionDef s ps $
[ Py.Standalone $ Py.FunctionCall (Py.Member (Py.Var p) "sort") []
| p <- take 1 ps, m == Sorted ] ++ stmts
where
stmts = concat $ evalState
(mapM (withdrawStatements . translateStmt) ss) (Map.empty, [], 0)
translate :: Prog -> [Py.PyStmt]
translate (Prog fs) =
(Py.FromImport "bisect" ["bisect"]) :
(Py.Import "random") : concatMap translateFunction fs

View File

@@ -0,0 +1,198 @@
module LanguageTwo where
import qualified PythonAst as Py
import qualified CommonParsing as P
import Data.Char
import Data.Functor
import Text.Parsec
import Text.Parsec.Char
import Text.Parsec.Combinator
{- Data Types -}
data Op
= Add
| Subtract
| Multiply
| Divide
| Equal
| NotEqual
| And
| Or
data Expr
= IntLiteral Int
| BinOp Op Expr Expr
| Var String
| Length Expr
data Stmt
= IfElse Expr Stmt (Maybe Stmt)
| Assign String Expr
| Block [Stmt]
data Prog = Prog Expr [Stmt] [Stmt]
{- Parser -}
type Parser = Parsec String ()
parseVar :: Parser String
parseVar = P.var [ "if", "else", "state", "effect", "combine" ]
parseLength :: Parser Expr
parseLength = Length <$> P.surround '|' '|' parseExpr
parseParenthesized :: Parser Expr
parseParenthesized = P.surround '(' ')' parseExpr
parseBasic :: Parser Expr
parseBasic = choice
[ IntLiteral <$> P.int
, Var <$> parseVar
, parseLength
, parseParenthesized
]
parseExpr :: Parser Expr
parseExpr = P.precedence BinOp parseBasic
[ P.op "*" Multiply <|> P.op "/" Divide
, P.op "+" Add <|> P.op "-" Subtract
, P.op "==" Equal <|> P.op "!=" NotEqual
, P.op "&&" And
, try $ P.op "||" Or
]
parseIf :: Parser Stmt
parseIf = do
P.kwIf >> spaces
c <- parseParenthesized
t <- parseStmt <* spaces
e <- (Just <$> (P.kwElse >> spaces *> parseStmt)) <|> return Nothing
return $ IfElse c t e
parseBlockStmts :: Parser [Stmt]
parseBlockStmts = P.surround '{' '}' (many parseStmt)
parseBlock :: Parser Stmt
parseBlock = Block <$> parseBlockStmts
parseAssign :: Parser Stmt
parseAssign = Assign <$>
(parseVar <* char '=' <* spaces) <*>
parseExpr <* (char ';' >> spaces)
parseStmt :: Parser Stmt
parseStmt = choice
[ parseIf
, parseAssign
, parseBlock
]
parseProgram :: Parser Prog
parseProgram = do
state <- P.kwState >> spaces *> parseExpr <* char ';' <* spaces
effect <- P.kwEffect >> spaces *> parseBlockStmts <* spaces
combined <- P.kwCombine >> spaces *> parseBlockStmts <* spaces
return $ Prog state effect combined
parse :: String -> String -> Either ParseError Prog
parse = runParser parseProgram ()
{- Translation -}
baseFunction :: Py.PyExpr -> [Py.PyStmt] -> [Py.PyStmt] -> Py.PyStmt
baseFunction s e c = Py.FunctionDef "prog" ["xs"] $
[Py.IfElse
(Py.BinOp Py.LessThan
(Py.FunctionCall (Py.Var "len") [Py.Var "xs"])
(Py.IntLiteral 2))
[Py.Return $ Py.Tuple [s, Py.Var "xs"]]
[]
Nothing
, Py.Assign (Py.VarPat "leng")
(Py.BinOp Py.FloorDiv
(Py.FunctionCall (Py.Var "len") [Py.Var "xs"])
(Py.IntLiteral 2))
, Py.Assign (Py.VarPat "left")
(Py.Access
(Py.Var "xs")
[Py.Slice Nothing $ Just (Py.Var "leng")])
, Py.Assign (Py.VarPat "right")
(Py.Access
(Py.Var "xs")
[Py.Slice (Just (Py.Var "leng")) Nothing])
, Py.Assign (Py.TuplePat [Py.VarPat "ls", Py.VarPat "left"])
(Py.FunctionCall (Py.Var "prog") [Py.Var "left"])
, Py.Assign (Py.TuplePat [Py.VarPat "rs", Py.VarPat "right"])
(Py.FunctionCall (Py.Var "prog") [Py.Var "right"])
, Py.Standalone $
Py.FunctionCall (Py.Member (Py.Var "left") "reverse") []
, Py.Standalone $
Py.FunctionCall (Py.Member (Py.Var "right") "reverse") []
, Py.Assign (Py.VarPat "state") s
, Py.Assign (Py.VarPat "source") (Py.IntLiteral 0)
, Py.Assign (Py.VarPat "total") (Py.ListLiteral [])
, Py.While
(Py.BinOp Py.And
(Py.BinOp Py.NotEqual (Py.Var "left") (Py.ListLiteral []))
(Py.BinOp Py.NotEqual (Py.Var "right") (Py.ListLiteral []))) $
[ Py.IfElse
(Py.BinOp Py.LessThanEq
(Py.Access (Py.Var "left") [Py.IntLiteral $ -1])
(Py.Access (Py.Var "right") [Py.IntLiteral $ -1]))
[ Py.Standalone $
Py.FunctionCall (Py.Member (Py.Var "total") "append")
[Py.FunctionCall (Py.Member (Py.Var "left") "pop") []]
, Py.Assign (Py.VarPat "source") (Py.IntLiteral 1)
]
[] $
Just
[ Py.Standalone $
Py.FunctionCall (Py.Member (Py.Var "total") "append")
[Py.FunctionCall (Py.Member (Py.Var "right") "pop") []]
, Py.Assign (Py.VarPat "source") (Py.IntLiteral 2)
]
] ++ e
] ++ c ++
[ Py.Standalone $ Py.FunctionCall (Py.Member (Py.Var "left") "reverse") []
, Py.Standalone $ Py.FunctionCall (Py.Member (Py.Var "right") "reverse") []
, Py.Return $ Py.Tuple
[ Py.Var "state"
, foldl (Py.BinOp Py.Add) (Py.Var "total") [Py.Var "left", Py.Var "right"]
]
]
translateExpr :: Expr -> Py.PyExpr
translateExpr (IntLiteral i) = Py.IntLiteral i
translateExpr (BinOp op l r) =
Py.BinOp (translateOp op) (translateExpr l) (translateExpr r)
translateExpr (Var s)
| s == "SOURCE" = Py.Var "source"
| s == "LEFT" = Py.Var "left"
| s == "RIGHT" = Py.Var "right"
| s == "STATE" = Py.Var "state"
| s == "LSTATE" = Py.Var "ls"
| s == "RSTATE" = Py.Var "rs"
| s == "L" = Py.IntLiteral 1
| s == "R" = Py.IntLiteral 2
| otherwise = Py.Var s
translateExpr (Length e) = Py.FunctionCall (Py.Var "len") [translateExpr e]
translateOp :: Op -> Py.PyBinOp
translateOp Add = Py.Add
translateOp Subtract = Py.Subtract
translateOp Multiply = Py.Multiply
translateOp Divide = Py.Divide
translateOp Equal = Py.Equal
translateOp NotEqual = Py.NotEqual
translateOp And = Py.And
translateOp Or = Py.Or
translateStmt :: Stmt -> [Py.PyStmt]
translateStmt (IfElse c t e) =
[Py.IfElse (translateExpr c) (translateStmt t) [] (translateStmt <$> e)]
translateStmt (Assign "STATE" e) = [Py.Assign (Py.VarPat "state") (translateExpr e)]
translateStmt (Assign v e) = [Py.Assign (Py.VarPat v) (translateExpr e)]
translateStmt (Block s) = concatMap translateStmt s
translate :: Prog -> [Py.PyStmt]
translate (Prog s e c) =
[baseFunction (translateExpr s) (concatMap translateStmt e) (concatMap translateStmt c)]

View File

@@ -0,0 +1,52 @@
module PythonAst where
data PyBinOp
= Add
| Subtract
| Multiply
| Divide
| FloorDiv
| LessThan
| LessThanEq
| GreaterThan
| GreaterThanEq
| Equal
| NotEqual
| And
| Or
data PyExpr
= BinOp PyBinOp PyExpr PyExpr
| IntLiteral Int
| StrLiteral String
| BoolLiteral Bool
| ListLiteral [PyExpr]
| DictLiteral [(PyExpr, PyExpr)]
| Lambda [PyPat] PyExpr
| Var String
| TupleLiteral [PyExpr]
| FunctionCall PyExpr [PyExpr]
| Access PyExpr [PyExpr]
| Ternary PyExpr PyExpr PyExpr
| Member PyExpr String
| In PyExpr PyExpr
| NotIn PyExpr PyExpr
| Slice (Maybe PyExpr) (Maybe PyExpr)
data PyPat
= VarPat String
| IgnorePat
| TuplePat [PyPat]
| AccessPat PyExpr [PyExpr]
data PyStmt
= Assign PyPat PyExpr
| IfElse PyExpr [PyStmt] [(PyExpr, [PyStmt])] (Maybe [PyStmt])
| While PyExpr [PyStmt]
| For PyPat PyExpr [PyStmt]
| FunctionDef String [String] [PyStmt]
| Return PyExpr
| Standalone PyExpr
| Import String
| FromImport String [String]
| Nonlocal [String]

View File

@@ -0,0 +1,142 @@
module PythonGen where
import PythonAst
import Data.List
import Data.Bifunctor
import Data.Maybe
indent :: String -> String
indent = (" " ++)
stmtBlock :: [PyStmt] -> [String]
stmtBlock = concatMap translateStmt
block :: String -> [String] -> [String]
block s ss = (s ++ ":") : map indent ss
prefix :: String -> PyExpr -> [PyStmt] -> [String]
prefix s e sts = block (s ++ " " ++ translateExpr e) $ stmtBlock sts
if_ :: PyExpr -> [PyStmt] -> [String]
if_ = prefix "if"
elif :: PyExpr -> [PyStmt] -> [String]
elif = prefix "elif"
else_ :: [PyStmt] -> [String]
else_ = block "else" . stmtBlock
while :: PyExpr -> [PyStmt] -> [String]
while = prefix "while"
parenth :: String -> String
parenth s = "(" ++ s ++ ")"
translateStmt :: PyStmt -> [String]
translateStmt (Assign p e) = [translatePat p ++ " = " ++ translateExpr e]
translateStmt (IfElse i t es e) =
if_ i t ++ concatMap (uncurry elif) es ++ maybe [] else_ e
translateStmt (While c t) = while c t
translateStmt (For x in_ b) = block head body
where
head = "for " ++ translatePat x ++ " in " ++ translateExpr in_
body = stmtBlock b
translateStmt (FunctionDef s ps b) = block head body
where
head = "def " ++ s ++ "(" ++ intercalate "," ps ++ ")"
body = stmtBlock b
translateStmt (Return e) = ["return " ++ translateExpr e]
translateStmt (Standalone e) = [translateExpr e]
translateStmt (Import s) = ["import " ++ s]
translateStmt (FromImport s ss) =
["from " ++ s ++ " import " ++ intercalate "," ss]
translateStmt (Nonlocal vs) =
["nonlocal " ++ intercalate "," vs]
precedence :: PyBinOp -> Int
precedence Add = 3
precedence Subtract = 3
precedence Multiply = 4
precedence Divide = 4
precedence FloorDiv = 4
precedence LessThan = 2
precedence LessThanEq = 2
precedence GreaterThan = 2
precedence GreaterThanEq = 2
precedence Equal = 2
precedence NotEqual = 2
precedence And = 1
precedence Or = 0
opString :: PyBinOp -> String
opString Add = "+"
opString Subtract = "-"
opString Multiply = "*"
opString Divide = "/"
opString FloorDiv = "//"
opString LessThan = "<"
opString LessThanEq = "<="
opString GreaterThan = ">"
opString GreaterThanEq = ">="
opString Equal = "=="
opString NotEqual = "!="
opString And = " and "
opString Or = " or "
translateOp :: PyBinOp -> PyBinOp -> PyExpr -> String
translateOp o o' =
if precedence o > precedence o'
then parenth . translateExpr
else translateExpr
dictMapping :: PyExpr -> PyExpr -> String
dictMapping f t = translateExpr f ++ ": " ++ translateExpr t
list :: String -> String -> [PyExpr] -> String
list o c es = o ++ intercalate ", " (map translateExpr es) ++ c
translateExpr :: PyExpr -> String
translateExpr (BinOp o l@(BinOp o1 _ _) r@(BinOp o2 _ _)) =
translateOp o o1 l ++ opString o ++ translateOp o o2 r
translateExpr (BinOp o l@(BinOp o1 _ _) r) =
translateOp o o1 l ++ opString o ++ translateExpr r
translateExpr (BinOp o l r@(BinOp o2 _ _)) =
translateExpr l ++ opString o ++ translateOp o o2 r
translateExpr (BinOp o l r) =
translateExpr l ++ opString o ++ translateExpr r
translateExpr (IntLiteral i) = show i
translateExpr (StrLiteral s) = "\"" ++ s ++ "\""
translateExpr (BoolLiteral b) = if b then "true" else "false"
translateExpr (ListLiteral l) = list "[" "]" l
translateExpr (DictLiteral l) =
"{" ++ intercalate ", " (map (uncurry dictMapping) l) ++ "}"
translateExpr (Lambda ps e) = parenth (head ++ ": " ++ body)
where
head = "lambda " ++ intercalate ", " (map translatePat ps)
body = translateExpr e
translateExpr (Var s) = s
translateExpr (TupleLiteral es) = list "(" ")" es
translateExpr (FunctionCall f ps) = translateExpr f ++ list "(" ")" ps
translateExpr (Access (Var s) e) = s ++ list "[" "]" e
translateExpr (Access e@Access{} i) = translateExpr e ++ list "[" "]" i
translateExpr (Access e i) = "(" ++ translateExpr e ++ ")" ++ list "[" "]" i
translateExpr (Ternary c t e) =
translateExpr t ++ " if " ++ translateExpr c ++ " else " ++ translateExpr e
translateExpr (Member (Var s) m) = s ++ "." ++ m
translateExpr (Member e@Member{} m) = translateExpr e ++ "." ++ m
translateExpr (Member e m) = "(" ++ translateExpr e ++ ")." ++ m
translateExpr (In m c) =
"(" ++ translateExpr m ++ ") in (" ++ translateExpr c ++ ")"
translateExpr (NotIn m c) =
"(" ++ translateExpr m ++ ") not in (" ++ translateExpr c ++ ")"
translateExpr (Slice l r) =
maybe [] (parenth . translateExpr) l ++ ":" ++ maybe [] (parenth . translateExpr) r
translatePat :: PyPat -> String
translatePat (VarPat s) = s
translatePat IgnorePat = "_"
translatePat (TuplePat ps) =
"(" ++ intercalate "," (map translatePat ps) ++ ")"
translatePat (AccessPat e es) = translateExpr (Access e es)
translate :: [PyStmt] -> String
translate = intercalate "\n" . concatMap translateStmt

View File

@@ -2,5 +2,5 @@
title: Daniel's Blog title: Daniel's Blog
--- ---
## Hello! ## Hello!
Welcome to my blog. Here, I write abour various subjects, including (but not limited to) Welcome to my blog. Here, I write about various subjects, including (but not limited to)
functional programming, compiler development, programming language theory, and occasionally video games. I hope you find something useful here! functional programming, compiler development, programming language theory, and occasionally video games. I hope you find something useful here!

View File

@@ -140,3 +140,4 @@ Here are the posts that I've written so far for this series:
* [Compilation]({{< relref "06_compiler_compilation.md" >}}) * [Compilation]({{< relref "06_compiler_compilation.md" >}})
* [Runtime]({{< relref "07_compiler_runtime.md" >}}) * [Runtime]({{< relref "07_compiler_runtime.md" >}})
* [LLVM]({{< relref "08_compiler_llvm.md" >}}) * [LLVM]({{< relref "08_compiler_llvm.md" >}})
* [Garbage Collection]({{< relref "09_compiler_garbage_collection.md" >}})

View File

@@ -0,0 +1,511 @@
---
title: A Language for an Assignment - Homework 1
date: 2019-12-27T23:27:09-08:00
tags: ["Haskell", "Python", "Algorithms"]
---
On a rainy Oregon day, I was walking between classes with a group of friends.
We were discussing the various ways to obfuscate solutions to the weekly
homework assignments in our Algorithms course: replace every `if` with
a ternary expression, use single variable names, put everything on one line.
I said:
> The
{{< sidenote "right" "chad-note" "chad" >}}
This is in reference to a meme, <a href="https://knowyourmeme.com/memes/virgin-vs-chad">Virgin vs Chad</a>.
A "chad" characteristic is masculine or "alpha" to the point of absurdity.
{{< /sidenote >}} move would be to make your own, different language for every homework assignment.
It was required of us to use
{{< sidenote "left" "python-note" "Python" >}}
A friend suggested making a Haskell program
that generates Python-based interpreters for languages. While that would be truly
absurd, I'll leave <em>this</em> challenge for another day.
{{< /sidenote >}} for our solutions, so that was the first limitation on this challenge.
Someone suggested to write the languages in Haskell, since that's what we used
in our Programming Languages class. So the final goal ended up:
* For each of the 10 homework assignments in CS325 - Analysis of Algorithms,
* Create a Haskell program that translates a language into,
* A valid Python program that works (nearly) out of the box and passes all the test cases.
It may not be worth it to create a whole
{{< sidenote "right" "general-purpose-note" "general-purpose" >}}
A general purpose language is one that's designed to be used in various
domains. For instance, C++ is a general-purpose language because it can
be used for embedded systems, GUI programs, and pretty much anything else.
This is in contrast to a domain-specific language, such as Game Maker Language,
which is aimed at a much narrower set of uses.
{{< /sidenote >}} language for each problem,
but nowhere in the challenge did we say that it had to be general-purpose. In
fact, some interesting design thinking can go into designing a domain-specific
language for a particular assignment. So let's jump right into it, and make
a language for the first homework assignment.
### Homework 1
There are two problems in Homework 1. Here they are, verbatim:
{{< codelines "text" "cs325-langs/hws/hw1.txt" 32 38 >}}
And the second:
{{< codelines "text" "cs325-langs/hws/hw1.txt" 47 68 >}}
We want to make a language __specifically__ for these two tasks (one of which
is split into many tasks). What common things can we isolate? I see two:
First, __all the problems deal with lists__. This may seem like a trivial observation,
but these two problems are the __only__ thing we use our language for. We have
list access,
{{< sidenote "right" "filterting-note" "list filtering" >}}
Quickselect is a variation on quicksort, which itself
finds all the "lesser" and "greater" elements in the input array.
{{< /sidenote >}} and list creation. That should serve as a good base!
If you squint a little bit, __all the problems are recursive with the same base case__.
Consider the first few lines of `search`, implemented naively:
```Python
def search(xs, k):
if xs == []:
return false
```
How about `sorted`? Take a look:
```Python
def sorted(xs):
if xs == []:
return []
```
I'm sure you see the picture. But it will take some real mental gymnastics to twist the
rest of the problems into this shape. What about `qselect`, for instance? There's two
cases for what it may return:
* `None` or equivalent if the index is out of bounds (we give it `4` an a list `[1, 2]`).
* A number if `qselect` worked.
The test cases never provide a concrete example of what should be returned from
`qselect` in the first case, so we'll interpret it like
{{< sidenote "right" "undefined-note" "undefined behavior" >}}
For a quick sidenote about undefined behavior, check out how
C++ optimizes the <a href="https://godbolt.org/z/3skK9j">Collatz Conjecture function</a>.
Clang doesn't know whether or not the function will terminate (whether the Collatz Conjecture
function terminates is an <a href="https://en.wikipedia.org/wiki/Collatz_conjecture">unsolved problem</a>),
but functions that don't terminate are undefined behavior. There's only one other way the function
returns, and that's with "1". Thus, clang optimizes the entire function to a single "return 1" call.
{{< /sidenote >}} in C++:
we can do whatever we want. So, let's allow it to return `[]` in the `None` case.
This makes this base case valid:
```Python
def qselect(xs, k):
if xs == []:
return []
```
"Oh yeah, now it's all coming together." With one more observation (which will come
from a piece I haven't yet shown you!), we'll be able to generalize this base case.
The observation is this section in the assignment:
{{< codelines "text" "cs325-langs/hws/hw1.txt" 83 98 >}}
The real key is the part about "returning the `[]` where x should be inserted". It so
happens that when the list given to the function is empty, the number should be inserted
precisely into that list. Thus:
```Python
def _search(xs, k):
if xs == []:
return xs
```
The same works for `qselect`:
```Python
def qselect(xs, k):
if xs == []:
return xs
```
And for sorted, too:
```Python
def sorted(xs):
if xs == []:
return xs
```
There are some functions that are exceptions, though:
```Python
def insert(xs, k):
# We can't return early here!
# If we do, we'll never insert anything.
```
Also:
```Python
def search(xs, k):
# We have to return true or false, never
# an empty list.
```
So, whenever we __don't__ return a list, we don't want to add a special case.
We arrive at the following common base case: __whenever a function returns a list, if its first argument
is the empty list, the first argument is immediately returned__.
We've largely exhasuted the conclusiosn we can draw from these problems. Let's get to designing a language.
### A Silly Language
Let's start by visualizing our goals. Without base cases, the solution to `_search`
would be something like this:
{{< codelines "text" "cs325-langs/sols/hw1.lang" 11 14 >}}
Here we have an __`if`-expression__. It has to have an `else`, and evaluates to the value
of the chosen branch. That is, `if true then 0 else 1` evaluates to `0`, while
`if false then 0 else 1` evaluates to `1`. Otherwise, we follow the binary tree search
algorithm faithfully.
Using this definition of `_search`, we can define `search` pretty easily:
{{< codelines "text" "cs325-langs/sols/hw1.lang" 17 17 >}}
Let's use Haskell's `(++)` operator for concatentation. This will help us understand
when the user is operating on lists, and when they're not. With this, `sorted` becomes:
{{< codelines "text" "cs325-langs/sols/hw1.lang" 16 16 >}}
Let's go for `qselect` now. We'll introduce a very silly language feature for this
problem:
{{< sidenote "right" "selector-note" "list selectors" >}}
You've probably never heard of list selectors, and for a good reason:
this is a <em>terrible</em> language feature. I'll go in more detail
later, but I wanted to make this clear right away.
{{< /sidenote >}}. We observe that `qselect` aims to partition the list into
other lists. We thus add the following pieces of syntax:
```
~xs -> {
pivot <- xs[rand]!
left <- xs[#0 <= pivot]
...
} -> ...
```
There are three new things here.
1. The actual "list selector": `~xs -> { .. } -> ...`. Between the curly braces
are branches which select parts of the list and assign them to new variables.
Thus, `pivot <- xs[rand]!` assigns the element at a random index to the variable `pivot`.
the `!` at the end means "after taking this out of `xs`, delete it from `xs`". The
syntax {{< sidenote "right" "curly-note" "starts with \"~\"" >}}
An observant reader will note that there's no need for the "xs" after the "~".
The idea was to add a special case syntax to reference the "selected list", but
I ended up not bothering. So in fact, this part of the syntax is useless.
{{< /sidenote >}} to make it easier to parse.
2. The `rand` list access syntax. `xs[rand]` is a special case that picks a random
element from `xs`.
3. The `xs[#0 <= pivot]` syntax. This is another special case that selects all elements
from `xs` that match the given predicate (where `#0` is replaced with each element in `xs`).
The big part of qselect is to not evaluate `right` unless you have to. So, we shouldn't
eagerly evaluate the list selector. We also don't want something like `right[|right|-1]` to evaluate
`right` twice. So we settle on
{{< sidenote "right" "lazy-note" "lazy evaluation" >}}
Lazy evaluation means only evaluating an expression when we need to. Thus,
although we might encounter the expression for <code>right</code>, we
only evaluate it when the time comes. Lazy evaluation, at least
the way that Haskell has it, is more specific: an expression is evaluated only
once, or not at all.
{{</ sidenote >}}.
Ah, but the `!` marker introduces
{{< sidenote "left" "side-effect-note" "side effects" >}}
A side effect is a term frequently used when talking about functional programming.
Evaluating the expression <code>xs[rand]!</code> doesn't just get a random element,
it also changes <em>something else</em>. In this case, that something else is
the <code>xs</code> list.
{{< /sidenote >}}. So we can't just evaluate these things all willy-nilly.
So, let's make it so that each expression in the selector list requires the ones above it. Thus,
`left` will require `pivot`, and `right` will require `left` and `pivot`. So,
lazily evaluated, ordered expressions. The whole `qselect` becomes:
{{< codelines "text" "cs325-langs/sols/hw1.lang" 1 9 >}}
We've now figured out all the language constructs. Let's start working on
some implementation!
#### Implementation
It would be silly of me to explain every detail of creating a language in Haskell
in this post; this is neither the purpose of the post, nor is it plausible
to do this without covering monads, parser combinators, grammars, abstract syntax
trees, and more. So, instead, I'll discuss the _interesting_ parts of the
implementation.
##### Temporary Variables
Our language is expression-based, yes. A function is a single,
arbitrarily complex expression (involving `if/else`, list
selectors, and more). So it would make sense to translate
a function to a single, arbitrarily complex Python expression.
However, the way we've designed our language makes it
not-so-suitable for converting to a single expression! For
instance, consider `xs[rand]`. We need to compute the list,
get its length, generate a random number, and then access
the corresponding element in the list. We use the list
here twice, and simply repeating the expression would not
be very smart: we'd be evaluating twice. So instead,
we'll use a variable, assign the list to that variable,
and then access that variable multiple times.
To be extra safe, let's use a fresh temporary variable
every time we need to store something. The simplest
way is to simply maintain a counter of how many temporary
variables we've already used, and generate a new variable
by prepending the word "temp" to that number. We start
with `temp0`, then `temp1`, and so on. To keep a counter,
we can use a state monad:
{{< codelines "Haskell" "cs325-langs/src/LanguageOne.hs" 230 230 >}}
Don't worry about the `Map.Map String [String]`, we'll get to that in a bit.
For now, all we have to worry about is the second element of the tuple,
the integer counting how many temporary variables we've used. We can
get the current temporary variable as follows:
{{< codelines "Haskell" "cs325-langs/src/LanguageOne.hs" 232 235 >}}
We can also get a fresh temporary variable like this:
{{< codelines "Haskell" "cs325-langs/src/LanguageOne.hs" 237 240 >}}
Now, the
{{< sidenote "left" "code-note" "code" >}}
Since we are translating an expression, we must have the result of
the translation yield an Python expression we can use in generating
larger Python expressions. However, as we've seen, we occasionally
have to use statements. Thus, the <code>translateExpr</code> function
returns a <code>Translator ([Py.PyStmt], Py.PyExpr)</code>.
{{< /sidenote >}}for generating a random list access looks like
{{< sidenote "right" "ast-note" "this:" >}}
The <code>Py.*</code> constructors are a part of a Python AST module I quickly
threw together. I won't showcase it here, but you can always look at the
source code for the blog (which includes this project)
<a href="https://dev.danilafe.com/Web-Projects/blog-static">here</a>.
{{< /sidenote >}}
{{< codelines "Haskell" "cs325-langs/src/LanguageOne.hs" 325 330 >}}
##### Implementing "lazy evaluation"
Lazy evaluation in functional programs usually arises from
{{< sidenote "right" "graph-note" "graph reduction" >}}
Graph reduction, more specifically the <em>Spineless,
Tagless G-machine</em> is at the core of the Glasgow Haskell
Compiler (GHC). Simon Peyton Jones' earlier book,
<em>Implementing Functional Languages: a tutorial</em>
details an earlier version of the G-machine.
{{< /sidenote >}}. However, Python is neither
functional nor graph-based, and we only lazily
evaluate list selectors. Thus, we'll have to do
some work to get our lazy evaluation to work as we desire.
Here's what I came up with:
1. It's difficult to insert Python statements where they are
needed: we'd have to figure out in which scope each variable
has already been declared, and in which scope it's yet
to be assigned.
2. Instead, we can use a Python dictionary, called `cache`,
and store computed versions of each variable in the cache.
3. It's pretty difficult to check if a variable
is in the cache, compute it if not, and then return the
result of the computation, in one expression. This is
true, unless that single expression is a function call, and we have a dedicated
function that takes no arguments, computes the expression if needed,
and uses the cache otherwise. We choose this route.
4. We have already promised that we'd evaluate all the selected
variables above a given variable before evaluating the variable
itself. So, each function will first call (and therefore
{{< sidenote "right" "force-note" "force" >}}
Forcing, in this case, comes from the context of lazy evaluation. To
force a variable or an expression is to tell the program to compute its
value, even though it may have been putting it off.
{{< /sidenote >}}) the functions
generated for variables declared above the function's own variable.
5. To keep track of all of this, we use the already-existing state monad
as a reader monad (that is, we clear the changes we make to the monad
after we're done translating the list selector). This is where the `Map.Map String [String]`
comes from.
The `Map.Map String [String]` keeps track of variables that will be lazily computed,
and also of the dependencies of each variable (the variables that need
to be access before the variable itself). We compute such a map for
each selector as follows:
{{< codelines "Haskell" "cs325-langs/src/LanguageOne.hs" 298 298 >}}
We update the existing map using `Map.union`:
{{< codelines "Haskell" "cs325-langs/src/LanguageOne.hs" 299 299 >}}
And, after we're done generating expressions in the body of this selector,
we clear it to its previous value `vs`:
{{< codelines "Haskell" "cs325-langs/src/LanguageOne.hs" 302 302 >}}
We generate a single selector as follows:
{{< codelines "Haskell" "cs325-langs/src/LanguageOne.hs" 268 281 >}}
This generates a function definition statement, which we will examine in
generated Python code later on.
Solving the problem this way also introduces another gotcha: sometimes,
a variable is produced by a function call, and other times the variable
is just a Python variable. We write this as follows:
{{< codelines "Haskell" "cs325-langs/src/LanguageOne.hs" 283 288 >}}
##### Special Case Insertion
This is a silly language for a single homework assignment. I'm not
planning to implement Hindley-Milner type inference, or anything
of that sort. For the purpose of this language, things will be
either a list, or not a list. And as long as a function __can__ return
a list, it can also return the list from its base case. Thus,
that's all we will try to figure out. The checking code is so
short that we can include the whole snippet at once:
{{< codelines "Haskell" "cs325-langs/src/LanguageOne.hs" 219 227 >}}
`mergePossibleType`
{{< sidenote "right" "bool-identity-note" "figures out" >}}
An observant reader will note that this is just a logical
OR function. It's not, however, good practice to use
booleans for types that have two constructors with no arguments.
Check out this <a href="https://programming-elm.com/blog/2019-05-20-solving-the-boolean-identity-crisis-part-1/">
Elm-based article</a> about this, which the author calls the
Boolean Identity Crisis.
{{< /sidenote >}}, given two possible types for an
expression, the final type for the expression.
There's only one real trick to this. Sometimes, like in
`_search`, the only time we return something _known_ to be a list, that
something is `xs`. Since we're making a list manipulation language,
let's __assume the first argument to the function is a list__, and
__use this information to determine expression types__. We guess
types in a very basic manner otherwise: If you use the concatenation
operator, or a list literal, then obviously we're working on a list.
If you're returning the first argument of the function, that's also
a list. Otherwise, it could be anything.
My Haskell linter actually suggested a pretty clever way of writing
the whole "add a base case if this function returns a list" code.
Check it out:
{{< codelines "Haskell" "cs325-langs/src/LanguageOne.hs" 260 266 >}}
Specifically, look at the line with `let fastReturn = ...`. It
uses a list comprehension: we take a parameter `p` from the list of
parameter `ps`, but only produce the statements for the base case
if the possible type computed using `p` is `List`.
### The Output
What kind of beast have we created? Take a look for yourself:
```Python
def qselect(xs,k):
if xs==[]:
return xs
cache = {}
def pivot():
if ("pivot") not in (cache):
cache["pivot"] = xs.pop(0)
return cache["pivot"]
def left():
def temp2(arg):
out = []
for arg0 in arg:
if arg0<=pivot():
out.append(arg0)
return out
pivot()
if ("left") not in (cache):
cache["left"] = temp2(xs)
return cache["left"]
def right():
def temp3(arg):
out = []
for arg0 in arg:
if arg0>pivot():
out.append(arg0)
return out
left()
pivot()
if ("right") not in (cache):
cache["right"] = temp3(xs)
return cache["right"]
if k>(len(left())+1):
temp4 = qselect(right(), k-len(left())-1)
else:
if k==(len(left())+1):
temp5 = [pivot()]
else:
temp5 = qselect(left(), k)
temp4 = temp5
return temp4
def _search(xs,k):
if xs==[]:
return xs
if xs[1]==k:
temp6 = xs
else:
if xs[1]>k:
temp8 = _search(xs[0], k)
else:
temp8 = _search(xs[2], k)
temp6 = temp8
return temp6
def sorted(xs):
if xs==[]:
return xs
return sorted(xs[0])+[xs[1]]+sorted(xs[2])
def search(xs,k):
return len(_search(xs, k))!=0
def insert(xs,k):
return _insert(k, _search(xs, k))
def _insert(k,xs):
if k==[]:
return k
if len(xs)==0:
temp16 = xs
temp16.append([])
temp17 = temp16
temp17.append(k)
temp18 = temp17
temp18.append([])
temp15 = temp18
else:
temp15 = xs
return temp15
```
It's...horrible! All the `tempX` variables, __three layers of nested function declarations__, hardcoded cache access. This is not something you'd ever want to write.
Even to get this code, I had to come up with hacks __in a language I created__.
The first is the hack is to make the `qselect` function use the `xs == []` base
case. This doesn't happen by default, because `qselect` doesn't return a list!
To "fix" this, I made `qselect` return the number it found, wrapped in a
list literal. This is not up to spec, and would require another function
to unwrap this list.
While `qselect` was struggling with not having the base case, `insert` had
a base case it didn't need: `insert` shouldn't return the list itself
when it's empty, it should insert into it! However, when we use the `<<`
list insertion operator, the language infers `insert` to be a list-returning
function itself, inserting into an empty list will always fail. So, we
make a function `_insert`, which __takes the arguments in reverse__.
The base case will still be generated, but the first argument (against
which the base case is checked) will be a number, so the `k == []` check
will always fail.
That concludes this post. I'll be working on more solutions to homework
assignments in self-made languages, so keep an eye out!

View File

@@ -0,0 +1,218 @@
---
title: A Language for an Assignment - Homework 2
date: 2019-12-30T20:05:10-08:00
tags: ["Haskell", "Python", "Algorithms"]
---
After the madness of the
[language for homework 1]({{< relref "00_cs325_languages_hw1.md" >}}),
the solution to the second homework offers a moment of respite.
Let's get right into the problems, shall we?
### Homework 2
Besides some free-response questions, the homework contains
two problems. The first:
{{< codelines "text" "cs325-langs/hws/hw2.txt" 29 34 >}}
And the second:
{{< codelines "text" "cs325-langs/hws/hw2.txt" 36 44 >}}
At first glance, it's not obvious why these problems are good for
us. However, there's one key observation: __`num_inversions` can be implemented
using a slightly-modified `mergesort`__. The trick is to maintain a counter
of inversions in every recursive call to `mergesort`, updating
it every time we take an element from the
{{< sidenote "right" "right-note" "right list" >}}
If this nomenclature is not clear to you, recall that
mergesort divides a list into two smaller lists. The
"right list" refers to the second of the two, because
if you visualize the original list as a rectangle, and cut
it in half (vertically, down the middle), then the second list
(from the left) is on the right.
{{< /sidenote >}} while there are still elements in the
{{< sidenote "left" "left-note" "left list" >}}
Why this is the case is left as an exercise to the reader.
{{< /sidenote >}}.
When we return from the call,
we add up the number of inversions from running `num_inversions`
on the smaller lists, and the number of inversions that we counted
as I described. We then return both the total number
of inversions and the sorted list.
So, we either perform the standard mergesort, or we perform mergesort
with additional steps added on. The additional steps can be divided into
three general categories:
1. __Initialization__: We create / set some initial state. This state
doesn't depend on the lists or anything else.
2. __Effect__: Each time that an element is moved from one of the two smaller
lists into the output list, we may change the state in some way (create
an effect).
3. __Combination__: The final state, and the results of the two
sub-problem states, are combined into the output of the function.
This is all very abstract. In the concrete case of inversions,
these steps are as follows:
1. __Initializtion__: The initial state, which is just the counter, is set to 0.
2. __Effect__: Each time an element is moved, if it comes from the right list,
the number of inversions is updated.
3. __Combination__: We update the state, simply adding the left and right
inversion counts.
We can make a language out of this!
### A Language
Again, let's start by visualizing what the solution will look like. How about this:
{{< rawblock "cs325-langs/sols/hw2.lang" >}}
We divide the code into the same three steps that we described above. The first
section is the initial state. Since it doesn't depend on anything, we expect
it to be some kind of literal, like an integer. Next, we have the effect section,
which has access to the variables below:
* `STATE`, to manipulate or check the current state.
* `LEFT` and `RIGHT`, to access the two lists being merged.
* `L` and `R`, constants that are used to compare against the `SOURCE` variable.
* `SOURCE`, to denote which list a number came from.
* `LSTATE` and `RSTATE`, to denote the final states from the two subproblems.
We use an `if`-statement to check if the element that was popped came
from the right list (by checking `SOURCE == R`). If it did, we increment the counter
(state) by the proper amount. In the combine step, which has access to the
same variables, we simply increment the state by the counters from the left
and right solutions, stored in `LSTATE` and `RSTATE`. That's it!
#### Implementation
The implementation is not tricky at all. We don't need to use monads like we did last
time, and nor do we have to perform any fancy Python nested function declarations.
To keep with the Python convention of lowercase variables, we'll translate the
uppercase "global" variables to lowercase. We'll do it like so:
{{< codelines "Haskell" "cs325-langs/src/LanguageTwo.hs" 167 176 >}}
Note that we translated `L` and `R` to integer literals. We'll indicate the source of
each element with an integer, since there's no real point to representing it with
a string or a variable. We'll need to be aware of this when we implement the actual, generic
mergesort code. Let's do that now:
{{< codelines "Haskell" "cs325-langs/src/LanguageTwo.hs" 101 161 >}}
This is probably the ugliest part of this assignment: we handwrote a Python
AST in Haskell that implements mergesort with our augmentations. Note that
this is a function, which takes a `Py.PyExpr` (the initial state expression),
and two lists of `Py.PyStmt`, which are the "effect" and "combination" code,
respectively. We simply splice them into our regular mergesort function.
The translation is otherwise pretty trivial, so there's no real reason
to show it here.
### The Output
What's the output of our solution to `num_inversions`? Take a look for yourself:
```Python
def prog(xs):
if len(xs)<2:
return (0, xs)
leng = len(xs)//2
left = xs[:(leng)]
right = xs[(leng):]
(ls,left) = prog(left)
(rs,right) = prog(right)
left.reverse()
right.reverse()
state = 0
source = 0
total = []
while (left!=[])and(right!=[]):
if left[-1]<=right[-1]:
total.append(left.pop())
source = 1
else:
total.append(right.pop())
source = 2
if source==2:
state = state+len(left)
state = state+ls+rs
left.reverse()
right.reverse()
return (state, total+left+right)
```
Honestly, that's pretty clean. As clean as `left.reverse()` to allow for \\(O(1)\\) pop is.
What's really clean, however, is the implementation of mergesort in our language.
It goes as follows:
```
state 0;
effect {}
combine {}
```
To implement mergesort in our language, which describes mergesort variants, all
we have to do is not specify any additional behavior. Cool, huh?
That's the end of this post. If you liked this one (and the previous one!),
keep an eye out for more!
### Appendix (Missing Homework Question)
I should not view homework assignments on a small-screen device. There __was__ a third problem
on homework 2:
{{< codelines "text" "cs325-langs/hws/hw2.txt" 46 65 >}}
This is not a mergesort variant, and adding support for it into our second language
will prevent us from making it the neat specialized
{{< sidenote "right" "dsl-note" "DSL" >}}
DSL is a shortened form of "domain specific language", which was briefly
described in another sidenote while solving homework 1.
{{< /sidenote >}} that was just saw. We'll do something else, instead:
we'll use the language we defined in homework 1 to solve this
problem:
```
empty() = [0, 0];
longest(xs) =
if |xs| != 0
then _longest(longest(xs[0]), longest(xs[2]))
else empty();
_longest(l, r) = [max(l[0], r[0]) + 1, max(l[0]+r[0], max(l[1], r[1]))];
```
{{< sidenote "right" "terrible-note" "This is quite terrible." >}}
This is probably true with any program written in our first
language.
{{< /sidenote >}} In these 6 lines of code, there are two hacks
to work around the peculiarities of the language.
At each recursive call, we want to keep track of both the depth
of the tree and the existing longest path. This is because
the longest path could be found either somewhere down
a subtree, or from combining the largest depths of
two subtrees. To return two values from a function in Python,
we'd use a tuple. Here, we use a list.
Alarm bells should be going off here. There's no reason why we should
ever return an empty list from the recursive call: at the very least, we
want to return `[0,0]`. But placing such a list literal in a function
will trigger the special case insertion. So, we have to hide this literal
from the compiler. Fortunately, that's not too hard to do - the compiler
is pretty halfhearted in its inference of types. Simply putting
the literal behind a constant function (`empty`) does the trick.
The program uses the subproblem depths multiple times in the
final computation. We thus probably want to assign these values
to names so we don't have to perform any repeated work. Since
the only two mechanisms for
{{< sidenote "right" "binding-note" "binding variables" >}}
To bind a variable means to assign a value to it.
{{< /sidenote >}} in this language are function calls
and list selectors, we use a helper function `_longest`,
which takes two subproblem solutions an combines them
into a new solution. It's pretty obvious that `_longest`
returns a list, so the compiler will try insert a base
case. Fortunately, subproblem solutions are always
lists of two numbers, so this doesn't affect us too much.

View File

@@ -0,0 +1,429 @@
---
title: A Language for an Assignment - Homework 3
date: 2020-01-02T22:17:43-08:00
tags: ["Haskell", "Python", "Algorithms"]
---
It rained in Sunriver on New Year's Eve, and it continued to rain
for the next couple of days. So, instead of going skiing as planned,
to the dismay of my family and friends, I spent the majority of
those days working on the third language for homework 3. It
was quite the language, too - the homework has three problems, each of
which has a solution independent of the others. I invite you
to join me in my descent into madness as we construct another language.
### Homework 3
Let's take a look at the three homework problems. The first two are
related, but are solved using a different technique:
{{< codelines "text" "cs325-langs/hws/hw3.txt" 18 30 >}}
This problem requires us to find the `k` numbers closest to some
query (which I will call `n`) from a list `xs`. The list isn't sorted, and the
problem must run in linear time. Sorting the list would require
the standard
{{< sidenote "right" "n-note" "\(O(n\log n)\) time." >}}
The \(n\) in this expression is not the same as the query <code>n</code>,
but rather the length of the list. In fact, I have not yet assigned
the length of the input <code>xs</code> to any variable. If we say that
\(m\) is a number that denotes that length, the proper expression
for the complexity is \(O(m \log m)\).
{{< /sidenote >}} Thus, we have to take another route, which should
already be familiar: quickselect. Using quickselect, we can find the `k`th
closest number, and then collect all the numbers that are closer than the `kth`
closest number. So, we need a language that:
* Supports quickselect (and thus, list partitioning and recursion).
* Supports iteration, {{< sidenote "left" "iteration-note" "multiple times." >}}
Why would we need to iterate multiple times? Note that we could have a list
of numbers that are all the same, <code>[1,1,1,1,1]</code>. Then, we'll need
to know how many of the numbers <em>equally close</em> as the <code>k</code>th
element we need to include, which will require another pass through the list.
{{< /sidenote >}}
That's a good start. Let's take a look at the second problem:
{{< codelines "text" "cs325-langs/hws/hw3.txt" 33 47 >}}
This problem really is easier. We have to find the position of _the_ closest
element, and then try expand towards either the left or right, depending on
which end is better. This expansion will take several steps, and will
likely require a way to "look" at a given part of the list. So let's add two more
rules. We need a language that also:
* Supports looping control flow, such as `while`.
* {{< sidenote "right" "view-note" "Allows for a \"view\" into the list" >}}
We could, of course, simply use list indexing. But then, we'd just be making
a simple imperative language, and that's boring. So let's play around
with our design a little, and experimentally add such a "list view" component.
{{< /sidenote >}}
(like an abstraction over indexing).
This is shaping up to be a fun language. Let's take a look at the last problem:
{{< codelines "text" "cs325-langs/hws/hw3.txt" 50 64 >}}
This problem requires more iterations of a list. We have several
{{< sidenote "right" "cursor-note" "\"cursors\"" >}}
I always make the language before I write the post, since a lot of
design decisions change mid-implementation. I realize now that
"cursors" would've been a better name for this language feature,
but alas, it is too late.
{{< /sidenote >}} looking into the list, and depending if the values
at each of the cursors add up, we do or do not add a new tuple to a list. So,
two more requirements:
* The "cursors" must be able to interact.
* The language can represent {{< sidenote "left" "tuple-note" "tuples." >}}
We could, of course, hack some other way to return a list of tuples, but
it turns out tuples are pretty simple to implement, and help make for nicer
programming in our language.
{{< /sidenote >}}
I think we've gathered what we want from the homework. Let's move on to the
language!
### A Language
As is now usual, let's envision a solution to the problems in our language. There
are actually quite a lot of functions to look at, so let's see them one by one.
First, let's look at `qselect`.
{{< codelines "text" "cs325-langs/sols/hw3.lang" 1 19 >}}
After the early return, the first interesting part of the language is the
use of what I have decided to call a __list traverser__. The list
traverser is a __generalization of a list index__. Whenever we use a list
index variable, we generally use the following operations:
* __Initialize__: we set the list index to some initial value, such as 0.
* __Step__: If we're walking the list from left to right, we increment the index.
If we're walking the list from right to left, we decrement the index.
* __Validity Check__: We check if the index is still valid (that is, we haven't
gone past the edge of the list).
* __Access__: Get the element the cursor is pointing to.
A {{< sidenote "right" "cpp-note" "traverser declaration" >}}
A fun fact is that we've just rediscovered C++
<a href="http://www.cplusplus.com/reference/iterator/">iterators</a>. C++
containers and their iterators provide us with the operations I described:
We can initialize an iterator like <code>auto it = list.begin()</code>. We
can step the iterator using <code>it++</code>. We can check its validity
using <code>it != list.end()</code>, and access what it's pointing to using
<code>*it</code>. While C++ uses templates and inheritance for this,
we define a language feature specifically for lists.
{{< /sidenote >}} describes these operations. The declartion for the `bisector`
traverser creates a "cursor" over the list `xs`, that goes between the 0th
and last elements of `xs`. The declaration for the `pivot` traverser creates
a "cursor" over the list `xs` that jumps around random locations in the list.
The next interesting part of the language is a __traverser macro__. This thing,
that looks like a function call (but isn't), performs an operation on the
cursor. For instance, `pop!` removes the element at the cursor from the list,
whereas `bisect!` categorizes the remaining elements in the cursor's list
into two lists, using a boolean-returning lambda (written in Java syntax).
Note that this implementation of `qselect` takes a function `c`, which it
uses to judge the actual value of the number. This is because our `qselect`
won't be finding _the_ smallest number, but the number with the smallest difference
with `n`. `n` will be factored in via the function.
Next up, let's take a look at the function that uses `qselect`, `closestUnsorted`:
{{< codelines "text" "cs325-langs/sols/hw3.lang" 21 46 >}}
Like we discussed, it finds the `k`th closest element (calling it `min`),
and counts how many elements that are __equal__ need to be included,
by setting the number to `k` at first, and subtracting 1 for every number
it encounters that's closer than `min`. Notice that we use the `valid!` and
`step!` macros, which implement the operations we described above. Notice
that the user doesn't deal with adding and subtracting numbers, and doing
comparisons. All they have to do is ask "am I still good to iterate?"
Next, let's take a look at `closestSorted`, which will require more
traverser macros.
{{< codelines "text" "cs325-langs/sols/hw3.lang" 48 70 >}}
The first new macro is `canstep!`. This macro just verifies that
the traverser can make another step. We need this for the "reverse" iterator,
which indicates the lower bound of the range of numbers we want to return,
because `subset!` (which itself is just Python's slice, like `xs[a:b]`), uses an inclusive bottom
index, and thus, we can't afford to step it before knowing that we can, and that
it's a better choice after the step.
Similarly, we have the `at!(t, i)` macro, which looks at the
traverser `t`, with offset `i`.
We have two loops. The first loop runs as long as we can expand the range in both
directions, and picks the better direction at each iteration. The second loop
runs as long as we still want more numbers, but have already hit the edge
of the list on the left or on the right.
Finally, let's look at the solution to `xyz`:
{{< codelines "text" "cs325-langs/sols/hw3.lang" 72 95 >}}
I won't go in depth, but notice that the expression in the `span` part
of the `traverser` declaration can access another traverser. We treat
as a feature the fact that this expression isn't immediately evaluated at the place
of the traverser declaration. Rather, every time that a comparison for a traverser
operation is performed, this expression is re-evaluated. This allows us to put
dynamic bounds on traversers `y` and `z`, one of which must not exceed the other.
Note also a new keyword that was just used: `sorted`. This is a harmless little
language feature that automatically calls `.sort()` on the first argument of
the function.
This is more than enough to work with. Let's move on to the implementation.
#### Implementation
Again, let's not go too far into the details of implementing the language from scratch.
Instead, let's take a look into specific parts of the language that deserve attention.
##### Revenge of the State Monad
Our previous language was, indeed, a respite from complexity. Translation was
straightforward, and the resulting expressions and statements were plugged straight
into a handwritten AST. We cannot get away with this here; the language is powerful
enough to implement three list-based problems, which comes at the cost of increased
complexity.
We need, once again, to generate temporary variables. We also need to keep track of
which variables are traversers, and the properties of these traversers, throughout
each function of the language. We thus fall back to using `Control.Monad.State`:
{{< codelines "Haskell" "cs325-langs/src/LanguageThree.hs" 198 198 >}}
There's one part of the state tuple that we haven't yet explained: the list of
statements.
##### Generating Statements
Recall that our translation function for expressions in the first homework had the type:
```Haskell
translateExpr :: Expr -> Translator ([Py.PyStmt], Py.PyExpr)
```
We then had to use `do`-notation, and explicitly concatenate lists
of emitted statements. In this language, I took an alternative route: I made
the statements part of the state. They are thus implicitly generated and
stored in the monad, and expression generators don't have to worry about
concatenating them. When the program is ready to use the generated statements
(say, when an `if`-statement needs to use the statements emitted by the condition
expression), we retrieve them from the monad:
{{< codelines "Haskell" "cs325-langs/src/LanguageThree.hs" 228 234 >}}
I should note, for transparency, that there's a bug in my use of this function.
When I compile `if`-statements, I accidentally place statements generated by
the condition into the body of the `if`. This bug doesn't manifest
in the solutions to the homework problems, and so I decided not to spend any more
time on fixing it.
##### Validating Traverser Declarations
We declare two separate types that hold traverser data. The first is a kind of "draft"
type, `TraverserData`:
{{< codelines "Haskell" "cs325-langs/src/LanguageThree.hs" 184 190 >}}
This record holds all possible configurations of a traverser
that occur as the program is iterating through the various `key: value` pairs in
the declaration. For instance, at the very beginning of processing a traverser declaration,
our program will use a "default" `TraverserData`, with all fields set to `Nothing` or
their default value. This value will then be modified by the first key/value pair,
changing, for instance, the list that the traverser operates on. This new modified
`TraverserData` will then be modified by the next key/value pair, and so on. Doing
this with every key/value pair (called an option in the below snippet)
is effectively a foldl operation.
{{< codelines "Haskell" "cs325-langs/src/LanguageThree.hs" 378 387 >}}
The data may not have all the required fields until the very end, and its type
reflects that: `Maybe String` here, `Maybe TraverserBounds` there. We don't
want to deal with unwrapping the `Maybe a` values every time we use the traverser,
especially if we've done so before. So, we define a `ValidTraverserData` record
that does not have `Maybe` arguments, and thus, has all the required data. At the
end of a traverser declaration, we attempt to translate a `TraverserData` into
a `ValidTraverserData`, invoking `fail` if we can't, and storing the `ValidTraverserData`
into the state otherwise:
{{< codelines "Haskell" "cs325-langs/src/LanguageThree.hs" 408 420 >}}
Then, every time we retrieve a traverser from the state,
define a lookup monadic operation like this:
{{< codelines "Haskell" "cs325-langs/src/LanguageThree.hs" 240 244 >}}
##### Compiling Macros
I didn't call them macros for no reason. Clearly, we don't want to generate
code that
{{< sidenote "right" "increment-note" "calls functions only to increment an index." >}}
In fact, there's no easy way to do this at all. Python's integers (if we choose to
represent our traversers using integers), are immutable. Furthermore, unlike C++,
where passing by reference allows a function to change its parameters "outside"
the call, Python offers no way to reassign a different value to a variable given
to a function.
<br><br>
For an example use of C++'s pass-by-reference mechanic, consider <code>std::swap</code>:
it's a function, but it modifies the two variables given to it. There's no
way to generically implement such a function in Python.
{{< /sidenote >}} We also can't allow arbitrary expressions to serve as traversers:
our translator keeps some context about which variables are traversers, what their
bounds are, and how they behave. Thus, __calls to traverser macros are very much macros__:
they operate on AST nodes, and __require__ that their first argument is a variable,
named like the traverser. We use the `requireTraverser` monadic operation
to get the traverser associated with the given variable name, and then perform
the operation as intended. The `at!(t)` operation is straightforward:
{{< codelines "Haskell" "cs325-langs/src/LanguageThree.hs" 317 319 >}}
The `at!(t,i)` is less so, since it deals with the intricacies of accessing
the list at either a positive of negative offset, depending on the direction
of the traverser. We implement a function to properly generate an expression for the offset:
{{< codelines "Haskell" "cs325-langs/src/LanguageThree.hs" 246 249 >}}
We then implement `at!(t,i)` as follows:
{{< codelines "Haskell" "cs325-langs/src/LanguageThree.hs" 320 323 >}}
The most complicated macro is `bisect!`. It must be able to step the traverser,
and also return a tuple of two lists that the bisection yields. We also
prefer that it didn't pollute the environment with extra variables. To
achieve this, we want `bisect!` to be a function call. We want this
function to implement the iteration and list construction.
`bisect!`, by definition, takes a lambda. This lambda, in our language, is declared
in the lexical scope in which `bisect!` is called. Thus, to guarantee correct translation,
we must do one of two things:
1. Translate 1-to-1, and create a lambda, passing it to a fixed `bisect` function declared
elsewhere.
2. Translate to a nested function declaration,
{{< sidenote "right" "inline-note" "inlining the lambda." >}}
Inlining, in this case, means replacing a call to a function with the function's body.
We do this to prevent the overhead of calling a function, which typically involves pushing
on a stack and other extraneous work. If our function is simple, like a simple
comparison, it doesn't make sense to spend the effort calling it.
{{< /sidenote >}}
Since I quite like the idea of inlining a lambda, let's settle for that. To do this,
we pull a fresh temporary variable and declare a function, into which we place
the traverser iteration code, as well as the body of the lambda, with the variable
substituted for the list access expression.
{{< sidenote "left" "nonlocal-note" "Here's the code:" >}}
Reading the lexical scope is one thing, but modifying it is another. To prevent
accidental changes to the variables outside a nested function, Python assumes
that variables assigned inside the function body are local to the function. Thus, to make
sure changing our variable (the traverser index) has an effect outside the function
(as it should) we must include the <code>nonlocal</code> keyword, telling
Python that we're not declaring a new, local variable, but mutating the old one.
{{< /sidenote >}}
{{< codelines "Haskell" "cs325-langs/src/LanguageThree.hs" 342 363 >}}
### The Output
Let's see what the compiler spits out:
```Python
from bisect import bisect
import random
def qselect(xs,k,c):
if xs==[]:
return 0
bisector = 0
pivot = random.randrange(len(xs))
pivotE = xs.pop(pivot)
def temp1():
nonlocal bisector
l = []
r = []
while bisector<len(xs):
if c(xs[bisector])<c(pivotE):
l.append(xs[bisector])
else:
r.append(xs[bisector])
bisector = bisector+1
return (l, r)
(leftList,rightList) = temp1()
if k>len(leftList)+1:
return qselect(rightList, k-len(leftList)-1, c)
elif k==len(leftList)+1:
return pivotE
else:
return qselect(leftList, k, c)
def closestUnsorted(xs,k,n):
min = qselect(list(xs), k, (lambda x: abs(x-n)))
out = []
countEqual = k
iter = 0
while iter<len(xs):
if abs(xs[iter]-n)<abs(min-n):
countEqual = countEqual-1
iter = iter+1
0
iter = 0
while iter<len(xs):
if abs(xs[iter]-n)==abs(min-n) and countEqual>0:
countEqual = countEqual-1
out = out+[xs[iter]]
elif abs(xs[iter]-n)<abs(min-n):
out = out+[xs[iter]]
iter = iter+1
0
return out
def closestSorted(xs,k,n):
start = bisect(xs, n)
counter = 0
left = start
right = start
while counter!=k and left-1*1>=0 and right<len(xs):
if abs(xs[left-1*1]-n)<abs(xs[right]-n):
left = left-1
0
else:
right = right+1
0
counter = counter+1
while counter!=k and (left-1*1>=0 or right<len(xs)):
if left-1*1>=0:
left = left-1
0
else:
right = right+1
0
counter = counter+1
return xs[(left):(right)]
def xyz(xs,k):
xs.sort()
x = 0
dest = []
while x<len(xs):
z = x+2
y = x+1
while y<z and z<len(xs):
if xs[x]+xs[y]==xs[z]:
dest = dest+[(xs[x], xs[y], xs[z])]
z = z+1
0
elif xs[x]+xs[y]>xs[z]:
z = z+1
0
else:
y = y+1
0
x = x+1
0
return dest
```
Observe that the generated code just uses indices, `+`, `-`, and various comparison operators.
Our traverser is an example of a __zero cost abstraction__, a feature that, conceptually,
operates at a higher level, making us no longer worry about adding, subtracting, and
comparing numbers, while, in the final output, not damaging the performance of safety
of the code. Also observe the various `0` standalone statements. This is an issue
with the translator: traverser macros may not always yield an expression, but
the type of `translateExpr` and `translateStmt` effectively requires one. Thus,
when a macro doesn't generate anything useful, we give it the placeholder expression `0`.
That concludes this third post in the series. I hope to see you in the next one!

View File

@@ -334,10 +334,10 @@ code for the global function:
{{< gmachine "Unwind-Global" >}} {{< gmachine "Unwind-Global" >}}
{{< gmachine_inner "Before">}} {{< gmachine_inner "Before">}}
\( \text{Unwind} : i \quad a, a_0, a_1, ..., a_n : s \quad d \quad h[\substack{a : \text{NGlobal} \; n \; c \\ a_k : \text{NApp} \; a_{k-1} \; a_k'}] \quad m \) \( \text{Unwind} : i \quad a, a_0, a_1, ..., a_{n-1} : s \quad d \quad h[\substack{a : \text{NGlobal} \; n \; c \\ a_k : \text{NApp} \; a_{k-1} \; a_k'}] \quad m \)
{{< /gmachine_inner >}} {{< /gmachine_inner >}}
{{< gmachine_inner "After" >}} {{< gmachine_inner "After" >}}
\( c \quad a_0', a_1', ..., a_n', a_n : s \quad d \quad h[\substack{a : \text{NGlobal} \; n \; c \\ a_k : \text{NApp} \; a_{k-1} \; a_k'}] \quad m \) \( c \quad a_0', a_1', ..., a_{n-1}', a_{n-1} : s \quad d \quad h[\substack{a : \text{NGlobal} \; n \; c \\ a_k : \text{NApp} \; a_{k-1} \; a_k'}] \quad m \)
{{< /gmachine_inner >}} {{< /gmachine_inner >}}
{{< gmachine_inner "Description" >}} {{< gmachine_inner "Description" >}}
Call a global function. Call a global function.
@@ -345,12 +345,12 @@ code for the global function:
{{< /gmachine >}} {{< /gmachine >}}
In this rule, we used a general rule for \\(a\_k\\), in which \\(k\\) is any number In this rule, we used a general rule for \\(a\_k\\), in which \\(k\\) is any number
between 0 and \\(n\\). We also expect the `NGlobal` node to contain two parameters, between 1 and \\(n-1\\). We also expect the `NGlobal` node to contain two parameters,
\\(n\\) and \\(c\\). \\(n\\) is the arity of the function (the number of arguments \\(n\\) and \\(c\\). \\(n\\) is the arity of the function (the number of arguments
it expects), and \\(c\\) are the instructions to construct the function's tree. it expects), and \\(c\\) are the instructions to construct the function's tree.
The attentive reader will have noticed a catch: we kept \\(a\_n\\) on the stack! The attentive reader will have noticed a catch: we kept \\(a\_{n-1}\\) on the stack!
This once again goes back to replacing a node in-place. \\(a\_n\\) is the address of the "root" of the This once again goes back to replacing a node in-place. \\(a\_{n-1}\\) is the address of the "root" of the
whole expression we're simplifying. Thus, to replace the value at this address, we need to keep whole expression we're simplifying. Thus, to replace the value at this address, we need to keep
the address until we have something to replace it with. the address until we have something to replace it with.

View File

@@ -408,10 +408,10 @@ looks as follows for `definition_defn`:
{{< codelines "C++" "compiler/06/definition.cpp" 44 52 >}} {{< codelines "C++" "compiler/06/definition.cpp" 44 52 >}}
Notice that we terminate the function with Update and Pop. This Notice that we terminate the function with Update and Pop. Update
will turn the `ast_app` node that served as the "root" will turn the `ast_app` node that served as the "root"
of the application into an indirection to the value that we have computed. of the application into an indirection to the value that we have computed.
Doing so will also remove all "scratch work" from the stack. After this, Pop will remove all "scratch work" from the stack.
In essense, this is how we can lazily evaluate expressions. In essense, this is how we can lazily evaluate expressions.
Finally, we make a function in our `main.cpp` file to compile Finally, we make a function in our `main.cpp` file to compile

View File

@@ -574,5 +574,6 @@ In the next several posts, we will improve
our compiler to properly free unused memory our compiler to properly free unused memory
usign a __garbage collector__, implement usign a __garbage collector__, implement
lambda functions using __lambda lifting__, lambda functions using __lambda lifting__,
and use our Alloc instruction to implement `let/in` expressions. See and use our Alloc instruction to implement `let/in` expressions.
you there! We get started on the first of these tasks in
[Part 9 - Garbage Collection]({{< relref "09_compiler_garbage_collection.md" >}}).

View File

@@ -0,0 +1,558 @@
---
title: Compiling a Functional Language Using C++, Part 9 - Garbage Collection
date: 2020-02-10T19:22:41-08:00
tags: ["C and C++", "Functional Languages", "Compilers"]
---
> "When will you learn? When will you learn that __your actions have consequences?__"
So far, we've entirely ignored the problem of memory management. Every time
that we need a new node for our growing graph, we simply ask for more memory
from the runtime with `malloc`. But selfishly, even when we no longer require
the memory allocated for a particular node, when that node is no longer in use,
we do not `free` it. In fact, our runtime currently has no idea about
which nodes are needed and which ones are ready to be discarded.
To convince ourselves that this is a problem, let's first assess the extent of the damage.
Consider the program from `works3.txt`:
{{< rawblock "compiler/09/examples/works3.txt" >}}
Compiling and running this program through `valgrind`, we get the following output:
```
==XXXX== LEAK SUMMARY:
==XXXX== definitely lost: 288 bytes in 12 blocks
==XXXX== indirectly lost: 768 bytes in 34 blocks
==XXXX== possibly lost: 0 bytes in 0 blocks
==XXXX== still reachable: 0 bytes in 0 blocks
==XXXX== suppressed: 0 bytes in 0 blocks
```
We lost 1056 bytes of memory, just to return the length of a list
with 3 elements. The problem of leaking memory is very real.
How do we solve this issue? We can't embed memory management into our language;
We want to keep it pure, and managing memory is typically pretty far from
that goal. Instead, we will make our runtime do the work of freeing memory.
Even then, this is a nontrivial goal: our runtime manipulates graphs, each
of which can be combined with others in arbitrary ways. In general, there
will not always be a _single_ node that, when freed, will guarantee that
another node can be freed as well. Instead, it's very possible in our
graphs that two parent nodes both refer to a third, and only when both
parents are freed can we free that third node itself. Consider,
for instance, the function `square` as follows:
```
defn square x = {
x * x
}
```
This function will receive, on top of the stack, a single graph representing `x`.
It will then create two applications of a global `(+)` function, each time
to the graph of `x`. Thus, it will construct a tree with two `App` nodes, both
of which
{{< sidenote "right" "lazy-note" "must keep track of a reference to x.">}}
We later take advantage of this, by replacing the graph of <code>x</code> with the
result of evaluating it. Since both <code>App</code> nodes point to the same
graph, when we evaluate it once, each node observes this update, and is not
required to evaluate <code>x</code> again. With this, we achieve lazy evaluation.
{{< /sidenote >}} The runtime will have to wait until both `App` nodes
are freed before it can free the graph of `x`.
This seems simple enough! If there are multiple things that may reference a node
in the graph, why don't we just keep track of how many there are? Once we know
that no more things are still referencing a node, we can free it. This is
called [reference counting](https://en.wikipedia.org/wiki/Reference_counting).
Reference counting is a valid technique, but unfortunately, it will not suit us.
The reason for this is that our language may produce
[cyclic graphs](https://en.wikipedia.org/wiki/Cycle_(graph_theory)). Consider,
for example, this definition of an infinite list of the number 1:
```
defn ones = { Cons 1 ones }
```
Envisioning the graph of the tree, we can see `ones` as an application
of the constructor `Cons` to two arguments, one of which is `ones` again.
{{< sidenote "right" "recursive-note" "It refers to itself!" >}}
Things are actually more complicated than this. In our current language,
recursive definitions are only possible in function definitions (like
<code>ones</code>). In our runtime, each time there is a reference
to a function, this is done through a <em>new node</em>, which
means that functions with recursive definitions are <em>not</em> represented cyclically.
Therefore, reference counting would work. However, in the future,
our language will have more ways of creating circular definitions,
some of which will indeed create cycles in our graphs. So, to
prepare for this, we will avoid the use of reference counting.
{{< /sidenote >}} In this case, when we compute the number of nodes
that require `ones`, we will always find the number to be at least 1: `ones`
needs `ones`, which needs `ones`, and so on. It will not be possible for
us to free `ones`, then, by simply counting the number of references to it.
There's a more powerful technique than reference counting for freeing
unused memory: __mark-and-sweep garbage collection__. This technique
is conceptually pretty simple to grasp, yet will allow us to handle
cycles in our graphs. Unsurprisingly, we implement this type
of garbage collection in two stages:
1. __Mark__: We go through every node that is still needed by
the runtime, and recursively mark it, its children, and so on as "to keep".
2. __Sweep__: We go through every node we haven't yet freed, and,
if it hasn't been marked as "to keep", we free it.
This also seems simple enough. There are two main things for us
to figure out:
1. For __Mark__, what are the "nodes still needed by the runtime"?
These are just the nodes on the various G-machine stacks. If
a node is not on the stack, nor is it a child of a node
that is on the stack, why should we keep it around?
2. For __Sweep__, how do we keep track of all the nodes we haven't
yet freed? In our case, the solution is a global list of allocated
nodes, which is updated every time that a node is allocated.
Wait a minute, though. Inside of `unwind` in C, we only have
a reference to the most recent stack. Our execution model allows
for an arbitrary number of stacks: we can keep using `Eval`,
placing the current stack on the dump, and starting a new stack
from scratch to evaluate a node. How can we traverse these stacks
from inside unwind? One solution could be to have each stack
point to the "parent" stack. To find all the nodes on the
stack, then, we'd start with the current stack, mark all the
nodes on it as "required", then move on to the parent stack,
rinse and repeat. This is plausible and pretty simple, but
there's another way.
We clean up after ourselves.
### Towards a Cleaner Stack
The G-machine compilation rules Simon Peyton Jones presents are written in a particular way. Every time
that a function is called, all it leaves behind on the stack is the graph node
that represents the function's output. Our own internal functions, however, have been less
careful. Consider, for instance, the "binary operator" function I showed you.
Its body is given by the following G-machine instructions:
```C++
instructions.push_back(instruction_ptr(new instruction_push(1)));
instructions.push_back(instruction_ptr(new instruction_eval()));
instructions.push_back(instruction_ptr(new instruction_push(1)));
instructions.push_back(instruction_ptr(new instruction_eval()));
instructions.push_back(instruction_ptr(new instruction_binop(op)));
```
When the function is called, there are at least 3 things on the stack:
1. The "outermost" application node, to be replaced with an indirection (to enable laziness).
2. The second argument to the binary operator.
3. The first argument to the binary operator.
Then, __Push__ adds another node to the stack, an __Eval__ forces
its evaluation (and leaves it on the stack). This happens again with the second argument.
Finally, we call __BinOp__, popping two values off the stack and combining them
according to the binary operator. This leaves the stack with 4 things: the 3 I described
above, and thew newly computed value. This is fine as far as `eval` is concerned: its
implementation only asks for the top value on the stack after `unwind` finishes. But
for anything more complicated, this is a very bad side effect. We want to leave the
stack as clean as we found it - with one node and no garbage.
Fortunately, the way we compile functions is a good guide for how we should
compile internal operators and constructors. The idea is captured
by the two instructions we insert at the end of a user-defined
function:
{{< codelines "C++" "compiler/09/definition.cpp" 56 57 >}}
Once a result is computed, we turn the node that represented the application
into an indirection, and point it to the computed result (as I said before,
this enables lazy evaluation). We also pop the arguments given to the function
off the stack. Let's add these two things to the `gen_llvm_internal_op` function:
{{< codelines "C++" "compiler/09/main.cpp" 70 85 >}}
Notice, in particular, the `instruction_update(2)` and `instruction_pop(2)`
instructions that were recently added. A similar thing has to be done for data
type constructors. The difference, though, is that __Pack__ removes the data
it packs from the stack, and thus, __Pop__ is not needed:
{{< codelines "C++" "compiler/09/definition.cpp" 102 117 >}}
With this done, let's run a quick test: let's print the number of things
on the stack at the end of an `eval` call (before the stack is freed,
of course). We can compare the output of runtime without the fix (`old`)
and with the fix (`current`):
```
current old
Current stack size is 0 | Current stack size: 1
Current stack size is 0 | Current stack size: 1
Current stack size is 0 | Current stack size: 1
Current stack size is 0 | Current stack size: 1
Current stack size is 0 | Current stack size: 0
Current stack size is 0 | Current stack size: 0
Current stack size is 0 | Current stack size: 3
Current stack size is 0 | Current stack size: 0
Current stack size is 0 | Current stack size: 3
Current stack size is 0 | Current stack size: 0
Current stack size is 0 | Current stack size: 3
Result: 3 | Result: 3
```
The stack is now much cleaner! Every time `eval` is called, it starts
with one node, and ends with one node (which is then popped).
### One Stack to Rule Them All
Wait a minute. If the stack is really always empty at the end, do we really need to construct
a new stack every time?
{{< sidenote "right" "arity-note" "I think not" >}}
There's some nuance to this. While it is true that for the most
part, we can get rid of the new stacks in favor of a single
one, our runtime will experience a change. The change lies
in the Unwind-Global rule, which <em>requires that the
stack has as many children as the function needs
arguments</em>. Until now, there was no way
for this condition to be accidentally satisfied: the function
we were unwinding was the only thing on the stack. Now,
though, things are different: the function being
unwound may share a stack with something else,
and just checking the stack size will not be sufficient.
<em>I believe</em> that this is not a problem for us,
since the compiler will only emit <strong>Eval</strong>
instructions for things it knows are data types or numbers,
meaning their type is not a partially applied function
that is missing arguments. However, this is a nontrivial
observation.
{{< /sidenote >}}, and Simon Peyton Jones seems to
agree. In _Implementing Functional Languages: a tutorial_, he mentions
that the dump does not need to be implemented as a real stack of stacks.
So let's try this out: instead of starting a new stack using `eval`,
let's use an existing one, by just calling `unwind` again. To do so,
all we have to do is change our `instruction_eval` instruction. When
the G-machine wants something evaluated now, it should just call
`unwind` directly!
To make this change, we have to make `unwind` available to the
compiler. We thus declare it in the `llvm_context.cpp` file:
{{< codelines "C++" "compiler/09/llvm_context.cpp" 158 163 >}}
And even create a function to construct a call to `unwind`
with the following signature:
{{< codelines "C++" "compiler/09/llvm_context.hpp" 58 58 >}}
We implement it like so:
{{< codelines "C++" "compiler/09/llvm_context.cpp" 217 220 >}}
Finally, the `instruction_eval::gen_llvm` method simply calls
`unwind`:
{{< codelines "C++" "compiler/09/instruction.cpp" 157 159 >}}
After this change, we only call `eval` from `main`. Furthermore,
since `eval` releases all the resources it allocates before
returning, we won't be able to
{{< sidenote "right" "retrieve-note" "easily retrieve" >}}
We were able to do this before, but that's because our
runtime didn't free the nodes, <em>ever</em>. Now that
it does, returning a node violates that node's lifetime.
{{< /sidenote >}}the result of the evaluation from it.
Thus, we simply merge `eval` with `main` - combining
the printing and the initialization / freeing
code.
With this, only one stack will be allocated for the entirety of
program execution. This doesn't just help us save on memory
allocations, but also __solves the problem of marking
valid nodes during garbage collection__! Instead of traversing
a dump of stacks, we can now simply traverse a single stack;
all that we need is in one place.
So this takes care, more or less, of the "mark" portion of mark-and-sweep.
Using the stack, we can recursively mark the nodes that we need. But
what about "sweeping"? How can we possibly know of every node that
we've allocated? There's some more bookkeping for us to do.
### It's All Connected
There exists a simple technique I've previously seen (and used)
for keeping track of all the allocated memory. The technique is
to __turn all the allocated nodes into elements of a linked list__.
The general process of implementing this proceeds as follows:
1. To each node, add a "next" pointer.
2. Keep a handle to the whole node chain somewhere.
3. Add each newly allocated node to the front of the whole chain.
This "somewhere" could be a global variable. However,
since we already pass a stack to almost all of our
functions, it makes more sense to make the list handle
a part of some data structure that will also contain the stack,
and pass that around, instead. This keeps all of the G-machine
data in one place, and in principle could allow for concurrent
execution of more than one G-machine in a single program. Let's
call our new data structure `gmachine`:
{{< codelines "C++" "compiler/09/runtime.h" 69 74 >}}
Here, the `stack` field holds the G-machine stack,
and the `gc_nodes` is the handle to the list of all the nodes
we've allocated and not yet freed. Don't worry about the `gc_node_count`
and `gc_threshold` fields - we'll get to them a little later.
This is going to be a significant change. First of all, since
the handle won't be global, it can't be accessed from inside the
`alloc_*` functions. Instead, we have to make sure to add
nodes allocated through `alloc_*` to a G-machine somewhere
wherever we call the allocators. To make it easier to add nodes to a G-machine
GC handle, let's make a new function, `track`:
```C
struct node_base* gmachine_track(struct gmachine*, struct node_base*);
```
This function will add the given node to the G-machine's handle,
and return that same node. This way, we can wrap nodes in
a call to `gmachine_track`. We will talk about this
function's implementation later in the post.
This doesn't get us all the way to a working runtime, though:
right now, we still pass around `struct stack*` instead of
`struct gmachine*` everywhere. However, the whole point
of adding the `gmachine` struct was to store more data in it!
Surely we need that new data somewhere, and thus, we need to
use the `gmachine` struct for _some_ functions. What functions
_do_ need a whole `gmachine*`, and which ones only need
a `stack*`?
1. {{< sidenote "right" "ownership-note" "Clearly," >}}
This might not be clear. Maybe <em>pushing</em> onto a stack will
add a node to our GC handle, and so, we need to have access
to the handle in <code>stack_push</code>. The underlying
question is that of <em>ownership</em>: when we allocate
a node, which part of the program does it "belong" to?
The "owner" of the node should do the work of managing
when to free it or keep it. Since we already agreed to
create a <code>gmachine</code> struct to house the GC
handle, it makes sense that nodes are owned by the
G-machine. Thus, the assumption in functions like
<code>stack_push</code> is that the "owner" of the node
already took care of allocating and tracking it, and
<code>stack_push</code> itself shouldn't bother.
{{< /sidenote >}} `stack_push`, `stack_pop`, and similar functions
do not require a G-machine.
2. `stack_alloc` and `stack_pack` __do__ need a G-machine,
because they must allocate new nodes. Where the nodes
are allocated, we should add them to the GC handle.
3. Since they use `stack_alloc` and `stack_pack`,
generated functions also need a G-machine.
4. Since `unwind` calls the generated functions,
it must also receive a G-machine.
As far as stack functions go, we only _need_ to update
`stack_alloc` and `stack_pack`. Everything else
doesn't require new node allocations, and thus,
does not require the GC handle. However, this makes
our code rather ugly: we have a set of mostly `stack_*`
functions, followed suddenly by two `gmachine_*` functions.
In the interest of cleanliness, let's instead do the following:
1. Make all functions associated with G-machine rules (like
__Alloc__, __Update__, and so on) require a `gmachine*`. This
way, theres a correspondence between our code and the theory.
2. Leave the rest of the functions (`stack_push`, `stack_pop`,
etc.) as is. They are not G-machine specific, and don't
require a GC handle, so there's no need to touch them.
Let's make this change. We end up with the following
functions:
{{< codelines "C" "compiler/09/runtime.h" 56 84 >}}
For the majority of the changed functions, the
updates are
{{< sidenote "right" "cosmetic-note" "cosmetic." >}}
We must also update the LLVM/C++ declarations of
the affected functions: many of them now take a
<code>gmachine_ptr_type</code> instead of <code>stack_ptr_type</code>.
This change is not shown explicitly here (it is hard to do with our
growing code base), but it is nonetheless significant.
{{< /sidenote >}} The functions
that require more significant modifications are `gmachine_alloc`
and `gmachine_pack`. In both, we must now make a call to `gmachine_track`
to ensure that a newly allocated node will be garbage collected in the future.
The updated code for `gmachine_alloc` is:
{{< codelines "C" "compiler/09/runtime.c" 140 145 >}}
Correspondingly, the updated code for `gmachine_pack` is:
{{< codelines "C" "compiler/09/runtime.c" 147 162 >}}
Note that we've secretly made one more change. Instead of
allocating `sizeof(*data) * n` bytes of memory for
the array of packed nodes, we allocate `sizeof(*data) * (n + 1)`,
and set the last element to `NULL`. This will allow other
functions (which we will soon write) to know how many elements are packed inside
a `node_data` (effectively, we've added a `NULL` terminator).
We must change our compiler to keep it up to date with this change. Importantly,
it must know that a G-machine struct exists. To give it
this information, we add a new
`llvm::StructType*` called `gmachine_type` to the `llvm_context` class,
initialize it in the constructor, and set its body as follows:
{{< codelines "C++" "compiler/09/llvm_context.cpp" 21 26 >}}
The compiler must also know that generated functions now use the G-machine
struct rather than a stack struct:
{{< codelines "C++" "compiler/09/llvm_context.cpp" 19 19 >}}
Since we still use some functions that require a stack and not a G-machine,
we must have a way to get the stack from a G-machine. To do this,
we create a new `unwrap` function, which uses LLVM's GEP instruction
to get a pointer to the G-machine's stack field:
{{< codelines "C++" "compiler/09/llvm_context.cpp" 222 225 >}}
We use this function elsewhere, such `llvm_context::create_pop`:
{{< codelines "C++" "compiler/09/llvm_context.cpp" 176 179 >}}
Finally, we want to make sure our generated functions don't allocate
nodes without tracking them with the G-machine. To do so, we modify
all the `create_*` methods to require the G-machine function argument,
and update the functions themselves to call `gmachine_track`. For
example, here's `llvm_context::create_num`:
{{< codelines "C++" "compiler/09/llvm_context.cpp" 235 239 >}}
Of course, this requires us to add a new `create_track` method
to the `llvm_context`:
{{< codelines "C++" "compiler/09/llvm_context.cpp" 212 215 >}}
This is good. Let's now implement the actual mark-and-sweep algorithm
in `gmachine_gc`:
{{< codelines "C" "compiler/09/runtime.c" 186 204 >}}
In the code above, we first iterate through the stack,
calling `gc_visit_node` on every node that we encounter. The
assumption is that once `gc_visit_node` is done, every node
that _can_ be reached has its `gc_reachable` field set to 1,
and all the others have it set to 0.
Once we reach the end of the stack, we continue to the "sweep" phase,
iterating through the linked list of nodes (held in the G-machine
GC handle `gc_nodes`). For each node, if its `gc_reachable` flag
is not set, we remove it from the linked list, and call `free_node_direct`
on it. Otherwise (that is, if the flag __is__ set), we clear it,
so that the node can potentially be garbage collected in a future
invocation of `gmachine_gc`.
`gc_visit_node` recursively marks a node and its children as reachable:
{{< codelines "C" "compiler/09/runtime.c" 51 70 >}}
This is possible with the `node_data` nodes because of the change we
made to the `gmachine_pack` instruction earlier: now, the last element
of the "packed" array is `NULL`, telling `gc_visit_node` that it has
reached the end of the list of children.
`free_node_direct` performs a non-recursive deallocation of all
the resources held by a particular node. So far, this is only
needed for `node_data` nodes, since the arrays holding their children
are dynamically allocated. Thus, the code for the function is
pretty simple:
{{< codelines "C" "compiler/09/runtime.c" 45 49 >}}
### When to Collect
When should we run garbage collection? Initially, I tried
running it after every call to `unwind`. However, this
quickly proved impractical: the performance of all
the programs in the language decreased by a spectacular
amount. Programs like `works1.txt` and `works2.txt`
would take tens of seconds to complete.
Instead of this madness, let's settle for an approach
common to many garbage collectors. Let's __perform
garbage collection every time the amount of
memory we've allocated doubles__. Tracking when the
amount of allocated memory doubles is the purpose of
the `gc_node_count` and `gc_threshold` fields in the
`gmachine` struct. The former field tracks how many
nodes have been tracked by the garbage collector, and the
latter holds the number of nodes the G-machine must
reach before triggering garbage collection.
Since the G-machine is made aware of allocations
by a call to the `gmachine_track` function, this
is where we will attempt to perform garbage collection.
We end up with the following code:
{{< codelines "C++" "compiler/09/runtime.c" 171 184 >}}
When a node is added to the GC handle, we increment the `gc_node_count`
field. If the new value of this field exceeds the threshold,
we perform garbage collection. There are cases in which
this is fairly dangerous: for instance, `gmachine_pack` first
moves all packed nodes into an array, then allocates a `node_data`
node. This means that for a brief moment, the nodes stored
into the new data node are inaccessible from the stack,
and thus susceptible to garbage collection. To prevent
situations like this, we run `gc_visit_node` on the node
being tracked, marking it and its children as "reachable".
Finally, we set the next "free" threshold to double
the number of currently allocated nodes.
This is about as much as we need to do. The change in this
post was a major one, and required updating multiple files.
As always, you're welcome to check out [the compiler source
code for this post](https://dev.danilafe.com/Web-Projects/blog-static/src/branch/master/code/compiler/09).
To wrap up, let's evaluate our change.
To especially stress the compiler, I came up with a prime number
generator. Since booleans are not in the standard library, and
since it isn't possible to pattern match on numbers, my
only option was the use Peano encoding. This effectively
means that numbers are represented as linked lists,
which makes garbage collection all the more
important. The program is quite long, but you can
[find the entire code here](https://dev.danilafe.com/Web-Projects/blog-static/src/branch/master/code/compiler/09/examples/primes.txt).
When I ran the `primes` program compiled using the
previous version of the compiler using `time`, I
got the following output:
```
Maximum resident set size (kbytes): 935764
Minor (reclaiming a frame) page faults: 233642
```
In contrast, here is the output of `time` when running
the same program compiled with the new version of
the compiler:
```
Maximum resident set size (kbytes): 7448
Minor (reclaiming a frame) page faults: 1577
```
We have reduced maximum memory usage by a factor of
125, and the number of page faults by a factor of 148.
That seems pretty good!
With this success, we end today's post. As I mentioned
before, we're not done. The language is still clunky to use,
and can benefit from `let/in` expressions and __lambda functions__.
Furthermore, our language is currently monomorphic, and would
be much better with __polymorphism__. Finally, to make our language
capable of more-than-trivial work, we may want to implement
__Input/Output__ and __strings__. I hope to see you in future posts,
where we will implement these features!

View File

@@ -0,0 +1,65 @@
---
title: Compiling a Functional Language Using C++, Part 10 - Polymorphism
date: 2019-12-09T23:26:46-08:00
tags: ["C and C++", "Functional Languages", "Compilers"]
draft: true
---
Last time, we wrote some pretty interesting programs in our little language.
We successfully expressed arithmetic and recursion. But there's one thing
that we cannot express in our language without further changes: an `if` statement.
Suppose we didn't want to add a special `if/else` expression into our language.
Thanks to lazy evaluation, we can express it using a function:
```
defn if c t e = {
case c of {
True -> { t }
False -> { e }
}
}
```
But an issue still remains: so far, our compiler remains __monomorphic__. That
is, a particular function can only have one possible type for each one of its
arguments. With our current setup, something like this
{{< sidenote "right" "if-note" "would not work:" >}}
In a polymorphically typed language, the inner <code>if</code> would just evaluate to
<code>False</code>, and the whole expression to 3.
{{< /sidenote >}}
```
if (if True False True) 11 3
```
This is because, for this to work, both of the following would need to hold (borrowing
some of our notation from the [typechecking]({{< relref "03_compiler_typechecking.md" >}}) post):
$$
\\text{if} : \\text{Int} \\rightarrow \\text{Int}
$$
$$
\\text{if} : \\text{Bool} \\rightarrow \\text{Bool}
$$
But using our rules so far, such a thing is impossible, since there is no way for
\\(\text{Int}\\) to be unified with \\(\text{Bool}\\). We need a more powerful
set of rules to describe our program's types. One such set of rules is
the [Hindley-Milner type system](https://en.wikipedia.org/wiki/Hindley%E2%80%93Milner_type_system),
which we have previously alluded to. In fact, the rules we came up
with were already very close to Hindley-Milner, with the exception of two:
__generalization__ and __instantiation__. Instantiation first:
$$
\frac
{\\Gamma \\vdash e : \\sigma \\quad \\sigma' \\sqsubseteq \\sigma}
{\\Gamma \\vdash e : \\sigma'}
$$
Next, generalization:
$$
\frac
{\\Gamma \\vdash e : \\sigma \\quad \\alpha \\not \\in \\text{free}(\\Gamma)}
{\\Gamma \\vdash e : \\forall a . \\sigma}
$$

116
content/blog/crystal_nix.md Normal file
View File

@@ -0,0 +1,116 @@
---
title: Building a Basic Crystal Project with Nix
date: 2020-02-16T14:31:42-08:00
tags: ["Crystal", "Nix"]
---
I really like the idea of Nix: you can have reproducible builds, written more or less
declaratively. I also really like the programming language [Crystal](https://crystal-lang.org/),
which is a compiled Ruby derivative. Recently, I decided to try learn NixOS as a package author,
and decided to make a Crystal project of mine, [pegasus](https://github.com/DanilaFe/pegasus),
my guinea pig. In this post, I will document my experience setting up Nix with Crystal.
### Getting Started
Pegasus is a rather simple package in terms of the build process - it has no dependencies, and
can be built with nothing but a Crystal compiler. Thus, I didn't have to worry about
dependencies. However, the `nixpkgs` repository does have a way to specify build dependencies
for a Nix project: [`crystal2nix`](https://github.com/NixOS/nixpkgs/blob/master/pkgs/development/compilers/crystal/crystal2nix.nix).
`crystal2nix` is another Nix package, which consists of a single Crystal binary program of
the same name. It translates a `shards.lock` file, generated by Crystal's `shards` package
manager, into a `shards.nix` file, which allows Nix to properly build the dependencies
of a Crystal package. If you have a project with a `shards.lock` file, you can use `shards2nix`
inside a `nix-shell` as follows:
```Bash
nix-shell -p crystal2nix --run crystal2nix
```
The above command says, create an environment with the `crystal2nix` package, and run the
program. Note that you should run this
[inside the project's root](https://github.com/NixOS/nixpkgs/blob/21bfc57dd9eb5c7c58b6ab0bfa707cbc7cf04e98/pkgs/development/compilers/crystal/build-package.nix#L2). Also note that if you
don't depend on other Crystal packages, you will not have a `shards.lock`, and running
`crystal2nix` is unnecessary.
The Crystal folder in the `nixpkgs` repository contains one more handy utility:
`buildCrystalPackage`. This is a function exported by the `crystal` Nix package, which
significantly simplifies the process of building a Crystal binary package. We can
look to `crystal2nix.nix` (linked above) for a concrete example. We can observe the following
attributes:
* `pname` - the name of the package.
* `version` - the
{{< sidenote "right" "version-note" "version" >}}
In my example code, I set the Nix package version to the commit hash. Doing this alone
is probably not the best idea, since it will prevent version numbers from being ordered.
However, version <code>0.1.0</code> didn't make sense either, since the project technically
doesn't have a release yet. You should set this to an actual package version if you have
one.
{{< /sidenote >}} of the package, as usual.
* `crystalBinaries.<xxx>.src` - the source Crystal file for binary `xxx`.
Using these attributes, I concocted the following expression for pegasus and all
of its included programs:
```nix
{ stdenv, crystal, fetchFromGitHub }:
let
version = "0489d47b191ecf8501787355b948801506e7c70f";
src = fetchFromGitHub {
owner = "DanilaFe";
repo = "pegasus";
rev = version;
sha256 = "097m7l16byis07xlg97wn5hdsz9k6c3h1ybzd2i7xhkj24kx230s";
};
in
crystal.buildCrystalPackage {
pname = "pegasus";
inherit version;
inherit src;
crystalBinaries.pegasus.src = "src/pegasus.cr";
crystalBinaries.pegasus-dot.src = "src/tools/dot/pegasus_dot.cr";
crystalBinaries.pegasus-sim.src = "src/tools/sim/pegasus_sim.cr";
crystalBinaries.pegasus-c.src = "src/generators/c/pegasus_c.cr";
crystalBinaries.pegasus-csem.src = "src/generators/csem/pegasus_csem.cr";
crystalBinaries.pegasus-crystal.src = "src/generators/crystal/pegasus_crystal.cr";
crystalBinaries.pegasus-crystalsem.src = "src/generators/crystalsem/pegasus_crystalsem.cr";
}
```
Here, I used Nix's `fetchFromGitHub` helper function. It clones a Git repository
from `https://github.com/<owner>/<repo>`, checks out the `rev` commit or branch,
and makes sure that it matches the `sha256` hash. The hash check is required so
that Nix can maintain the reproducibility of the build: if the commit is changed,
the code to compile may not be the same, and thus, the package would be different. The
hash helps detect such changes. To generate the hash, I used `nix-prefetch-git`,
which tries to clone the repository and compute its hash.
In the case that your project has a `shards.nix` file generated as above, you will also
need to add the following line inside your `buildCrystalPackage` call:
```
shardsFile = ./shards.nix;
```
The `shards.nix` file will contain all the dependency Git repositories, and the
`shardsFile` attribute will forward this list to `buildCrystalPackage`, which
will handle their inclusion in the package build.
That's pretty much it! The `buildCrystalPackage` Nix function does most of the heavy
lifting for Crystal binary packages. Please also check out
[this web page](https://edef.eu/~qyliss/nixlib/file/nixpkgs/doc/languages-frameworks/crystal.section.md.html):
I found out from it that `pname` had to be used instead of `name`, and it also has some information
regarding additional compiler options and build inputs.
### Appendix: A Small Caveat
I was running the `crystal2nix` (and doing all of my Nix-related work) in a NixOS virtual
machine. However, my version of NixOS was somewhat out of date (`19.04`), and I could
not retrieve `crystal2nix`. I had to switch channels to `nixos-19.09`, which is the current
stable version of NixOS.
There was one more difficulty involved in
[switching channels](https://nixos.wiki/wiki/Nix_channels): I had to do it as root.
It so happens that if you add a channel as non-root user, your system will still use
the channel specified by root, and thus, you will experience the update. You can spot
this issue in the output of `nix-env -u`; it will complain of duplicate packages.

View File

@@ -0,0 +1,110 @@
---
title: Using GHC IDE for Haskell Error Checking and Autocompletion
date: 2020-01-06T17:07:25-08:00
tags: ["Haskell", "Language Server Protocol"]
---
Last year, when I took Oregon State University's CS 381 class, I ended up setting
up my editor with the Haskell IDE engine. This made it possible
to detect errors, view types, and have good autocompletion within the editor itself.
Recently, I've found that GHC IDE works better for my projects, so instead
of butchering the original article, I'll just quickly write an updated version here,
referencing the old one when necessary.
By the end of the article, your editor should be able to detect errors and
properly autocomplete Haskell code, somewhat like in the below screenshot:
![Imgur](https://i.imgur.com/CRMznGL.png)
### Downloading and Installing GHC IDE
GHC IDE is a Haskell-based program that uses the
{{< sidenote "right" "lsp-note" "language server protocol" >}}
You don't really need to know what the language server protocol (LSP) is
to use GHC IDE. If you are nonetheless interested, I wrote a little
bit about it <a href="{{< ref "/blog/haskell_language_server" >}}#prelude-language-server-protocol">in the previous iteration of this post.</a>
If you want more information, check out the <a href="https://microsoft.github.io/language-server-protocol/">official Microsoft page on LSP.</a>
{{< /sidenote >}} to communicate with any editor that supports it. Editors
with support the the LSP include Atom, Visual Studio Code, Emacs, and Vim. Thus,
You can get a good Haskell development environment without tying yourself to one
application or service.
We first want to download the GHC IDE. To do this, you need to have
[Git](https://git-scm.com/) installed. Once you have that, in your Git bash (on Windows)
or in your terminal (maxOS, Linux), type the command:
```
git clone https://github.com/digital-asset/ghcide.git
```
To install GHC IDE, you can use either `cabal` (which is typically the `cabal-install` package,
and is required normally for this class) or `stack` (a build tool). For `cabal`:
```
cabal install
```
And for `stack`:
```
stack install
```
This will create an executable in your `~/.local/bin` directory. By default, this
is not usable from other programs, such as Vim, so you should add this directory
to your path. On Linux and macOS, this is done by adding the following line
to your `.bashrc` file (or equivalent):
```
export PATH=$PATH:/home/<yourusername>/.local/bin
```
On Windows, this is done by
{{< sidenote "right" "path-note" "editing your PATH variable." >}}
If you need to know how to change your <code>PATH</code>, I wrote
about it briefly in the <a href="{{< ref "/blog/haskell_language_server" >}}
#installation-of-v0-5-0-0-windows-systems">previous iteration of this post.</a>
{{< /sidenote >}} I don't run Windows,
so I don't know where `cabal install` will place the executable, but I do know
where the executable will appear if you use `stack install` - in the directory
given by:
```
stack path --local-bin
```
Adding that to your path should be sufficient to use GHC IDE.
### Setting up Your Editor
This is where the paths diverge. I personally use (Neo)vim, but for the sake
of completeness, I'll go over installation for Atom and VSCode (I'm not including
Emacs because I know nothing about configuring Emacs).
#### Atom
There appears to be an Atom extension specifically for GHC IDE:
[ide-haskell-ghcide](https://atom.io/packages/ide-haskell-ghcide). It doesn't
have a lot of configuration options, and will certainly require GHC IDE to
be in your path. However, since both GHC IDE and the Haskell IDE engine
use the Language Server Protocol, the more mature [ide-haskell-hie](https://atom.io/packages/ide-haskell-hie) extension may work, as well. In fact, since `ide-haskell-ghcide` is so young,
I'd recommend trying `ide-haskell-hie` first, configuring the settings (found under
_Settings > Packages > (Search ide-haskell-hie) > Settings_)
to use the following full path:
```
<output of stack path --local-bin>/ghcide
```
#### VSCode
The team behind GHC IDE maintains an official VSCode extension found
[here](https://marketplace.visualstudio.com/items?itemName=DigitalAssetHoldingsLLC.ghcide).
Installing it, when you have GHC IDE also installed, should be sufficient to get
VSCode to autocomplete and error check.
#### (Neo)vim
My original recommendations for (neo)vim remain unchanged, with the exception
of using `ghcide` instead of `hie` in the `serverCommands` variable. You
can find the original instructions
[here](https://danilafe.com/blog/haskell_language_server/#neovim).
### Conclusion
I hope that using GHC IDE, you'll be able to have a significantly more pleasant
Haskell experience in CS 381. Enjoy!

View File

@@ -10,7 +10,7 @@ I found that __sidenotes__ were a feature that I didn't even know I needed.
A lot of my writing seems to use small parenthesized remarks (like this), which, A lot of my writing seems to use small parenthesized remarks (like this), which,
although it doesn't break the flow in a grammatical sense, lengthens the although it doesn't break the flow in a grammatical sense, lengthens the
sentence, and makes it harder to follow. Since I do my best to write content sentence, and makes it harder to follow. Since I do my best to write content
to help explain stuff (like the [compiler series]({{ relref "00_compiler_intro.md" }})), to help explain stuff (like the [compiler series]({{< relref "00_compiler_intro.md" >}})),
making sentences __more__ difficult to understand is a no-go. making sentences __more__ difficult to understand is a no-go.
So, what do they look like? So, what do they look like?

View File

@@ -1,7 +1,10 @@
@import "style.scss"; @import "style.scss";
$sidenote-width: 350px; $sidenote-accommodate-shrink: 10rem;
$sidenote-offset: 15px; $sidenote-width: 30rem;
$sidenote-offset: 1.5rem;
$sidenote-padding: 1rem;
$sidenote-highlight-border-width: .2rem;
.sidenote { .sidenote {
&:hover { &:hover {
@@ -11,15 +14,16 @@ $sidenote-offset: 15px;
} }
.sidenote-content { .sidenote-content {
border: 2px dashed; border: $sidenote-highlight-border-width dashed;
padding: 9px; padding: $sidenote-padding -
($sidenote-highlight-border-width - $standard-border-width);
border-color: $primary-color; border-color: $primary-color;
} }
} }
} }
.sidenote-label { .sidenote-label {
border-bottom: 2px solid $primary-color; border-bottom: .2rem solid $primary-color;
} }
.sidenote-checkbox { .sidenote-checkbox {
@@ -30,7 +34,7 @@ $sidenote-offset: 15px;
display: block; display: block;
position: absolute; position: absolute;
width: $sidenote-width; width: $sidenote-width;
margin-top: -1.5em; margin-top: -1.5rem;
&.sidenote-right { &.sidenote-right {
right: 0; right: 0;
@@ -42,29 +46,50 @@ $sidenote-offset: 15px;
margin-left: -($sidenote-width + $sidenote-offset); margin-left: -($sidenote-width + $sidenote-offset);
} }
@media screen and
(max-width: $container-width + 2 * ($sidenote-width + 2 * $sidenote-offset)) {
position: static;
margin-top: 10px;
margin-bottom: 10px;
width: 100%;
display: none;
.sidenote-checkbox:checked ~ & {
display: block;
}
&.sidenote-left {
margin-left: 0px;
}
&.sidenote-right {
margin-right: 0px;
}
}
@include bordered-block; @include bordered-block;
padding: 10px; padding: $sidenote-padding;
box-sizing: border-box; box-sizing: border-box;
text-align: left; text-align: left;
} }
@mixin hidden-sidenote {
position: static;
margin-top: 1rem;
margin-bottom: 1rem;
width: 100%;
display: none;
.sidenote-checkbox:checked ~ & {
display: block;
}
}
@media screen and
(max-width: $container-width + 2 * ($sidenote-width + 2 * $sidenote-offset)) {
.sidenote-content.sidenote-left {
@include hidden-sidenote;
margin-left: 0rem;
}
.container {
position: relative;
left: -$sidenote-width/2
}
}
@media screen and
(max-width: $container-width + ($sidenote-width + 3 * $sidenote-offset)) {
.post-content {
max-width: 100%;
}
.sidenote-content.sidenote-right {
@include hidden-sidenote;
margin-right: 0rem;
}
.container {
position: initial;
}
}

View File

@@ -1,24 +1,28 @@
$container-width: 800px; $container-width: 45rem;
$standard-border-width: .075rem;
$primary-color: #36e281; $primary-color: #36e281;
$primary-color-dark: darken($primary-color, 10%); $primary-color-dark: darken($primary-color, 10%);
$code-color: #f0f0f0; $code-color: #f0f0f0;
$code-color-dark: darken($code-color, 10%); $code-color-dark: darken($code-color, 10%);
$border-color: #bfbfbf; $border-color: #bfbfbf;
$font-heading: "Lora", serif; $font-heading: "Lora", serif;
$font-body: "Raleway", serif; $font-body: "Raleway", serif;
$font-code: "Inconsolata", monospace; $font-code: "Inconsolata", monospace;
$standard-border: 1px solid $border-color;
$standard-border: $standard-border-width solid $border-color;
@mixin bordered-block { @mixin bordered-block {
border: $standard-border; border: $standard-border;
border-radius: 2px; border-radius: .2rem;
} }
body { body {
font-family: $font-body; font-family: $font-body;
font-size: 1.0em; font-size: 1.0rem;
line-height: 1.5; line-height: 1.5;
margin-bottom: 1em; margin-bottom: 1rem;
text-align: justify; text-align: justify;
} }
@@ -27,8 +31,8 @@ main {
} }
h1, h2, h3, h4, h5, h6 { h1, h2, h3, h4, h5, h6 {
margin-bottom: .1em; margin-bottom: .1rem;
margin-top: .5em; margin-top: .5rem;
font-family: $font-heading; font-family: $font-heading;
font-weight: normal; font-weight: normal;
text-align: left; text-align: left;
@@ -49,7 +53,7 @@ code {
pre code { pre code {
display: block; display: block;
padding: 0.5em; padding: 0.5rem;
overflow-x: auto; overflow-x: auto;
background-color: $code-color; background-color: $code-color;
} }
@@ -61,12 +65,12 @@ pre code {
box-sizing: border-box; box-sizing: border-box;
@media screen and (max-width: $container-width){ @media screen and (max-width: $container-width){
padding: 0em 1em 0em 1em; padding: 0rem 1rem 0rem 1rem;
} }
} }
.button, input[type="submit"] { .button, input[type="submit"] {
padding: 0.5em; padding: 0.5rem;
background-color: $primary-color; background-color: $primary-color;
border: none; border: none;
color: white; color: white;
@@ -87,7 +91,7 @@ pre code {
nav { nav {
background-color: $primary-color; background-color: $primary-color;
width: 100%; width: 100%;
margin: 1em 0px 1em 0px; margin: 1rem 0rem 1rem 0rem;
} }
nav a { nav a {
@@ -110,7 +114,7 @@ nav a {
} }
.post-content { .post-content {
margin-top: .5em; margin-top: .5rem;
} }
h1 { h1 {

View File

@@ -6,9 +6,11 @@
<link href="https://fonts.googleapis.com/css?family=Inconsolata|Lora|Raleway" rel="stylesheet"> <link href="https://fonts.googleapis.com/css?family=Inconsolata|Lora|Raleway" rel="stylesheet">
<link rel="stylesheet" href="//cdnjs.cloudflare.com/ajax/libs/normalize/5.0.0/normalize.min.css"> <link rel="stylesheet" href="//cdnjs.cloudflare.com/ajax/libs/normalize/5.0.0/normalize.min.css">
{{ $style := resources.Get "scss/style.scss" | resources.ToCSS | resources.Minify }} {{ $style := resources.Get "scss/style.scss" | resources.ToCSS | resources.Minify }}
{{ $sidenotes:= resources.Get "scss/sidenotes.scss" | resources.ToCSS | resources.Minify }} {{ $sidenotes := resources.Get "scss/sidenotes.scss" | resources.ToCSS | resources.Minify }}
{{ $icon := resources.Get "img/favicon.png" }}
<link rel="stylesheet" href="{{ $style.Permalink }}"> <link rel="stylesheet" href="{{ $style.Permalink }}">
<link rel="stylesheet" href="{{ $sidenotes.Permalink }}"> <link rel="stylesheet" href="{{ $sidenotes.Permalink }}">
<link rel="icon" type="image/png" href="{{ $icon.Permalink }}">
<script src='https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/MathJax.js?config=TeX-MML-AM_CHTML' async></script> <script src='https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/MathJax.js?config=TeX-MML-AM_CHTML' async></script>
{{ template "_internal/google_analytics.html" . }} {{ template "_internal/google_analytics.html" . }}

View File

@@ -0,0 +1,39 @@
{{- $pctx := . -}}
{{- if .IsHome -}}{{ $pctx = .Site }}{{- end -}}
{{- $pages := slice -}}
{{- if or $.IsHome $.IsSection -}}
{{- $pages = $pctx.RegularPages -}}
{{- else -}}
{{- $pages = $pctx.Pages -}}
{{- end -}}
{{- $limit := .Site.Config.Services.RSS.Limit -}}
{{- if ge $limit 1 -}}
{{- $pages = $pages | first $limit -}}
{{- end -}}
{{- printf "<?xml version=\"1.0\" encoding=\"utf-8\" standalone=\"yes\"?>" | safeHTML }}
<rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom">
<channel>
<title>{{ if eq .Title .Site.Title }}{{ .Site.Title }}{{ else }}{{ with .Title }}{{.}} on {{ end }}{{ .Site.Title }}{{ end }}</title>
<link>{{ .Permalink }}</link>
<description>Recent content {{ if ne .Title .Site.Title }}{{ with .Title }}in {{.}} {{ end }}{{ end }}on {{ .Site.Title }}</description>
<generator>Hugo -- gohugo.io</generator>{{ with .Site.LanguageCode }}
<language>{{.}}</language>{{end}}{{ with .Site.Author.email }}
<managingEditor>{{.}}{{ with $.Site.Author.name }} ({{.}}){{end}}</managingEditor>{{end}}{{ with .Site.Author.email }}
<webMaster>{{.}}{{ with $.Site.Author.name }} ({{.}}){{end}}</webMaster>{{end}}{{ with .Site.Copyright }}
<copyright>{{.}}</copyright>{{end}}{{ if not .Date.IsZero }}
<lastBuildDate>{{ .Date.Format "Mon, 02 Jan 2006 15:04:05 -0700" | safeHTML }}</lastBuildDate>{{ end }}
{{ with .OutputFormats.Get "RSS" }}
{{ printf "<atom:link href=%q rel=\"self\" type=%q />" .Permalink .MediaType | safeHTML }}
{{ end }}
{{ range $pages }}
<item>
<title>{{ .Title }}</title>
<link>{{ .Permalink }}</link>
<pubDate>{{ .Date.Format "Mon, 02 Jan 2006 15:04:05 -0700" | safeHTML }}</pubDate>
{{ with .Site.Author.email }}<author>{{.}}{{ with $.Site.Author.name }} ({{.}}){{end}}</author>{{end}}
<guid>{{ .Permalink }}</guid>
<description>{{ .Content | html }}</description>
</item>
{{ end }}
</channel>
</rss>

View File

@@ -0,0 +1,9 @@
{{ .Page.Scratch.Add "numbernote-id" 1 }}
{{ $id := .Page.Scratch.Get "numbernote-id" }}
<span class="sidenote">
<label class="sidenote-label" for="numbernote-{{ $id }}">({{ $id }})</label>
<input class="sidenote-checkbox" type="checkbox" id="numbernote-{{ $id }}"></input>
<span class="sidenote-content sidenote-{{ .Get 0 }}">
{{ .Inner }}
</span>
</span>