diff --git a/agda.rb b/agda.rb
new file mode 100644
index 0000000..5a5f3f3
--- /dev/null
+++ b/agda.rb
@@ -0,0 +1,262 @@
+require "nokogiri"
+require "pathname"
+
+files = ARGV[0..-1]
+
+class LineInfo
+ attr_accessor :links
+
+ def initialize
+ @links = []
+ end
+end
+
+class Handler
+ def initialize
+ @file_infos = {}
+ end
+
+ # Traverse the preformatted Agda block in the given Agda HTML file
+ # and find which textual ranges have IDs and links to other ranges.
+ # Store this information in a hash, line => LineInfo.
+ def process_agda_html_file(file)
+ return @file_infos[file] if @file_infos.include? file
+
+ @file_infos[file] = line_infos = {}
+ unless File.exists?(file)
+ return line_infos
+ end
+
+ document = Nokogiri::HTML.parse(File.open(file))
+ pre_code = document.css("pre.Agda")[0]
+
+ # The traversal postorder; we always visit children before their
+ # parents, and we visit leaves in sequence.
+ offset = 0
+ line = 1
+ pre_code.traverse do |at|
+ # Text nodes are always leaves; visiting a new leaf means we've advanced
+ # in the text by the length of that text. However, if there are newlines
+ # -- since this is a preformatted block -- we also advanced by a line.
+ # At this time, do not support links that span multiple lines, but
+ # Agda doesn't produce those either.
+ if at.text?
+ if at.content.include? "\n"
+ # This textual leaf is at least part whitespace. The logic
+ # assumes that links can't span multiple pages, and that links
+ # aren't nested, so ensure that the parent of the textual node
+ # is the preformatted block itself.
+ raise "unsupported Agda HTML output" if at.parent.name != "pre"
+
+ # Increase the line and track the final offset. Written as a loop
+ # in case we eventually want to add some handling for the pieces
+ # sandwiched between newlines.
+ at.content.split("\n", -1).each_with_index do |bit, idx|
+ line += 1 unless idx == 0
+ offset = bit.length
+ end
+ else
+ # It's not a newline node, so it could be anywhere. All we need to
+ # do is adjust the offset within the full pre block's text.
+ offset += at.content.length
+ end
+ elsif at.name == "a"
+ # We found a link. Agda emits both links and "things to link to" as
+ # 'a' nodes, so check for either, and record them. Even if
+ # the link is nested, the .content.length accessor will only
+ # retrieve the textual content, and thus -- assuming the link
+ # isn't split across lines -- will find the proper from-to range.
+
+ line_info = line_infos.fetch(line) { line_infos[line] = LineInfo.new }
+ href = at.attribute("href")
+ id = at.attribute("id")
+ if href or id
+ new_link = { :from => offset-at.content.length, :to => offset }
+ new_link[:href] = href if href
+ new_link[:id] = id if id
+
+ line_info.links << new_link
+ end
+ end
+ end
+ return line_infos
+ end
+
+ # Given a Hugo HTML file which references potentially several Agda modules
+ # in code blocks, insert links into the code blocks.
+ #
+ # There are several things we need to take care of:
+ # 1. Finding the HTML files associated with each referenced Agda module.
+ # For this, we make use of the data-base-path etc. attributes that
+ # the vanilla theme inserts.
+ # 2. "zipping together" the Agda and Hugo HTML representations. Each of
+ # them encode the code, but they use different HTML elements and structures.
+ # So, given a Hugo HTML code block, traverse its textual contents
+ # and find any that are covered by links in the related Agda HTML file.
+ # 3. Fixing up links: the Agda HTML links assume each module has its own HTML
+ # file. This isn't true for us: multiple modules are stitched into
+ # one Hugo HTML file. Also, we don't include the entire Agda HTML
+ # file in the Hugo HTML, so some links may be broken. So, find IDs
+ # that are visible in the Hugo file, rename them to be globally unique,
+ # and re-write cross-file links that reference these IDs to point
+ # inside the Hugo file.
+ def process_source_file(file)
+ nodes_referencing_href = {} # Agda HTML ID -> all (Hugo HTML) nodes that link to it
+ seen_hrefs = {} # Agda HTML ID -> its new ID in Hugo-land
+ id_counter = 0
+
+ # Process each highlight group that's been marked as an Agda file.
+ document = Nokogiri::HTML.parse(File.open(file))
+ document.css('div[data-agda-block]').each do |t|
+ first_line, last_line = nil, nil
+
+ if first_line_attr = t.attribute("data-first-line")
+ first_line = first_line_attr.to_s.to_i
+ end
+ if last_line_attr = t.attribute("data-last-line")
+ last_line = last_line_attr.to_s.to_i
+ end
+
+ # The name of an Agda module is determined from its directory
+ # structure: A/B/C.agda creates A.B.C.html. Assume that the
+ # base path is the Agda module root, and use the remaining directories
+ # to build the HTML file name.
+ base_path = t.attribute("data-base-path").to_s
+ base_path_dirs = Pathname(base_path).each_filename.to_a
+ full_path = t.attribute("data-file-path").to_s
+ full_path_dirs = Pathname(full_path).each_filename.to_a
+
+ dirs_in_base = full_path_dirs[base_path_dirs.length..-1]
+ html_file = dirs_in_base.join(".").gsub(/\.agda$/, ".html")
+ html_path = File.join(["code", base_path, "html", html_file])
+
+ agda_info = process_agda_html_file(html_path)
+
+ # Hugo conveniently generates a bunch of spans, each encoding a line
+ # of code output. We can iterate over these and match them up with
+ # the line numbers we got from reading the Agda HTML output.
+ lines = t.css("pre.chroma code[data-lang] .line")
+ lines.zip((first_line..last_line)).each do |line, line_no|
+ line_info = agda_info[line_no]
+ next unless line_info
+
+ offset = 0
+ line.traverse do |lt|
+ if lt.text?
+ content = lt.content
+ new_offset = offset + content.length
+
+ # The span/a/etc. structure of the Agda and Hugo HTML files
+ # need not line up; it's possible for there to be a single link
+ # in the Agda file that's broken up across multiple HTML nodes
+ # in the Hugo output. For now, just don't link those, since inserting
+ # such overlapping links is relatively complicated. Instead,
+ # require links to fit fully within a current text node (and thus,
+ # not overlap the boundaries of any HTML).
+ matching_links = line_info.links.filter do |link|
+ link[:from] >= offset and link[:to] <= new_offset
+ end
+
+ # A given text node can be broken into any number of sub-nodes,
+ # where some sub-nodes are still text, and others have been turned
+ # into links. Store the new pieces in replace_with. E.g.,
+ #
+ # Original:
+ # abc
+ #
+ # New:
+ # abc
+ #
+ # replace_with:
+ # ["a", , "c"]
+ #
+ # match_offset represents how much of the original text we've
+ # already converted. The below iteration assumes that matching
+ # links are in order, and don't overlap.
+ replace_with = []
+ replace_offset = 0
+ matching_links.each do |match|
+ # The link's range is an offset from the beginning of the line,
+ # but the text piece we're splitting up might be partway into
+ # the line. Convert the link coordinates to piece-relative ones.
+ relative_from = match[:from] - offset
+ relative_to = match[:to] - offset
+
+ # If the previous link ended some time before the new link
+ # began (or if the current link is the first one, and is not
+ # at the beginning), ensure that the plain text "in between"
+ # is kept.
+ replace_with << content[replace_offset...relative_from]
+
+ tag = (match.include? :href) ? 'a' : 'span'
+ new_link = Nokogiri::XML::Node.new(tag, document)
+ if match.include? :href
+ # For nodes with links, note what they're referring to, so
+ # we can adjust their hrefs when we assign global IDs.
+ href = match[:href].to_s
+ new_link['href'] = href
+ ref_list = nodes_referencing_href.fetch(href) { nodes_referencing_href[href] = [] }
+ ref_list << new_link
+ end
+ if match.include? :id
+ # For nodes with IDs visible in the current Hugo file, we'll
+ # want to redirect links that previously go to other Agda
+ # module HTML files. So, note the ID that we want to redirect,
+ # and pick a new unique ID to replace it with.
+ id = match[:id].to_s
+ uniq_id = "agda-unique-ident-#{id_counter}"
+ id_counter += 1
+ new_link['id'] = uniq_id
+ seen_hrefs["#{html_file}##{id}"] = uniq_id
+ end
+ new_link.content = content[relative_from...relative_to]
+
+ replace_with << new_link
+ replace_offset = relative_to
+ end
+ replace_with << content[replace_offset..-1]
+
+ # Finally, replace the node under consideration with the new
+ # pieces.
+ replace_with.each do |replacement|
+ lt.add_previous_sibling replacement
+ end
+ lt.remove
+
+ offset = new_offset
+ end
+ end
+ end
+ end
+
+ # Now, we have a complete list of all the IDs visible in scope.
+ # Redirect relevant links to these IDs. This achieves within-post
+ # links.
+ seen_hrefs.each do |agda_id, hugo_id|
+ nodes_to_rewrite = nodes_referencing_href[agda_id]
+ next unless nodes_to_rewrite
+
+ nodes_to_rewrite.each do |node|
+ node['href'] = "##{hugo_id}"
+ end
+
+ nodes_referencing_href.delete agda_id
+ end
+
+ # The remaining nodes reference symbols not in the current Hugo document,
+ # so just replace them with their contents for now (we'd need some
+ # external hosting to make this work otherwise).
+ nodes_referencing_href.each do |href, nodes|
+ nodes.each do |node|
+ node.replace node.content
+ end
+ end
+
+ File.write(file, document.to_html(encoding: 'UTF-8'))
+ end
+end
+
+handler = Handler.new
+files.each do |file|
+ handler.process_source_file file
+end