352 lines
14 KiB
Ruby
352 lines
14 KiB
Ruby
require "nokogiri"
|
|
require "pathname"
|
|
|
|
files = ARGV[0..-1]
|
|
|
|
class LineInfo
|
|
attr_accessor :links
|
|
|
|
def initialize
|
|
@links = []
|
|
end
|
|
end
|
|
|
|
class AgdaContext
|
|
def initialize
|
|
@file_infos = {}
|
|
end
|
|
|
|
# Traverse the preformatted Agda block in the given Agda HTML file
|
|
# and find which textual ranges have IDs and links to other ranges.
|
|
# Store this information in a hash, line => LineInfo.
|
|
def process_agda_html_file(file)
|
|
return @file_infos[file] if @file_infos.include? file
|
|
|
|
@file_infos[file] = line_infos = {}
|
|
unless File.exists?(file)
|
|
return line_infos
|
|
end
|
|
|
|
document = Nokogiri::HTML.parse(File.open(file))
|
|
pre_code = document.css("pre.Agda")[0]
|
|
|
|
# The traversal postorder; we always visit children before their
|
|
# parents, and we visit leaves in sequence.
|
|
offset = 0
|
|
line = 1
|
|
pre_code.traverse do |at|
|
|
# Text nodes are always leaves; visiting a new leaf means we've advanced
|
|
# in the text by the length of that text. However, if there are newlines
|
|
# -- since this is a preformatted block -- we also advanced by a line.
|
|
# At this time, do not support links that span multiple lines, but
|
|
# Agda doesn't produce those either.
|
|
if at.text?
|
|
if at.content.include? "\n"
|
|
# This textual leaf is at least part whitespace. The logic
|
|
# assumes that links can't span multiple pages, and that links
|
|
# aren't nested, so ensure that the parent of the textual node
|
|
# is the preformatted block itself.
|
|
raise "unsupported Agda HTML output" if at.parent.name != "pre"
|
|
|
|
# Increase the line and track the final offset. Written as a loop
|
|
# in case we eventually want to add some handling for the pieces
|
|
# sandwiched between newlines.
|
|
at.content.split("\n", -1).each_with_index do |bit, idx|
|
|
line += 1 unless idx == 0
|
|
offset = bit.length
|
|
end
|
|
else
|
|
# It's not a newline node, so it could be anywhere. All we need to
|
|
# do is adjust the offset within the full pre block's text.
|
|
offset += at.content.length
|
|
end
|
|
elsif at.name == "a"
|
|
# We found a link. Agda emits both links and "things to link to" as
|
|
# 'a' nodes, so check for either, and record them. Even if
|
|
# the link is nested, the .content.length accessor will only
|
|
# retrieve the textual content, and thus -- assuming the link
|
|
# isn't split across lines -- will find the proper from-to range.
|
|
|
|
line_info = line_infos.fetch(line) { line_infos[line] = LineInfo.new }
|
|
href = at.attribute("href")
|
|
id = at.attribute("id")
|
|
if href or id
|
|
new_node = { :from => offset-at.content.length, :to => offset }
|
|
new_node[:href] = href if href
|
|
new_node[:id] = id if id
|
|
|
|
line_info.links << new_node
|
|
end
|
|
end
|
|
end
|
|
return line_infos
|
|
end
|
|
end
|
|
|
|
class FileGroup
|
|
def initialize(agda_context)
|
|
@agda_context = agda_context
|
|
# Agda HTML href -> list of (file, Hugo HTML node that links to it)
|
|
@nodes_referencing_href = {}
|
|
# Agda HTML href -> (its new ID in Hugo-land, file in which it's defined)
|
|
# This supports cross-post linking within a seires.
|
|
@global_seen_hrefs = {}
|
|
# file name -> Agda HTML href -> its new ID in Hugo-land
|
|
# This supports linking within a particular post.
|
|
@local_seen_hrefs = Hash.new { {} }
|
|
# Global counter to generate fresh IDs. There's no reason for it to
|
|
# be global within a series (since IDs are namespaced by the file they're in),
|
|
# but it's just more convenient this way.
|
|
@id_counter = 0
|
|
end
|
|
|
|
def note_defined_href(file, href)
|
|
file_hrefs = @local_seen_hrefs.fetch(file) do
|
|
@local_seen_hrefs[file] = {}
|
|
end
|
|
|
|
uniq_id = file_hrefs.fetch(href) do
|
|
new_id = "agda-unique-ident-#{@id_counter}"
|
|
@id_counter += 1
|
|
file_hrefs[href] = new_id
|
|
end
|
|
|
|
unless @global_seen_hrefs.include? href
|
|
@global_seen_hrefs[href] = { :file => file, :id => uniq_id }
|
|
end
|
|
|
|
return uniq_id
|
|
end
|
|
|
|
def note_used_href(file, node, href)
|
|
ref_list = @nodes_referencing_href.fetch(href) { @nodes_referencing_href[href] = [] }
|
|
ref_list << { :file => file, :node => node }
|
|
return href
|
|
end
|
|
|
|
# Given a Hugo HTML file which references potentially several Agda modules
|
|
# in code blocks, insert links into the code blocks.
|
|
#
|
|
# There are several things we need to take care of:
|
|
# 1. Finding the HTML files associated with each referenced Agda module.
|
|
# For this, we make use of the data-base-path etc. attributes that
|
|
# the vanilla theme inserts.
|
|
# 2. "zipping together" the Agda and Hugo HTML representations. Each of
|
|
# them encode the code, but they use different HTML elements and structures.
|
|
# So, given a Hugo HTML code block, traverse its textual contents
|
|
# and find any that are covered by links in the related Agda HTML file.
|
|
# 3. Fixing up links: the Agda HTML links assume each module has its own HTML
|
|
# file. This isn't true for us: multiple modules are stitched into
|
|
# one Hugo HTML file. Also, we don't include the entire Agda HTML
|
|
# file in the Hugo HTML, so some links may be broken. So, find IDs
|
|
# that are visible in the Hugo file, rename them to be globally unique,
|
|
# and re-write cross-file links that reference these IDs to point
|
|
# inside the Hugo file.
|
|
def process_source_file(file, document)
|
|
# Process each highlight group that's been marked as an Agda file.
|
|
document.css('div[data-agda-block]').each do |t|
|
|
first_line, last_line = nil, nil
|
|
|
|
if first_line_attr = t.attribute("data-first-line")
|
|
first_line = first_line_attr.to_s.to_i
|
|
end
|
|
if last_line_attr = t.attribute("data-last-line")
|
|
last_line = last_line_attr.to_s.to_i
|
|
end
|
|
|
|
if first_line and last_line
|
|
line_range = first_line..last_line
|
|
else
|
|
line_range = 1..
|
|
end
|
|
|
|
# Sometimes, code is deeply nested in the source file, but we don't
|
|
# want to show the leading space. In that case, the generator sets
|
|
# data-source-offset with how much leading space was stripped off.
|
|
initial_offset = 0
|
|
if source_offset_attr = t.attribute("data-source-offset")
|
|
initial_offset = source_offset_attr.to_s.to_i
|
|
end
|
|
|
|
full_path = t.attribute("data-file-path").to_s
|
|
full_path_dirs = Pathname(full_path).each_filename.to_a
|
|
|
|
# The name of an Agda module is determined from its directory
|
|
# structure: A/B/C.agda creates A.B.C.html. Depending on where
|
|
# the code is included, there might be some additional folders
|
|
# that precede A that we want to ignore.
|
|
base_path = t.attribute("data-base-path").to_s
|
|
base_dir_depth = 0
|
|
if base_path.empty?
|
|
# No submodules were used. Assume code/<X> is the root, since
|
|
# that's the code layout of the blog right now.
|
|
base_dir_depth = 1
|
|
base_path = full_path_dirs[0]
|
|
else
|
|
# The code is in a submodule. Assume that the base path / submodule
|
|
# root is the Agda module root, ignore all folders before that.
|
|
base_path_dirs = Pathname(base_path).each_filename.to_a
|
|
base_dir_depth = base_path_dirs.length
|
|
end
|
|
|
|
dirs_in_base = full_path_dirs[base_dir_depth..-1]
|
|
html_file = dirs_in_base.join(".").gsub(/\.agda$/, ".html")
|
|
html_path = File.join(["code", base_path, "html", html_file])
|
|
|
|
agda_info = @agda_context.process_agda_html_file(html_path)
|
|
|
|
# Hugo conveniently generates a bunch of spans, each encoding a line
|
|
# of code output. We can iterate over these and match them up with
|
|
# the line numbers we got from reading the Agda HTML output.
|
|
lines = t.css("pre.chroma code[data-lang] .line")
|
|
lines.zip(line_range).each do |line, line_no|
|
|
line_info = agda_info[line_no]
|
|
next unless line_info
|
|
|
|
offset = initial_offset
|
|
line.traverse do |lt|
|
|
if lt.text?
|
|
content = lt.content
|
|
new_offset = offset + content.length
|
|
|
|
# The span/a/etc. structure of the Agda and Hugo HTML files
|
|
# need not line up; it's possible for there to be a single link
|
|
# in the Agda file that's broken up across multiple HTML nodes
|
|
# in the Hugo output. For now, just don't link those, since inserting
|
|
# such overlapping links is relatively complicated. Instead,
|
|
# require links to fit fully within a current text node (and thus,
|
|
# not overlap the boundaries of any HTML).
|
|
matching_links = line_info.links.filter do |link|
|
|
link[:from] >= offset and link[:to] <= new_offset
|
|
end
|
|
|
|
# A given text node can be broken into any number of sub-nodes,
|
|
# where some sub-nodes are still text, and others have been turned
|
|
# into links. Store the new pieces in replace_with. E.g.,
|
|
#
|
|
# Original:
|
|
# abc
|
|
#
|
|
# New:
|
|
# a<a href="..">b</a>c
|
|
#
|
|
# replace_with:
|
|
# ["a", <Nokogiri::XML::Node...>, "c"]
|
|
#
|
|
# match_offset represents how much of the original text we've
|
|
# already converted. The below iteration assumes that matching
|
|
# links are in order, and don't overlap.
|
|
replace_with = []
|
|
replace_offset = 0
|
|
matching_links.each do |match|
|
|
# The link's range is an offset from the beginning of the line,
|
|
# but the text piece we're splitting up might be partway into
|
|
# the line. Convert the link coordinates to piece-relative ones.
|
|
relative_from = match[:from] - offset
|
|
relative_to = match[:to] - offset
|
|
|
|
# If the previous link ended some time before the new link
|
|
# began (or if the current link is the first one, and is not
|
|
# at the beginning), ensure that the plain text "in between"
|
|
# is kept.
|
|
replace_with << content[replace_offset...relative_from]
|
|
|
|
tag = (match.include? :href) ? 'a' : 'span'
|
|
new_node = Nokogiri::XML::Node.new(tag, document)
|
|
if match.include? :href
|
|
# For nodes with links, note what they're referring to, so
|
|
# we can adjust their hrefs when we assign global IDs.
|
|
href = match[:href].to_s
|
|
new_node['href'] = note_used_href file, new_node, href
|
|
end
|
|
if match.include? :id
|
|
# For nodes with IDs visible in the current Hugo file, we'll
|
|
# want to redirect links that previously go to other Agda
|
|
# module HTML files. So, note the ID that we want to redirect,
|
|
# and pick a new unique ID to replace it with.
|
|
id = match[:id].to_s
|
|
new_node['id'] = note_defined_href file, "#{html_file}##{id}"
|
|
end
|
|
new_node.content = content[relative_from...relative_to]
|
|
|
|
replace_with << new_node
|
|
replace_offset = relative_to
|
|
end
|
|
replace_with << content[replace_offset..-1]
|
|
|
|
# Finally, replace the node under consideration with the new
|
|
# pieces.
|
|
replace_with.each do |replacement|
|
|
lt.add_previous_sibling replacement
|
|
end
|
|
lt.remove
|
|
|
|
offset = new_offset
|
|
end
|
|
end
|
|
end
|
|
end
|
|
end
|
|
|
|
def cross_link_files
|
|
# Now, we have a complete list of all the IDs visible in scope.
|
|
# Redirect relevant links to these IDs. This achieves within-post
|
|
# links.
|
|
@nodes_referencing_href.each do |href, references|
|
|
references.each do |reference|
|
|
file = reference[:file]
|
|
node = reference[:node]
|
|
|
|
local_targets = @local_seen_hrefs[file]
|
|
if local_targets.include? href
|
|
# A code block in this fine provides this href, create a local link.
|
|
node['href'] = "##{local_targets[href]}"
|
|
elsif @global_seen_hrefs.include? href
|
|
# A code block in this series, but not in this file, defines
|
|
# this href. Create a cross-file link.
|
|
target = @global_seen_hrefs[href]
|
|
other_file = target[:file]
|
|
id = target[:id]
|
|
|
|
relpath = Pathname.new(other_file).dirname.relative_path_from(Pathname.new(file).dirname)
|
|
node['href'] = "#{relpath}##{id}"
|
|
else
|
|
# No definitions in any blog page. For now, just delete the anchor.
|
|
node.replace node.content
|
|
end
|
|
end
|
|
end
|
|
end
|
|
end
|
|
|
|
agda_context = AgdaContext.new
|
|
|
|
file_documents = {}
|
|
series_groups = files.group_by do |file|
|
|
file_documents[file] = document = Nokogiri::HTML.parse(File.open(file))
|
|
document.css("meta[name=blog-series]")&.attribute("content")&.to_s
|
|
end
|
|
|
|
# For the 'nil' group, process individually.
|
|
if files_with_no_series = series_groups.delete(nil)
|
|
files_with_no_series.each do |file|
|
|
file_group = FileGroup.new agda_context
|
|
file_group.process_source_file file, file_documents[file]
|
|
file_group.cross_link_files
|
|
end
|
|
end
|
|
|
|
# For groups, process them together to allow cross-linking
|
|
series_groups.each do |_, files_in_series|
|
|
file_group = FileGroup.new agda_context
|
|
files_in_series.each do |file|
|
|
file_group.process_source_file file, file_documents[file]
|
|
end
|
|
file_group.cross_link_files
|
|
end
|
|
|
|
# Having modified all the HTML files, save them.
|
|
file_documents.each do |file, document|
|
|
File.write(file, document.to_html(encoding: 'UTF-8'))
|
|
end
|