Add initial draft of flake.nix
Signed-off-by: Danila Fedorin <danila.fedorin@gmail.com>
This commit is contained in:
74
scripts/chatgpt-fix-root-URLs.py
Normal file
74
scripts/chatgpt-fix-root-URLs.py
Normal file
@@ -0,0 +1,74 @@
|
||||
import os
|
||||
from bs4 import BeautifulSoup
|
||||
from urllib.parse import urlparse
|
||||
|
||||
# Domains considered part of your site.
|
||||
SITE_ROOT_URLS = ["https://danilafe.com/", "http://danilafe.com/"]
|
||||
# The project root is the current working directory.
|
||||
PROJECT_ROOT = os.getcwd()
|
||||
HTML_EXTENSIONS = {".html", ".htm"}
|
||||
|
||||
def convert_to_relative(url, base_filepath):
|
||||
"""
|
||||
Convert an absolute URL (including domain-relative URLs) to a relative path
|
||||
appropriate for the HTML file at base_filepath.
|
||||
"""
|
||||
parsed = urlparse(url)
|
||||
# If the URL is already relative, return it unchanged.
|
||||
if not (url.startswith("/") or any(url.startswith(root) for root in SITE_ROOT_URLS)):
|
||||
return url
|
||||
|
||||
# If it's an absolute URL on danilafe.com, strip the domain.
|
||||
for root_url in SITE_ROOT_URLS:
|
||||
if url.startswith(root_url):
|
||||
url = url[len(root_url):]
|
||||
break
|
||||
|
||||
# For domain-relative URLs (starting with "/"), remove the leading slash.
|
||||
if url.startswith("/"):
|
||||
url = url.lstrip("/")
|
||||
|
||||
# Build the full filesystem path for the target resource.
|
||||
target_path = os.path.normpath(os.path.join(PROJECT_ROOT, url))
|
||||
base_dir = os.path.dirname(base_filepath)
|
||||
# Compute the relative path from the HTML file's directory to the target.
|
||||
relative_path = os.path.relpath(target_path, start=base_dir)
|
||||
return relative_path.replace(os.path.sep, "/")
|
||||
|
||||
def process_html_file(filepath):
|
||||
"""Process a single HTML file to rewrite links, unwrap <noscript> blocks, and remove preload links."""
|
||||
with open(filepath, "r", encoding="utf-8") as f:
|
||||
soup = BeautifulSoup(f, "lxml")
|
||||
|
||||
# Update tags with href/src attributes.
|
||||
for tag in soup.find_all(["a", "link", "script", "img"]):
|
||||
attr = "href" if tag.name in ["a", "link"] else "src"
|
||||
if tag.has_attr(attr):
|
||||
tag[attr] = convert_to_relative(tag[attr], filepath)
|
||||
|
||||
# Process <noscript> blocks: update links inside them.
|
||||
for noscript in soup.find_all("noscript"):
|
||||
for link in noscript.find_all("link"):
|
||||
if link.has_attr("href"):
|
||||
link["href"] = convert_to_relative(link["href"], filepath)
|
||||
|
||||
# Remove all <link> elements with rel="preload"
|
||||
for preload in soup.find_all("link", rel="preload"):
|
||||
preload.decompose()
|
||||
|
||||
# "Partially evaluate" noscript: unwrap the <noscript> blocks.
|
||||
for noscript in soup.find_all("noscript"):
|
||||
noscript.unwrap()
|
||||
|
||||
with open(filepath, "w", encoding="utf-8") as f:
|
||||
f.write(str(soup))
|
||||
|
||||
def process_directory(directory):
|
||||
"""Recursively process all HTML files in the given directory."""
|
||||
for root, _, files in os.walk(directory):
|
||||
for file in files:
|
||||
if os.path.splitext(file)[1].lower() in HTML_EXTENSIONS:
|
||||
process_html_file(os.path.join(root, file))
|
||||
|
||||
if __name__ == "__main__":
|
||||
process_directory(".")
|
||||
Reference in New Issue
Block a user