import os
from bs4 import BeautifulSoup
from urllib.parse import urlparse

# Domains considered part of your site.
SITE_ROOT_URLS = ["https://danilafe.com/", "http://danilafe.com/"]
# The project root is the current working directory.
PROJECT_ROOT = os.getcwd()
HTML_EXTENSIONS = {".html", ".htm"}

def convert_to_relative(url, base_filepath):
    """
    Convert an absolute URL (including domain-relative URLs) to a relative path
    appropriate for the HTML file at base_filepath.
    """
    parsed = urlparse(url)
    # If the URL is already relative, return it unchanged.
    if not (url.startswith("/") or any(url.startswith(root) for root in SITE_ROOT_URLS)):
        return url

    # If it's an absolute URL on danilafe.com, strip the domain.
    for root_url in SITE_ROOT_URLS:
        if url.startswith(root_url):
            url = url[len(root_url):]
            break

    # For domain-relative URLs (starting with "/"), remove the leading slash.
    if url.startswith("/"):
        url = url.lstrip("/")

    # Build the full filesystem path for the target resource.
    target_path = os.path.normpath(os.path.join(PROJECT_ROOT, url))
    base_dir = os.path.dirname(base_filepath)
    # Compute the relative path from the HTML file's directory to the target.
    relative_path = os.path.relpath(target_path, start=base_dir)
    return relative_path.replace(os.path.sep, "/")

def process_html_file(filepath):
    """Process a single HTML file to rewrite links, unwrap <noscript> blocks, and remove preload links."""
    with open(filepath, "r", encoding="utf-8") as f:
        soup = BeautifulSoup(f, "lxml")

    # Update tags with href/src attributes.
    for tag in soup.find_all(["a", "link", "script", "img"]):
        attr = "href" if tag.name in ["a", "link"] else "src"
        if tag.has_attr(attr):
            tag[attr] = convert_to_relative(tag[attr], filepath)

    # Process <noscript> blocks: update links inside them.
    for noscript in soup.find_all("noscript"):
        for link in noscript.find_all("link"):
            if link.has_attr("href"):
                link["href"] = convert_to_relative(link["href"], filepath)
    
    # Remove all <link> elements with rel="preload"
    for preload in soup.find_all("link", rel="preload"):
        preload.decompose()
    
    # "Partially evaluate" noscript: unwrap the <noscript> blocks.
    for noscript in soup.find_all("noscript"):
        noscript.unwrap()

    with open(filepath, "w", encoding="utf-8") as f:
        f.write(str(soup))

def process_directory(directory):
    """Recursively process all HTML files in the given directory."""
    for root, _, files in os.walk(directory):
        for file in files:
            if os.path.splitext(file)[1].lower() in HTML_EXTENSIONS:
                process_html_file(os.path.join(root, file))

if __name__ == "__main__":
    process_directory(".")