diff --git a/chatgpt-subset-one-go.py b/chatgpt-subset-one-go.py index 64db038..ba93dc8 100644 --- a/chatgpt-subset-one-go.py +++ b/chatgpt-subset-one-go.py @@ -1,12 +1,9 @@ import os +import sys from bs4 import BeautifulSoup from fontTools.subset import Subsetter, Options from fontTools.ttLib import TTFont -# Directories -HTML_DIR = "." # Directory with .html files -FONT_DIR = "." # Directory containing fonts to be modified - FONT_EXTENSIONS = (".ttf", ".woff", ".woff2", ".otf") # Font file types def extract_text_from_html(file_path): @@ -15,15 +12,13 @@ def extract_text_from_html(file_path): soup = BeautifulSoup(f.read(), "html.parser") return soup.get_text() -def get_used_characters(directory): +def get_used_characters(files): """Collect unique characters from all .html files in the given directory.""" char_set = set() - for root, _, files in os.walk(directory): - for file in files: - if file.endswith(".html"): - full_path = os.path.join(root, file) - text = extract_text_from_html(full_path) - char_set.update(text) + for file in files: + full_path = os.path.join(root, file) + text = extract_text_from_html(full_path) + char_set.update(text) return "".join(sorted(char_set)) def find_font_files(directory): @@ -65,10 +60,10 @@ def subset_font_in_place(font_path, characters): print(f"Subsetted font in place: {font_path}") if __name__ == "__main__": - used_chars = get_used_characters(HTML_DIR) + used_chars = get_used_characters(sys.argv[1:]) print(f"Extracted {len(used_chars)} unique characters from HTML files.") - font_files = find_font_files(FONT_DIR) + font_files = find_font_files(".") print(f"Found {len(font_files)} font files to subset.") for font_file in font_files: