Adjust Python script to also just accept HTML files as args

Signed-off-by: Danila Fedorin <danila.fedorin@gmail.com>
2025-02-23 12:28:19 -08:00 · 2025-02-23 12:28:19 -08:00 · d847d20666
commit d847d20666
parent 07408d01a9
1 changed files with 8 additions and 13 deletions
--- a/chatgpt-subset-one-go.py
+++ b/chatgpt-subset-one-go.py
@ -1,12 +1,9 @@
 import os
+import sys
 from bs4 import BeautifulSoup
 from fontTools.subset import Subsetter, Options
 from fontTools.ttLib import TTFont

-# Directories
-HTML_DIR = "."    # Directory with .html files
-FONT_DIR = "."          # Directory containing fonts to be modified
-
 FONT_EXTENSIONS = (".ttf", ".woff", ".woff2", ".otf")  # Font file types

 def extract_text_from_html(file_path):
@ -15,12 +12,10 @@ def extract_text_from_html(file_path):
        soup = BeautifulSoup(f.read(), "html.parser")
        return soup.get_text()

-def get_used_characters(directory):
+def get_used_characters(files):
    """Collect unique characters from all .html files in the given directory."""
    char_set = set()
-    for root, _, files in os.walk(directory):
    for file in files:
-            if file.endswith(".html"):
        full_path = os.path.join(root, file)
        text = extract_text_from_html(full_path)
        char_set.update(text)
@ -65,10 +60,10 @@ def subset_font_in_place(font_path, characters):
    print(f"Subsetted font in place: {font_path}")

 if __name__ == "__main__":
-    used_chars = get_used_characters(HTML_DIR)
+    used_chars = get_used_characters(sys.argv[1:])
    print(f"Extracted {len(used_chars)} unique characters from HTML files.")

-    font_files = find_font_files(FONT_DIR)
+    font_files = find_font_files(".")
    print(f"Found {len(font_files)} font files to subset.")

    for font_file in font_files: