Adjust Python script to also just accept HTML files as args
Signed-off-by: Danila Fedorin <danila.fedorin@gmail.com>
This commit is contained in:
parent
07408d01a9
commit
d847d20666
@ -1,12 +1,9 @@
|
||||
import os
|
||||
import sys
|
||||
from bs4 import BeautifulSoup
|
||||
from fontTools.subset import Subsetter, Options
|
||||
from fontTools.ttLib import TTFont
|
||||
|
||||
# Directories
|
||||
HTML_DIR = "." # Directory with .html files
|
||||
FONT_DIR = "." # Directory containing fonts to be modified
|
||||
|
||||
FONT_EXTENSIONS = (".ttf", ".woff", ".woff2", ".otf") # Font file types
|
||||
|
||||
def extract_text_from_html(file_path):
|
||||
@ -15,12 +12,10 @@ def extract_text_from_html(file_path):
|
||||
soup = BeautifulSoup(f.read(), "html.parser")
|
||||
return soup.get_text()
|
||||
|
||||
def get_used_characters(directory):
|
||||
def get_used_characters(files):
|
||||
"""Collect unique characters from all .html files in the given directory."""
|
||||
char_set = set()
|
||||
for root, _, files in os.walk(directory):
|
||||
for file in files:
|
||||
if file.endswith(".html"):
|
||||
full_path = os.path.join(root, file)
|
||||
text = extract_text_from_html(full_path)
|
||||
char_set.update(text)
|
||||
@ -65,10 +60,10 @@ def subset_font_in_place(font_path, characters):
|
||||
print(f"Subsetted font in place: {font_path}")
|
||||
|
||||
if __name__ == "__main__":
|
||||
used_chars = get_used_characters(HTML_DIR)
|
||||
used_chars = get_used_characters(sys.argv[1:])
|
||||
print(f"Extracted {len(used_chars)} unique characters from HTML files.")
|
||||
|
||||
font_files = find_font_files(FONT_DIR)
|
||||
font_files = find_font_files(".")
|
||||
print(f"Found {len(font_files)} font files to subset.")
|
||||
|
||||
for font_file in font_files:
|
||||
|
Loading…
Reference in New Issue
Block a user