Adjust Python script to also just accept HTML files as args
Signed-off-by: Danila Fedorin <danila.fedorin@gmail.com>
This commit is contained in:
parent
07408d01a9
commit
d847d20666
@ -1,12 +1,9 @@
|
|||||||
import os
|
import os
|
||||||
|
import sys
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
from fontTools.subset import Subsetter, Options
|
from fontTools.subset import Subsetter, Options
|
||||||
from fontTools.ttLib import TTFont
|
from fontTools.ttLib import TTFont
|
||||||
|
|
||||||
# Directories
|
|
||||||
HTML_DIR = "." # Directory with .html files
|
|
||||||
FONT_DIR = "." # Directory containing fonts to be modified
|
|
||||||
|
|
||||||
FONT_EXTENSIONS = (".ttf", ".woff", ".woff2", ".otf") # Font file types
|
FONT_EXTENSIONS = (".ttf", ".woff", ".woff2", ".otf") # Font file types
|
||||||
|
|
||||||
def extract_text_from_html(file_path):
|
def extract_text_from_html(file_path):
|
||||||
@ -15,15 +12,13 @@ def extract_text_from_html(file_path):
|
|||||||
soup = BeautifulSoup(f.read(), "html.parser")
|
soup = BeautifulSoup(f.read(), "html.parser")
|
||||||
return soup.get_text()
|
return soup.get_text()
|
||||||
|
|
||||||
def get_used_characters(directory):
|
def get_used_characters(files):
|
||||||
"""Collect unique characters from all .html files in the given directory."""
|
"""Collect unique characters from all .html files in the given directory."""
|
||||||
char_set = set()
|
char_set = set()
|
||||||
for root, _, files in os.walk(directory):
|
for file in files:
|
||||||
for file in files:
|
full_path = os.path.join(root, file)
|
||||||
if file.endswith(".html"):
|
text = extract_text_from_html(full_path)
|
||||||
full_path = os.path.join(root, file)
|
char_set.update(text)
|
||||||
text = extract_text_from_html(full_path)
|
|
||||||
char_set.update(text)
|
|
||||||
return "".join(sorted(char_set))
|
return "".join(sorted(char_set))
|
||||||
|
|
||||||
def find_font_files(directory):
|
def find_font_files(directory):
|
||||||
@ -65,10 +60,10 @@ def subset_font_in_place(font_path, characters):
|
|||||||
print(f"Subsetted font in place: {font_path}")
|
print(f"Subsetted font in place: {font_path}")
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
used_chars = get_used_characters(HTML_DIR)
|
used_chars = get_used_characters(sys.argv[1:])
|
||||||
print(f"Extracted {len(used_chars)} unique characters from HTML files.")
|
print(f"Extracted {len(used_chars)} unique characters from HTML files.")
|
||||||
|
|
||||||
font_files = find_font_files(FONT_DIR)
|
font_files = find_font_files(".")
|
||||||
print(f"Found {len(font_files)} font files to subset.")
|
print(f"Found {len(font_files)} font files to subset.")
|
||||||
|
|
||||||
for font_file in font_files:
|
for font_file in font_files:
|
||||||
|
Loading…
Reference in New Issue
Block a user