Add a (ChatGPT-provided) script to perform subsetting
Signed-off-by: Danila Fedorin <danila.fedorin@gmail.com>
This commit is contained in:
parent
ce8f8fb872
commit
816a473913
75
chatgpt-subset-one-go.py
Normal file
75
chatgpt-subset-one-go.py
Normal file
@ -0,0 +1,75 @@
|
|||||||
|
import os
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
from fontTools.subset import Subsetter, Options
|
||||||
|
from fontTools.ttLib import TTFont
|
||||||
|
|
||||||
|
# Directories
|
||||||
|
HTML_DIR = "." # Directory with .html files
|
||||||
|
FONT_DIR = "." # Directory containing fonts to be modified
|
||||||
|
|
||||||
|
FONT_EXTENSIONS = (".ttf", ".woff", ".woff2", ".otf") # Font file types
|
||||||
|
|
||||||
|
def extract_text_from_html(file_path):
|
||||||
|
"""Extract text content from a single HTML file."""
|
||||||
|
with open(file_path, "r", encoding="utf-8") as f:
|
||||||
|
soup = BeautifulSoup(f.read(), "html.parser")
|
||||||
|
return soup.get_text()
|
||||||
|
|
||||||
|
def get_used_characters(directory):
|
||||||
|
"""Collect unique characters from all .html files in the given directory."""
|
||||||
|
char_set = set()
|
||||||
|
for root, _, files in os.walk(directory):
|
||||||
|
for file in files:
|
||||||
|
if file.endswith(".html"):
|
||||||
|
full_path = os.path.join(root, file)
|
||||||
|
text = extract_text_from_html(full_path)
|
||||||
|
char_set.update(text)
|
||||||
|
return "".join(sorted(char_set))
|
||||||
|
|
||||||
|
def find_font_files(directory):
|
||||||
|
"""Find all font files in the given directory, recursively."""
|
||||||
|
font_files = []
|
||||||
|
for root, _, files in os.walk(directory):
|
||||||
|
for file in files:
|
||||||
|
if file.endswith(FONT_EXTENSIONS):
|
||||||
|
font_files.append(os.path.join(root, file))
|
||||||
|
return font_files
|
||||||
|
|
||||||
|
def subset_font_in_place(font_path, characters):
|
||||||
|
"""Subsets the given font file to include only the specified characters."""
|
||||||
|
# Convert characters to their integer code points
|
||||||
|
unicode_set = {ord(c) for c in characters}
|
||||||
|
|
||||||
|
font = TTFont(font_path)
|
||||||
|
options = Options()
|
||||||
|
options.drop_tables += ["DSIG"]
|
||||||
|
options.drop_tables += ["LTSH", "VDMX", "hdmx", "gasp"]
|
||||||
|
options.unicodes = unicode_set
|
||||||
|
options.variations = False
|
||||||
|
options.drop_variations = True
|
||||||
|
options.layout_features = ["*"] # keep all OT features
|
||||||
|
options.hinting = False
|
||||||
|
|
||||||
|
# Preserve original format if it was WOFF/WOFF2
|
||||||
|
if font_path.endswith(".woff2"):
|
||||||
|
options.flavor = "woff2"
|
||||||
|
elif font_path.endswith(".woff"):
|
||||||
|
options.flavor = "woff"
|
||||||
|
|
||||||
|
subsetter = Subsetter(options)
|
||||||
|
subsetter.populate(unicodes=unicode_set)
|
||||||
|
subsetter.subset(font)
|
||||||
|
|
||||||
|
# Overwrite the original font file
|
||||||
|
font.save(font_path)
|
||||||
|
print(f"Subsetted font in place: {font_path}")
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
used_chars = get_used_characters(HTML_DIR)
|
||||||
|
print(f"Extracted {len(used_chars)} unique characters from HTML files.")
|
||||||
|
|
||||||
|
font_files = find_font_files(FONT_DIR)
|
||||||
|
print(f"Found {len(font_files)} font files to subset.")
|
||||||
|
|
||||||
|
for font_file in font_files:
|
||||||
|
subset_font_in_place(font_file, used_chars)
|
Loading…
Reference in New Issue
Block a user