This commit is contained in:
Danila Fedorin 2025-02-23 13:19:35 -08:00
parent a8c2b1d05a
commit bbc8a48d57

View File

@ -15,7 +15,9 @@ def extract_text_from_html(file_path):
def get_used_characters(files):
"""Collect unique characters from all .html files in the given directory."""
char_set = set()
print(files)
for file in files:
print("extracting from", file)
text = extract_text_from_html(file)
char_set.update(text)
return "".join(sorted(char_set))
@ -60,7 +62,7 @@ def subset_font_in_place(font_path, characters):
if __name__ == "__main__":
used_chars = get_used_characters(sys.argv[2:])
print(f"Extracted {len(used_chars)} unique characters from HTML files.")
print(f"Extracted {len(used_chars)} unique characters from {len(sys.argv[2:])} HTML files.")
font_files = find_font_files(sys.argv[1])
print(f"Found {len(font_files)} font files to subset.")