diff --git a/chatgpt-subset-one-go.py b/chatgpt-subset-one-go.py index f3ed3c7..cb2af75 100644 --- a/chatgpt-subset-one-go.py +++ b/chatgpt-subset-one-go.py @@ -16,6 +16,7 @@ def get_used_characters(files): """Collect unique characters from all .html files in the given directory.""" char_set = set() for file in files: + print("extracting from", file) text = extract_text_from_html(file) char_set.update(text) return "".join(sorted(char_set))