From a8c2b1d05abdd4077fc6bfefdab92a974b478da2 Mon Sep 17 00:00:00 2001 From: Danila Fedorin Date: Sun, 23 Feb 2025 13:16:33 -0800 Subject: [PATCH] Fix bug in subsetting script Signed-off-by: Danila Fedorin --- chatgpt-subset-one-go.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/chatgpt-subset-one-go.py b/chatgpt-subset-one-go.py index 77e3550..f3ed3c7 100644 --- a/chatgpt-subset-one-go.py +++ b/chatgpt-subset-one-go.py @@ -16,8 +16,7 @@ def get_used_characters(files): """Collect unique characters from all .html files in the given directory.""" char_set = set() for file in files: - full_path = os.path.join(root, file) - text = extract_text_from_html(full_path) + text = extract_text_from_html(file) char_set.update(text) return "".join(sorted(char_set))