From 87052196874cc1bf82f70a6f5aa8e6df59bc1537 Mon Sep 17 00:00:00 2001
From: Cody Logan <cody@lokken.dev>
Date: Fri, 13 Oct 2023 11:13:31 -0700
Subject: Revise batch file parsing to ignore blank and commented lines

Previously, blank lines would cause an error and lines prepended
with "#" would be downloaded like any other, assuming they were valid.
Now, "#" can be used to mark ignored files or comments.
---
 README.md              | 10 +++++-----
 docs/wikiget.1         |  1 +
 docs/wikiget.1.md      |  4 ++--
 src/wikiget/wikiget.py | 19 ++++++++++---------
 4 files changed, 18 insertions(+), 16 deletions(-)

diff --git a/README.md b/README.md
index 4faadf4..eecea90 100644
--- a/README.md
+++ b/README.md
@@ -35,11 +35,11 @@ By default, the program won't overwrite existing files with the same name as the
 `-f` or `--force`. Additionally, the file can be downloaded to a different name with `-o`.
 
 Files can be batch downloaded with the `-a` or `--batch` flag. In this mode, `FILE` will be treated as an input file
-containing multiple files to download, one filename or URL per line. If an error is encountered, execution stops
-immediately and the offending filename is printed. For large batches, the process can be sped up by downloading files
-in parallel. The number of parallel downloads can be set with `-j`. For instance, with `-a -j4`, wikiget will download
-four files at once. Without `-j` or with `-j` by itself without a number, wikiget will download the files one at a
-time.
+containing multiple files to download, one filename or URL per line. Blank lines and lines starting with "#" are
+ignored. If an error is encountered, execution stops immediately and the offending filename is printed. For large
+batches, the process can be sped up by downloading files in parallel. The number of parallel downloads can be set with
+`-j`. For instance, with `-a -j4`, wikiget will download four files at once. Without `-j` or with `-j` by itself without
+a number, wikiget will download the files one at a time.
 
 ### Example usage
 
diff --git a/docs/wikiget.1 b/docs/wikiget.1
index 03a0c41..fa1a33d 100644
--- a/docs/wikiget.1
+++ b/docs/wikiget.1
@@ -39,6 +39,7 @@ fetched from the site in the URL, in this case
 In batch download mode (activated with -\f[B]a\f[R] or
 --\f[B]batch\f[R]), this is a text file containing multiple file names
 or URLs to be downloaded, one per line.
+Blank lines and lines starting with \[lq]#\[rq] are ignored.
 If an error is encountered during download, execution stops immediately
 and the offending filename is printed.
 .TP
diff --git a/docs/wikiget.1.md b/docs/wikiget.1.md
index 45184f4..fd274d5 100644
--- a/docs/wikiget.1.md
+++ b/docs/wikiget.1.md
@@ -30,8 +30,8 @@ the file name or the URL of its description page.
 *BATCHFILE*
 
 :   In batch download mode (activated with \-**a** or \-\-**batch**), this is a text file containing multiple file names
-    or URLs to be downloaded, one per line. If an error is encountered during download, execution stops immediately and
-    the offending filename is printed.
+    or URLs to be downloaded, one per line. Blank lines and lines starting with "#" are ignored. If an error is
+    encountered during download, execution stops immediately and the offending filename is printed.
 
 \-**s**, \-\-**site** *SITE*
 
diff --git a/src/wikiget/wikiget.py b/src/wikiget/wikiget.py
index 5b36ce5..fba9509 100644
--- a/src/wikiget/wikiget.py
+++ b/src/wikiget/wikiget.py
@@ -145,7 +145,7 @@ def configure_logging(args):
 
 def batch_download(args):
     input_file = args.FILE
-    dl_list = []
+    dl_list = {}
 
     logging.info(f"Using batch file '{input_file}'.")
 
@@ -157,10 +157,12 @@ def batch_download(args):
         sys.exit(1)
     else:
         with fd:
-            # store file contents in memory in case something happens to the file
-            # while we're downloading
-            for _, line in enumerate(fd):
-                dl_list.append(line)
+            # read the file into memory and process each line as we go
+            for line_num, line in enumerate(fd, start=1):
+                line_s = line.strip()
+                # ignore blank lines and lines starting with "#" (for comments)
+                if line_s and not line_s.startswith("#"):
+                    dl_list[line_num] = line_s
 
     # TODO: validate file contents before download process starts
     with ThreadPoolExecutor(
@@ -168,11 +170,10 @@ def batch_download(args):
         thread_name_prefix="download",
     ) as executor:
         futures = []
-        for line_num, line in enumerate(dl_list, start=1):
-            url = line.strip()
+        for line_num, line in dl_list.items():
             # keep track of batch file line numbers for debugging/logging purposes
-            logging.info(f"Downloading '{url}' at line {line_num}:")
-            file = prep_download(url, args)
+            logging.info(f"Downloading '{line}' at line {line_num}")
+            file = prep_download(line, args)
             future = executor.submit(download, file, args)
             futures.append(future)
         # wait for downloads to finish
-- 
cgit v1.2.3