From 87052196874cc1bf82f70a6f5aa8e6df59bc1537 Mon Sep 17 00:00:00 2001 From: Cody Logan Date: Fri, 13 Oct 2023 11:13:31 -0700 Subject: Revise batch file parsing to ignore blank and commented lines Previously, blank lines would cause an error and lines prepended with "#" would be downloaded like any other, assuming they were valid. Now, "#" can be used to mark ignored files or comments. --- README.md | 10 +++++----- docs/wikiget.1 | 1 + docs/wikiget.1.md | 4 ++-- src/wikiget/wikiget.py | 19 ++++++++++--------- 4 files changed, 18 insertions(+), 16 deletions(-) diff --git a/README.md b/README.md index 4faadf4..eecea90 100644 --- a/README.md +++ b/README.md @@ -35,11 +35,11 @@ By default, the program won't overwrite existing files with the same name as the `-f` or `--force`. Additionally, the file can be downloaded to a different name with `-o`. Files can be batch downloaded with the `-a` or `--batch` flag. In this mode, `FILE` will be treated as an input file -containing multiple files to download, one filename or URL per line. If an error is encountered, execution stops -immediately and the offending filename is printed. For large batches, the process can be sped up by downloading files -in parallel. The number of parallel downloads can be set with `-j`. For instance, with `-a -j4`, wikiget will download -four files at once. Without `-j` or with `-j` by itself without a number, wikiget will download the files one at a -time. +containing multiple files to download, one filename or URL per line. Blank lines and lines starting with "#" are +ignored. If an error is encountered, execution stops immediately and the offending filename is printed. For large +batches, the process can be sped up by downloading files in parallel. The number of parallel downloads can be set with +`-j`. For instance, with `-a -j4`, wikiget will download four files at once. Without `-j` or with `-j` by itself without +a number, wikiget will download the files one at a time. ### Example usage diff --git a/docs/wikiget.1 b/docs/wikiget.1 index 03a0c41..fa1a33d 100644 --- a/docs/wikiget.1 +++ b/docs/wikiget.1 @@ -39,6 +39,7 @@ fetched from the site in the URL, in this case In batch download mode (activated with -\f[B]a\f[R] or --\f[B]batch\f[R]), this is a text file containing multiple file names or URLs to be downloaded, one per line. +Blank lines and lines starting with \[lq]#\[rq] are ignored. If an error is encountered during download, execution stops immediately and the offending filename is printed. .TP diff --git a/docs/wikiget.1.md b/docs/wikiget.1.md index 45184f4..fd274d5 100644 --- a/docs/wikiget.1.md +++ b/docs/wikiget.1.md @@ -30,8 +30,8 @@ the file name or the URL of its description page. *BATCHFILE* : In batch download mode (activated with \-**a** or \-\-**batch**), this is a text file containing multiple file names - or URLs to be downloaded, one per line. If an error is encountered during download, execution stops immediately and - the offending filename is printed. + or URLs to be downloaded, one per line. Blank lines and lines starting with "#" are ignored. If an error is + encountered during download, execution stops immediately and the offending filename is printed. \-**s**, \-\-**site** *SITE* diff --git a/src/wikiget/wikiget.py b/src/wikiget/wikiget.py index 5b36ce5..fba9509 100644 --- a/src/wikiget/wikiget.py +++ b/src/wikiget/wikiget.py @@ -145,7 +145,7 @@ def configure_logging(args): def batch_download(args): input_file = args.FILE - dl_list = [] + dl_list = {} logging.info(f"Using batch file '{input_file}'.") @@ -157,10 +157,12 @@ def batch_download(args): sys.exit(1) else: with fd: - # store file contents in memory in case something happens to the file - # while we're downloading - for _, line in enumerate(fd): - dl_list.append(line) + # read the file into memory and process each line as we go + for line_num, line in enumerate(fd, start=1): + line_s = line.strip() + # ignore blank lines and lines starting with "#" (for comments) + if line_s and not line_s.startswith("#"): + dl_list[line_num] = line_s # TODO: validate file contents before download process starts with ThreadPoolExecutor( @@ -168,11 +170,10 @@ def batch_download(args): thread_name_prefix="download", ) as executor: futures = [] - for line_num, line in enumerate(dl_list, start=1): - url = line.strip() + for line_num, line in dl_list.items(): # keep track of batch file line numbers for debugging/logging purposes - logging.info(f"Downloading '{url}' at line {line_num}:") - file = prep_download(url, args) + logging.info(f"Downloading '{line}' at line {line_num}") + file = prep_download(line, args) future = executor.submit(download, file, args) futures.append(future) # wait for downloads to finish -- cgit v1.2.3