diff options
| author | Cody Logan <cody@lokken.dev> | 2023-10-13 11:13:31 -0700 |
|---|---|---|
| committer | Cody Logan <cody@lokken.dev> | 2023-10-13 11:13:31 -0700 |
| commit | 87052196874cc1bf82f70a6f5aa8e6df59bc1537 (patch) | |
| tree | 1f73908b40f525dafcf503d1f00b838e8c417300 /src/wikiget | |
| parent | 226b7cb84070c6d073e153ad410fca7798c8e334 (diff) | |
| download | wikiget-87052196874cc1bf82f70a6f5aa8e6df59bc1537.tar.gz wikiget-87052196874cc1bf82f70a6f5aa8e6df59bc1537.zip | |
Revise batch file parsing to ignore blank and commented lines
Previously, blank lines would cause an error and lines prepended
with "#" would be downloaded like any other, assuming they were valid.
Now, "#" can be used to mark ignored files or comments.
Diffstat (limited to 'src/wikiget')
| -rw-r--r-- | src/wikiget/wikiget.py | 19 |
1 files changed, 10 insertions, 9 deletions
diff --git a/src/wikiget/wikiget.py b/src/wikiget/wikiget.py index 5b36ce5..fba9509 100644 --- a/src/wikiget/wikiget.py +++ b/src/wikiget/wikiget.py @@ -145,7 +145,7 @@ def configure_logging(args): def batch_download(args): input_file = args.FILE - dl_list = [] + dl_list = {} logging.info(f"Using batch file '{input_file}'.") @@ -157,10 +157,12 @@ def batch_download(args): sys.exit(1) else: with fd: - # store file contents in memory in case something happens to the file - # while we're downloading - for _, line in enumerate(fd): - dl_list.append(line) + # read the file into memory and process each line as we go + for line_num, line in enumerate(fd, start=1): + line_s = line.strip() + # ignore blank lines and lines starting with "#" (for comments) + if line_s and not line_s.startswith("#"): + dl_list[line_num] = line_s # TODO: validate file contents before download process starts with ThreadPoolExecutor( @@ -168,11 +170,10 @@ def batch_download(args): thread_name_prefix="download", ) as executor: futures = [] - for line_num, line in enumerate(dl_list, start=1): - url = line.strip() + for line_num, line in dl_list.items(): # keep track of batch file line numbers for debugging/logging purposes - logging.info(f"Downloading '{url}' at line {line_num}:") - file = prep_download(url, args) + logging.info(f"Downloading '{line}' at line {line_num}") + file = prep_download(line, args) future = executor.submit(download, file, args) futures.append(future) # wait for downloads to finish |
