diff options
| author | Cody Logan <clpo13@gmail.com> | 2023-10-03 13:28:23 -0700 |
|---|---|---|
| committer | Cody Logan <clpo13@gmail.com> | 2023-10-03 13:28:23 -0700 |
| commit | 93e879e30ec2776c5d347e72be32f3ef30bd1410 (patch) | |
| tree | 8ab08ad73ccc41ad1441f64847cc24dd9c026e86 /src/wikiget/wikiget.py | |
| parent | 865088207b39427b6b932de4f312d82bd5e05a53 (diff) | |
| download | wikiget-93e879e30ec2776c5d347e72be32f3ef30bd1410.tar.gz wikiget-93e879e30ec2776c5d347e72be32f3ef30bd1410.zip | |
Add parallel download option in batch mode
Number of download threads can be set with new -j
option. Unfortunately, it's not that much faster
than downloading in serial, since the API calls
made before the downloads actually start are not
(and ideally should not be) parallelized. Still,
for large batches, it saves a bit of time.
Known issue: due to the download threads writing
to the log asynchronously, the messages get
jumbled up. This will be fixed eventually.
Diffstat (limited to 'src/wikiget/wikiget.py')
| -rw-r--r-- | src/wikiget/wikiget.py | 19 |
1 files changed, 13 insertions, 6 deletions
diff --git a/src/wikiget/wikiget.py b/src/wikiget/wikiget.py index 80d5057..c16d3f6 100644 --- a/src/wikiget/wikiget.py +++ b/src/wikiget/wikiget.py @@ -18,6 +18,7 @@ import argparse import logging import sys +from concurrent.futures import ThreadPoolExecutor import wikiget from wikiget.dl import download, prep_download @@ -172,12 +173,18 @@ def main(): dl_list.append(line) # TODO: validate file contents before download process starts - for line_num, line in enumerate(dl_list, start=1): - url = line.strip() - # keep track of batch file line numbers for debugging/logging purposes - logging.info(f"Downloading '{url}' at line {line_num}:") - file = prep_download(url, args) - download(file, args) + with ThreadPoolExecutor(max_workers=args.threads) as executor: + futures = [] + for line_num, line in enumerate(dl_list, start=1): + url = line.strip() + # keep track of batch file line numbers for debugging/logging purposes + logging.info(f"Downloading '{url}' at line {line_num}:") + file = prep_download(url, args) + future = executor.submit(download, file, args) + futures.append(future) + # wait for downloads to finish + for future in futures: + future.result() else: # single download mode file = prep_download(args.FILE, args) |
