aboutsummaryrefslogtreecommitdiff
path: root/src/wikiget/wikiget.py
diff options
context:
space:
mode:
authorCody Logan <clpo13@gmail.com>2023-10-03 13:28:23 -0700
committerCody Logan <clpo13@gmail.com>2023-10-03 13:28:23 -0700
commit93e879e30ec2776c5d347e72be32f3ef30bd1410 (patch)
tree8ab08ad73ccc41ad1441f64847cc24dd9c026e86 /src/wikiget/wikiget.py
parent865088207b39427b6b932de4f312d82bd5e05a53 (diff)
downloadwikiget-93e879e30ec2776c5d347e72be32f3ef30bd1410.tar.gz
wikiget-93e879e30ec2776c5d347e72be32f3ef30bd1410.zip
Add parallel download option in batch mode
Number of download threads can be set with new -j option. Unfortunately, it's not that much faster than downloading in serial, since the API calls made before the downloads actually start are not (and ideally should not be) parallelized. Still, for large batches, it saves a bit of time. Known issue: due to the download threads writing to the log asynchronously, the messages get jumbled up. This will be fixed eventually.
Diffstat (limited to 'src/wikiget/wikiget.py')
-rw-r--r--src/wikiget/wikiget.py19
1 files changed, 13 insertions, 6 deletions
diff --git a/src/wikiget/wikiget.py b/src/wikiget/wikiget.py
index 80d5057..c16d3f6 100644
--- a/src/wikiget/wikiget.py
+++ b/src/wikiget/wikiget.py
@@ -18,6 +18,7 @@
import argparse
import logging
import sys
+from concurrent.futures import ThreadPoolExecutor
import wikiget
from wikiget.dl import download, prep_download
@@ -172,12 +173,18 @@ def main():
dl_list.append(line)
# TODO: validate file contents before download process starts
- for line_num, line in enumerate(dl_list, start=1):
- url = line.strip()
- # keep track of batch file line numbers for debugging/logging purposes
- logging.info(f"Downloading '{url}' at line {line_num}:")
- file = prep_download(url, args)
- download(file, args)
+ with ThreadPoolExecutor(max_workers=args.threads) as executor:
+ futures = []
+ for line_num, line in enumerate(dl_list, start=1):
+ url = line.strip()
+ # keep track of batch file line numbers for debugging/logging purposes
+ logging.info(f"Downloading '{url}' at line {line_num}:")
+ file = prep_download(url, args)
+ future = executor.submit(download, file, args)
+ futures.append(future)
+ # wait for downloads to finish
+ for future in futures:
+ future.result()
else:
# single download mode
file = prep_download(args.FILE, args)