From 93e879e30ec2776c5d347e72be32f3ef30bd1410 Mon Sep 17 00:00:00 2001
From: Cody Logan <clpo13@gmail.com>
Date: Tue, 3 Oct 2023 13:28:23 -0700
Subject: Add parallel download option in batch mode

Number of download threads can be set with new -j
option. Unfortunately, it's not that much faster
than downloading in serial, since the API calls
made before the downloads actually start are not
(and ideally should not be) parallelized. Still,
for large batches, it saves a bit of time.

Known issue: due to the download threads writing
to the log asynchronously, the messages get
jumbled up. This will be fixed eventually.
---
 src/wikiget/wikiget.py | 19 +++++++++++++------
 1 file changed, 13 insertions(+), 6 deletions(-)

(limited to 'src/wikiget')

diff --git a/src/wikiget/wikiget.py b/src/wikiget/wikiget.py
index 80d5057..c16d3f6 100644
--- a/src/wikiget/wikiget.py
+++ b/src/wikiget/wikiget.py
@@ -18,6 +18,7 @@
 import argparse
 import logging
 import sys
+from concurrent.futures import ThreadPoolExecutor
 
 import wikiget
 from wikiget.dl import download, prep_download
@@ -172,12 +173,18 @@ def main():
                     dl_list.append(line)
 
         # TODO: validate file contents before download process starts
-        for line_num, line in enumerate(dl_list, start=1):
-            url = line.strip()
-            # keep track of batch file line numbers for debugging/logging purposes
-            logging.info(f"Downloading '{url}' at line {line_num}:")
-            file = prep_download(url, args)
-            download(file, args)
+        with ThreadPoolExecutor(max_workers=args.threads) as executor:
+            futures = []
+            for line_num, line in enumerate(dl_list, start=1):
+                url = line.strip()
+                # keep track of batch file line numbers for debugging/logging purposes
+                logging.info(f"Downloading '{url}' at line {line_num}:")
+                file = prep_download(url, args)
+                future = executor.submit(download, file, args)
+                futures.append(future)
+            # wait for downloads to finish
+            for future in futures:
+                future.result()
     else:
         # single download mode
         file = prep_download(args.FILE, args)
-- 
cgit v1.2.3