Move batch_download function to proper file

author: Cody Logan <cody@lokken.dev> 2023-10-20 16:28:23 -0700
committer: Cody Logan <cody@lokken.dev> 2023-10-20 16:28:23 -0700
commit: b136af078208882ae696b21c0d8aac009e7468d4 (patch)
tree: a1ddd808be14775143795b94132880f8c252f518 /src/wikiget
parent: 05457af0d73ff3a820c0b465e6607fc5832a6e74 (diff)
download: wikiget-b136af078208882ae696b21c0d8aac009e7468d4.tar.gz
wikiget-b136af078208882ae696b21c0d8aac009e7468d4.zip
2 files changed, 70 insertions, 60 deletions
diff --git a/src/wikiget/dl.py b/src/wikiget/dl.py
index 171b017..83aef9f 100644
--- a/src/wikiget/dl.py
+++ b/src/wikiget/dl.py
@@ -18,12 +18,14 @@
 import logging
 import os
 import sys
+from concurrent.futures import ThreadPoolExecutor
 
 from mwclient import APIError, InvalidResponse, LoginError, Site
 from requests import ConnectionError, HTTPError
 from tqdm import tqdm
 
 import wikiget
+from wikiget.exceptions import ParseError
 from wikiget.file import File
 from wikiget.parse import get_dest
 from wikiget.validations import verify_hash
@@ -78,12 +80,62 @@ def prep_download(dl, args):
     return file
 
 
+def batch_download(args):
+    input_file = args.FILE
+    dl_list = {}
+    errors = 0
+
+    logging.info(f"Using batch file '{input_file}'.")
+
+    try:
+        fd = open(input_file)
+    except OSError as e:
+        logging.error("File could not be read. The following error was encountered:")
+        logging.error(e)
+        sys.exit(1)
+    else:
+        with fd:
+            # read the file into memory and process each line as we go
+            for line_num, line in enumerate(fd, start=1):
+                line_s = line.strip()
+                # ignore blank lines and lines starting with "#" (for comments)
+                if line_s and not line_s.startswith("#"):
+                    dl_list[line_num] = line_s
+
+    # TODO: validate file contents before download process starts
+    with ThreadPoolExecutor(max_workers=args.threads) as executor:
+        futures = []
+        for line_num, line in dl_list.items():
+            # keep track of batch file line numbers for debugging/logging purposes
+            logging.info(f"Processing '{line}' at line {line_num}")
+            try:
+                file = prep_download(line, args)
+            except ParseError as e:
+                logging.warning(f"{e} (line {line_num})")
+                errors += 1
+                continue
+            except (ConnectionError, HTTPError, InvalidResponse, LoginError, APIError):
+                logging.warning(
+                    f"Unable to download '{line}' (line {line_num}) due to an error"
+                )
+                errors += 1
+                continue
+            future = executor.submit(download, file, args)
+            futures.append(future)
+        # wait for downloads to finish
+        for future in futures:
+            errors += future.result()
+    return errors
+
+
 def download(f, args):
     file = f.image
     filename = f.name
     dest = f.dest
     site = file.site
 
+    errors = 0
+
     if file.exists:
         # file exists either locally or at a common repository, like Wikimedia Commons
         file_url = file.imageinfo["url"]
@@ -100,6 +152,7 @@ def download(f, args):
             logging.warning(
                 f"File '{dest}' already exists, skipping download (use -f to force)"
             )
+            errors += 1
         else:
             try:
                 fd = open(dest, "wb")
@@ -108,7 +161,7 @@ def download(f, args):
                     "File could not be written. The following error was encountered:"
                 )
                 logging.error(e)
-                sys.exit(1)
+                errors += 1
             else:
                 # download the file(s)
                 if args.verbose >= wikiget.STD_VERBOSE:
@@ -143,11 +196,11 @@ def download(f, args):
                 logging.info(success_log)
             else:
                 logging.error("Hash mismatch! Downloaded file may be corrupt.")
-                # TODO: log but don't quit while in batch mode
-                sys.exit(1)
+                errors += 1
 
     else:
         # no file information returned
         logging.error(f"Target '{filename}' does not appear to be a valid file")
-        # TODO: log but don't quit while in batch mode
-        sys.exit(1)
+        errors += 1
+
+    return errors
diff --git a/src/wikiget/wikiget.py b/src/wikiget/wikiget.py
index 90078e1..e9a1147 100644
--- a/src/wikiget/wikiget.py
+++ b/src/wikiget/wikiget.py
@@ -18,13 +18,12 @@
 import argparse
 import logging
 import sys
-from concurrent.futures import ThreadPoolExecutor
 
 from mwclient import APIError, InvalidResponse, LoginError
 from requests import ConnectionError, HTTPError
 
 import wikiget
-from wikiget.dl import download, prep_download
+from wikiget.dl import batch_download, download, prep_download
 from wikiget.exceptions import ParseError
 
 
@@ -145,55 +144,6 @@ def configure_logging(args):
     else:
         # log only to console
         logging.basicConfig(level=loglevel, format=log_format)
-
-
-def batch_download(args):
-    input_file = args.FILE
-    dl_list = {}
-
-    logging.info(f"Using batch file '{input_file}'.")
-
-    try:
-        fd = open(input_file)
-    except OSError as e:
-        logging.error("File could not be read. The following error was encountered:")
-        logging.error(e)
-        sys.exit(1)
-    else:
-        with fd:
-            # read the file into memory and process each line as we go
-            for line_num, line in enumerate(fd, start=1):
-                line_s = line.strip()
-                # ignore blank lines and lines starting with "#" (for comments)
-                if line_s and not line_s.startswith("#"):
-                    dl_list[line_num] = line_s
-
-    # TODO: validate file contents before download process starts
-    with ThreadPoolExecutor(
-        max_workers=args.threads,
-        thread_name_prefix="download",
-    ) as executor:
-        futures = []
-        for line_num, line in dl_list.items():
-            # keep track of batch file line numbers for debugging/logging purposes
-            logging.info(f"Downloading '{line}' at line {line_num}")
-            try:
-                file = prep_download(line, args)
-            except ParseError as e:
-                logging.warning(f"{e} (line {line_num})")
-                continue
-            except (ConnectionError, HTTPError, InvalidResponse, LoginError, APIError):
-                logging.error(
-                    f"Unable to download '{line}' (line {line_num}) due to an error"
-                )
-                continue
-            future = executor.submit(download, file, args)
-            futures.append(future)
-        # wait for downloads to finish
-        for future in futures:
-            future.result()
-
-
 def main():
     # setup our environment
     parser = construct_parser()
@@ -207,9 +157,14 @@ def main():
 
     if args.batch:
         # batch download mode
-        # TODO: return non-zero exit code if any errors were encountered, even if some
-        # downloads completed successfully
-        batch_download(args)
+        errors = batch_download(args)
+        if errors:
+            # return non-zero exit code if any problems were encountered, even if some
+            # downloads completed successfully
+            logging.warning(
+                f"{errors} problem{'s'[:errors^1]} encountered during batch processing"
+            )
+            sys.exit(1)
     else:
         # single download mode
         try:
@@ -219,4 +174,6 @@ def main():
             sys.exit(1)
         except (ConnectionError, HTTPError, InvalidResponse, LoginError, APIError):
             sys.exit(1)
-        download(file, args)
+        errors = download(file, args)
+        if errors:
+            sys.exit(1)
author	Cody Logan <cody@lokken.dev>	2023-10-20 16:28:23 -0700
committer	Cody Logan <cody@lokken.dev>	2023-10-20 16:28:23 -0700
commit	b136af078208882ae696b21c0d8aac009e7468d4 (patch)
tree	a1ddd808be14775143795b94132880f8c252f518 /src/wikiget
parent	05457af0d73ff3a820c0b465e6607fc5832a6e74 (diff)
download	wikiget-b136af078208882ae696b21c0d8aac009e7468d4.tar.gz wikiget-b136af078208882ae696b21c0d8aac009e7468d4.zip