aboutsummaryrefslogtreecommitdiff
path: root/src/wikiget
diff options
context:
space:
mode:
authorCody Logan <cody@lokken.dev>2023-10-20 16:28:23 -0700
committerCody Logan <cody@lokken.dev>2023-10-20 16:28:23 -0700
commitb136af078208882ae696b21c0d8aac009e7468d4 (patch)
treea1ddd808be14775143795b94132880f8c252f518 /src/wikiget
parent05457af0d73ff3a820c0b465e6607fc5832a6e74 (diff)
downloadwikiget-b136af078208882ae696b21c0d8aac009e7468d4.tar.gz
wikiget-b136af078208882ae696b21c0d8aac009e7468d4.zip
Move batch_download function to proper file
Diffstat (limited to 'src/wikiget')
-rw-r--r--src/wikiget/dl.py63
-rw-r--r--src/wikiget/wikiget.py67
2 files changed, 70 insertions, 60 deletions
diff --git a/src/wikiget/dl.py b/src/wikiget/dl.py
index 171b017..83aef9f 100644
--- a/src/wikiget/dl.py
+++ b/src/wikiget/dl.py
@@ -18,12 +18,14 @@
import logging
import os
import sys
+from concurrent.futures import ThreadPoolExecutor
from mwclient import APIError, InvalidResponse, LoginError, Site
from requests import ConnectionError, HTTPError
from tqdm import tqdm
import wikiget
+from wikiget.exceptions import ParseError
from wikiget.file import File
from wikiget.parse import get_dest
from wikiget.validations import verify_hash
@@ -78,12 +80,62 @@ def prep_download(dl, args):
return file
+def batch_download(args):
+ input_file = args.FILE
+ dl_list = {}
+ errors = 0
+
+ logging.info(f"Using batch file '{input_file}'.")
+
+ try:
+ fd = open(input_file)
+ except OSError as e:
+ logging.error("File could not be read. The following error was encountered:")
+ logging.error(e)
+ sys.exit(1)
+ else:
+ with fd:
+ # read the file into memory and process each line as we go
+ for line_num, line in enumerate(fd, start=1):
+ line_s = line.strip()
+ # ignore blank lines and lines starting with "#" (for comments)
+ if line_s and not line_s.startswith("#"):
+ dl_list[line_num] = line_s
+
+ # TODO: validate file contents before download process starts
+ with ThreadPoolExecutor(max_workers=args.threads) as executor:
+ futures = []
+ for line_num, line in dl_list.items():
+ # keep track of batch file line numbers for debugging/logging purposes
+ logging.info(f"Processing '{line}' at line {line_num}")
+ try:
+ file = prep_download(line, args)
+ except ParseError as e:
+ logging.warning(f"{e} (line {line_num})")
+ errors += 1
+ continue
+ except (ConnectionError, HTTPError, InvalidResponse, LoginError, APIError):
+ logging.warning(
+ f"Unable to download '{line}' (line {line_num}) due to an error"
+ )
+ errors += 1
+ continue
+ future = executor.submit(download, file, args)
+ futures.append(future)
+ # wait for downloads to finish
+ for future in futures:
+ errors += future.result()
+ return errors
+
+
def download(f, args):
file = f.image
filename = f.name
dest = f.dest
site = file.site
+ errors = 0
+
if file.exists:
# file exists either locally or at a common repository, like Wikimedia Commons
file_url = file.imageinfo["url"]
@@ -100,6 +152,7 @@ def download(f, args):
logging.warning(
f"File '{dest}' already exists, skipping download (use -f to force)"
)
+ errors += 1
else:
try:
fd = open(dest, "wb")
@@ -108,7 +161,7 @@ def download(f, args):
"File could not be written. The following error was encountered:"
)
logging.error(e)
- sys.exit(1)
+ errors += 1
else:
# download the file(s)
if args.verbose >= wikiget.STD_VERBOSE:
@@ -143,11 +196,11 @@ def download(f, args):
logging.info(success_log)
else:
logging.error("Hash mismatch! Downloaded file may be corrupt.")
- # TODO: log but don't quit while in batch mode
- sys.exit(1)
+ errors += 1
else:
# no file information returned
logging.error(f"Target '{filename}' does not appear to be a valid file")
- # TODO: log but don't quit while in batch mode
- sys.exit(1)
+ errors += 1
+
+ return errors
diff --git a/src/wikiget/wikiget.py b/src/wikiget/wikiget.py
index 90078e1..e9a1147 100644
--- a/src/wikiget/wikiget.py
+++ b/src/wikiget/wikiget.py
@@ -18,13 +18,12 @@
import argparse
import logging
import sys
-from concurrent.futures import ThreadPoolExecutor
from mwclient import APIError, InvalidResponse, LoginError
from requests import ConnectionError, HTTPError
import wikiget
-from wikiget.dl import download, prep_download
+from wikiget.dl import batch_download, download, prep_download
from wikiget.exceptions import ParseError
@@ -145,55 +144,6 @@ def configure_logging(args):
else:
# log only to console
logging.basicConfig(level=loglevel, format=log_format)
-
-
-def batch_download(args):
- input_file = args.FILE
- dl_list = {}
-
- logging.info(f"Using batch file '{input_file}'.")
-
- try:
- fd = open(input_file)
- except OSError as e:
- logging.error("File could not be read. The following error was encountered:")
- logging.error(e)
- sys.exit(1)
- else:
- with fd:
- # read the file into memory and process each line as we go
- for line_num, line in enumerate(fd, start=1):
- line_s = line.strip()
- # ignore blank lines and lines starting with "#" (for comments)
- if line_s and not line_s.startswith("#"):
- dl_list[line_num] = line_s
-
- # TODO: validate file contents before download process starts
- with ThreadPoolExecutor(
- max_workers=args.threads,
- thread_name_prefix="download",
- ) as executor:
- futures = []
- for line_num, line in dl_list.items():
- # keep track of batch file line numbers for debugging/logging purposes
- logging.info(f"Downloading '{line}' at line {line_num}")
- try:
- file = prep_download(line, args)
- except ParseError as e:
- logging.warning(f"{e} (line {line_num})")
- continue
- except (ConnectionError, HTTPError, InvalidResponse, LoginError, APIError):
- logging.error(
- f"Unable to download '{line}' (line {line_num}) due to an error"
- )
- continue
- future = executor.submit(download, file, args)
- futures.append(future)
- # wait for downloads to finish
- for future in futures:
- future.result()
-
-
def main():
# setup our environment
parser = construct_parser()
@@ -207,9 +157,14 @@ def main():
if args.batch:
# batch download mode
- # TODO: return non-zero exit code if any errors were encountered, even if some
- # downloads completed successfully
- batch_download(args)
+ errors = batch_download(args)
+ if errors:
+ # return non-zero exit code if any problems were encountered, even if some
+ # downloads completed successfully
+ logging.warning(
+ f"{errors} problem{'s'[:errors^1]} encountered during batch processing"
+ )
+ sys.exit(1)
else:
# single download mode
try:
@@ -219,4 +174,6 @@ def main():
sys.exit(1)
except (ConnectionError, HTTPError, InvalidResponse, LoginError, APIError):
sys.exit(1)
- download(file, args)
+ errors = download(file, args)
+ if errors:
+ sys.exit(1)