diff options
| author | Cody Logan <cody@lokken.dev> | 2023-10-13 12:24:13 -0700 |
|---|---|---|
| committer | Cody Logan <cody@lokken.dev> | 2023-10-13 12:24:13 -0700 |
| commit | 630541499a58f98c55d5cc372d21e745c106d250 (patch) | |
| tree | f8dc0fe50d0d4a329ce8e64f497b89d84a25f7a2 /src/wikiget | |
| parent | 875748228e509e244c8f444114387f1a03cbb393 (diff) | |
| download | wikiget-630541499a58f98c55d5cc372d21e745c106d250.tar.gz wikiget-630541499a58f98c55d5cc372d21e745c106d250.zip | |
Refactor parsing logic and revise exception handling
Diffstat (limited to 'src/wikiget')
| -rw-r--r-- | src/wikiget/dl.py | 58 | ||||
| -rw-r--r-- | src/wikiget/exceptions.py | 20 | ||||
| -rw-r--r-- | src/wikiget/parse.py | 54 | ||||
| -rw-r--r-- | src/wikiget/wikiget.py | 12 |
4 files changed, 102 insertions, 42 deletions
diff --git a/src/wikiget/dl.py b/src/wikiget/dl.py index 2b2befa..50b7460 100644 --- a/src/wikiget/dl.py +++ b/src/wikiget/dl.py @@ -18,46 +18,16 @@ import logging import os import sys -from urllib.parse import unquote, urlparse from mwclient import APIError, InvalidResponse, LoginError, Site from requests import ConnectionError, HTTPError from tqdm import tqdm import wikiget +from wikiget.exceptions import ParseError from wikiget.file import File -from wikiget.validations import valid_file, verify_hash - - -def get_dest(dl, args): - url = urlparse(dl) - - if url.netloc: - filename = url.path - site_name = url.netloc - if args.site is not wikiget.DEFAULT_SITE: - # this will work even if the user specifies 'commons.wikimedia.org' - logging.warning("target is a URL, ignoring site specified with --site") - else: - filename = dl - site_name = args.site - - file_match = valid_file(filename) - - # check if this is a valid file - if file_match and file_match.group(1): - # has File:/Image: prefix and extension - filename = file_match.group(2) - else: - # no file extension and/or prefix, probably an article - logging.error(f"Could not parse input '{filename}' as a file.") - sys.exit(1) - - filename = unquote(filename) # remove URL encoding for special characters - - dest = args.output or filename - - return filename, dest, site_name +from wikiget.parse import get_dest +from wikiget.validations import verify_hash def query_api(filename, site_name, args): @@ -98,8 +68,7 @@ def query_api(filename, site_name, args): # an API error at this point likely means access is denied, which could happen # with a private wiki logging.error( - "Access denied. Try providing credentials with " - "--username and --password." + "Access denied. Try providing credentials with --username and --password." ) logging.debug("Full error message:") for i in e.args: @@ -110,7 +79,10 @@ def query_api(filename, site_name, args): def prep_download(dl, args): - filename, dest, site_name = get_dest(dl, args) + try: + filename, dest, site_name = get_dest(dl, args) + except ParseError: + raise file = File(filename, dest) file.object, file.site = query_api(file.name, site_name, args) return file @@ -136,7 +108,7 @@ def download(f, args): if os.path.isfile(dest) and not args.force: logging.warning( - f"File '{dest}' already exists, skipping download (use -f to ignore)" + f"File '{dest}' already exists, skipping download (use -f to force)" ) else: try: @@ -167,19 +139,25 @@ def download(f, args): fd.write(chunk) progress_bar.update(len(chunk)) - # verify file integrity and optionally print details + # verify file integrity and log details dl_sha1 = verify_hash(dest) - logging.info(f"Downloaded file SHA1 is {dl_sha1}") - logging.info(f"Server file SHA1 is {file_sha1}") + logging.info(f"Remote file SHA1 is {file_sha1}") + logging.info(f"Local file SHA1 is {dl_sha1}") if dl_sha1 == file_sha1: logging.info("Hashes match!") # at this point, we've successfully downloaded the file + success_log = f"'{filename}' downloaded" + if args.output: + success_log += f" to '{dest}'" + logging.info(success_log) else: logging.error("Hash mismatch! Downloaded file may be corrupt.") + # TODO: log but don't quit while in batch mode sys.exit(1) else: # no file information returned logging.error(f"Target '{filename}' does not appear to be a valid file.") + # TODO: log but don't quit while in batch mode sys.exit(1) diff --git a/src/wikiget/exceptions.py b/src/wikiget/exceptions.py new file mode 100644 index 0000000..94ed6b2 --- /dev/null +++ b/src/wikiget/exceptions.py @@ -0,0 +1,20 @@ +# wikiget - CLI tool for downloading files from Wikimedia sites +# Copyright (C) 2023 Cody Logan +# SPDX-License-Identifier: GPL-3.0-or-later +# +# Wikiget is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# Wikiget is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Wikiget. If not, see <https://www.gnu.org/licenses/>. + + +class ParseError(Exception): + pass diff --git a/src/wikiget/parse.py b/src/wikiget/parse.py new file mode 100644 index 0000000..09c0767 --- /dev/null +++ b/src/wikiget/parse.py @@ -0,0 +1,54 @@ +# wikiget - CLI tool for downloading files from Wikimedia sites +# Copyright (C) 2023 Cody Logan +# SPDX-License-Identifier: GPL-3.0-or-later +# +# Wikiget is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# Wikiget is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Wikiget. If not, see <https://www.gnu.org/licenses/>. + +import logging +from urllib.parse import unquote, urlparse + +import wikiget +from wikiget.exceptions import ParseError +from wikiget.validations import valid_file + + +def get_dest(dl, args): + url = urlparse(dl) + + if url.netloc: + filename = url.path + site_name = url.netloc + if args.site is not wikiget.DEFAULT_SITE: + # this will work even if the user specifies 'commons.wikimedia.org' + logging.warning("target is a URL, ignoring site specified with --site") + else: + filename = dl + site_name = args.site + + file_match = valid_file(filename) + + # check if this is a valid file + if file_match and file_match.group(1): + # has File:/Image: prefix and extension + filename = file_match.group(2) + else: + # no file extension and/or prefix, probably an article + msg = f"Could not parse input '{filename}' as a file" + raise ParseError(msg) + + filename = unquote(filename) # remove URL encoding for special characters + + dest = args.output or filename + + return filename, dest, site_name diff --git a/src/wikiget/wikiget.py b/src/wikiget/wikiget.py index fba9509..68e0233 100644 --- a/src/wikiget/wikiget.py +++ b/src/wikiget/wikiget.py @@ -22,6 +22,7 @@ from concurrent.futures import ThreadPoolExecutor import wikiget from wikiget.dl import download, prep_download +from wikiget.exceptions import ParseError def construct_parser(): @@ -173,7 +174,10 @@ def batch_download(args): for line_num, line in dl_list.items(): # keep track of batch file line numbers for debugging/logging purposes logging.info(f"Downloading '{line}' at line {line_num}") - file = prep_download(line, args) + try: + file = prep_download(line, args) + except ParseError as e: + logging.warning(f"{e} (line {line_num})") future = executor.submit(download, file, args) futures.append(future) # wait for downloads to finish @@ -197,5 +201,9 @@ def main(): batch_download(args) else: # single download mode - file = prep_download(args.FILE, args) + try: + file = prep_download(args.FILE, args) + except ParseError as e: + logging.error(e) + sys.exit(1) download(file, args) |
