From a1995912ed24b37a990f3fcd5e91dbf7b46669fb Mon Sep 17 00:00:00 2001 From: Cody Logan Date: Tue, 26 Sep 2023 15:17:04 -0700 Subject: Reorganize file tree --- src/wikiget/dl.py | 159 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 159 insertions(+) create mode 100644 src/wikiget/dl.py (limited to 'src/wikiget/dl.py') diff --git a/src/wikiget/dl.py b/src/wikiget/dl.py new file mode 100644 index 0000000..8f32218 --- /dev/null +++ b/src/wikiget/dl.py @@ -0,0 +1,159 @@ +# wikiget - CLI tool for downloading files from Wikimedia sites +# Copyright (C) 2018-2021 Cody Logan and contributors +# SPDX-License-Identifier: GPL-3.0-or-later +# +# Wikiget is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# Wikiget is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Wikiget. If not, see . + +import logging +import os +import sys +from urllib.parse import unquote, urlparse + +from mwclient import APIError, InvalidResponse, LoginError, Site +from requests import ConnectionError, HTTPError +from tqdm import tqdm + +from . import CHUNKSIZE, DEFAULT_SITE, USER_AGENT +from .validations import valid_file, verify_hash + + +def download(dl, args): + url = urlparse(dl) + + if url.netloc: + filename = url.path + site_name = url.netloc + if args.site is not DEFAULT_SITE: + # this will work even if the user specifies 'commons.wikimedia.org' + logging.warning("target is a URL, " + "ignoring site specified with --site") + else: + filename = dl + site_name = args.site + + file_match = valid_file(filename) + + # check if this is a valid file + if file_match and file_match.group(1): + # has File:/Image: prefix and extension + filename = file_match.group(2) + else: + # no file extension and/or prefix, probably an article + logging.error(f"Could not parse input '{filename}' as a file.") + sys.exit(1) + + filename = unquote(filename) # remove URL encoding for special characters + + dest = args.output or filename + + logging.debug(f"User agent: {USER_AGENT}") + + # connect to site and identify ourselves + logging.info(f"Site name: {site_name}") + try: + site = Site(site_name, path=args.path, clients_useragent=USER_AGENT) + if args.username and args.password: + site.login(args.username, args.password) + except ConnectionError as e: + # usually this means there is no such site, or there's no network + # connection, though it could be a certificate problem + logging.error("Couldn't connect to specified site.") + logging.debug("Full error message:") + logging.debug(e) + sys.exit(1) + except HTTPError as e: + # most likely a 403 forbidden or 404 not found error for api.php + logging.error("Couldn't find the specified wiki's api.php. " + "Check the value of --path.") + logging.debug("Full error message:") + logging.debug(e) + sys.exit(1) + except (InvalidResponse, LoginError) as e: + # InvalidResponse: site exists, but we couldn't communicate with the + # API endpoint for some reason other than an HTTP error. + # LoginError: missing or invalid credentials + logging.error(e) + sys.exit(1) + + # get info about the target file + try: + file = site.images[filename] + except APIError as e: + # an API error at this point likely means access is denied, + # which could happen with a private wiki + logging.error("Access denied. Try providing credentials with " + "--username and --password.") + logging.debug("Full error message:") + for i in e.args: + logging.debug(i) + sys.exit(1) + + if file.imageinfo != {}: + # file exists either locally or at a common repository, + # like Wikimedia Commons + file_url = file.imageinfo["url"] + file_size = file.imageinfo["size"] + file_sha1 = file.imageinfo["sha1"] + + filename_log = (f"Downloading '{filename}' ({file_size} bytes) " + f"from {site.host}") + if args.output: + filename_log += f" to '{dest}'" + logging.info(filename_log) + logging.info(f"{file_url}") + + if os.path.isfile(dest) and not args.force: + logging.warning(f"File '{dest}' already exists, skipping download " + "(use -f to ignore)") + else: + try: + fd = open(dest, "wb") + except IOError as e: + logging.error("File could not be written. " + "The following error was encountered:") + logging.error(e) + sys.exit(1) + else: + # download the file(s) + if args.verbose >= 1: + leave_bars = True + else: + leave_bars = False + with tqdm(leave=leave_bars, total=file_size, + unit="B", unit_scale=True, + unit_divisor=CHUNKSIZE) as progress_bar: + with fd: + res = site.connection.get(file_url, stream=True) + progress_bar.set_postfix(file=dest, refresh=False) + for chunk in res.iter_content(CHUNKSIZE): + fd.write(chunk) + progress_bar.update(len(chunk)) + + # verify file integrity and optionally print details + dl_sha1 = verify_hash(dest) + + logging.info(f"Downloaded file SHA1 is {dl_sha1}") + logging.info(f"Server file SHA1 is {file_sha1}") + if dl_sha1 == file_sha1: + logging.info("Hashes match!") + # at this point, we've successfully downloaded the file + else: + logging.error("Hash mismatch! Downloaded file may be corrupt.") + sys.exit(1) + + else: + # no file information returned + logging.error(f"Target '{filename}' does not appear to be " + "a valid file.") + sys.exit(1) -- cgit v1.2.3 From 75a79785d851efa319f4216e0d3471d30a02154a Mon Sep 17 00:00:00 2001 From: Cody Logan Date: Tue, 26 Sep 2023 15:45:43 -0700 Subject: Style and format fixes --- src/wikiget/dl.py | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) (limited to 'src/wikiget/dl.py') diff --git a/src/wikiget/dl.py b/src/wikiget/dl.py index 9850ce8..791db61 100644 --- a/src/wikiget/dl.py +++ b/src/wikiget/dl.py @@ -36,8 +36,7 @@ def download(dl, args): site_name = url.netloc if args.site is not wikiget.DEFAULT_SITE: # this will work even if the user specifies 'commons.wikimedia.org' - logging.warning("target is a URL, " - "ignoring site specified with --site") + logging.warning("target is a URL, ignoring site specified with --site") else: filename = dl site_name = args.site @@ -74,8 +73,9 @@ def download(dl, args): sys.exit(1) except HTTPError as e: # most likely a 403 forbidden or 404 not found error for api.php - logging.error("Couldn't find the specified wiki's api.php. " - "Check the value of --path.") + logging.error( + "Couldn't find the specified wiki's api.php. Check the value of --path." + ) logging.debug("Full error message:") logging.debug(e) sys.exit(1) @@ -92,8 +92,10 @@ def download(dl, args): except APIError as e: # an API error at this point likely means access is denied, # which could happen with a private wiki - logging.error("Access denied. Try providing credentials with " - "--username and --password.") + logging.error( + "Access denied. Try providing credentials with " + "--username and --password." + ) logging.debug("Full error message:") for i in e.args: logging.debug(i) @@ -106,22 +108,23 @@ def download(dl, args): file_size = file.imageinfo["size"] file_sha1 = file.imageinfo["sha1"] - filename_log = (f"Downloading '{filename}' ({file_size} bytes) " - f"from {site.host}") + filename_log = f"Downloading '{filename}' ({file_size} bytes) from {site.host}" if args.output: filename_log += f" to '{dest}'" logging.info(filename_log) logging.info(f"{file_url}") if os.path.isfile(dest) and not args.force: - logging.warning(f"File '{dest}' already exists, skipping download " - "(use -f to ignore)") + logging.warning( + f"File '{dest}' already exists, skipping download (use -f to ignore)" + ) else: try: fd = open(dest, "wb") except OSError as e: - logging.error("File could not be written. " - "The following error was encountered:") + logging.error( + "File could not be written. The following error was encountered:" + ) logging.error(e) sys.exit(1) else: @@ -158,6 +161,5 @@ def download(dl, args): else: # no file information returned - logging.error(f"Target '{filename}' does not appear to be " - "a valid file.") + logging.error(f"Target '{filename}' does not appear to be a valid file.") sys.exit(1) -- cgit v1.2.3 From 485df31f095a9b629a1dcc04af13956325856d8c Mon Sep 17 00:00:00 2001 From: Cody Logan Date: Tue, 3 Oct 2023 09:51:58 -0700 Subject: Update README and do some code cleanup --- src/wikiget/dl.py | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) (limited to 'src/wikiget/dl.py') diff --git a/src/wikiget/dl.py b/src/wikiget/dl.py index 791db61..d32736f 100644 --- a/src/wikiget/dl.py +++ b/src/wikiget/dl.py @@ -1,5 +1,5 @@ # wikiget - CLI tool for downloading files from Wikimedia sites -# Copyright (C) 2018-2021 Cody Logan and contributors +# Copyright (C) 2018-2023 Cody Logan and contributors # SPDX-License-Identifier: GPL-3.0-or-later # # Wikiget is free software: you can redistribute it and/or modify @@ -65,8 +65,8 @@ def download(dl, args): if args.username and args.password: site.login(args.username, args.password) except ConnectionError as e: - # usually this means there is no such site, or there's no network - # connection, though it could be a certificate problem + # usually this means there is no such site, or there's no network connection, + # though it could be a certificate problem logging.error("Couldn't connect to specified site.") logging.debug("Full error message:") logging.debug(e) @@ -80,8 +80,8 @@ def download(dl, args): logging.debug(e) sys.exit(1) except (InvalidResponse, LoginError) as e: - # InvalidResponse: site exists, but we couldn't communicate with the - # API endpoint for some reason other than an HTTP error. + # InvalidResponse: site exists, but we couldn't communicate with the API + # endpoint for some reason other than an HTTP error. # LoginError: missing or invalid credentials logging.error(e) sys.exit(1) @@ -90,8 +90,8 @@ def download(dl, args): try: file = site.images[filename] except APIError as e: - # an API error at this point likely means access is denied, - # which could happen with a private wiki + # an API error at this point likely means access is denied, which could happen + # with a private wiki logging.error( "Access denied. Try providing credentials with " "--username and --password." @@ -102,8 +102,7 @@ def download(dl, args): sys.exit(1) if file.imageinfo != {}: - # file exists either locally or at a common repository, - # like Wikimedia Commons + # file exists either locally or at a common repository, like Wikimedia Commons file_url = file.imageinfo["url"] file_size = file.imageinfo["size"] file_sha1 = file.imageinfo["sha1"] -- cgit v1.2.3 From 865088207b39427b6b932de4f312d82bd5e05a53 Mon Sep 17 00:00:00 2001 From: Cody Logan Date: Tue, 3 Oct 2023 13:26:09 -0700 Subject: Refactor for better code organization --- src/wikiget/dl.py | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) (limited to 'src/wikiget/dl.py') diff --git a/src/wikiget/dl.py b/src/wikiget/dl.py index d32736f..2b2befa 100644 --- a/src/wikiget/dl.py +++ b/src/wikiget/dl.py @@ -25,10 +25,11 @@ from requests import ConnectionError, HTTPError from tqdm import tqdm import wikiget +from wikiget.file import File from wikiget.validations import valid_file, verify_hash -def download(dl, args): +def get_dest(dl, args): url = urlparse(dl) if url.netloc: @@ -56,6 +57,10 @@ def download(dl, args): dest = args.output or filename + return filename, dest, site_name + + +def query_api(filename, site_name, args): logging.debug(f"User agent: {wikiget.USER_AGENT}") # connect to site and identify ourselves @@ -101,6 +106,22 @@ def download(dl, args): logging.debug(i) sys.exit(1) + return file, site + + +def prep_download(dl, args): + filename, dest, site_name = get_dest(dl, args) + file = File(filename, dest) + file.object, file.site = query_api(file.name, site_name, args) + return file + + +def download(f, args): + file = f.object + filename = f.name + site = f.site + dest = f.dest + if file.imageinfo != {}: # file exists either locally or at a common repository, like Wikimedia Commons file_url = file.imageinfo["url"] -- cgit v1.2.3 From 630541499a58f98c55d5cc372d21e745c106d250 Mon Sep 17 00:00:00 2001 From: Cody Logan Date: Fri, 13 Oct 2023 12:24:13 -0700 Subject: Refactor parsing logic and revise exception handling --- src/wikiget/dl.py | 58 +++++++++++++++++-------------------------------------- 1 file changed, 18 insertions(+), 40 deletions(-) (limited to 'src/wikiget/dl.py') diff --git a/src/wikiget/dl.py b/src/wikiget/dl.py index 2b2befa..50b7460 100644 --- a/src/wikiget/dl.py +++ b/src/wikiget/dl.py @@ -18,46 +18,16 @@ import logging import os import sys -from urllib.parse import unquote, urlparse from mwclient import APIError, InvalidResponse, LoginError, Site from requests import ConnectionError, HTTPError from tqdm import tqdm import wikiget +from wikiget.exceptions import ParseError from wikiget.file import File -from wikiget.validations import valid_file, verify_hash - - -def get_dest(dl, args): - url = urlparse(dl) - - if url.netloc: - filename = url.path - site_name = url.netloc - if args.site is not wikiget.DEFAULT_SITE: - # this will work even if the user specifies 'commons.wikimedia.org' - logging.warning("target is a URL, ignoring site specified with --site") - else: - filename = dl - site_name = args.site - - file_match = valid_file(filename) - - # check if this is a valid file - if file_match and file_match.group(1): - # has File:/Image: prefix and extension - filename = file_match.group(2) - else: - # no file extension and/or prefix, probably an article - logging.error(f"Could not parse input '{filename}' as a file.") - sys.exit(1) - - filename = unquote(filename) # remove URL encoding for special characters - - dest = args.output or filename - - return filename, dest, site_name +from wikiget.parse import get_dest +from wikiget.validations import verify_hash def query_api(filename, site_name, args): @@ -98,8 +68,7 @@ def query_api(filename, site_name, args): # an API error at this point likely means access is denied, which could happen # with a private wiki logging.error( - "Access denied. Try providing credentials with " - "--username and --password." + "Access denied. Try providing credentials with --username and --password." ) logging.debug("Full error message:") for i in e.args: @@ -110,7 +79,10 @@ def query_api(filename, site_name, args): def prep_download(dl, args): - filename, dest, site_name = get_dest(dl, args) + try: + filename, dest, site_name = get_dest(dl, args) + except ParseError: + raise file = File(filename, dest) file.object, file.site = query_api(file.name, site_name, args) return file @@ -136,7 +108,7 @@ def download(f, args): if os.path.isfile(dest) and not args.force: logging.warning( - f"File '{dest}' already exists, skipping download (use -f to ignore)" + f"File '{dest}' already exists, skipping download (use -f to force)" ) else: try: @@ -167,19 +139,25 @@ def download(f, args): fd.write(chunk) progress_bar.update(len(chunk)) - # verify file integrity and optionally print details + # verify file integrity and log details dl_sha1 = verify_hash(dest) - logging.info(f"Downloaded file SHA1 is {dl_sha1}") - logging.info(f"Server file SHA1 is {file_sha1}") + logging.info(f"Remote file SHA1 is {file_sha1}") + logging.info(f"Local file SHA1 is {dl_sha1}") if dl_sha1 == file_sha1: logging.info("Hashes match!") # at this point, we've successfully downloaded the file + success_log = f"'{filename}' downloaded" + if args.output: + success_log += f" to '{dest}'" + logging.info(success_log) else: logging.error("Hash mismatch! Downloaded file may be corrupt.") + # TODO: log but don't quit while in batch mode sys.exit(1) else: # no file information returned logging.error(f"Target '{filename}' does not appear to be a valid file.") + # TODO: log but don't quit while in batch mode sys.exit(1) -- cgit v1.2.3 From 06335ba0176cabd84f5b548995f465ac1c09bc8e Mon Sep 17 00:00:00 2001 From: Cody Logan Date: Tue, 17 Oct 2023 14:00:14 -0700 Subject: Clean up exception handling and error messages --- src/wikiget/dl.py | 23 ++++++++--------------- 1 file changed, 8 insertions(+), 15 deletions(-) (limited to 'src/wikiget/dl.py') diff --git a/src/wikiget/dl.py b/src/wikiget/dl.py index 50b7460..4521b72 100644 --- a/src/wikiget/dl.py +++ b/src/wikiget/dl.py @@ -24,7 +24,6 @@ from requests import ConnectionError, HTTPError from tqdm import tqdm import wikiget -from wikiget.exceptions import ParseError from wikiget.file import File from wikiget.parse import get_dest from wikiget.validations import verify_hash @@ -42,24 +41,22 @@ def query_api(filename, site_name, args): except ConnectionError as e: # usually this means there is no such site, or there's no network connection, # though it could be a certificate problem - logging.error("Couldn't connect to specified site.") - logging.debug("Full error message:") + logging.error("Could not connect to specified site") logging.debug(e) - sys.exit(1) + raise except HTTPError as e: # most likely a 403 forbidden or 404 not found error for api.php logging.error( - "Couldn't find the specified wiki's api.php. Check the value of --path." + "Could not find the specified wiki's api.php. Check the value of --path." ) - logging.debug("Full error message:") logging.debug(e) - sys.exit(1) + raise except (InvalidResponse, LoginError) as e: # InvalidResponse: site exists, but we couldn't communicate with the API # endpoint for some reason other than an HTTP error. # LoginError: missing or invalid credentials logging.error(e) - sys.exit(1) + raise # get info about the target file try: @@ -70,19 +67,15 @@ def query_api(filename, site_name, args): logging.error( "Access denied. Try providing credentials with --username and --password." ) - logging.debug("Full error message:") for i in e.args: logging.debug(i) - sys.exit(1) + raise return file, site def prep_download(dl, args): - try: - filename, dest, site_name = get_dest(dl, args) - except ParseError: - raise + filename, dest, site_name = get_dest(dl, args) file = File(filename, dest) file.object, file.site = query_api(file.name, site_name, args) return file @@ -158,6 +151,6 @@ def download(f, args): else: # no file information returned - logging.error(f"Target '{filename}' does not appear to be a valid file.") + logging.error(f"Target '{filename}' does not appear to be a valid file") # TODO: log but don't quit while in batch mode sys.exit(1) -- cgit v1.2.3 From 05457af0d73ff3a820c0b465e6607fc5832a6e74 Mon Sep 17 00:00:00 2001 From: Cody Logan Date: Fri, 20 Oct 2023 16:23:28 -0700 Subject: Reorganize File class --- src/wikiget/dl.py | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) (limited to 'src/wikiget/dl.py') diff --git a/src/wikiget/dl.py b/src/wikiget/dl.py index 4521b72..171b017 100644 --- a/src/wikiget/dl.py +++ b/src/wikiget/dl.py @@ -30,10 +30,8 @@ from wikiget.validations import verify_hash def query_api(filename, site_name, args): - logging.debug(f"User agent: {wikiget.USER_AGENT}") - # connect to site and identify ourselves - logging.info(f"Site name: {site_name}") + logging.info(f"Connecting to {site_name}") try: site = Site(site_name, path=args.path, clients_useragent=wikiget.USER_AGENT) if args.username and args.password: @@ -60,7 +58,7 @@ def query_api(filename, site_name, args): # get info about the target file try: - file = site.images[filename] + image = site.images[filename] except APIError as e: # an API error at this point likely means access is denied, which could happen # with a private wiki @@ -71,23 +69,22 @@ def query_api(filename, site_name, args): logging.debug(i) raise - return file, site + return image def prep_download(dl, args): - filename, dest, site_name = get_dest(dl, args) - file = File(filename, dest) - file.object, file.site = query_api(file.name, site_name, args) + file = get_dest(dl, args) + file.image = query_api(file.name, file.site, args) return file def download(f, args): - file = f.object + file = f.image filename = f.name - site = f.site dest = f.dest + site = file.site - if file.imageinfo != {}: + if file.exists: # file exists either locally or at a common repository, like Wikimedia Commons file_url = file.imageinfo["url"] file_size = file.imageinfo["size"] -- cgit v1.2.3 From b136af078208882ae696b21c0d8aac009e7468d4 Mon Sep 17 00:00:00 2001 From: Cody Logan Date: Fri, 20 Oct 2023 16:28:23 -0700 Subject: Move batch_download function to proper file --- src/wikiget/dl.py | 63 ++++++++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 58 insertions(+), 5 deletions(-) (limited to 'src/wikiget/dl.py') diff --git a/src/wikiget/dl.py b/src/wikiget/dl.py index 171b017..83aef9f 100644 --- a/src/wikiget/dl.py +++ b/src/wikiget/dl.py @@ -18,12 +18,14 @@ import logging import os import sys +from concurrent.futures import ThreadPoolExecutor from mwclient import APIError, InvalidResponse, LoginError, Site from requests import ConnectionError, HTTPError from tqdm import tqdm import wikiget +from wikiget.exceptions import ParseError from wikiget.file import File from wikiget.parse import get_dest from wikiget.validations import verify_hash @@ -78,12 +80,62 @@ def prep_download(dl, args): return file +def batch_download(args): + input_file = args.FILE + dl_list = {} + errors = 0 + + logging.info(f"Using batch file '{input_file}'.") + + try: + fd = open(input_file) + except OSError as e: + logging.error("File could not be read. The following error was encountered:") + logging.error(e) + sys.exit(1) + else: + with fd: + # read the file into memory and process each line as we go + for line_num, line in enumerate(fd, start=1): + line_s = line.strip() + # ignore blank lines and lines starting with "#" (for comments) + if line_s and not line_s.startswith("#"): + dl_list[line_num] = line_s + + # TODO: validate file contents before download process starts + with ThreadPoolExecutor(max_workers=args.threads) as executor: + futures = [] + for line_num, line in dl_list.items(): + # keep track of batch file line numbers for debugging/logging purposes + logging.info(f"Processing '{line}' at line {line_num}") + try: + file = prep_download(line, args) + except ParseError as e: + logging.warning(f"{e} (line {line_num})") + errors += 1 + continue + except (ConnectionError, HTTPError, InvalidResponse, LoginError, APIError): + logging.warning( + f"Unable to download '{line}' (line {line_num}) due to an error" + ) + errors += 1 + continue + future = executor.submit(download, file, args) + futures.append(future) + # wait for downloads to finish + for future in futures: + errors += future.result() + return errors + + def download(f, args): file = f.image filename = f.name dest = f.dest site = file.site + errors = 0 + if file.exists: # file exists either locally or at a common repository, like Wikimedia Commons file_url = file.imageinfo["url"] @@ -100,6 +152,7 @@ def download(f, args): logging.warning( f"File '{dest}' already exists, skipping download (use -f to force)" ) + errors += 1 else: try: fd = open(dest, "wb") @@ -108,7 +161,7 @@ def download(f, args): "File could not be written. The following error was encountered:" ) logging.error(e) - sys.exit(1) + errors += 1 else: # download the file(s) if args.verbose >= wikiget.STD_VERBOSE: @@ -143,11 +196,11 @@ def download(f, args): logging.info(success_log) else: logging.error("Hash mismatch! Downloaded file may be corrupt.") - # TODO: log but don't quit while in batch mode - sys.exit(1) + errors += 1 else: # no file information returned logging.error(f"Target '{filename}' does not appear to be a valid file") - # TODO: log but don't quit while in batch mode - sys.exit(1) + errors += 1 + + return errors -- cgit v1.2.3 From 3d37cf6f86eb6c48a3a0a094c42ade6d7aed1daf Mon Sep 17 00:00:00 2001 From: Cody Logan Date: Fri, 20 Oct 2023 16:31:56 -0700 Subject: Move logging configuration to new file Also, use a LoggerAdapter to add contextual info (such as filenames) to log messages when downloading, especially useful with threaded batch processing. --- src/wikiget/dl.py | 29 ++++++++++++++--------------- 1 file changed, 14 insertions(+), 15 deletions(-) (limited to 'src/wikiget/dl.py') diff --git a/src/wikiget/dl.py b/src/wikiget/dl.py index 83aef9f..5491378 100644 --- a/src/wikiget/dl.py +++ b/src/wikiget/dl.py @@ -27,6 +27,7 @@ from tqdm import tqdm import wikiget from wikiget.exceptions import ParseError from wikiget.file import File +from wikiget.logging import FileLogAdapter from wikiget.parse import get_dest from wikiget.validations import verify_hash @@ -136,6 +137,9 @@ def download(f, args): errors = 0 + logger = logging.getLogger("") + adapter = FileLogAdapter(logger, {"filename": filename}) + if file.exists: # file exists either locally or at a common repository, like Wikimedia Commons file_url = file.imageinfo["url"] @@ -145,22 +149,17 @@ def download(f, args): filename_log = f"Downloading '{filename}' ({file_size} bytes) from {site.host}" if args.output: filename_log += f" to '{dest}'" - logging.info(filename_log) - logging.info(f"{file_url}") + adapter.info(filename_log) + adapter.info(f"{file_url}") if os.path.isfile(dest) and not args.force: - logging.warning( - f"File '{dest}' already exists, skipping download (use -f to force)" - ) + adapter.warning("File already exists, skipping download (use -f to force)") errors += 1 else: try: fd = open(dest, "wb") except OSError as e: - logging.error( - "File could not be written. The following error was encountered:" - ) - logging.error(e) + adapter.error(f"File could not be written. {e}") errors += 1 else: # download the file(s) @@ -185,22 +184,22 @@ def download(f, args): # verify file integrity and log details dl_sha1 = verify_hash(dest) - logging.info(f"Remote file SHA1 is {file_sha1}") - logging.info(f"Local file SHA1 is {dl_sha1}") + adapter.info(f"Remote file SHA1 is {file_sha1}") + adapter.info(f"Local file SHA1 is {dl_sha1}") if dl_sha1 == file_sha1: - logging.info("Hashes match!") + adapter.info("Hashes match!") # at this point, we've successfully downloaded the file success_log = f"'{filename}' downloaded" if args.output: success_log += f" to '{dest}'" - logging.info(success_log) + adapter.info(success_log) else: - logging.error("Hash mismatch! Downloaded file may be corrupt.") + adapter.error("Hash mismatch! Downloaded file may be corrupt.") errors += 1 else: # no file information returned - logging.error(f"Target '{filename}' does not appear to be a valid file") + adapter.warning("Target does not appear to be a valid file") errors += 1 return errors -- cgit v1.2.3 From c1820026f97eaf671c29ab30f02879de0ac4df89 Mon Sep 17 00:00:00 2001 From: Cody Logan Date: Fri, 20 Oct 2023 16:36:14 -0700 Subject: Add type annotations to source files --- src/wikiget/dl.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'src/wikiget/dl.py') diff --git a/src/wikiget/dl.py b/src/wikiget/dl.py index 5491378..5b5b43b 100644 --- a/src/wikiget/dl.py +++ b/src/wikiget/dl.py @@ -18,9 +18,11 @@ import logging import os import sys +from argparse import Namespace from concurrent.futures import ThreadPoolExecutor from mwclient import APIError, InvalidResponse, LoginError, Site +from mwclient.image import Image from requests import ConnectionError, HTTPError from tqdm import tqdm @@ -32,7 +34,7 @@ from wikiget.parse import get_dest from wikiget.validations import verify_hash -def query_api(filename, site_name, args): +def query_api(filename: str, site_name: str, args: Namespace) -> Image: # connect to site and identify ourselves logging.info(f"Connecting to {site_name}") try: @@ -75,13 +77,13 @@ def query_api(filename, site_name, args): return image -def prep_download(dl, args): +def prep_download(dl: str, args: Namespace) -> File: file = get_dest(dl, args) file.image = query_api(file.name, file.site, args) return file -def batch_download(args): +def batch_download(args: Namespace) -> int: input_file = args.FILE dl_list = {} errors = 0 @@ -129,7 +131,7 @@ def batch_download(args): return errors -def download(f, args): +def download(f: File, args: Namespace) -> int: file = f.image filename = f.name dest = f.dest -- cgit v1.2.3