diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/wikiget/__init__.py | 2 | ||||
| -rw-r--r-- | src/wikiget/dl.py | 85 | ||||
| -rw-r--r-- | src/wikiget/wikiget.py | 71 |
3 files changed, 100 insertions, 58 deletions
diff --git a/src/wikiget/__init__.py b/src/wikiget/__init__.py index b68b0ec..20ea620 100644 --- a/src/wikiget/__init__.py +++ b/src/wikiget/__init__.py @@ -1,5 +1,5 @@ # wikiget - CLI tool for downloading files from Wikimedia sites -# Copyright (C) 2018, 2019, 2020 Cody Logan and contributors +# Copyright (C) 2018-2021 Cody Logan and contributors # SPDX-License-Identifier: GPL-3.0-or-later # # Wikiget is free software: you can redistribute it and/or modify diff --git a/src/wikiget/dl.py b/src/wikiget/dl.py index 949f09e..9850ce8 100644 --- a/src/wikiget/dl.py +++ b/src/wikiget/dl.py @@ -15,6 +15,7 @@ # You should have received a copy of the GNU General Public License # along with Wikiget. If not, see <https://www.gnu.org/licenses/>. +import logging import os import sys from urllib.parse import unquote, urlparse @@ -33,9 +34,10 @@ def download(dl, args): if url.netloc: filename = url.path site_name = url.netloc - if args.site is not wikiget.DEFAULT_SITE and not args.quiet: + if args.site is not wikiget.DEFAULT_SITE: # this will work even if the user specifies 'commons.wikimedia.org' - print("Warning: target is a URL, ignoring site specified with --site") + logging.warning("target is a URL, " + "ignoring site specified with --site") else: filename = dl site_name = args.site @@ -48,19 +50,17 @@ def download(dl, args): filename = file_match.group(2) else: # no file extension and/or prefix, probably an article - print(f"Could not parse input '{filename}' as a file. ") + logging.error(f"Could not parse input '{filename}' as a file.") sys.exit(1) filename = unquote(filename) # remove URL encoding for special characters dest = args.output or filename - if args.verbose >= wikiget.VERY_VERBOSE: - print(f"User agent: {wikiget.USER_AGENT}") + logging.debug(f"User agent: {wikiget.USER_AGENT}") # connect to site and identify ourselves - if args.verbose >= wikiget.STD_VERBOSE: - print(f"Site name: {site_name}") + logging.info(f"Site name: {site_name}") try: site = Site(site_name, path=args.path, clients_useragent=wikiget.USER_AGENT) if args.username and args.password: @@ -68,26 +68,22 @@ def download(dl, args): except ConnectionError as e: # usually this means there is no such site, or there's no network # connection, though it could be a certificate problem - print("Error: couldn't connect to specified site.") - if args.verbose >= wikiget.VERY_VERBOSE: - print("Full error message:") - print(e) + logging.error("Couldn't connect to specified site.") + logging.debug("Full error message:") + logging.debug(e) sys.exit(1) except HTTPError as e: # most likely a 403 forbidden or 404 not found error for api.php - print( - "Error: couldn't find the specified wiki's api.php. " - "Check the value of --path." - ) - if args.verbose >= wikiget.VERY_VERBOSE: - print("Full error message:") - print(e) + logging.error("Couldn't find the specified wiki's api.php. " + "Check the value of --path.") + logging.debug("Full error message:") + logging.debug(e) sys.exit(1) except (InvalidResponse, LoginError) as e: # InvalidResponse: site exists, but we couldn't communicate with the # API endpoint for some reason other than an HTTP error. # LoginError: missing or invalid credentials - print(e) + logging.error(e) sys.exit(1) # get info about the target file @@ -96,14 +92,11 @@ def download(dl, args): except APIError as e: # an API error at this point likely means access is denied, # which could happen with a private wiki - print( - "Error: access denied. Try providing credentials with " - "--username and --password." - ) - if args.verbose >= wikiget.VERY_VERBOSE: - print("Full error message:") - for i in e.args: - print(i) + logging.error("Access denied. Try providing credentials with " + "--username and --password.") + logging.debug("Full error message:") + for i in e.args: + logging.debug(i) sys.exit(1) if file.imageinfo != {}: @@ -113,26 +106,23 @@ def download(dl, args): file_size = file.imageinfo["size"] file_sha1 = file.imageinfo["sha1"] - if args.verbose >= wikiget.STD_VERBOSE: - print( - f"Info: downloading '{filename}' " - f"({file_size} bytes) from {site.host}", - end="", - ) - if args.output: - print(f" to '{dest}'") - else: - print("\n", end="") - print(f"Info: {file_url}") + filename_log = (f"Downloading '{filename}' ({file_size} bytes) " + f"from {site.host}") + if args.output: + filename_log += f" to '{dest}'" + logging.info(filename_log) + logging.info(f"{file_url}") if os.path.isfile(dest) and not args.force: - print(f"File '{dest}' already exists, skipping download (use -f to ignore)") + logging.warning(f"File '{dest}' already exists, skipping download " + "(use -f to ignore)") else: try: fd = open(dest, "wb") except OSError as e: - print("File could not be written. The following error was encountered:") - print(e) + logging.error("File could not be written. " + "The following error was encountered:") + logging.error(e) sys.exit(1) else: # download the file(s) @@ -157,18 +147,17 @@ def download(dl, args): # verify file integrity and optionally print details dl_sha1 = verify_hash(dest) - if args.verbose >= wikiget.STD_VERBOSE: - print(f"Info: downloaded file SHA1 is {dl_sha1}") - print(f"Info: server file SHA1 is {file_sha1}") + logging.info(f"Downloaded file SHA1 is {dl_sha1}") + logging.info(f"Server file SHA1 is {file_sha1}") if dl_sha1 == file_sha1: - if args.verbose >= wikiget.STD_VERBOSE: - print("Info: hashes match!") + logging.info("Hashes match!") # at this point, we've successfully downloaded the file else: - print("Error: hash mismatch! Downloaded file may be corrupt.") + logging.error("Hash mismatch! Downloaded file may be corrupt.") sys.exit(1) else: # no file information returned - print(f"Target '{filename}' does not appear to be a valid file.") + logging.error(f"Target '{filename}' does not appear to be " + "a valid file.") sys.exit(1) diff --git a/src/wikiget/wikiget.py b/src/wikiget/wikiget.py index ba36766..b9a227f 100644 --- a/src/wikiget/wikiget.py +++ b/src/wikiget/wikiget.py @@ -101,30 +101,83 @@ def main(): "filename per line", action="store_true", ) + parser.add_argument( + "-l", + "--logfile", + default="", + help="save log output to LOGFILE" + ) args = parser.parse_args() - # print API and debug messages in verbose mode + loglevel = logging.WARNING if args.verbose >= wikiget.VERY_VERBOSE: - logging.basicConfig(level=logging.DEBUG) + # this includes API and library messages + loglevel = logging.DEBUG elif args.verbose >= wikiget.STD_VERBOSE: - logging.basicConfig(level=logging.WARNING) + loglevel = logging.INFO + elif args.quiet: + loglevel = logging.ERROR + + # configure logging: + # console log level is set via -v, -vv, and -q options + # file log level is always info (TODO: add debug option) + if args.logfile: + # log to console and file + logging.basicConfig( + level=logging.INFO, + format="%(asctime)s [%(levelname)-7s] %(message)s", + filename=args.logfile + ) + + console = logging.StreamHandler() + # TODO: even when loglevel is set to logging.DEBUG, + # debug messages aren't printing to console + console.setLevel(loglevel) + console.setFormatter( + logging.Formatter("[%(levelname)s] %(message)s") + ) + logging.getLogger("").addHandler(console) + else: + # log only to console + logging.basicConfig( + level=loglevel, + format="[%(levelname)s] %(message)s" + ) + + # log events are appended to the file if it already exists, + # so note the start of a new download session + logging.info(f"Starting download session using wikiget {wikiget.wikiget_version}") + # logging.info(f"Log level is set to {loglevel}") if args.batch: # batch download mode input_file = args.FILE - if args.verbose >= wikiget.STD_VERBOSE: - print(f"Info: using batch file '{input_file}'") + dl_list = [] + + logging.info(f"Using batch file '{input_file}'.") + try: - fd = open(input_file) + fd = open(input_file, "r") except OSError as e: - print("File could not be read. The following error was encountered:") - print(e) + logging.error("File could not be read. " + "The following error was encountered:") + logging.error(e) sys.exit(1) else: with fd: + # store file contents in memory in case something + # happens to the file while we're downloading for _, line in enumerate(fd): - download(line.strip(), args) + dl_list.append(line) + + # TODO: validate file contents before download process starts + for line_num, url in enumerate(dl_list, start=1): + url = url.strip() + # keep track of batch file line numbers for + # debugging/logging purposes + logging.info(f"Downloading '{url}' at line {line_num}:") + download(url, args) else: # single download mode dl = args.FILE |
