diff options
| -rw-r--r-- | wikiget/dl.py | 81 | ||||
| -rw-r--r-- | wikiget/wikiget.py | 42 |
2 files changed, 68 insertions, 55 deletions
diff --git a/wikiget/dl.py b/wikiget/dl.py index 0ac8fec..856d8ca 100644 --- a/wikiget/dl.py +++ b/wikiget/dl.py @@ -15,6 +15,7 @@ # You should have received a copy of the GNU General Public License # along with Wikiget. If not, see <https://www.gnu.org/licenses/>. +import logging import os import sys from urllib.parse import unquote, urlparse @@ -33,10 +34,10 @@ def download(dl, args): if url.netloc: filename = url.path site_name = url.netloc - if args.site is not DEFAULT_SITE and not args.quiet: + if args.site is not DEFAULT_SITE: # this will work even if the user specifies 'commons.wikimedia.org' - print('Warning: target is a URL, ' - 'ignoring site specified with --site') + logging.warning("target is a URL, " + "ignoring site specified with --site") else: filename = dl site_name = args.site @@ -49,19 +50,17 @@ def download(dl, args): filename = file_match.group(2) else: # no file extension and/or prefix, probably an article - print(f"Could not parse input '{filename}' as a file. ") + logging.error(f"Could not parse input '{filename}' as a file.") sys.exit(1) filename = unquote(filename) # remove URL encoding for special characters dest = args.output or filename - if args.verbose >= 2: - print(f'User agent: {USER_AGENT}') + logging.debug(f"User agent: {USER_AGENT}") # connect to site and identify ourselves - if args.verbose >= 1: - print(f'Site name: {site_name}') + logging.info(f"Site name: {site_name}") try: site = Site(site_name, path=args.path, clients_useragent=USER_AGENT) if args.username and args.password: @@ -69,24 +68,22 @@ def download(dl, args): except ConnectionError as e: # usually this means there is no such site, or there's no network # connection, though it could be a certificate problem - print("Error: couldn't connect to specified site.") - if args.verbose >= 2: - print('Full error message:') - print(e) + logging.error("Couldn't connect to specified site.") + logging.debug("Full error message:") + logging.debug(e) sys.exit(1) except HTTPError as e: # most likely a 403 forbidden or 404 not found error for api.php - print("Error: couldn't find the specified wiki's api.php. " - "Check the value of --path.") - if args.verbose >= 2: - print('Full error message:') - print(e) + logging.error("Couldn't find the specified wiki's api.php. " + "Check the value of --path.") + logging.debug("Full error message:") + logging.debug(e) sys.exit(1) except (InvalidResponse, LoginError) as e: # InvalidResponse: site exists, but we couldn't communicate with the # API endpoint for some reason other than an HTTP error. # LoginError: missing or invalid credentials - print(e) + logging.error(e) sys.exit(1) # get info about the target file @@ -95,12 +92,11 @@ def download(dl, args): except APIError as e: # an API error at this point likely means access is denied, # which could happen with a private wiki - print('Error: access denied. Try providing credentials with ' - '--username and --password.') - if args.verbose >= 2: - print('Full error message:') - for i in e.args: - print(i) + logging.error("Access denied. Try providing credentials with " + "--username and --password.") + logging.debug("Full error message:") + for i in e.args: + logging.debug(i) sys.exit(1) if file.imageinfo != {}: @@ -110,26 +106,22 @@ def download(dl, args): file_size = file.imageinfo['size'] file_sha1 = file.imageinfo['sha1'] - if args.verbose >= 1: - print(f"Info: downloading '{filename}' " - f"({file_size} bytes) from {site.host}", - end='') - if args.output: - print(f" to '{dest}'") - else: - print('\n', end='') - print(f'Info: {file_url}') + filename_log = f"Downloading '{filename}' ({file_size} bytes) from {site.host}" + if args.output: + filename_log += f" to '{dest}'" + logging.info(filename_log) + logging.info(f"{file_url}") if os.path.isfile(dest) and not args.force: - print(f"File '{dest}' already exists, skipping download " - "(use -f to ignore)") + logging.warning(f"File '{dest}' already exists, skipping download " + "(use -f to ignore)") else: try: fd = open(dest, 'wb') except IOError as e: - print('File could not be written. ' - 'The following error was encountered:') - print(e) + logging.error("File could not be written. " + "The following error was encountered:") + logging.error(e) sys.exit(1) else: # download the file(s) @@ -150,18 +142,17 @@ def download(dl, args): # verify file integrity and optionally print details dl_sha1 = verify_hash(dest) - if args.verbose >= 1: - print(f'Info: downloaded file SHA1 is {dl_sha1}') - print(f'Info: server file SHA1 is {file_sha1}') + logging.info(f"Downloaded file SHA1 is {dl_sha1}") + logging.info(f"Server file SHA1 is {file_sha1}") if dl_sha1 == file_sha1: - if args.verbose >= 1: - print('Info: hashes match!') + logging.info("Hashes match!") # at this point, we've successfully downloaded the file else: - print('Error: hash mismatch! Downloaded file may be corrupt.') + logging.error("Hash mismatch! Downloaded file may be corrupt.") sys.exit(1) else: # no file information returned - print(f"Target '{filename}' does not appear to be a valid file.") + logging.error(f"Target '{filename}' does not appear to be " + "a valid file.") sys.exit(1) diff --git a/wikiget/wikiget.py b/wikiget/wikiget.py index 1e2e9ed..dfc6027 100644 --- a/wikiget/wikiget.py +++ b/wikiget/wikiget.py @@ -81,29 +81,51 @@ def main(): args = parser.parse_args() - # print API and debug messages in verbose mode + loglevel = logging.WARNING if args.verbose >= 2: - logging.basicConfig(level=logging.DEBUG) + # this includes API and library messages + loglevel = logging.DEBUG elif args.verbose >= 1: - logging.basicConfig(level=logging.WARNING) + loglevel = logging.INFO + elif args.quiet: + loglevel = logging.ERROR + + # set up logger + # TODO: optionally save to log file + logging.basicConfig( + level=loglevel, + # format="%(asctime)s [%(levelname)s] %(message)s" + format="[%(levelname)s] %(message)s" + ) if args.batch: # batch download mode input_file = args.FILE - if args.verbose >= 1: - print(f"Info: using batch file '{input_file}'") + dl_list = [] + + logging.info(f"Using batch file '{input_file}'.") + try: fd = open(input_file, 'r') except IOError as e: - print('File could not be read. ' - 'The following error was encountered:') - print(e) + logging.error("File could not be read. " + "The following error was encountered:") + logging.error(e) sys.exit(1) else: with fd: + # store file contents in memory in case something + # happens to the file while we're downloading for _, line in enumerate(fd): - line = line.strip() - download(line, args) + dl_list.append(line) + + # TODO: validate file contents before download process starts + for line_num, url in enumerate(dl_list, start=1): + url = url.strip() + # keep track of batch file line numbers for + # debugging/logging purposes + logging.info(f"Downloading file {line_num} ({url}):") + download(url, args) else: # single download mode dl = args.FILE |
