aboutsummaryrefslogtreecommitdiff
path: root/src/wikiget
diff options
context:
space:
mode:
Diffstat (limited to 'src/wikiget')
-rw-r--r--src/wikiget/__init__.py7
-rw-r--r--src/wikiget/dl.py219
-rw-r--r--src/wikiget/exceptions.py20
-rw-r--r--src/wikiget/file.py39
-rw-r--r--src/wikiget/logging.py58
-rw-r--r--src/wikiget/parse.py59
-rw-r--r--src/wikiget/validations.py35
-rw-r--r--src/wikiget/version.py17
-rw-r--r--src/wikiget/wikiget.py109
9 files changed, 409 insertions, 154 deletions
diff --git a/src/wikiget/__init__.py b/src/wikiget/__init__.py
index b68b0ec..3946868 100644
--- a/src/wikiget/__init__.py
+++ b/src/wikiget/__init__.py
@@ -1,5 +1,5 @@
# wikiget - CLI tool for downloading files from Wikimedia sites
-# Copyright (C) 2018, 2019, 2020 Cody Logan and contributors
+# Copyright (C) 2018-2023 Cody Logan and contributors
# SPDX-License-Identifier: GPL-3.0-or-later
#
# Wikiget is free software: you can redistribute it and/or modify
@@ -24,8 +24,9 @@ BLOCKSIZE = 65536
CHUNKSIZE = 1024
DEFAULT_SITE = "commons.wikimedia.org"
DEFAULT_PATH = "/w/"
-USER_AGENT = "wikiget/{} (https://github.com/clpo13/wikiget) mwclient/{}".format(
- wikiget_version, mwclient_version
+USER_AGENT = (
+ f"wikiget/{wikiget_version} (https://github.com/clpo13/wikiget) "
+ f"mwclient/{mwclient_version}"
)
STD_VERBOSE = 1
VERY_VERBOSE = 2
diff --git a/src/wikiget/dl.py b/src/wikiget/dl.py
index 949f09e..5b5b43b 100644
--- a/src/wikiget/dl.py
+++ b/src/wikiget/dl.py
@@ -1,5 +1,5 @@
# wikiget - CLI tool for downloading files from Wikimedia sites
-# Copyright (C) 2018-2021 Cody Logan and contributors
+# Copyright (C) 2018-2023 Cody Logan and contributors
# SPDX-License-Identifier: GPL-3.0-or-later
#
# Wikiget is free software: you can redistribute it and/or modify
@@ -15,125 +15,154 @@
# You should have received a copy of the GNU General Public License
# along with Wikiget. If not, see <https://www.gnu.org/licenses/>.
+import logging
import os
import sys
-from urllib.parse import unquote, urlparse
+from argparse import Namespace
+from concurrent.futures import ThreadPoolExecutor
from mwclient import APIError, InvalidResponse, LoginError, Site
+from mwclient.image import Image
from requests import ConnectionError, HTTPError
from tqdm import tqdm
import wikiget
-from wikiget.validations import valid_file, verify_hash
+from wikiget.exceptions import ParseError
+from wikiget.file import File
+from wikiget.logging import FileLogAdapter
+from wikiget.parse import get_dest
+from wikiget.validations import verify_hash
-def download(dl, args):
- url = urlparse(dl)
-
- if url.netloc:
- filename = url.path
- site_name = url.netloc
- if args.site is not wikiget.DEFAULT_SITE and not args.quiet:
- # this will work even if the user specifies 'commons.wikimedia.org'
- print("Warning: target is a URL, ignoring site specified with --site")
- else:
- filename = dl
- site_name = args.site
-
- file_match = valid_file(filename)
-
- # check if this is a valid file
- if file_match and file_match.group(1):
- # has File:/Image: prefix and extension
- filename = file_match.group(2)
- else:
- # no file extension and/or prefix, probably an article
- print(f"Could not parse input '{filename}' as a file. ")
- sys.exit(1)
-
- filename = unquote(filename) # remove URL encoding for special characters
-
- dest = args.output or filename
-
- if args.verbose >= wikiget.VERY_VERBOSE:
- print(f"User agent: {wikiget.USER_AGENT}")
-
+def query_api(filename: str, site_name: str, args: Namespace) -> Image:
# connect to site and identify ourselves
- if args.verbose >= wikiget.STD_VERBOSE:
- print(f"Site name: {site_name}")
+ logging.info(f"Connecting to {site_name}")
try:
site = Site(site_name, path=args.path, clients_useragent=wikiget.USER_AGENT)
if args.username and args.password:
site.login(args.username, args.password)
except ConnectionError as e:
- # usually this means there is no such site, or there's no network
- # connection, though it could be a certificate problem
- print("Error: couldn't connect to specified site.")
- if args.verbose >= wikiget.VERY_VERBOSE:
- print("Full error message:")
- print(e)
- sys.exit(1)
+ # usually this means there is no such site, or there's no network connection,
+ # though it could be a certificate problem
+ logging.error("Could not connect to specified site")
+ logging.debug(e)
+ raise
except HTTPError as e:
# most likely a 403 forbidden or 404 not found error for api.php
- print(
- "Error: couldn't find the specified wiki's api.php. "
- "Check the value of --path."
+ logging.error(
+ "Could not find the specified wiki's api.php. Check the value of --path."
)
- if args.verbose >= wikiget.VERY_VERBOSE:
- print("Full error message:")
- print(e)
- sys.exit(1)
+ logging.debug(e)
+ raise
except (InvalidResponse, LoginError) as e:
- # InvalidResponse: site exists, but we couldn't communicate with the
- # API endpoint for some reason other than an HTTP error.
+ # InvalidResponse: site exists, but we couldn't communicate with the API
+ # endpoint for some reason other than an HTTP error.
# LoginError: missing or invalid credentials
- print(e)
- sys.exit(1)
+ logging.error(e)
+ raise
# get info about the target file
try:
- file = site.images[filename]
+ image = site.images[filename]
except APIError as e:
- # an API error at this point likely means access is denied,
- # which could happen with a private wiki
- print(
- "Error: access denied. Try providing credentials with "
- "--username and --password."
+ # an API error at this point likely means access is denied, which could happen
+ # with a private wiki
+ logging.error(
+ "Access denied. Try providing credentials with --username and --password."
)
- if args.verbose >= wikiget.VERY_VERBOSE:
- print("Full error message:")
- for i in e.args:
- print(i)
- sys.exit(1)
+ for i in e.args:
+ logging.debug(i)
+ raise
- if file.imageinfo != {}:
- # file exists either locally or at a common repository,
- # like Wikimedia Commons
+ return image
+
+
+def prep_download(dl: str, args: Namespace) -> File:
+ file = get_dest(dl, args)
+ file.image = query_api(file.name, file.site, args)
+ return file
+
+
+def batch_download(args: Namespace) -> int:
+ input_file = args.FILE
+ dl_list = {}
+ errors = 0
+
+ logging.info(f"Using batch file '{input_file}'.")
+
+ try:
+ fd = open(input_file)
+ except OSError as e:
+ logging.error("File could not be read. The following error was encountered:")
+ logging.error(e)
+ sys.exit(1)
+ else:
+ with fd:
+ # read the file into memory and process each line as we go
+ for line_num, line in enumerate(fd, start=1):
+ line_s = line.strip()
+ # ignore blank lines and lines starting with "#" (for comments)
+ if line_s and not line_s.startswith("#"):
+ dl_list[line_num] = line_s
+
+ # TODO: validate file contents before download process starts
+ with ThreadPoolExecutor(max_workers=args.threads) as executor:
+ futures = []
+ for line_num, line in dl_list.items():
+ # keep track of batch file line numbers for debugging/logging purposes
+ logging.info(f"Processing '{line}' at line {line_num}")
+ try:
+ file = prep_download(line, args)
+ except ParseError as e:
+ logging.warning(f"{e} (line {line_num})")
+ errors += 1
+ continue
+ except (ConnectionError, HTTPError, InvalidResponse, LoginError, APIError):
+ logging.warning(
+ f"Unable to download '{line}' (line {line_num}) due to an error"
+ )
+ errors += 1
+ continue
+ future = executor.submit(download, file, args)
+ futures.append(future)
+ # wait for downloads to finish
+ for future in futures:
+ errors += future.result()
+ return errors
+
+
+def download(f: File, args: Namespace) -> int:
+ file = f.image
+ filename = f.name
+ dest = f.dest
+ site = file.site
+
+ errors = 0
+
+ logger = logging.getLogger("")
+ adapter = FileLogAdapter(logger, {"filename": filename})
+
+ if file.exists:
+ # file exists either locally or at a common repository, like Wikimedia Commons
file_url = file.imageinfo["url"]
file_size = file.imageinfo["size"]
file_sha1 = file.imageinfo["sha1"]
- if args.verbose >= wikiget.STD_VERBOSE:
- print(
- f"Info: downloading '{filename}' "
- f"({file_size} bytes) from {site.host}",
- end="",
- )
- if args.output:
- print(f" to '{dest}'")
- else:
- print("\n", end="")
- print(f"Info: {file_url}")
+ filename_log = f"Downloading '{filename}' ({file_size} bytes) from {site.host}"
+ if args.output:
+ filename_log += f" to '{dest}'"
+ adapter.info(filename_log)
+ adapter.info(f"{file_url}")
if os.path.isfile(dest) and not args.force:
- print(f"File '{dest}' already exists, skipping download (use -f to ignore)")
+ adapter.warning("File already exists, skipping download (use -f to force)")
+ errors += 1
else:
try:
fd = open(dest, "wb")
except OSError as e:
- print("File could not be written. The following error was encountered:")
- print(e)
- sys.exit(1)
+ adapter.error(f"File could not be written. {e}")
+ errors += 1
else:
# download the file(s)
if args.verbose >= wikiget.STD_VERBOSE:
@@ -154,21 +183,25 @@ def download(dl, args):
fd.write(chunk)
progress_bar.update(len(chunk))
- # verify file integrity and optionally print details
+ # verify file integrity and log details
dl_sha1 = verify_hash(dest)
- if args.verbose >= wikiget.STD_VERBOSE:
- print(f"Info: downloaded file SHA1 is {dl_sha1}")
- print(f"Info: server file SHA1 is {file_sha1}")
+ adapter.info(f"Remote file SHA1 is {file_sha1}")
+ adapter.info(f"Local file SHA1 is {dl_sha1}")
if dl_sha1 == file_sha1:
- if args.verbose >= wikiget.STD_VERBOSE:
- print("Info: hashes match!")
+ adapter.info("Hashes match!")
# at this point, we've successfully downloaded the file
+ success_log = f"'{filename}' downloaded"
+ if args.output:
+ success_log += f" to '{dest}'"
+ adapter.info(success_log)
else:
- print("Error: hash mismatch! Downloaded file may be corrupt.")
- sys.exit(1)
+ adapter.error("Hash mismatch! Downloaded file may be corrupt.")
+ errors += 1
else:
# no file information returned
- print(f"Target '{filename}' does not appear to be a valid file.")
- sys.exit(1)
+ adapter.warning("Target does not appear to be a valid file")
+ errors += 1
+
+ return errors
diff --git a/src/wikiget/exceptions.py b/src/wikiget/exceptions.py
new file mode 100644
index 0000000..94ed6b2
--- /dev/null
+++ b/src/wikiget/exceptions.py
@@ -0,0 +1,20 @@
+# wikiget - CLI tool for downloading files from Wikimedia sites
+# Copyright (C) 2023 Cody Logan
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# Wikiget is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# Wikiget is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with Wikiget. If not, see <https://www.gnu.org/licenses/>.
+
+
+class ParseError(Exception):
+ pass
diff --git a/src/wikiget/file.py b/src/wikiget/file.py
new file mode 100644
index 0000000..b890e63
--- /dev/null
+++ b/src/wikiget/file.py
@@ -0,0 +1,39 @@
+# wikiget - CLI tool for downloading files from Wikimedia sites
+# Copyright (C) 2023 Cody Logan
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# Wikiget is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# Wikiget is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with Wikiget. If not, see <https://www.gnu.org/licenses/>.
+
+from mwclient.image import Image
+
+from wikiget import DEFAULT_SITE
+
+
+class File:
+ def __init__(self, name: str, dest: str = "", site: str = "") -> None:
+ """
+ Initializes a new file with the specified name and an optional destination name.
+
+ :param name: name of the file
+ :type name: str
+ :param dest: destination of the file, if different from the name; if not
+ specified, defaults to the name
+ :type dest: str, optional
+ :param site: name of the site hosting the file; if not specified, defaults to
+ the global default site
+ """
+ self.image: Image = None
+ self.name = name
+ self.dest = dest if dest else name
+ self.site = site if site else DEFAULT_SITE
diff --git a/src/wikiget/logging.py b/src/wikiget/logging.py
new file mode 100644
index 0000000..87b917c
--- /dev/null
+++ b/src/wikiget/logging.py
@@ -0,0 +1,58 @@
+# wikiget - CLI tool for downloading files from Wikimedia sites
+# Copyright (C) 2023 Cody Logan
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# Wikiget is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# Wikiget is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with Wikiget. If not, see <https://www.gnu.org/licenses/>.
+
+import logging
+from argparse import Namespace
+
+import wikiget
+
+
+class FileLogAdapter(logging.LoggerAdapter):
+ def process(self, msg, kwargs):
+ return f"[{self.extra['filename']}] {msg}", kwargs
+
+
+def configure_logging(args: Namespace) -> None:
+ loglevel = logging.WARNING
+ if args.verbose >= wikiget.VERY_VERBOSE:
+ # this includes API and library messages
+ loglevel = logging.DEBUG
+ elif args.verbose >= wikiget.STD_VERBOSE:
+ loglevel = logging.INFO
+ elif args.quiet:
+ loglevel = logging.ERROR
+
+ # configure logging:
+ # console log level is set via -v, -vv, and -q options;
+ # file log level is always debug (TODO: make this user configurable)
+ base_format = "%(message)s"
+ log_format = "[%(levelname)s] " + base_format
+ if args.logfile:
+ # log to console and file
+ logging.basicConfig(
+ level=logging.DEBUG,
+ format="%(asctime)s [%(levelname)-7s] " + base_format,
+ filename=args.logfile,
+ )
+
+ console = logging.StreamHandler()
+ console.setLevel(loglevel)
+ console.setFormatter(logging.Formatter(log_format))
+ logging.getLogger("").addHandler(console)
+ else:
+ # log only to console
+ logging.basicConfig(level=loglevel, format=log_format)
diff --git a/src/wikiget/parse.py b/src/wikiget/parse.py
new file mode 100644
index 0000000..fe3fe43
--- /dev/null
+++ b/src/wikiget/parse.py
@@ -0,0 +1,59 @@
+# wikiget - CLI tool for downloading files from Wikimedia sites
+# Copyright (C) 2023 Cody Logan
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# Wikiget is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# Wikiget is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with Wikiget. If not, see <https://www.gnu.org/licenses/>.
+
+import logging
+from argparse import Namespace
+from urllib.parse import unquote, urlparse
+
+import wikiget
+from wikiget.exceptions import ParseError
+from wikiget.file import File
+from wikiget.validations import valid_file
+
+
+def get_dest(dl: str, args: Namespace) -> File:
+ url = urlparse(dl)
+
+ if url.netloc:
+ filename = url.path
+ site_name = url.netloc
+ if args.site is not wikiget.DEFAULT_SITE:
+ # this will work even if the user specifies 'commons.wikimedia.org' since
+ # we're comparing objects instead of values (is not vs. !=)
+ logging.warning("Target is a URL, ignoring site specified with --site")
+ else:
+ filename = dl
+ site_name = args.site
+
+ file_match = valid_file(filename)
+
+ # check if this is a valid file
+ if file_match and file_match.group(1):
+ # has File:/Image: prefix and extension
+ filename = file_match.group(2)
+ else:
+ # no file extension and/or prefix, probably an article
+ msg = f"Could not parse input '{filename}' as a file"
+ raise ParseError(msg)
+
+ filename = unquote(filename) # remove URL encoding for special characters
+
+ dest = args.output or filename
+
+ file = File(filename, dest, site_name)
+
+ return file
diff --git a/src/wikiget/validations.py b/src/wikiget/validations.py
index dc70df4..c9e7bcf 100644
--- a/src/wikiget/validations.py
+++ b/src/wikiget/validations.py
@@ -1,5 +1,5 @@
# wikiget - CLI tool for downloading files from Wikimedia sites
-# Copyright (C) 2018, 2019, 2020 Cody Logan
+# Copyright (C) 2018-2023 Cody Logan
# SPDX-License-Identifier: GPL-3.0-or-later
#
# Wikiget is free software: you can redistribute it and/or modify
@@ -17,17 +17,21 @@
import hashlib
import re
+from typing import Optional
from wikiget import BLOCKSIZE
-def valid_file(search_string):
+def valid_file(search_string: str) -> Optional[re.Match]:
"""
- Determines if the given string contains a valid file name, defined as a
- string ending with a '.' and at least one character, beginning with 'File:'
- or 'Image:', the standard file prefixes in MediaWiki.
+ Determines if the given string contains a valid file name, defined as a string
+ ending with a '.' and at least one character, beginning with 'File:' or 'Image:',
+ the standard file prefixes in MediaWiki.
+
:param search_string: string to validate
+ :type search_string: str
:returns: a regex Match object if there's a match or None otherwise
+ :rtype: re.Match
"""
# second group could also restrict to file extensions with three or more
# letters with ([^/\r\n\t\f\v]+\.\w{3,})
@@ -35,25 +39,30 @@ def valid_file(search_string):
return file_regex.search(search_string)
-def valid_site(search_string):
+def valid_site(search_string: str) -> Optional[re.Match]:
"""
- Determines if the given string contains a valid site name, defined as a
- string ending with 'wikipedia.org' or 'wikimedia.org'. This covers all
- subdomains of those domains. Eventually, it should be possible to support
- any MediaWiki site, regardless of domain name.
+ Determines if the given string contains a valid site name, defined as a string
+ ending with 'wikipedia.org' or 'wikimedia.org'. This covers all subdomains of those
+ domains. Eventually, it should be possible to support any MediaWiki site, regardless
+ of domain name.
+
:param search_string: string to validate
+ :type search_string: str
:returns: a regex Match object if there's a match or None otherwise
+ :rtype: re.Match
"""
site_regex = re.compile(r"wiki[mp]edia\.org$", re.I)
return site_regex.search(search_string)
-def verify_hash(filename):
+def verify_hash(filename: str) -> str:
"""
- Calculates the SHA1 hash of the given file for comparison with a known
- value.
+ Calculates the SHA1 hash of the given file for comparison with a known value.
+
:param filename: name of the file to calculate a hash for
+ :type filename: str
:return: hash digest
+ :rtype: str
"""
hasher = hashlib.sha1() # noqa: S324
with open(filename, "rb") as dl:
diff --git a/src/wikiget/version.py b/src/wikiget/version.py
index dd9b22c..34dabb7 100644
--- a/src/wikiget/version.py
+++ b/src/wikiget/version.py
@@ -1 +1,18 @@
+# wikiget - CLI tool for downloading files from Wikimedia sites
+# Copyright (C) 2018-2023 Cody Logan and contributors
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# Wikiget is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# Wikiget is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with Wikiget. If not, see <https://www.gnu.org/licenses/>.
+
__version__ = "0.5.1"
diff --git a/src/wikiget/wikiget.py b/src/wikiget/wikiget.py
index ba36766..e64d00e 100644
--- a/src/wikiget/wikiget.py
+++ b/src/wikiget/wikiget.py
@@ -1,5 +1,5 @@
# wikiget - CLI tool for downloading files from Wikimedia sites
-# Copyright (C) 2018-2021 Cody Logan and contributors
+# Copyright (C) 2018-2023 Cody Logan and contributors
# SPDX-License-Identifier: GPL-3.0-or-later
#
# Wikiget is free software: you can redistribute it and/or modify
@@ -19,38 +19,33 @@ import argparse
import logging
import sys
-import wikiget
-from wikiget.dl import download
+from mwclient import APIError, InvalidResponse, LoginError
+from requests import ConnectionError, HTTPError
+import wikiget
+from wikiget.dl import batch_download, download, prep_download
+from wikiget.exceptions import ParseError
+from wikiget.logging import configure_logging
-def main():
- """
- Main entry point for console script. Automatically compiled by setuptools
- when installed with `pip install` or `python setup.py install`.
- """
+def construct_parser() -> argparse.ArgumentParser:
parser = argparse.ArgumentParser(
description="""
- A tool for downloading files from
- MediaWiki sites using the file name or
+ A tool for downloading files from MediaWiki sites using the file name or
description page URL
""",
epilog="""
- Copyright (C) 2018-2023 Cody Logan
- and contributors.
- License GPLv3+: GNU GPL version 3 or later
- <http://www.gnu.org/licenses/gpl.html>.
- This is free software; you are free to
- change and redistribute it under certain
- conditions. There is NO WARRANTY, to the
- extent permitted by law.
+ Copyright (C) 2018-2023 Cody Logan and contributors. License GPLv3+: GNU GPL
+ version 3 or later <http://www.gnu.org/licenses/gpl.html>. This is free
+ software; you are free to change and redistribute it under certain conditions.
+ There is NO WARRANTY, to the extent permitted by law.
""",
)
parser.add_argument(
"FILE",
help="""
- name of the file to download with the File:
- prefix, or the URL of its file description page
+ name of the file to download with the File: prefix, or the URL of its file
+ description page
""",
)
parser.add_argument(
@@ -80,52 +75,76 @@ def main():
help="MediaWiki site to download from (default: %(default)s)",
)
parser.add_argument(
- "-p",
+ "-P",
"--path",
default=wikiget.DEFAULT_PATH,
help="MediaWiki site path, where api.php is located (default: %(default)s)",
)
parser.add_argument(
- "--username", default="", help="MediaWiki site username, for private wikis"
+ "-u",
+ "--username",
+ default="",
+ help="MediaWiki site username, for private wikis",
)
parser.add_argument(
- "--password", default="", help="MediaWiki site password, for private wikis"
+ "-p",
+ "--password",
+ default="",
+ help="MediaWiki site password, for private wikis",
)
output_options = parser.add_mutually_exclusive_group()
output_options.add_argument("-o", "--output", help="write download to OUTPUT")
output_options.add_argument(
"-a",
"--batch",
- help="treat FILE as a textfile containing "
- "multiple files to download, one URL or "
- "filename per line",
+ help="treat FILE as a textfile containing multiple files to download, one URL "
+ "or filename per line",
action="store_true",
)
+ parser.add_argument(
+ "-l", "--logfile", default="", help="save log output to LOGFILE"
+ )
+ parser.add_argument(
+ "-j",
+ "--threads",
+ default=1,
+ help="number of parallel downloads to attempt in batch mode",
+ type=int,
+ )
+ return parser
+
+
+def main() -> None:
+ # setup our environment
+ parser = construct_parser()
args = parser.parse_args()
+ configure_logging(args)
- # print API and debug messages in verbose mode
- if args.verbose >= wikiget.VERY_VERBOSE:
- logging.basicConfig(level=logging.DEBUG)
- elif args.verbose >= wikiget.STD_VERBOSE:
- logging.basicConfig(level=logging.WARNING)
+ # log events are appended to the file if it already exists, so note the start of a
+ # new download session
+ logging.info(f"Starting download session using wikiget {wikiget.wikiget_version}")
+ logging.debug(f"User agent: {wikiget.USER_AGENT}")
if args.batch:
# batch download mode
- input_file = args.FILE
- if args.verbose >= wikiget.STD_VERBOSE:
- print(f"Info: using batch file '{input_file}'")
- try:
- fd = open(input_file)
- except OSError as e:
- print("File could not be read. The following error was encountered:")
- print(e)
+ errors = batch_download(args)
+ if errors:
+ # return non-zero exit code if any problems were encountered, even if some
+ # downloads completed successfully
+ logging.warning(
+ f"{errors} problem{'s'[:errors^1]} encountered during batch processing"
+ )
sys.exit(1)
- else:
- with fd:
- for _, line in enumerate(fd):
- download(line.strip(), args)
else:
# single download mode
- dl = args.FILE
- download(dl, args)
+ try:
+ file = prep_download(args.FILE, args)
+ except ParseError as e:
+ logging.error(e)
+ sys.exit(1)
+ except (ConnectionError, HTTPError, InvalidResponse, LoginError, APIError):
+ sys.exit(1)
+ errors = download(file, args)
+ if errors:
+ sys.exit(1)