aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/wikiget/client.py25
-rw-r--r--src/wikiget/dl.py50
-rw-r--r--src/wikiget/exceptions.py5
-rw-r--r--src/wikiget/file.py16
-rw-r--r--src/wikiget/logging.py9
-rw-r--r--src/wikiget/parse.py20
-rw-r--r--src/wikiget/validations.py6
7 files changed, 115 insertions, 16 deletions
diff --git a/src/wikiget/client.py b/src/wikiget/client.py
index f746386..2fc4a6c 100644
--- a/src/wikiget/client.py
+++ b/src/wikiget/client.py
@@ -28,10 +28,19 @@ logger = logging.getLogger(__name__)
def connect_to_site(site_name: str, args: Namespace) -> Site:
- # connect to site and identify ourselves
+ """Create and return a Site object using the given site name and CLI arguments.
+
+ :param site_name: hostname of the site to connect to
+ :type site_name: str
+ :param args: command-line arguments and their values
+ :type args: argparse.Namespace
+ :return: a new Site object
+ :rtype: mwclient.Site
+ """
logger.info("Connecting to %s", site_name)
try:
+ # connect to site and identify ourselves
site = Site(site_name, path=args.path, clients_useragent=wikiget.USER_AGENT)
if args.username and args.password:
logger.info("Attempting to authenticate with credentials")
@@ -60,8 +69,20 @@ def connect_to_site(site_name: str, args: Namespace) -> Site:
def query_api(filename: str, site: Site) -> Image:
- # get info about the target file
+ """Query the given Site for an Image object matching the given filename.
+
+ Even if there's no file by that name on the site, an Image will still be returned,
+ though with an empty imageinfo attribute.
+
+ :param filename: name of the file to retrieve
+ :type filename: str
+ :param site: the Site object to query
+ :type site: mwclient.Site
+ :return: an Image object representing the requested file
+ :rtype: mwclient.image.Image
+ """
try:
+ # get info about the target file
image = site.images[filename]
except APIError as e:
# an API error at this point likely means access is denied, which could happen
diff --git a/src/wikiget/dl.py b/src/wikiget/dl.py
index 85f1ccf..abd0533 100644
--- a/src/wikiget/dl.py
+++ b/src/wikiget/dl.py
@@ -37,6 +37,20 @@ logger = logging.getLogger(__name__)
def prep_download(dl: str, args: Namespace) -> File:
+ """Prepare to download a file by parsing the filename or URL and CLI arguments.
+
+ First, the target is parsed for a valid name, destination, and site. If there are no
+ problems creating a File with this information, we connect to the site hosting it
+ and fetch the relevant Image object, which is added as an attribute to the File.
+
+ :param dl: a string representing the file or URL to download
+ :type dl: str
+ :param args: command-line arguments and their values
+ :type args: argparse.Namespace
+ :raises FileExistsError: the destination file already exists on disk
+ :return: a File object representing the file to download
+ :rtype: File
+ """
file = get_dest(dl, args)
# check if the destination file already exists; don't overwrite unless the user says
@@ -50,6 +64,21 @@ def prep_download(dl: str, args: Namespace) -> File:
def process_download(args: Namespace) -> int:
+ """Process the download target given in the CLI args as a single file or batch file.
+
+ If the target is a batch file, process with batch_download and return the number of
+ errors encountered, if any. If there were any errors, log the number and exit with
+ code 1. If no errors, exit with code 0.
+
+ If the target is a single file or URL, process with prep_download and log any
+ exceptions that it raises. If there aren't any, download the file and return the
+ exit code appropriately.
+
+ :param args: command-line arguments and their values
+ :type args: argparse.Namespace
+ :return: program exit code (1 if there were any problems or 0 otherwise)
+ :rtype: int
+ """
exit_code = 0
if args.batch:
@@ -84,6 +113,17 @@ def process_download(args: Namespace) -> int:
def batch_download(args: Namespace) -> int:
+ """Download files specified in a batch file.
+
+ The batch file is parsed into a dictionary, and the dictionary's items are checked
+ for validity before being downloaded using a ThreadPool for simultaneous downloads,
+ if threading was specified on the command line.
+
+ :param args: command-line arguments and their values
+ :type args: argparse.Namespace
+ :return: number of errors encountered during processing
+ :rtype: int
+ """
errors = 0
# parse batch file
@@ -126,6 +166,15 @@ def batch_download(args: Namespace) -> int:
def download(f: File, args: Namespace) -> int:
+ """Fetch file information and contents if the file exists and save it to disk.
+
+ :param f: a File object representing the file to be downloaded
+ :type f: File
+ :param args: command-line arguments and their values
+ :type args: argparse.Namespace
+ :return: number of errors encountered during processing
+ :rtype: int
+ """
file = f.image
filename = f.name
dest = f.dest
@@ -133,6 +182,7 @@ def download(f: File, args: Namespace) -> int:
errors = 0
+ # prepend the current filename to all log messages
adapter = FileLogAdapter(logger, {"filename": filename})
if file.exists:
diff --git a/src/wikiget/exceptions.py b/src/wikiget/exceptions.py
index 4afc97f..c72a589 100644
--- a/src/wikiget/exceptions.py
+++ b/src/wikiget/exceptions.py
@@ -17,7 +17,4 @@
class ParseError(Exception):
- """
- This exception is raised when the program's input is unable to be parsed as a valid
- download target.
- """
+ """Raised when some input is unable to be parsed as valid."""
diff --git a/src/wikiget/file.py b/src/wikiget/file.py
index 38d41c3..f2320b2 100644
--- a/src/wikiget/file.py
+++ b/src/wikiget/file.py
@@ -24,18 +24,21 @@ from wikiget import DEFAULT_SITE
class File:
- """
- This class represents a file with the properties name, destination, host site, and
+ """A file object.
+
+ Represents a file with the attributes name, destination, host site, and
mwclient.image.Image object as retrieved from the host site.
"""
def __init__(self, name: str, dest: str = "", site: str = "") -> None:
- """
- Initializes a new file with the specified name and an optional destination name.
+ """Initialize a new file.
+
+ The file name is required. If a destination and/or site are provided, those will
+ be used instead of the defaults.
:param name: name of the file
:type name: str
- :param dest: destination of the file, if different from the name; if not
+ :param dest: output destination of the file, if different from the name; if not
specified, defaults to the name
:type dest: str, optional
:param site: name of the site hosting the file; if not specified, defaults to
@@ -48,8 +51,7 @@ class File:
self.site = site if site else DEFAULT_SITE
def __eq__(self, other: object) -> bool:
- """
- Compares this File object with another for equality.
+ """Compare this File object with another for equality.
:param other: another File to compare
:type other: File
diff --git a/src/wikiget/logging.py b/src/wikiget/logging.py
index a762094..6673877 100644
--- a/src/wikiget/logging.py
+++ b/src/wikiget/logging.py
@@ -26,6 +26,15 @@ class FileLogAdapter(logging.LoggerAdapter):
def configure_logging(verbosity: int, logfile: str, *, quiet: bool) -> None:
+ """Set the program's log configuration according to the given settings.
+
+ :param verbosity: how verbose the log messages should be
+ :type verbosity: int
+ :param logfile: file to write log messages to, if any
+ :type logfile: str
+ :param quiet: True if log messages should be suppressed or False otherwise
+ :type quiet: bool
+ """
loglevel = logging.WARNING # default log level
if verbosity >= wikiget.VERY_VERBOSE:
# this includes API and library messages, not just from wikiget
diff --git a/src/wikiget/parse.py b/src/wikiget/parse.py
index 82e9172..92726f0 100644
--- a/src/wikiget/parse.py
+++ b/src/wikiget/parse.py
@@ -34,6 +34,16 @@ logger = logging.getLogger(__name__)
def get_dest(dl: str, args: Namespace) -> File:
+ """Parse the given download target for filename, destination, and site host.
+
+ :param dl: download target (filename or URL)
+ :type dl: str
+ :param args: command-line arguments and their values
+ :type args: argparse.Namespace
+ :raises ParseError: the target was unable to be parsed as a valid file
+ :return: a File object representing the target, destination, and site
+ :rtype: File
+ """
url = urlparse(dl)
if url.netloc:
@@ -64,6 +74,16 @@ def get_dest(dl: str, args: Namespace) -> File:
def read_batch_file(batch_file: str) -> dict[int, str]:
+ """Parse a batch file or stdin for valid input.
+
+ The contents are returned as a dictionary with line numbers for keys and line
+ contents for values. Any blank lines or lines starting with '#' are skipped.
+
+ :param batch_file: name of the file to parse or "-" for stdin
+ :type batch_file: str
+ :return: a dictionary representation of the input contents
+ :rtype: dict[int, str]
+ """
dl_dict = {}
if batch_file == "-":
diff --git a/src/wikiget/validations.py b/src/wikiget/validations.py
index 18c1f86..c7cc2dd 100644
--- a/src/wikiget/validations.py
+++ b/src/wikiget/validations.py
@@ -25,7 +25,7 @@ from wikiget import BLOCKSIZE
def valid_file(search_string: str) -> re.Match | None:
- """Determines if the given string contains a valid file name
+ """Determine if the given string contains a valid file name.
A valid file name is a string that begins with 'File:' or 'Image:' (the standard
file prefixes in MediaWiki), includes a period, and has at least one character
@@ -43,7 +43,7 @@ def valid_file(search_string: str) -> re.Match | None:
def valid_site(search_string: str) -> re.Match | None:
- """Determines if the given string contains a valid site name
+ """Determine if the given string contains a valid site name.
A valid site name is a string ending with 'wikipedia.org' or 'wikimedia.org'. This
covers all subdomains of those domains.
@@ -61,7 +61,7 @@ def valid_site(search_string: str) -> re.Match | None:
def verify_hash(filename: str) -> str:
- """Calculates the SHA1 hash of the given file for comparison with a known value.
+ """Calculate the SHA1 hash of the given file for comparison with a known value.
Despite being insecure, SHA1 is used since that's what the MediaWiki API returns for
the file hash.