diff options
| author | Cody Logan <cody@lokken.dev> | 2023-11-08 12:53:25 -0800 |
|---|---|---|
| committer | Cody Logan <cody@lokken.dev> | 2023-11-08 12:53:25 -0800 |
| commit | 96316c8be7bc21617ec5333f87864a0f002ebaa4 (patch) | |
| tree | bc42d19c3656203700dd322a73f0a73fe798c0b1 | |
| parent | 2df37a4157763fdad816b7641714ed0478351794 (diff) | |
| download | wikiget-96316c8be7bc21617ec5333f87864a0f002ebaa4.tar.gz wikiget-96316c8be7bc21617ec5333f87864a0f002ebaa4.zip | |
Add and refine docstrings in src folder
| -rw-r--r-- | src/wikiget/client.py | 25 | ||||
| -rw-r--r-- | src/wikiget/dl.py | 50 | ||||
| -rw-r--r-- | src/wikiget/exceptions.py | 5 | ||||
| -rw-r--r-- | src/wikiget/file.py | 16 | ||||
| -rw-r--r-- | src/wikiget/logging.py | 9 | ||||
| -rw-r--r-- | src/wikiget/parse.py | 20 | ||||
| -rw-r--r-- | src/wikiget/validations.py | 6 |
7 files changed, 115 insertions, 16 deletions
diff --git a/src/wikiget/client.py b/src/wikiget/client.py index f746386..2fc4a6c 100644 --- a/src/wikiget/client.py +++ b/src/wikiget/client.py @@ -28,10 +28,19 @@ logger = logging.getLogger(__name__) def connect_to_site(site_name: str, args: Namespace) -> Site: - # connect to site and identify ourselves + """Create and return a Site object using the given site name and CLI arguments. + + :param site_name: hostname of the site to connect to + :type site_name: str + :param args: command-line arguments and their values + :type args: argparse.Namespace + :return: a new Site object + :rtype: mwclient.Site + """ logger.info("Connecting to %s", site_name) try: + # connect to site and identify ourselves site = Site(site_name, path=args.path, clients_useragent=wikiget.USER_AGENT) if args.username and args.password: logger.info("Attempting to authenticate with credentials") @@ -60,8 +69,20 @@ def connect_to_site(site_name: str, args: Namespace) -> Site: def query_api(filename: str, site: Site) -> Image: - # get info about the target file + """Query the given Site for an Image object matching the given filename. + + Even if there's no file by that name on the site, an Image will still be returned, + though with an empty imageinfo attribute. + + :param filename: name of the file to retrieve + :type filename: str + :param site: the Site object to query + :type site: mwclient.Site + :return: an Image object representing the requested file + :rtype: mwclient.image.Image + """ try: + # get info about the target file image = site.images[filename] except APIError as e: # an API error at this point likely means access is denied, which could happen diff --git a/src/wikiget/dl.py b/src/wikiget/dl.py index 85f1ccf..abd0533 100644 --- a/src/wikiget/dl.py +++ b/src/wikiget/dl.py @@ -37,6 +37,20 @@ logger = logging.getLogger(__name__) def prep_download(dl: str, args: Namespace) -> File: + """Prepare to download a file by parsing the filename or URL and CLI arguments. + + First, the target is parsed for a valid name, destination, and site. If there are no + problems creating a File with this information, we connect to the site hosting it + and fetch the relevant Image object, which is added as an attribute to the File. + + :param dl: a string representing the file or URL to download + :type dl: str + :param args: command-line arguments and their values + :type args: argparse.Namespace + :raises FileExistsError: the destination file already exists on disk + :return: a File object representing the file to download + :rtype: File + """ file = get_dest(dl, args) # check if the destination file already exists; don't overwrite unless the user says @@ -50,6 +64,21 @@ def prep_download(dl: str, args: Namespace) -> File: def process_download(args: Namespace) -> int: + """Process the download target given in the CLI args as a single file or batch file. + + If the target is a batch file, process with batch_download and return the number of + errors encountered, if any. If there were any errors, log the number and exit with + code 1. If no errors, exit with code 0. + + If the target is a single file or URL, process with prep_download and log any + exceptions that it raises. If there aren't any, download the file and return the + exit code appropriately. + + :param args: command-line arguments and their values + :type args: argparse.Namespace + :return: program exit code (1 if there were any problems or 0 otherwise) + :rtype: int + """ exit_code = 0 if args.batch: @@ -84,6 +113,17 @@ def process_download(args: Namespace) -> int: def batch_download(args: Namespace) -> int: + """Download files specified in a batch file. + + The batch file is parsed into a dictionary, and the dictionary's items are checked + for validity before being downloaded using a ThreadPool for simultaneous downloads, + if threading was specified on the command line. + + :param args: command-line arguments and their values + :type args: argparse.Namespace + :return: number of errors encountered during processing + :rtype: int + """ errors = 0 # parse batch file @@ -126,6 +166,15 @@ def batch_download(args: Namespace) -> int: def download(f: File, args: Namespace) -> int: + """Fetch file information and contents if the file exists and save it to disk. + + :param f: a File object representing the file to be downloaded + :type f: File + :param args: command-line arguments and their values + :type args: argparse.Namespace + :return: number of errors encountered during processing + :rtype: int + """ file = f.image filename = f.name dest = f.dest @@ -133,6 +182,7 @@ def download(f: File, args: Namespace) -> int: errors = 0 + # prepend the current filename to all log messages adapter = FileLogAdapter(logger, {"filename": filename}) if file.exists: diff --git a/src/wikiget/exceptions.py b/src/wikiget/exceptions.py index 4afc97f..c72a589 100644 --- a/src/wikiget/exceptions.py +++ b/src/wikiget/exceptions.py @@ -17,7 +17,4 @@ class ParseError(Exception): - """ - This exception is raised when the program's input is unable to be parsed as a valid - download target. - """ + """Raised when some input is unable to be parsed as valid.""" diff --git a/src/wikiget/file.py b/src/wikiget/file.py index 38d41c3..f2320b2 100644 --- a/src/wikiget/file.py +++ b/src/wikiget/file.py @@ -24,18 +24,21 @@ from wikiget import DEFAULT_SITE class File: - """ - This class represents a file with the properties name, destination, host site, and + """A file object. + + Represents a file with the attributes name, destination, host site, and mwclient.image.Image object as retrieved from the host site. """ def __init__(self, name: str, dest: str = "", site: str = "") -> None: - """ - Initializes a new file with the specified name and an optional destination name. + """Initialize a new file. + + The file name is required. If a destination and/or site are provided, those will + be used instead of the defaults. :param name: name of the file :type name: str - :param dest: destination of the file, if different from the name; if not + :param dest: output destination of the file, if different from the name; if not specified, defaults to the name :type dest: str, optional :param site: name of the site hosting the file; if not specified, defaults to @@ -48,8 +51,7 @@ class File: self.site = site if site else DEFAULT_SITE def __eq__(self, other: object) -> bool: - """ - Compares this File object with another for equality. + """Compare this File object with another for equality. :param other: another File to compare :type other: File diff --git a/src/wikiget/logging.py b/src/wikiget/logging.py index a762094..6673877 100644 --- a/src/wikiget/logging.py +++ b/src/wikiget/logging.py @@ -26,6 +26,15 @@ class FileLogAdapter(logging.LoggerAdapter): def configure_logging(verbosity: int, logfile: str, *, quiet: bool) -> None: + """Set the program's log configuration according to the given settings. + + :param verbosity: how verbose the log messages should be + :type verbosity: int + :param logfile: file to write log messages to, if any + :type logfile: str + :param quiet: True if log messages should be suppressed or False otherwise + :type quiet: bool + """ loglevel = logging.WARNING # default log level if verbosity >= wikiget.VERY_VERBOSE: # this includes API and library messages, not just from wikiget diff --git a/src/wikiget/parse.py b/src/wikiget/parse.py index 82e9172..92726f0 100644 --- a/src/wikiget/parse.py +++ b/src/wikiget/parse.py @@ -34,6 +34,16 @@ logger = logging.getLogger(__name__) def get_dest(dl: str, args: Namespace) -> File: + """Parse the given download target for filename, destination, and site host. + + :param dl: download target (filename or URL) + :type dl: str + :param args: command-line arguments and their values + :type args: argparse.Namespace + :raises ParseError: the target was unable to be parsed as a valid file + :return: a File object representing the target, destination, and site + :rtype: File + """ url = urlparse(dl) if url.netloc: @@ -64,6 +74,16 @@ def get_dest(dl: str, args: Namespace) -> File: def read_batch_file(batch_file: str) -> dict[int, str]: + """Parse a batch file or stdin for valid input. + + The contents are returned as a dictionary with line numbers for keys and line + contents for values. Any blank lines or lines starting with '#' are skipped. + + :param batch_file: name of the file to parse or "-" for stdin + :type batch_file: str + :return: a dictionary representation of the input contents + :rtype: dict[int, str] + """ dl_dict = {} if batch_file == "-": diff --git a/src/wikiget/validations.py b/src/wikiget/validations.py index 18c1f86..c7cc2dd 100644 --- a/src/wikiget/validations.py +++ b/src/wikiget/validations.py @@ -25,7 +25,7 @@ from wikiget import BLOCKSIZE def valid_file(search_string: str) -> re.Match | None: - """Determines if the given string contains a valid file name + """Determine if the given string contains a valid file name. A valid file name is a string that begins with 'File:' or 'Image:' (the standard file prefixes in MediaWiki), includes a period, and has at least one character @@ -43,7 +43,7 @@ def valid_file(search_string: str) -> re.Match | None: def valid_site(search_string: str) -> re.Match | None: - """Determines if the given string contains a valid site name + """Determine if the given string contains a valid site name. A valid site name is a string ending with 'wikipedia.org' or 'wikimedia.org'. This covers all subdomains of those domains. @@ -61,7 +61,7 @@ def valid_site(search_string: str) -> re.Match | None: def verify_hash(filename: str) -> str: - """Calculates the SHA1 hash of the given file for comparison with a known value. + """Calculate the SHA1 hash of the given file for comparison with a known value. Despite being insecure, SHA1 is used since that's what the MediaWiki API returns for the file hash. |
