Refactor code and improve docstrings

author: Cody Logan <cody@lokken.dev> 2023-10-13 10:11:20 -0700
committer: Cody Logan <cody@lokken.dev> 2023-10-13 10:11:20 -0700
commit: 8b70abecb543099528ecc8c3b1edfe0330d3d223 (patch)
tree: 939de9ab71d283489406838af4d14ef10ec1528d /src
parent: 5dc9b79bd68d2f7cf0dcf1adfaffd8e07b27c6ba (diff)
download: wikiget-8b70abecb543099528ecc8c3b1edfe0330d3d223.tar.gz
wikiget-8b70abecb543099528ecc8c3b1edfe0330d3d223.zip
3 files changed, 75 insertions, 54 deletions
diff --git a/src/wikiget/file.py b/src/wikiget/file.py
index 60a71e0..c1b9ae6 100644
--- a/src/wikiget/file.py
+++ b/src/wikiget/file.py
@@ -17,11 +17,17 @@
 
 
 class File:
-    def __init__(self, name, dest=None):
+    def __init__(self, name, dest=""):
+        """
+        Initializes a new file with the specified name and an optional destination name.
+
+        :param name: name of the file
+        :type name: str
+        :param dest: destination of the file, if different from the name; if not
+            specified, defaults to the name
+        :type dest: str, optional
+        """
         self.object = None
         self.site = None
         self.name = name
-        if dest is None:
-            self.dest = name
-        else:
-            self.dest = dest
+        self.dest = dest if dest else name
diff --git a/src/wikiget/validations.py b/src/wikiget/validations.py
index 8ebd996..1610417 100644
--- a/src/wikiget/validations.py
+++ b/src/wikiget/validations.py
@@ -23,11 +23,14 @@ from wikiget import BLOCKSIZE
 
 def valid_file(search_string):
     """
-    Determines if the given string contains a valid file name, defined as a
-    string ending with a '.' and at least one character, beginning with 'File:'
-    or 'Image:', the standard file prefixes in MediaWiki.
+    Determines if the given string contains a valid file name, defined as a string
+    ending with a '.' and at least one character, beginning with 'File:' or 'Image:',
+    the standard file prefixes in MediaWiki.
+
     :param search_string: string to validate
+    :type search_string: str
     :returns: a regex Match object if there's a match or None otherwise
+    :rtype: re.Match
     """
     # second group could also restrict to file extensions with three or more
     # letters with ([^/\r\n\t\f\v]+\.\w{3,})
@@ -37,12 +40,15 @@ def valid_file(search_string):
 
 def valid_site(search_string):
     """
-    Determines if the given string contains a valid site name, defined as a
-    string ending with 'wikipedia.org' or 'wikimedia.org'. This covers all
-    subdomains of those domains. Eventually, it should be possible to support
-    any MediaWiki site, regardless of domain name.
+    Determines if the given string contains a valid site name, defined as a string
+    ending with 'wikipedia.org' or 'wikimedia.org'. This covers all subdomains of those
+    domains. Eventually, it should be possible to support any MediaWiki site, regardless
+    of domain name.
+
     :param search_string: string to validate
+    :type search_string: str
     :returns: a regex Match object if there's a match or None otherwise
+    :rtype: re.Match
     """
     site_regex = re.compile(r"wiki[mp]edia\.org$", re.I)
     return site_regex.search(search_string)
@@ -50,10 +56,12 @@ def valid_site(search_string):
 
 def verify_hash(filename):
     """
-    Calculates the SHA1 hash of the given file for comparison with a known
-    value.
+    Calculates the SHA1 hash of the given file for comparison with a known value.
+
     :param filename: name of the file to calculate a hash for
+    :type filename: str
     :return: hash digest
+    :rtype: str
     """
     hasher = hashlib.sha1()  # noqa: S324
     with open(filename, "rb") as dl:
diff --git a/src/wikiget/wikiget.py b/src/wikiget/wikiget.py
index 8c067e0..c470b46 100644
--- a/src/wikiget/wikiget.py
+++ b/src/wikiget/wikiget.py
@@ -111,10 +111,7 @@ def construct_parser():
     return parser
 
 
-def main():
-    parser = construct_parser()
-    args = parser.parse_args()
-
+def configure_logging(args):
     loglevel = logging.WARNING
     if args.verbose >= wikiget.VERY_VERBOSE:
         # this includes API and library messages
@@ -147,6 +144,51 @@ def main():
         # log only to console
         logging.basicConfig(level=loglevel, format=log_format)
 
+
+def batch_download(args):
+    input_file = args.FILE
+    dl_list = []
+
+    logging.info(f"Using batch file '{input_file}'.")
+
+    try:
+        fd = open(input_file)
+    except OSError as e:
+        logging.error("File could not be read. The following error was encountered:")
+        logging.error(e)
+        sys.exit(1)
+    else:
+        with fd:
+            # store file contents in memory in case something happens to the file
+            # while we're downloading
+            for _, line in enumerate(fd):
+                dl_list.append(line)
+
+    # TODO: validate file contents before download process starts
+    with ThreadPoolExecutor(
+        max_workers=args.threads,
+        thread_name_prefix="download",
+    ) as executor:
+        futures = []
+        for line_num, line in enumerate(dl_list, start=1):
+            url = line.strip()
+            # keep track of batch file line numbers for debugging/logging purposes
+            logging.info(f"Downloading '{url}' at line {line_num}:")
+            file = prep_download(url, args)
+            future = executor.submit(download, file, args)
+            futures.append(future)
+        # wait for downloads to finish
+        for future in futures:
+            future.result()
+
+
+def main():
+    # setup
+    parser = construct_parser()
+    args = parser.parse_args()
+
+    configure_logging(args)
+
     # log events are appended to the file if it already exists, so note the start of a
     # new download session
     logging.info(f"Starting download session using wikiget {wikiget.wikiget_version}")
@@ -154,42 +196,7 @@ def main():
 
     if args.batch:
         # batch download mode
-        input_file = args.FILE
-        dl_list = []
-
-        logging.info(f"Using batch file '{input_file}'.")
-
-        try:
-            fd = open(input_file)
-        except OSError as e:
-            logging.error(
-                "File could not be read. The following error was encountered:"
-            )
-            logging.error(e)
-            sys.exit(1)
-        else:
-            with fd:
-                # store file contents in memory in case something happens to the file
-                # while we're downloading
-                for _, line in enumerate(fd):
-                    dl_list.append(line)
-
-        # TODO: validate file contents before download process starts
-        with ThreadPoolExecutor(
-            max_workers=args.threads,
-            thread_name_prefix="download",
-        ) as executor:
-            futures = []
-            for line_num, line in enumerate(dl_list, start=1):
-                url = line.strip()
-                # keep track of batch file line numbers for debugging/logging purposes
-                logging.info(f"Downloading '{url}' at line {line_num}:")
-                file = prep_download(url, args)
-                future = executor.submit(download, file, args)
-                futures.append(future)
-            # wait for downloads to finish
-            for future in futures:
-                future.result()
+        batch_download(args)
     else:
         # single download mode
         file = prep_download(args.FILE, args)
author	Cody Logan <cody@lokken.dev>	2023-10-13 10:11:20 -0700
committer	Cody Logan <cody@lokken.dev>	2023-10-13 10:11:20 -0700
commit	8b70abecb543099528ecc8c3b1edfe0330d3d223 (patch)
tree	939de9ab71d283489406838af4d14ef10ec1528d /src
parent	5dc9b79bd68d2f7cf0dcf1adfaffd8e07b27c6ba (diff)
download	wikiget-8b70abecb543099528ecc8c3b1edfe0330d3d223.tar.gz wikiget-8b70abecb543099528ecc8c3b1edfe0330d3d223.zip