aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorCody Logan <cody@lokken.dev>2023-10-13 10:11:20 -0700
committerCody Logan <cody@lokken.dev>2023-10-13 10:11:20 -0700
commit8b70abecb543099528ecc8c3b1edfe0330d3d223 (patch)
tree939de9ab71d283489406838af4d14ef10ec1528d /src
parent5dc9b79bd68d2f7cf0dcf1adfaffd8e07b27c6ba (diff)
downloadwikiget-8b70abecb543099528ecc8c3b1edfe0330d3d223.tar.gz
wikiget-8b70abecb543099528ecc8c3b1edfe0330d3d223.zip
Refactor code and improve docstrings
Diffstat (limited to 'src')
-rw-r--r--src/wikiget/file.py16
-rw-r--r--src/wikiget/validations.py26
-rw-r--r--src/wikiget/wikiget.py87
3 files changed, 75 insertions, 54 deletions
diff --git a/src/wikiget/file.py b/src/wikiget/file.py
index 60a71e0..c1b9ae6 100644
--- a/src/wikiget/file.py
+++ b/src/wikiget/file.py
@@ -17,11 +17,17 @@
class File:
- def __init__(self, name, dest=None):
+ def __init__(self, name, dest=""):
+ """
+ Initializes a new file with the specified name and an optional destination name.
+
+ :param name: name of the file
+ :type name: str
+ :param dest: destination of the file, if different from the name; if not
+ specified, defaults to the name
+ :type dest: str, optional
+ """
self.object = None
self.site = None
self.name = name
- if dest is None:
- self.dest = name
- else:
- self.dest = dest
+ self.dest = dest if dest else name
diff --git a/src/wikiget/validations.py b/src/wikiget/validations.py
index 8ebd996..1610417 100644
--- a/src/wikiget/validations.py
+++ b/src/wikiget/validations.py
@@ -23,11 +23,14 @@ from wikiget import BLOCKSIZE
def valid_file(search_string):
"""
- Determines if the given string contains a valid file name, defined as a
- string ending with a '.' and at least one character, beginning with 'File:'
- or 'Image:', the standard file prefixes in MediaWiki.
+ Determines if the given string contains a valid file name, defined as a string
+ ending with a '.' and at least one character, beginning with 'File:' or 'Image:',
+ the standard file prefixes in MediaWiki.
+
:param search_string: string to validate
+ :type search_string: str
:returns: a regex Match object if there's a match or None otherwise
+ :rtype: re.Match
"""
# second group could also restrict to file extensions with three or more
# letters with ([^/\r\n\t\f\v]+\.\w{3,})
@@ -37,12 +40,15 @@ def valid_file(search_string):
def valid_site(search_string):
"""
- Determines if the given string contains a valid site name, defined as a
- string ending with 'wikipedia.org' or 'wikimedia.org'. This covers all
- subdomains of those domains. Eventually, it should be possible to support
- any MediaWiki site, regardless of domain name.
+ Determines if the given string contains a valid site name, defined as a string
+ ending with 'wikipedia.org' or 'wikimedia.org'. This covers all subdomains of those
+ domains. Eventually, it should be possible to support any MediaWiki site, regardless
+ of domain name.
+
:param search_string: string to validate
+ :type search_string: str
:returns: a regex Match object if there's a match or None otherwise
+ :rtype: re.Match
"""
site_regex = re.compile(r"wiki[mp]edia\.org$", re.I)
return site_regex.search(search_string)
@@ -50,10 +56,12 @@ def valid_site(search_string):
def verify_hash(filename):
"""
- Calculates the SHA1 hash of the given file for comparison with a known
- value.
+ Calculates the SHA1 hash of the given file for comparison with a known value.
+
:param filename: name of the file to calculate a hash for
+ :type filename: str
:return: hash digest
+ :rtype: str
"""
hasher = hashlib.sha1() # noqa: S324
with open(filename, "rb") as dl:
diff --git a/src/wikiget/wikiget.py b/src/wikiget/wikiget.py
index 8c067e0..c470b46 100644
--- a/src/wikiget/wikiget.py
+++ b/src/wikiget/wikiget.py
@@ -111,10 +111,7 @@ def construct_parser():
return parser
-def main():
- parser = construct_parser()
- args = parser.parse_args()
-
+def configure_logging(args):
loglevel = logging.WARNING
if args.verbose >= wikiget.VERY_VERBOSE:
# this includes API and library messages
@@ -147,6 +144,51 @@ def main():
# log only to console
logging.basicConfig(level=loglevel, format=log_format)
+
+def batch_download(args):
+ input_file = args.FILE
+ dl_list = []
+
+ logging.info(f"Using batch file '{input_file}'.")
+
+ try:
+ fd = open(input_file)
+ except OSError as e:
+ logging.error("File could not be read. The following error was encountered:")
+ logging.error(e)
+ sys.exit(1)
+ else:
+ with fd:
+ # store file contents in memory in case something happens to the file
+ # while we're downloading
+ for _, line in enumerate(fd):
+ dl_list.append(line)
+
+ # TODO: validate file contents before download process starts
+ with ThreadPoolExecutor(
+ max_workers=args.threads,
+ thread_name_prefix="download",
+ ) as executor:
+ futures = []
+ for line_num, line in enumerate(dl_list, start=1):
+ url = line.strip()
+ # keep track of batch file line numbers for debugging/logging purposes
+ logging.info(f"Downloading '{url}' at line {line_num}:")
+ file = prep_download(url, args)
+ future = executor.submit(download, file, args)
+ futures.append(future)
+ # wait for downloads to finish
+ for future in futures:
+ future.result()
+
+
+def main():
+ # setup
+ parser = construct_parser()
+ args = parser.parse_args()
+
+ configure_logging(args)
+
# log events are appended to the file if it already exists, so note the start of a
# new download session
logging.info(f"Starting download session using wikiget {wikiget.wikiget_version}")
@@ -154,42 +196,7 @@ def main():
if args.batch:
# batch download mode
- input_file = args.FILE
- dl_list = []
-
- logging.info(f"Using batch file '{input_file}'.")
-
- try:
- fd = open(input_file)
- except OSError as e:
- logging.error(
- "File could not be read. The following error was encountered:"
- )
- logging.error(e)
- sys.exit(1)
- else:
- with fd:
- # store file contents in memory in case something happens to the file
- # while we're downloading
- for _, line in enumerate(fd):
- dl_list.append(line)
-
- # TODO: validate file contents before download process starts
- with ThreadPoolExecutor(
- max_workers=args.threads,
- thread_name_prefix="download",
- ) as executor:
- futures = []
- for line_num, line in enumerate(dl_list, start=1):
- url = line.strip()
- # keep track of batch file line numbers for debugging/logging purposes
- logging.info(f"Downloading '{url}' at line {line_num}:")
- file = prep_download(url, args)
- future = executor.submit(download, file, args)
- futures.append(future)
- # wait for downloads to finish
- for future in futures:
- future.result()
+ batch_download(args)
else:
# single download mode
file = prep_download(args.FILE, args)