Refactor parsing logic and revise exception handling

author: Cody Logan <cody@lokken.dev> 2023-10-13 12:24:13 -0700
committer: Cody Logan <cody@lokken.dev> 2023-10-13 12:24:13 -0700
commit: 630541499a58f98c55d5cc372d21e745c106d250 (patch)
tree: f8dc0fe50d0d4a329ce8e64f497b89d84a25f7a2 /src
parent: 875748228e509e244c8f444114387f1a03cbb393 (diff)
download: wikiget-630541499a58f98c55d5cc372d21e745c106d250.tar.gz
wikiget-630541499a58f98c55d5cc372d21e745c106d250.zip
4 files changed, 102 insertions, 42 deletions
diff --git a/src/wikiget/dl.py b/src/wikiget/dl.py
index 2b2befa..50b7460 100644
--- a/src/wikiget/dl.py
+++ b/src/wikiget/dl.py
@@ -18,46 +18,16 @@
 import logging
 import os
 import sys
-from urllib.parse import unquote, urlparse
 
 from mwclient import APIError, InvalidResponse, LoginError, Site
 from requests import ConnectionError, HTTPError
 from tqdm import tqdm
 
 import wikiget
+from wikiget.exceptions import ParseError
 from wikiget.file import File
-from wikiget.validations import valid_file, verify_hash
-
-
-def get_dest(dl, args):
-    url = urlparse(dl)
-
-    if url.netloc:
-        filename = url.path
-        site_name = url.netloc
-        if args.site is not wikiget.DEFAULT_SITE:
-            # this will work even if the user specifies 'commons.wikimedia.org'
-            logging.warning("target is a URL, ignoring site specified with --site")
-    else:
-        filename = dl
-        site_name = args.site
-
-    file_match = valid_file(filename)
-
-    # check if this is a valid file
-    if file_match and file_match.group(1):
-        # has File:/Image: prefix and extension
-        filename = file_match.group(2)
-    else:
-        # no file extension and/or prefix, probably an article
-        logging.error(f"Could not parse input '{filename}' as a file.")
-        sys.exit(1)
-
-    filename = unquote(filename)  # remove URL encoding for special characters
-
-    dest = args.output or filename
-
-    return filename, dest, site_name
+from wikiget.parse import get_dest
+from wikiget.validations import verify_hash
 
 
 def query_api(filename, site_name, args):
@@ -98,8 +68,7 @@ def query_api(filename, site_name, args):
         # an API error at this point likely means access is denied, which could happen
         # with a private wiki
         logging.error(
-            "Access denied. Try providing credentials with "
-            "--username and --password."
+            "Access denied. Try providing credentials with --username and --password."
         )
         logging.debug("Full error message:")
         for i in e.args:
@@ -110,7 +79,10 @@ def query_api(filename, site_name, args):
 
 
 def prep_download(dl, args):
-    filename, dest, site_name = get_dest(dl, args)
+    try:
+        filename, dest, site_name = get_dest(dl, args)
+    except ParseError:
+        raise
     file = File(filename, dest)
     file.object, file.site = query_api(file.name, site_name, args)
     return file
@@ -136,7 +108,7 @@ def download(f, args):
 
         if os.path.isfile(dest) and not args.force:
             logging.warning(
-                f"File '{dest}' already exists, skipping download (use -f to ignore)"
+                f"File '{dest}' already exists, skipping download (use -f to force)"
             )
         else:
             try:
@@ -167,19 +139,25 @@ def download(f, args):
                             fd.write(chunk)
                             progress_bar.update(len(chunk))
 
-            # verify file integrity and optionally print details
+            # verify file integrity and log details
             dl_sha1 = verify_hash(dest)
 
-            logging.info(f"Downloaded file SHA1 is {dl_sha1}")
-            logging.info(f"Server file SHA1 is {file_sha1}")
+            logging.info(f"Remote file SHA1 is {file_sha1}")
+            logging.info(f"Local file SHA1 is {dl_sha1}")
             if dl_sha1 == file_sha1:
                 logging.info("Hashes match!")
                 # at this point, we've successfully downloaded the file
+                success_log = f"'{filename}' downloaded"
+                if args.output:
+                    success_log += f" to '{dest}'"
+                logging.info(success_log)
             else:
                 logging.error("Hash mismatch! Downloaded file may be corrupt.")
+                # TODO: log but don't quit while in batch mode
                 sys.exit(1)
 
     else:
         # no file information returned
         logging.error(f"Target '{filename}' does not appear to be a valid file.")
+        # TODO: log but don't quit while in batch mode
         sys.exit(1)
diff --git a/src/wikiget/exceptions.py b/src/wikiget/exceptions.py
new file mode 100644
index 0000000..94ed6b2
--- /dev/null
+++ b/src/wikiget/exceptions.py
@@ -0,0 +1,20 @@
+# wikiget - CLI tool for downloading files from Wikimedia sites
+# Copyright (C) 2023 Cody Logan
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# Wikiget is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# Wikiget is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with Wikiget. If not, see <https://www.gnu.org/licenses/>.
+
+
+class ParseError(Exception):
+    pass
diff --git a/src/wikiget/parse.py b/src/wikiget/parse.py
new file mode 100644
index 0000000..09c0767
--- /dev/null
+++ b/src/wikiget/parse.py
@@ -0,0 +1,54 @@
+# wikiget - CLI tool for downloading files from Wikimedia sites
+# Copyright (C) 2023 Cody Logan
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# Wikiget is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# Wikiget is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with Wikiget. If not, see <https://www.gnu.org/licenses/>.
+
+import logging
+from urllib.parse import unquote, urlparse
+
+import wikiget
+from wikiget.exceptions import ParseError
+from wikiget.validations import valid_file
+
+
+def get_dest(dl, args):
+    url = urlparse(dl)
+
+    if url.netloc:
+        filename = url.path
+        site_name = url.netloc
+        if args.site is not wikiget.DEFAULT_SITE:
+            # this will work even if the user specifies 'commons.wikimedia.org'
+            logging.warning("target is a URL, ignoring site specified with --site")
+    else:
+        filename = dl
+        site_name = args.site
+
+    file_match = valid_file(filename)
+
+    # check if this is a valid file
+    if file_match and file_match.group(1):
+        # has File:/Image: prefix and extension
+        filename = file_match.group(2)
+    else:
+        # no file extension and/or prefix, probably an article
+        msg = f"Could not parse input '{filename}' as a file"
+        raise ParseError(msg)
+
+    filename = unquote(filename)  # remove URL encoding for special characters
+
+    dest = args.output or filename
+
+    return filename, dest, site_name
diff --git a/src/wikiget/wikiget.py b/src/wikiget/wikiget.py
index fba9509..68e0233 100644
--- a/src/wikiget/wikiget.py
+++ b/src/wikiget/wikiget.py
@@ -22,6 +22,7 @@ from concurrent.futures import ThreadPoolExecutor
 
 import wikiget
 from wikiget.dl import download, prep_download
+from wikiget.exceptions import ParseError
 
 
 def construct_parser():
@@ -173,7 +174,10 @@ def batch_download(args):
         for line_num, line in dl_list.items():
             # keep track of batch file line numbers for debugging/logging purposes
             logging.info(f"Downloading '{line}' at line {line_num}")
-            file = prep_download(line, args)
+            try:
+                file = prep_download(line, args)
+            except ParseError as e:
+                logging.warning(f"{e} (line {line_num})")
             future = executor.submit(download, file, args)
             futures.append(future)
         # wait for downloads to finish
@@ -197,5 +201,9 @@ def main():
         batch_download(args)
     else:
         # single download mode
-        file = prep_download(args.FILE, args)
+        try:
+            file = prep_download(args.FILE, args)
+        except ParseError as e:
+            logging.error(e)
+            sys.exit(1)
         download(file, args)
author	Cody Logan <cody@lokken.dev>	2023-10-13 12:24:13 -0700
committer	Cody Logan <cody@lokken.dev>	2023-10-13 12:24:13 -0700
commit	630541499a58f98c55d5cc372d21e745c106d250 (patch)
tree	f8dc0fe50d0d4a329ce8e64f497b89d84a25f7a2 /src
parent	875748228e509e244c8f444114387f1a03cbb393 (diff)
download	wikiget-630541499a58f98c55d5cc372d21e745c106d250.tar.gz wikiget-630541499a58f98c55d5cc372d21e745c106d250.zip