From a1995912ed24b37a990f3fcd5e91dbf7b46669fb Mon Sep 17 00:00:00 2001
From: Cody Logan <clpo13@gmail.com>
Date: Tue, 26 Sep 2023 15:17:04 -0700
Subject: Reorganize file tree

---
 src/wikiget/dl.py | 159 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 159 insertions(+)
 create mode 100644 src/wikiget/dl.py

(limited to 'src/wikiget/dl.py')

diff --git a/src/wikiget/dl.py b/src/wikiget/dl.py
new file mode 100644
index 0000000..8f32218
--- /dev/null
+++ b/src/wikiget/dl.py
@@ -0,0 +1,159 @@
+# wikiget - CLI tool for downloading files from Wikimedia sites
+# Copyright (C) 2018-2021 Cody Logan and contributors
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# Wikiget is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# Wikiget is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with Wikiget. If not, see <https://www.gnu.org/licenses/>.
+
+import logging
+import os
+import sys
+from urllib.parse import unquote, urlparse
+
+from mwclient import APIError, InvalidResponse, LoginError, Site
+from requests import ConnectionError, HTTPError
+from tqdm import tqdm
+
+from . import CHUNKSIZE, DEFAULT_SITE, USER_AGENT
+from .validations import valid_file, verify_hash
+
+
+def download(dl, args):
+    url = urlparse(dl)
+
+    if url.netloc:
+        filename = url.path
+        site_name = url.netloc
+        if args.site is not DEFAULT_SITE:
+            # this will work even if the user specifies 'commons.wikimedia.org'
+            logging.warning("target is a URL, "
+                            "ignoring site specified with --site")
+    else:
+        filename = dl
+        site_name = args.site
+
+    file_match = valid_file(filename)
+
+    # check if this is a valid file
+    if file_match and file_match.group(1):
+        # has File:/Image: prefix and extension
+        filename = file_match.group(2)
+    else:
+        # no file extension and/or prefix, probably an article
+        logging.error(f"Could not parse input '{filename}' as a file.")
+        sys.exit(1)
+
+    filename = unquote(filename)  # remove URL encoding for special characters
+
+    dest = args.output or filename
+
+    logging.debug(f"User agent: {USER_AGENT}")
+
+    # connect to site and identify ourselves
+    logging.info(f"Site name: {site_name}")
+    try:
+        site = Site(site_name, path=args.path, clients_useragent=USER_AGENT)
+        if args.username and args.password:
+            site.login(args.username, args.password)
+    except ConnectionError as e:
+        # usually this means there is no such site, or there's no network
+        # connection, though it could be a certificate problem
+        logging.error("Couldn't connect to specified site.")
+        logging.debug("Full error message:")
+        logging.debug(e)
+        sys.exit(1)
+    except HTTPError as e:
+        # most likely a 403 forbidden or 404 not found error for api.php
+        logging.error("Couldn't find the specified wiki's api.php. "
+                      "Check the value of --path.")
+        logging.debug("Full error message:")
+        logging.debug(e)
+        sys.exit(1)
+    except (InvalidResponse, LoginError) as e:
+        # InvalidResponse: site exists, but we couldn't communicate with the
+        # API endpoint for some reason other than an HTTP error.
+        # LoginError: missing or invalid credentials
+        logging.error(e)
+        sys.exit(1)
+
+    # get info about the target file
+    try:
+        file = site.images[filename]
+    except APIError as e:
+        # an API error at this point likely means access is denied,
+        # which could happen with a private wiki
+        logging.error("Access denied. Try providing credentials with "
+                      "--username and --password.")
+        logging.debug("Full error message:")
+        for i in e.args:
+            logging.debug(i)
+        sys.exit(1)
+
+    if file.imageinfo != {}:
+        # file exists either locally or at a common repository,
+        # like Wikimedia Commons
+        file_url = file.imageinfo["url"]
+        file_size = file.imageinfo["size"]
+        file_sha1 = file.imageinfo["sha1"]
+
+        filename_log = (f"Downloading '{filename}' ({file_size} bytes) "
+                        f"from {site.host}")
+        if args.output:
+            filename_log += f" to '{dest}'"
+        logging.info(filename_log)
+        logging.info(f"{file_url}")
+
+        if os.path.isfile(dest) and not args.force:
+            logging.warning(f"File '{dest}' already exists, skipping download "
+                            "(use -f to ignore)")
+        else:
+            try:
+                fd = open(dest, "wb")
+            except IOError as e:
+                logging.error("File could not be written. "
+                              "The following error was encountered:")
+                logging.error(e)
+                sys.exit(1)
+            else:
+                # download the file(s)
+                if args.verbose >= 1:
+                    leave_bars = True
+                else:
+                    leave_bars = False
+                with tqdm(leave=leave_bars, total=file_size,
+                          unit="B", unit_scale=True,
+                          unit_divisor=CHUNKSIZE) as progress_bar:
+                    with fd:
+                        res = site.connection.get(file_url, stream=True)
+                        progress_bar.set_postfix(file=dest, refresh=False)
+                        for chunk in res.iter_content(CHUNKSIZE):
+                            fd.write(chunk)
+                            progress_bar.update(len(chunk))
+
+            # verify file integrity and optionally print details
+            dl_sha1 = verify_hash(dest)
+
+            logging.info(f"Downloaded file SHA1 is {dl_sha1}")
+            logging.info(f"Server file SHA1 is {file_sha1}")
+            if dl_sha1 == file_sha1:
+                logging.info("Hashes match!")
+                # at this point, we've successfully downloaded the file
+            else:
+                logging.error("Hash mismatch! Downloaded file may be corrupt.")
+                sys.exit(1)
+
+    else:
+        # no file information returned
+        logging.error(f"Target '{filename}' does not appear to be "
+                      "a valid file.")
+        sys.exit(1)
-- 
cgit v1.2.3


From 75a79785d851efa319f4216e0d3471d30a02154a Mon Sep 17 00:00:00 2001
From: Cody Logan <clpo13@gmail.com>
Date: Tue, 26 Sep 2023 15:45:43 -0700
Subject: Style and format fixes

---
 src/wikiget/dl.py | 30 ++++++++++++++++--------------
 1 file changed, 16 insertions(+), 14 deletions(-)

(limited to 'src/wikiget/dl.py')

diff --git a/src/wikiget/dl.py b/src/wikiget/dl.py
index 9850ce8..791db61 100644
--- a/src/wikiget/dl.py
+++ b/src/wikiget/dl.py
@@ -36,8 +36,7 @@ def download(dl, args):
         site_name = url.netloc
         if args.site is not wikiget.DEFAULT_SITE:
             # this will work even if the user specifies 'commons.wikimedia.org'
-            logging.warning("target is a URL, "
-                            "ignoring site specified with --site")
+            logging.warning("target is a URL, ignoring site specified with --site")
     else:
         filename = dl
         site_name = args.site
@@ -74,8 +73,9 @@ def download(dl, args):
         sys.exit(1)
     except HTTPError as e:
         # most likely a 403 forbidden or 404 not found error for api.php
-        logging.error("Couldn't find the specified wiki's api.php. "
-                      "Check the value of --path.")
+        logging.error(
+            "Couldn't find the specified wiki's api.php. Check the value of --path."
+        )
         logging.debug("Full error message:")
         logging.debug(e)
         sys.exit(1)
@@ -92,8 +92,10 @@ def download(dl, args):
     except APIError as e:
         # an API error at this point likely means access is denied,
         # which could happen with a private wiki
-        logging.error("Access denied. Try providing credentials with "
-                      "--username and --password.")
+        logging.error(
+            "Access denied. Try providing credentials with "
+            "--username and --password."
+        )
         logging.debug("Full error message:")
         for i in e.args:
             logging.debug(i)
@@ -106,22 +108,23 @@ def download(dl, args):
         file_size = file.imageinfo["size"]
         file_sha1 = file.imageinfo["sha1"]
 
-        filename_log = (f"Downloading '{filename}' ({file_size} bytes) "
-                        f"from {site.host}")
+        filename_log = f"Downloading '{filename}' ({file_size} bytes) from {site.host}"
         if args.output:
             filename_log += f" to '{dest}'"
         logging.info(filename_log)
         logging.info(f"{file_url}")
 
         if os.path.isfile(dest) and not args.force:
-            logging.warning(f"File '{dest}' already exists, skipping download "
-                            "(use -f to ignore)")
+            logging.warning(
+                f"File '{dest}' already exists, skipping download (use -f to ignore)"
+            )
         else:
             try:
                 fd = open(dest, "wb")
             except OSError as e:
-                logging.error("File could not be written. "
-                              "The following error was encountered:")
+                logging.error(
+                    "File could not be written. The following error was encountered:"
+                )
                 logging.error(e)
                 sys.exit(1)
             else:
@@ -158,6 +161,5 @@ def download(dl, args):
 
     else:
         # no file information returned
-        logging.error(f"Target '{filename}' does not appear to be "
-                      "a valid file.")
+        logging.error(f"Target '{filename}' does not appear to be a valid file.")
         sys.exit(1)
-- 
cgit v1.2.3


From 485df31f095a9b629a1dcc04af13956325856d8c Mon Sep 17 00:00:00 2001
From: Cody Logan <clpo13@gmail.com>
Date: Tue, 3 Oct 2023 09:51:58 -0700
Subject: Update README and do some code cleanup

---
 src/wikiget/dl.py | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

(limited to 'src/wikiget/dl.py')

diff --git a/src/wikiget/dl.py b/src/wikiget/dl.py
index 791db61..d32736f 100644
--- a/src/wikiget/dl.py
+++ b/src/wikiget/dl.py
@@ -1,5 +1,5 @@
 # wikiget - CLI tool for downloading files from Wikimedia sites
-# Copyright (C) 2018-2021 Cody Logan and contributors
+# Copyright (C) 2018-2023 Cody Logan and contributors
 # SPDX-License-Identifier: GPL-3.0-or-later
 #
 # Wikiget is free software: you can redistribute it and/or modify
@@ -65,8 +65,8 @@ def download(dl, args):
         if args.username and args.password:
             site.login(args.username, args.password)
     except ConnectionError as e:
-        # usually this means there is no such site, or there's no network
-        # connection, though it could be a certificate problem
+        # usually this means there is no such site, or there's no network connection,
+        # though it could be a certificate problem
         logging.error("Couldn't connect to specified site.")
         logging.debug("Full error message:")
         logging.debug(e)
@@ -80,8 +80,8 @@ def download(dl, args):
         logging.debug(e)
         sys.exit(1)
     except (InvalidResponse, LoginError) as e:
-        # InvalidResponse: site exists, but we couldn't communicate with the
-        # API endpoint for some reason other than an HTTP error.
+        # InvalidResponse: site exists, but we couldn't communicate with the API
+        # endpoint for some reason other than an HTTP error.
         # LoginError: missing or invalid credentials
         logging.error(e)
         sys.exit(1)
@@ -90,8 +90,8 @@ def download(dl, args):
     try:
         file = site.images[filename]
     except APIError as e:
-        # an API error at this point likely means access is denied,
-        # which could happen with a private wiki
+        # an API error at this point likely means access is denied, which could happen
+        # with a private wiki
         logging.error(
             "Access denied. Try providing credentials with "
             "--username and --password."
@@ -102,8 +102,7 @@ def download(dl, args):
         sys.exit(1)
 
     if file.imageinfo != {}:
-        # file exists either locally or at a common repository,
-        # like Wikimedia Commons
+        # file exists either locally or at a common repository, like Wikimedia Commons
         file_url = file.imageinfo["url"]
         file_size = file.imageinfo["size"]
         file_sha1 = file.imageinfo["sha1"]
-- 
cgit v1.2.3


From 865088207b39427b6b932de4f312d82bd5e05a53 Mon Sep 17 00:00:00 2001
From: Cody Logan <clpo13@gmail.com>
Date: Tue, 3 Oct 2023 13:26:09 -0700
Subject: Refactor for better code organization

---
 src/wikiget/dl.py | 23 ++++++++++++++++++++++-
 1 file changed, 22 insertions(+), 1 deletion(-)

(limited to 'src/wikiget/dl.py')

diff --git a/src/wikiget/dl.py b/src/wikiget/dl.py
index d32736f..2b2befa 100644
--- a/src/wikiget/dl.py
+++ b/src/wikiget/dl.py
@@ -25,10 +25,11 @@ from requests import ConnectionError, HTTPError
 from tqdm import tqdm
 
 import wikiget
+from wikiget.file import File
 from wikiget.validations import valid_file, verify_hash
 
 
-def download(dl, args):
+def get_dest(dl, args):
     url = urlparse(dl)
 
     if url.netloc:
@@ -56,6 +57,10 @@ def download(dl, args):
 
     dest = args.output or filename
 
+    return filename, dest, site_name
+
+
+def query_api(filename, site_name, args):
     logging.debug(f"User agent: {wikiget.USER_AGENT}")
 
     # connect to site and identify ourselves
@@ -101,6 +106,22 @@ def download(dl, args):
             logging.debug(i)
         sys.exit(1)
 
+    return file, site
+
+
+def prep_download(dl, args):
+    filename, dest, site_name = get_dest(dl, args)
+    file = File(filename, dest)
+    file.object, file.site = query_api(file.name, site_name, args)
+    return file
+
+
+def download(f, args):
+    file = f.object
+    filename = f.name
+    site = f.site
+    dest = f.dest
+
     if file.imageinfo != {}:
         # file exists either locally or at a common repository, like Wikimedia Commons
         file_url = file.imageinfo["url"]
-- 
cgit v1.2.3


From 630541499a58f98c55d5cc372d21e745c106d250 Mon Sep 17 00:00:00 2001
From: Cody Logan <cody@lokken.dev>
Date: Fri, 13 Oct 2023 12:24:13 -0700
Subject: Refactor parsing logic and revise exception handling

---
 src/wikiget/dl.py | 58 +++++++++++++++++--------------------------------------
 1 file changed, 18 insertions(+), 40 deletions(-)

(limited to 'src/wikiget/dl.py')

diff --git a/src/wikiget/dl.py b/src/wikiget/dl.py
index 2b2befa..50b7460 100644
--- a/src/wikiget/dl.py
+++ b/src/wikiget/dl.py
@@ -18,46 +18,16 @@
 import logging
 import os
 import sys
-from urllib.parse import unquote, urlparse
 
 from mwclient import APIError, InvalidResponse, LoginError, Site
 from requests import ConnectionError, HTTPError
 from tqdm import tqdm
 
 import wikiget
+from wikiget.exceptions import ParseError
 from wikiget.file import File
-from wikiget.validations import valid_file, verify_hash
-
-
-def get_dest(dl, args):
-    url = urlparse(dl)
-
-    if url.netloc:
-        filename = url.path
-        site_name = url.netloc
-        if args.site is not wikiget.DEFAULT_SITE:
-            # this will work even if the user specifies 'commons.wikimedia.org'
-            logging.warning("target is a URL, ignoring site specified with --site")
-    else:
-        filename = dl
-        site_name = args.site
-
-    file_match = valid_file(filename)
-
-    # check if this is a valid file
-    if file_match and file_match.group(1):
-        # has File:/Image: prefix and extension
-        filename = file_match.group(2)
-    else:
-        # no file extension and/or prefix, probably an article
-        logging.error(f"Could not parse input '{filename}' as a file.")
-        sys.exit(1)
-
-    filename = unquote(filename)  # remove URL encoding for special characters
-
-    dest = args.output or filename
-
-    return filename, dest, site_name
+from wikiget.parse import get_dest
+from wikiget.validations import verify_hash
 
 
 def query_api(filename, site_name, args):
@@ -98,8 +68,7 @@ def query_api(filename, site_name, args):
         # an API error at this point likely means access is denied, which could happen
         # with a private wiki
         logging.error(
-            "Access denied. Try providing credentials with "
-            "--username and --password."
+            "Access denied. Try providing credentials with --username and --password."
         )
         logging.debug("Full error message:")
         for i in e.args:
@@ -110,7 +79,10 @@ def query_api(filename, site_name, args):
 
 
 def prep_download(dl, args):
-    filename, dest, site_name = get_dest(dl, args)
+    try:
+        filename, dest, site_name = get_dest(dl, args)
+    except ParseError:
+        raise
     file = File(filename, dest)
     file.object, file.site = query_api(file.name, site_name, args)
     return file
@@ -136,7 +108,7 @@ def download(f, args):
 
         if os.path.isfile(dest) and not args.force:
             logging.warning(
-                f"File '{dest}' already exists, skipping download (use -f to ignore)"
+                f"File '{dest}' already exists, skipping download (use -f to force)"
             )
         else:
             try:
@@ -167,19 +139,25 @@ def download(f, args):
                             fd.write(chunk)
                             progress_bar.update(len(chunk))
 
-            # verify file integrity and optionally print details
+            # verify file integrity and log details
             dl_sha1 = verify_hash(dest)
 
-            logging.info(f"Downloaded file SHA1 is {dl_sha1}")
-            logging.info(f"Server file SHA1 is {file_sha1}")
+            logging.info(f"Remote file SHA1 is {file_sha1}")
+            logging.info(f"Local file SHA1 is {dl_sha1}")
             if dl_sha1 == file_sha1:
                 logging.info("Hashes match!")
                 # at this point, we've successfully downloaded the file
+                success_log = f"'{filename}' downloaded"
+                if args.output:
+                    success_log += f" to '{dest}'"
+                logging.info(success_log)
             else:
                 logging.error("Hash mismatch! Downloaded file may be corrupt.")
+                # TODO: log but don't quit while in batch mode
                 sys.exit(1)
 
     else:
         # no file information returned
         logging.error(f"Target '{filename}' does not appear to be a valid file.")
+        # TODO: log but don't quit while in batch mode
         sys.exit(1)
-- 
cgit v1.2.3


From 06335ba0176cabd84f5b548995f465ac1c09bc8e Mon Sep 17 00:00:00 2001
From: Cody Logan <cody@lokken.dev>
Date: Tue, 17 Oct 2023 14:00:14 -0700
Subject: Clean up exception handling and error messages

---
 src/wikiget/dl.py | 23 ++++++++---------------
 1 file changed, 8 insertions(+), 15 deletions(-)

(limited to 'src/wikiget/dl.py')

diff --git a/src/wikiget/dl.py b/src/wikiget/dl.py
index 50b7460..4521b72 100644
--- a/src/wikiget/dl.py
+++ b/src/wikiget/dl.py
@@ -24,7 +24,6 @@ from requests import ConnectionError, HTTPError
 from tqdm import tqdm
 
 import wikiget
-from wikiget.exceptions import ParseError
 from wikiget.file import File
 from wikiget.parse import get_dest
 from wikiget.validations import verify_hash
@@ -42,24 +41,22 @@ def query_api(filename, site_name, args):
     except ConnectionError as e:
         # usually this means there is no such site, or there's no network connection,
         # though it could be a certificate problem
-        logging.error("Couldn't connect to specified site.")
-        logging.debug("Full error message:")
+        logging.error("Could not connect to specified site")
         logging.debug(e)
-        sys.exit(1)
+        raise
     except HTTPError as e:
         # most likely a 403 forbidden or 404 not found error for api.php
         logging.error(
-            "Couldn't find the specified wiki's api.php. Check the value of --path."
+            "Could not find the specified wiki's api.php. Check the value of --path."
         )
-        logging.debug("Full error message:")
         logging.debug(e)
-        sys.exit(1)
+        raise
     except (InvalidResponse, LoginError) as e:
         # InvalidResponse: site exists, but we couldn't communicate with the API
         # endpoint for some reason other than an HTTP error.
         # LoginError: missing or invalid credentials
         logging.error(e)
-        sys.exit(1)
+        raise
 
     # get info about the target file
     try:
@@ -70,19 +67,15 @@ def query_api(filename, site_name, args):
         logging.error(
             "Access denied. Try providing credentials with --username and --password."
         )
-        logging.debug("Full error message:")
         for i in e.args:
             logging.debug(i)
-        sys.exit(1)
+        raise
 
     return file, site
 
 
 def prep_download(dl, args):
-    try:
-        filename, dest, site_name = get_dest(dl, args)
-    except ParseError:
-        raise
+    filename, dest, site_name = get_dest(dl, args)
     file = File(filename, dest)
     file.object, file.site = query_api(file.name, site_name, args)
     return file
@@ -158,6 +151,6 @@ def download(f, args):
 
     else:
         # no file information returned
-        logging.error(f"Target '{filename}' does not appear to be a valid file.")
+        logging.error(f"Target '{filename}' does not appear to be a valid file")
         # TODO: log but don't quit while in batch mode
         sys.exit(1)
-- 
cgit v1.2.3


From 05457af0d73ff3a820c0b465e6607fc5832a6e74 Mon Sep 17 00:00:00 2001
From: Cody Logan <cody@lokken.dev>
Date: Fri, 20 Oct 2023 16:23:28 -0700
Subject: Reorganize File class

---
 src/wikiget/dl.py | 19 ++++++++-----------
 1 file changed, 8 insertions(+), 11 deletions(-)

(limited to 'src/wikiget/dl.py')

diff --git a/src/wikiget/dl.py b/src/wikiget/dl.py
index 4521b72..171b017 100644
--- a/src/wikiget/dl.py
+++ b/src/wikiget/dl.py
@@ -30,10 +30,8 @@ from wikiget.validations import verify_hash
 
 
 def query_api(filename, site_name, args):
-    logging.debug(f"User agent: {wikiget.USER_AGENT}")
-
     # connect to site and identify ourselves
-    logging.info(f"Site name: {site_name}")
+    logging.info(f"Connecting to {site_name}")
     try:
         site = Site(site_name, path=args.path, clients_useragent=wikiget.USER_AGENT)
         if args.username and args.password:
@@ -60,7 +58,7 @@ def query_api(filename, site_name, args):
 
     # get info about the target file
     try:
-        file = site.images[filename]
+        image = site.images[filename]
     except APIError as e:
         # an API error at this point likely means access is denied, which could happen
         # with a private wiki
@@ -71,23 +69,22 @@ def query_api(filename, site_name, args):
             logging.debug(i)
         raise
 
-    return file, site
+    return image
 
 
 def prep_download(dl, args):
-    filename, dest, site_name = get_dest(dl, args)
-    file = File(filename, dest)
-    file.object, file.site = query_api(file.name, site_name, args)
+    file = get_dest(dl, args)
+    file.image = query_api(file.name, file.site, args)
     return file
 
 
 def download(f, args):
-    file = f.object
+    file = f.image
     filename = f.name
-    site = f.site
     dest = f.dest
+    site = file.site
 
-    if file.imageinfo != {}:
+    if file.exists:
         # file exists either locally or at a common repository, like Wikimedia Commons
         file_url = file.imageinfo["url"]
         file_size = file.imageinfo["size"]
-- 
cgit v1.2.3


From b136af078208882ae696b21c0d8aac009e7468d4 Mon Sep 17 00:00:00 2001
From: Cody Logan <cody@lokken.dev>
Date: Fri, 20 Oct 2023 16:28:23 -0700
Subject: Move batch_download function to proper file

---
 src/wikiget/dl.py | 63 ++++++++++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 58 insertions(+), 5 deletions(-)

(limited to 'src/wikiget/dl.py')

diff --git a/src/wikiget/dl.py b/src/wikiget/dl.py
index 171b017..83aef9f 100644
--- a/src/wikiget/dl.py
+++ b/src/wikiget/dl.py
@@ -18,12 +18,14 @@
 import logging
 import os
 import sys
+from concurrent.futures import ThreadPoolExecutor
 
 from mwclient import APIError, InvalidResponse, LoginError, Site
 from requests import ConnectionError, HTTPError
 from tqdm import tqdm
 
 import wikiget
+from wikiget.exceptions import ParseError
 from wikiget.file import File
 from wikiget.parse import get_dest
 from wikiget.validations import verify_hash
@@ -78,12 +80,62 @@ def prep_download(dl, args):
     return file
 
 
+def batch_download(args):
+    input_file = args.FILE
+    dl_list = {}
+    errors = 0
+
+    logging.info(f"Using batch file '{input_file}'.")
+
+    try:
+        fd = open(input_file)
+    except OSError as e:
+        logging.error("File could not be read. The following error was encountered:")
+        logging.error(e)
+        sys.exit(1)
+    else:
+        with fd:
+            # read the file into memory and process each line as we go
+            for line_num, line in enumerate(fd, start=1):
+                line_s = line.strip()
+                # ignore blank lines and lines starting with "#" (for comments)
+                if line_s and not line_s.startswith("#"):
+                    dl_list[line_num] = line_s
+
+    # TODO: validate file contents before download process starts
+    with ThreadPoolExecutor(max_workers=args.threads) as executor:
+        futures = []
+        for line_num, line in dl_list.items():
+            # keep track of batch file line numbers for debugging/logging purposes
+            logging.info(f"Processing '{line}' at line {line_num}")
+            try:
+                file = prep_download(line, args)
+            except ParseError as e:
+                logging.warning(f"{e} (line {line_num})")
+                errors += 1
+                continue
+            except (ConnectionError, HTTPError, InvalidResponse, LoginError, APIError):
+                logging.warning(
+                    f"Unable to download '{line}' (line {line_num}) due to an error"
+                )
+                errors += 1
+                continue
+            future = executor.submit(download, file, args)
+            futures.append(future)
+        # wait for downloads to finish
+        for future in futures:
+            errors += future.result()
+    return errors
+
+
 def download(f, args):
     file = f.image
     filename = f.name
     dest = f.dest
     site = file.site
 
+    errors = 0
+
     if file.exists:
         # file exists either locally or at a common repository, like Wikimedia Commons
         file_url = file.imageinfo["url"]
@@ -100,6 +152,7 @@ def download(f, args):
             logging.warning(
                 f"File '{dest}' already exists, skipping download (use -f to force)"
             )
+            errors += 1
         else:
             try:
                 fd = open(dest, "wb")
@@ -108,7 +161,7 @@ def download(f, args):
                     "File could not be written. The following error was encountered:"
                 )
                 logging.error(e)
-                sys.exit(1)
+                errors += 1
             else:
                 # download the file(s)
                 if args.verbose >= wikiget.STD_VERBOSE:
@@ -143,11 +196,11 @@ def download(f, args):
                 logging.info(success_log)
             else:
                 logging.error("Hash mismatch! Downloaded file may be corrupt.")
-                # TODO: log but don't quit while in batch mode
-                sys.exit(1)
+                errors += 1
 
     else:
         # no file information returned
         logging.error(f"Target '{filename}' does not appear to be a valid file")
-        # TODO: log but don't quit while in batch mode
-        sys.exit(1)
+        errors += 1
+
+    return errors
-- 
cgit v1.2.3


From 3d37cf6f86eb6c48a3a0a094c42ade6d7aed1daf Mon Sep 17 00:00:00 2001
From: Cody Logan <cody@lokken.dev>
Date: Fri, 20 Oct 2023 16:31:56 -0700
Subject: Move logging configuration to new file

Also, use a LoggerAdapter to add contextual info (such as filenames)
to log messages when downloading, especially useful with threaded
batch processing.
---
 src/wikiget/dl.py | 29 ++++++++++++++---------------
 1 file changed, 14 insertions(+), 15 deletions(-)

(limited to 'src/wikiget/dl.py')

diff --git a/src/wikiget/dl.py b/src/wikiget/dl.py
index 83aef9f..5491378 100644
--- a/src/wikiget/dl.py
+++ b/src/wikiget/dl.py
@@ -27,6 +27,7 @@ from tqdm import tqdm
 import wikiget
 from wikiget.exceptions import ParseError
 from wikiget.file import File
+from wikiget.logging import FileLogAdapter
 from wikiget.parse import get_dest
 from wikiget.validations import verify_hash
 
@@ -136,6 +137,9 @@ def download(f, args):
 
     errors = 0
 
+    logger = logging.getLogger("")
+    adapter = FileLogAdapter(logger, {"filename": filename})
+
     if file.exists:
         # file exists either locally or at a common repository, like Wikimedia Commons
         file_url = file.imageinfo["url"]
@@ -145,22 +149,17 @@ def download(f, args):
         filename_log = f"Downloading '{filename}' ({file_size} bytes) from {site.host}"
         if args.output:
             filename_log += f" to '{dest}'"
-        logging.info(filename_log)
-        logging.info(f"{file_url}")
+        adapter.info(filename_log)
+        adapter.info(f"{file_url}")
 
         if os.path.isfile(dest) and not args.force:
-            logging.warning(
-                f"File '{dest}' already exists, skipping download (use -f to force)"
-            )
+            adapter.warning("File already exists, skipping download (use -f to force)")
             errors += 1
         else:
             try:
                 fd = open(dest, "wb")
             except OSError as e:
-                logging.error(
-                    "File could not be written. The following error was encountered:"
-                )
-                logging.error(e)
+                adapter.error(f"File could not be written. {e}")
                 errors += 1
             else:
                 # download the file(s)
@@ -185,22 +184,22 @@ def download(f, args):
             # verify file integrity and log details
             dl_sha1 = verify_hash(dest)
 
-            logging.info(f"Remote file SHA1 is {file_sha1}")
-            logging.info(f"Local file SHA1 is {dl_sha1}")
+            adapter.info(f"Remote file SHA1 is {file_sha1}")
+            adapter.info(f"Local file SHA1 is {dl_sha1}")
             if dl_sha1 == file_sha1:
-                logging.info("Hashes match!")
+                adapter.info("Hashes match!")
                 # at this point, we've successfully downloaded the file
                 success_log = f"'{filename}' downloaded"
                 if args.output:
                     success_log += f" to '{dest}'"
-                logging.info(success_log)
+                adapter.info(success_log)
             else:
-                logging.error("Hash mismatch! Downloaded file may be corrupt.")
+                adapter.error("Hash mismatch! Downloaded file may be corrupt.")
                 errors += 1
 
     else:
         # no file information returned
-        logging.error(f"Target '{filename}' does not appear to be a valid file")
+        adapter.warning("Target does not appear to be a valid file")
         errors += 1
 
     return errors
-- 
cgit v1.2.3


From c1820026f97eaf671c29ab30f02879de0ac4df89 Mon Sep 17 00:00:00 2001
From: Cody Logan <cody@lokken.dev>
Date: Fri, 20 Oct 2023 16:36:14 -0700
Subject: Add type annotations to source files

---
 src/wikiget/dl.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

(limited to 'src/wikiget/dl.py')

diff --git a/src/wikiget/dl.py b/src/wikiget/dl.py
index 5491378..5b5b43b 100644
--- a/src/wikiget/dl.py
+++ b/src/wikiget/dl.py
@@ -18,9 +18,11 @@
 import logging
 import os
 import sys
+from argparse import Namespace
 from concurrent.futures import ThreadPoolExecutor
 
 from mwclient import APIError, InvalidResponse, LoginError, Site
+from mwclient.image import Image
 from requests import ConnectionError, HTTPError
 from tqdm import tqdm
 
@@ -32,7 +34,7 @@ from wikiget.parse import get_dest
 from wikiget.validations import verify_hash
 
 
-def query_api(filename, site_name, args):
+def query_api(filename: str, site_name: str, args: Namespace) -> Image:
     # connect to site and identify ourselves
     logging.info(f"Connecting to {site_name}")
     try:
@@ -75,13 +77,13 @@ def query_api(filename, site_name, args):
     return image
 
 
-def prep_download(dl, args):
+def prep_download(dl: str, args: Namespace) -> File:
     file = get_dest(dl, args)
     file.image = query_api(file.name, file.site, args)
     return file
 
 
-def batch_download(args):
+def batch_download(args: Namespace) -> int:
     input_file = args.FILE
     dl_list = {}
     errors = 0
@@ -129,7 +131,7 @@ def batch_download(args):
     return errors
 
 
-def download(f, args):
+def download(f: File, args: Namespace) -> int:
     file = f.image
     filename = f.name
     dest = f.dest
-- 
cgit v1.2.3