diff options
| -rw-r--r-- | src/wikiget/dl.py | 23 | ||||
| -rw-r--r-- | src/wikiget/file.py | 27 | ||||
| -rw-r--r-- | src/wikiget/wikiget.py | 38 | ||||
| -rw-r--r-- | tests/test_file_class.py | 31 |
4 files changed, 103 insertions, 16 deletions
diff --git a/src/wikiget/dl.py b/src/wikiget/dl.py index d32736f..2b2befa 100644 --- a/src/wikiget/dl.py +++ b/src/wikiget/dl.py @@ -25,10 +25,11 @@ from requests import ConnectionError, HTTPError from tqdm import tqdm import wikiget +from wikiget.file import File from wikiget.validations import valid_file, verify_hash -def download(dl, args): +def get_dest(dl, args): url = urlparse(dl) if url.netloc: @@ -56,6 +57,10 @@ def download(dl, args): dest = args.output or filename + return filename, dest, site_name + + +def query_api(filename, site_name, args): logging.debug(f"User agent: {wikiget.USER_AGENT}") # connect to site and identify ourselves @@ -101,6 +106,22 @@ def download(dl, args): logging.debug(i) sys.exit(1) + return file, site + + +def prep_download(dl, args): + filename, dest, site_name = get_dest(dl, args) + file = File(filename, dest) + file.object, file.site = query_api(file.name, site_name, args) + return file + + +def download(f, args): + file = f.object + filename = f.name + site = f.site + dest = f.dest + if file.imageinfo != {}: # file exists either locally or at a common repository, like Wikimedia Commons file_url = file.imageinfo["url"] diff --git a/src/wikiget/file.py b/src/wikiget/file.py new file mode 100644 index 0000000..60a71e0 --- /dev/null +++ b/src/wikiget/file.py @@ -0,0 +1,27 @@ +# wikiget - CLI tool for downloading files from Wikimedia sites +# Copyright (C) 2023 Cody Logan +# SPDX-License-Identifier: GPL-3.0-or-later +# +# Wikiget is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# Wikiget is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Wikiget. If not, see <https://www.gnu.org/licenses/>. + + +class File: + def __init__(self, name, dest=None): + self.object = None + self.site = None + self.name = name + if dest is None: + self.dest = name + else: + self.dest = dest diff --git a/src/wikiget/wikiget.py b/src/wikiget/wikiget.py index f482280..80d5057 100644 --- a/src/wikiget/wikiget.py +++ b/src/wikiget/wikiget.py @@ -20,15 +20,10 @@ import logging import sys import wikiget -from wikiget.dl import download +from wikiget.dl import download, prep_download -def main(): - """ - Main entry point for console script. Automatically compiled by setuptools when - installed with `pip install` or `python setup.py install`. - """ - +def construct_parser(): parser = argparse.ArgumentParser( description=""" A tool for downloading files from MediaWiki sites using the file name or @@ -84,13 +79,13 @@ def main(): "-u", "--username", default="", - help="MediaWiki site username, for private wikis" + help="MediaWiki site username, for private wikis", ) parser.add_argument( "-p", "--password", default="", - help="MediaWiki site password, for private wikis" + help="MediaWiki site password, for private wikis", ) output_options = parser.add_mutually_exclusive_group() output_options.add_argument("-o", "--output", help="write download to OUTPUT") @@ -104,7 +99,19 @@ def main(): parser.add_argument( "-l", "--logfile", default="", help="save log output to LOGFILE" ) + parser.add_argument( + "-j", + "--threads", + default=1, + help="Number of parallel downloads to attempt in batch mode", + type=int, + ) + return parser + + +def main(): + parser = construct_parser() args = parser.parse_args() loglevel = logging.WARNING @@ -165,12 +172,13 @@ def main(): dl_list.append(line) # TODO: validate file contents before download process starts - for line_num, url in enumerate(dl_list, start=1): - s_url = url.strip() + for line_num, line in enumerate(dl_list, start=1): + url = line.strip() # keep track of batch file line numbers for debugging/logging purposes - logging.info(f"Downloading '{s_url}' at line {line_num}:") - download(s_url, args) + logging.info(f"Downloading '{url}' at line {line_num}:") + file = prep_download(url, args) + download(file, args) else: # single download mode - dl = args.FILE - download(dl, args) + file = prep_download(args.FILE, args) + download(file, args) diff --git a/tests/test_file_class.py b/tests/test_file_class.py new file mode 100644 index 0000000..7ad0b87 --- /dev/null +++ b/tests/test_file_class.py @@ -0,0 +1,31 @@ +# wikiget - CLI tool for downloading files from Wikimedia sites +# Copyright (C) 2023 Cody Logan +# SPDX-License-Identifier: GPL-3.0-or-later +# +# Wikiget is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# Wikiget is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Wikiget. If not, see <https://www.gnu.org/licenses/>. + +from wikiget.file import File + + +def test_file_with_name_only(): + file = File("foobar.jpg") + assert file.name == "foobar.jpg" + assert file.dest == file.name + + +def test_file_with_name_and_dest(): + file = File("foobar.jpg", "bazqux.jpg") + assert file.name == "foobar.jpg" + assert file.dest == "bazqux.jpg" + assert file.dest != file.name |
