aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/wikiget/dl.py23
-rw-r--r--src/wikiget/file.py27
-rw-r--r--src/wikiget/wikiget.py38
3 files changed, 72 insertions, 16 deletions
diff --git a/src/wikiget/dl.py b/src/wikiget/dl.py
index d32736f..2b2befa 100644
--- a/src/wikiget/dl.py
+++ b/src/wikiget/dl.py
@@ -25,10 +25,11 @@ from requests import ConnectionError, HTTPError
from tqdm import tqdm
import wikiget
+from wikiget.file import File
from wikiget.validations import valid_file, verify_hash
-def download(dl, args):
+def get_dest(dl, args):
url = urlparse(dl)
if url.netloc:
@@ -56,6 +57,10 @@ def download(dl, args):
dest = args.output or filename
+ return filename, dest, site_name
+
+
+def query_api(filename, site_name, args):
logging.debug(f"User agent: {wikiget.USER_AGENT}")
# connect to site and identify ourselves
@@ -101,6 +106,22 @@ def download(dl, args):
logging.debug(i)
sys.exit(1)
+ return file, site
+
+
+def prep_download(dl, args):
+ filename, dest, site_name = get_dest(dl, args)
+ file = File(filename, dest)
+ file.object, file.site = query_api(file.name, site_name, args)
+ return file
+
+
+def download(f, args):
+ file = f.object
+ filename = f.name
+ site = f.site
+ dest = f.dest
+
if file.imageinfo != {}:
# file exists either locally or at a common repository, like Wikimedia Commons
file_url = file.imageinfo["url"]
diff --git a/src/wikiget/file.py b/src/wikiget/file.py
new file mode 100644
index 0000000..60a71e0
--- /dev/null
+++ b/src/wikiget/file.py
@@ -0,0 +1,27 @@
+# wikiget - CLI tool for downloading files from Wikimedia sites
+# Copyright (C) 2023 Cody Logan
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# Wikiget is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# Wikiget is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with Wikiget. If not, see <https://www.gnu.org/licenses/>.
+
+
+class File:
+ def __init__(self, name, dest=None):
+ self.object = None
+ self.site = None
+ self.name = name
+ if dest is None:
+ self.dest = name
+ else:
+ self.dest = dest
diff --git a/src/wikiget/wikiget.py b/src/wikiget/wikiget.py
index f482280..80d5057 100644
--- a/src/wikiget/wikiget.py
+++ b/src/wikiget/wikiget.py
@@ -20,15 +20,10 @@ import logging
import sys
import wikiget
-from wikiget.dl import download
+from wikiget.dl import download, prep_download
-def main():
- """
- Main entry point for console script. Automatically compiled by setuptools when
- installed with `pip install` or `python setup.py install`.
- """
-
+def construct_parser():
parser = argparse.ArgumentParser(
description="""
A tool for downloading files from MediaWiki sites using the file name or
@@ -84,13 +79,13 @@ def main():
"-u",
"--username",
default="",
- help="MediaWiki site username, for private wikis"
+ help="MediaWiki site username, for private wikis",
)
parser.add_argument(
"-p",
"--password",
default="",
- help="MediaWiki site password, for private wikis"
+ help="MediaWiki site password, for private wikis",
)
output_options = parser.add_mutually_exclusive_group()
output_options.add_argument("-o", "--output", help="write download to OUTPUT")
@@ -104,7 +99,19 @@ def main():
parser.add_argument(
"-l", "--logfile", default="", help="save log output to LOGFILE"
)
+ parser.add_argument(
+ "-j",
+ "--threads",
+ default=1,
+ help="Number of parallel downloads to attempt in batch mode",
+ type=int,
+ )
+ return parser
+
+
+def main():
+ parser = construct_parser()
args = parser.parse_args()
loglevel = logging.WARNING
@@ -165,12 +172,13 @@ def main():
dl_list.append(line)
# TODO: validate file contents before download process starts
- for line_num, url in enumerate(dl_list, start=1):
- s_url = url.strip()
+ for line_num, line in enumerate(dl_list, start=1):
+ url = line.strip()
# keep track of batch file line numbers for debugging/logging purposes
- logging.info(f"Downloading '{s_url}' at line {line_num}:")
- download(s_url, args)
+ logging.info(f"Downloading '{url}' at line {line_num}:")
+ file = prep_download(url, args)
+ download(file, args)
else:
# single download mode
- dl = args.FILE
- download(dl, args)
+ file = prep_download(args.FILE, args)
+ download(file, args)