aboutsummaryrefslogtreecommitdiff
path: root/wikiget
diff options
context:
space:
mode:
authorCody Logan <clpo13@gmail.com>2019-09-26 16:03:44 -0700
committerCody Logan <clpo13@gmail.com>2019-09-26 16:03:44 -0700
commitcbf64a55ecbbc38304bc2def8c9b96d62332ba62 (patch)
tree56d7e6ecf2d578eed8fb7a733ba676100e774cda /wikiget
parentf34995d4547357bc90157d81e8445f72f6dada7f (diff)
downloadwikiget-0.2.0.tar.gz
wikiget-0.2.0.zip
Add batch file functionality for downloading multiple filesv0.2.0
Diffstat (limited to 'wikiget')
-rw-r--r--wikiget/version.py2
-rw-r--r--wikiget/wikiget.py60
2 files changed, 45 insertions, 17 deletions
diff --git a/wikiget/version.py b/wikiget/version.py
index 262f3cb..5b1bb55 100644
--- a/wikiget/version.py
+++ b/wikiget/version.py
@@ -4,4 +4,4 @@
"""Sets the program version in setup.py and on the command line."""
-__version__ = "0.1.6"
+__version__ = "0.2.0"
diff --git a/wikiget/wikiget.py b/wikiget/wikiget.py
index 9aaacf4..04d6f3e 100644
--- a/wikiget/wikiget.py
+++ b/wikiget/wikiget.py
@@ -27,6 +27,9 @@ from tqdm import tqdm
from wikiget.version import __version__
BLOCKSIZE = 65536
+DEFAULT_SITE = "en.wikipedia.org"
+USER_AGENT = "wikiget/{} (https://github.com/clpo13/python-wikiget) " \
+ "mwclient/{}".format(__version__, mwclient_version)
def main():
@@ -34,9 +37,6 @@ def main():
Main entry point for console script. Automatically compiled by setuptools
when installed with `pip install` or `python setup.py install`.
"""
- default_site = "en.wikipedia.org"
- user_agent = "wikiget/{} (https://github.com/clpo13/python-wikiget) " \
- "mwclient/{}".format(__version__, mwclient_version)
parser = argparse.ArgumentParser(description="""
A tool for downloading files from MediaWiki sites
@@ -63,9 +63,12 @@ def main():
action="count", default=0)
parser.add_argument("-f", "--force", help="force overwriting existing files",
action="store_true")
- parser.add_argument("-s", "--site", default=default_site,
+ parser.add_argument("-s", "--site", default=DEFAULT_SITE,
help="MediaWiki site to download from (default: %(default)s)")
parser.add_argument("-o", "--output", help="write download to OUTPUT")
+ parser.add_argument("-a", "--batch", help="treat FILE as a textfile containing multiple files to download, one URL or filename per line",
+ action="store_true")
+
args = parser.parse_args()
# print API and debug messages in verbose mode
@@ -74,16 +77,39 @@ def main():
elif args.verbose >= 1:
logging.basicConfig(level=logging.WARNING)
- url = urlparse(args.FILE)
+ if args.batch:
+ # batch download mode
+ input_file = args.FILE
+ if args.verbose >= 1:
+ print("Info: using batch file '{}'".format(input_file))
+ try:
+ fd = open(input_file, "r")
+ except IOError as e:
+ print("File could not be read. The following error was encountered:")
+ print(e)
+ sys.exit(1)
+ else:
+ with fd:
+ for _, line in enumerate(fd):
+ line = line.strip()
+ download(line, args)
+ else:
+ # single download mode
+ dl = args.FILE
+ download(dl, args)
+
+
+def download(dl, args):
+ url = urlparse(dl)
if url.netloc:
filename = url.path
site_name = url.netloc
- if args.site is not default_site and not args.quiet:
+ if args.site is not DEFAULT_SITE and not args.quiet:
# this will work even if the user specifies 'en.wikipedia.org'
print("Warning: target is a URL, ignoring site specified with --site")
else:
- filename = args.FILE
+ filename = dl
site_name = args.site
file_match = valid_file(filename)
@@ -114,11 +140,11 @@ def main():
dest = args.output or filename
if args.verbose >= 2:
- print("User agent: {}".format(user_agent))
+ print("User agent: {}".format(USER_AGENT))
# connect to site and identify ourselves
try:
- site = Site(site_name, clients_useragent=user_agent)
+ site = Site(site_name, clients_useragent=USER_AGENT)
except ConnectionError:
# usually this means there is no such site, or there's no network connection
print("Error: couldn't connect to specified site.")
@@ -150,19 +176,21 @@ def main():
print("File '{}' already exists, skipping download (use -f to ignore)".format(dest))
else:
try:
+ fd = open(dest, "wb")
+ except IOError as e:
+ print("File could not be written. The following error was encountered:")
+ print(e)
+ sys.exit(1)
+ else:
# download the file
with tqdm(total=file_size, unit="B",
unit_scale=True, unit_divisor=1024) as progress_bar:
- with open(dest, "wb") as fd:
+ with fd:
res = site.connection.get(file_url, stream=True)
progress_bar.set_postfix(file=dest, refresh=False)
for chunk in res.iter_content(1024):
fd.write(chunk)
progress_bar.update(len(chunk))
- except IOError as e:
- print("File could not be written. The following error was encountered:")
- print(e)
- sys.exit(1)
# verify file integrity and optionally print details
dl_sha1 = verify_hash(dest)
@@ -173,14 +201,14 @@ def main():
if dl_sha1 == file_sha1:
if args.verbose >= 1:
print("Info: hashes match!")
- sys.exit(0)
+ # at this point, we've successfully downloaded the file
else:
print("Error: hash mismatch! Downloaded file may be corrupt.")
sys.exit(1)
else:
# no file information returned
- print("Target does not appear to be a valid file.")
+ print("Target '{}' does not appear to be a valid file.".format(filename))
sys.exit(1)