diff options
| author | Cody Logan <clpo13@gmail.com> | 2019-06-14 11:46:35 -0700 |
|---|---|---|
| committer | Cody Logan <clpo13@gmail.com> | 2019-06-14 11:46:35 -0700 |
| commit | 9b997deb8f7ab3b41011d3e4caf80329a78ca0d4 (patch) | |
| tree | c9723e6392417bcc1c47880101c2a39cf71c2fe0 /wikiget | |
| parent | 5671aac1a1fae8473a5a8eb207d4b7c673736531 (diff) | |
| download | wikiget-9b997deb8f7ab3b41011d3e4caf80329a78ca0d4.tar.gz wikiget-9b997deb8f7ab3b41011d3e4caf80329a78ca0d4.zip | |
Initial file integrity checking
Diffstat (limited to 'wikiget')
| -rw-r--r-- | wikiget/wikiget.py | 34 |
1 files changed, 34 insertions, 0 deletions
diff --git a/wikiget/wikiget.py b/wikiget/wikiget.py index 30a7d17..895d4b3 100644 --- a/wikiget/wikiget.py +++ b/wikiget/wikiget.py @@ -13,6 +13,7 @@ from future import standard_library standard_library.install_aliases() import argparse +import hashlib import logging import os import re @@ -25,6 +26,8 @@ from tqdm import tqdm from wikiget.version import __version__ +BLOCKSIZE = 65536 + def main(): """ @@ -131,6 +134,7 @@ def main(): # file exists either locally or at Wikimedia Commons file_url = file.imageinfo["url"] file_size = file.imageinfo["size"] + file_sha1 = file.imageinfo["sha1"] if args.verbose >= 1: print("Info: downloading '{}' " @@ -158,6 +162,21 @@ def main(): print("File could not be written. The following error was encountered:") print(e) sys.exit(1) + + # verify file integrity + dl_sha1 = verify_hash(dest) + + if args.verbose >= 1: + print("Info: downloaded file SHA1 is {}".format(dl_sha1)) + print("Info: server file SHA1 is {}".format(file_sha1)) + if dl_sha1 == file_sha1: + if args.verbose >= 1: + print("Info: hashes match!") + sys.exit(0) + else: + print("Error: hash mismatch! Downloaded file may be corrupt.") + sys.exit(1) + else: # no file information returned print("Target does not appear to be a valid file.") @@ -189,3 +208,18 @@ def valid_site(search_string): """ site_regex = re.compile(r"wiki[mp]edia\.org$", re.I) return site_regex.search(search_string) + + +def verify_hash(filename): + """ + Calculates the SHA1 hash of the given file for comparison with a known value. + :param filename: name of the file to calculate a hash for + :return: hash digest + """ + hasher = hashlib.sha1() + with open(filename, "rb") as dl: + buf = dl.read(BLOCKSIZE) + while len(buf) > 0: + hasher.update(buf) + buf = dl.read(BLOCKSIZE) + return hasher.hexdigest() |
