aboutsummaryrefslogtreecommitdiff
path: root/wikiget
diff options
context:
space:
mode:
authorCody Logan <clpo13@gmail.com>2019-06-14 11:46:35 -0700
committerCody Logan <clpo13@gmail.com>2019-06-14 11:46:35 -0700
commit9b997deb8f7ab3b41011d3e4caf80329a78ca0d4 (patch)
treec9723e6392417bcc1c47880101c2a39cf71c2fe0 /wikiget
parent5671aac1a1fae8473a5a8eb207d4b7c673736531 (diff)
downloadwikiget-9b997deb8f7ab3b41011d3e4caf80329a78ca0d4.tar.gz
wikiget-9b997deb8f7ab3b41011d3e4caf80329a78ca0d4.zip
Initial file integrity checking
Diffstat (limited to 'wikiget')
-rw-r--r--wikiget/wikiget.py34
1 files changed, 34 insertions, 0 deletions
diff --git a/wikiget/wikiget.py b/wikiget/wikiget.py
index 30a7d17..895d4b3 100644
--- a/wikiget/wikiget.py
+++ b/wikiget/wikiget.py
@@ -13,6 +13,7 @@ from future import standard_library
standard_library.install_aliases()
import argparse
+import hashlib
import logging
import os
import re
@@ -25,6 +26,8 @@ from tqdm import tqdm
from wikiget.version import __version__
+BLOCKSIZE = 65536
+
def main():
"""
@@ -131,6 +134,7 @@ def main():
# file exists either locally or at Wikimedia Commons
file_url = file.imageinfo["url"]
file_size = file.imageinfo["size"]
+ file_sha1 = file.imageinfo["sha1"]
if args.verbose >= 1:
print("Info: downloading '{}' "
@@ -158,6 +162,21 @@ def main():
print("File could not be written. The following error was encountered:")
print(e)
sys.exit(1)
+
+ # verify file integrity
+ dl_sha1 = verify_hash(dest)
+
+ if args.verbose >= 1:
+ print("Info: downloaded file SHA1 is {}".format(dl_sha1))
+ print("Info: server file SHA1 is {}".format(file_sha1))
+ if dl_sha1 == file_sha1:
+ if args.verbose >= 1:
+ print("Info: hashes match!")
+ sys.exit(0)
+ else:
+ print("Error: hash mismatch! Downloaded file may be corrupt.")
+ sys.exit(1)
+
else:
# no file information returned
print("Target does not appear to be a valid file.")
@@ -189,3 +208,18 @@ def valid_site(search_string):
"""
site_regex = re.compile(r"wiki[mp]edia\.org$", re.I)
return site_regex.search(search_string)
+
+
+def verify_hash(filename):
+ """
+ Calculates the SHA1 hash of the given file for comparison with a known value.
+ :param filename: name of the file to calculate a hash for
+ :return: hash digest
+ """
+ hasher = hashlib.sha1()
+ with open(filename, "rb") as dl:
+ buf = dl.read(BLOCKSIZE)
+ while len(buf) > 0:
+ hasher.update(buf)
+ buf = dl.read(BLOCKSIZE)
+ return hasher.hexdigest()