diff options
| author | Cody Logan <clpo13@gmail.com> | 2019-12-06 14:47:44 -0800 |
|---|---|---|
| committer | Cody Logan <clpo13@gmail.com> | 2019-12-06 14:47:44 -0800 |
| commit | 5f35b45b0b15e0f66608b9c774b76f39e7aa93ee (patch) | |
| tree | ae5e812ae60fe287fd35d56b1884a637ca64acf0 /wikiget/wikiget.py | |
| parent | 8273f4cdc3a4ee67d936c2b0b06f3d5ee92c31bf (diff) | |
| download | wikiget-5f35b45b0b15e0f66608b9c774b76f39e7aa93ee.tar.gz wikiget-5f35b45b0b15e0f66608b9c774b76f39e7aa93ee.zip | |
Switch to Wikimedia Commons as default site
Commons is intended as a repository for freely-usable media, so it makes more
sense for it to be the default, and most images on Wikimedia sites (like English
Wikipedia) are also available there.
The functionality for specifying alternate sites is left in case users want to
download fair-use media, which is not available on Commons, as well as for the
possible future integration with non-Wikimedia MediaWiki sites (like Fandom/Wikia).
Diffstat (limited to 'wikiget/wikiget.py')
| -rw-r--r-- | wikiget/wikiget.py | 84 |
1 files changed, 42 insertions, 42 deletions
diff --git a/wikiget/wikiget.py b/wikiget/wikiget.py index 8bcd3fd..a78056c 100644 --- a/wikiget/wikiget.py +++ b/wikiget/wikiget.py @@ -40,9 +40,9 @@ from tqdm import tqdm from wikiget.version import __version__ BLOCKSIZE = 65536 -DEFAULT_SITE = "en.wikipedia.org" -USER_AGENT = "wikiget/{} (https://github.com/clpo13/wikiget) " \ - "mwclient/{}".format(__version__, mwclient_version) +DEFAULT_SITE = 'commons.wikimedia.org' +USER_AGENT = 'wikiget/{} (https://github.com/clpo13/wikiget) ' \ + 'mwclient/{}'.format(__version__, mwclient_version) def main(): @@ -62,25 +62,25 @@ def main(): it under certain conditions. There is NO WARRANTY, to the extent permitted by law. """) - parser.add_argument("FILE", help=""" + parser.add_argument('FILE', help=""" name of the file to download with the File: or Image: prefix, or the URL of its file description page """) - parser.add_argument("-V", "--version", action="version", - version="%(prog)s {}".format(__version__)) + parser.add_argument('-V', '--version', action='version', + version='%(prog)s {}'.format(__version__)) output_options = parser.add_mutually_exclusive_group() - output_options.add_argument("-q", "--quiet", help="suppress warning messages", - action="store_true") - output_options.add_argument("-v", "--verbose", - help="print detailed information; use -vv for even more detail", - action="count", default=0) - parser.add_argument("-f", "--force", help="force overwriting existing files", - action="store_true") - parser.add_argument("-s", "--site", default=DEFAULT_SITE, - help="MediaWiki site to download from (default: %(default)s)") - parser.add_argument("-o", "--output", help="write download to OUTPUT") - parser.add_argument("-a", "--batch", help="treat FILE as a textfile containing multiple files to download, one URL or filename per line", - action="store_true") + output_options.add_argument('-q', '--quiet', help='suppress warning messages', + action='store_true') + output_options.add_argument('-v', '--verbose', + help='print detailed information; use -vv for even more detail', + action='count', default=0) + parser.add_argument('-f', '--force', help='force overwriting existing files', + action='store_true') + parser.add_argument('-s', '--site', default=DEFAULT_SITE, + help='MediaWiki site to download from (default: %(default)s)') + parser.add_argument('-o', '--output', help='write download to OUTPUT') + parser.add_argument('-a', '--batch', help='treat FILE as a textfile containing multiple files to download, one URL or filename per line', + action='store_true') args = parser.parse_args() @@ -96,9 +96,9 @@ def main(): if args.verbose >= 1: print("Info: using batch file '{}'".format(input_file)) try: - fd = open(input_file, "r") + fd = open(input_file, 'r') except IOError as e: - print("File could not be read. The following error was encountered:") + print('File could not be read. The following error was encountered:') print(e) sys.exit(1) else: @@ -119,8 +119,8 @@ def download(dl, args): filename = url.path site_name = url.netloc if args.site is not DEFAULT_SITE and not args.quiet: - # this will work even if the user specifies 'en.wikipedia.org' - print("Warning: target is a URL, ignoring site specified with --site") + # this will work even if the user specifies 'commons.wikimedia.org' + print('Warning: target is a URL, ignoring site specified with --site') else: filename = dl site_name = args.site @@ -130,7 +130,7 @@ def download(dl, args): # check for valid site parameter if not site_match: - print("Only Wikimedia sites (wikipedia.org and wikimedia.org) are currently supported.") + print('Only Wikimedia sites (wikipedia.org and wikimedia.org) are currently supported.') sys.exit(1) # check if this is a valid file @@ -139,13 +139,13 @@ def download(dl, args): filename = file_match.group(2) else: # no file extension and/or prefix, probably an article - print("Downloading Wikipedia articles is not currently supported.", end="") + print('Downloading Wikipedia articles is not currently supported.', end='') if file_match and not file_match.group(1): # file extension detected, but no prefix # TODO: no longer possible to get to this point since file_match is None with no prefix print(" If this is a file, please add the 'File:' prefix.") else: - print("\n", end="") + print('\n', end='') sys.exit(1) filename = unquote(filename) # remove URL encoding for special characters @@ -153,7 +153,7 @@ def download(dl, args): dest = args.output or filename if args.verbose >= 2: - print("User agent: {}".format(USER_AGENT)) + print('User agent: {}'.format(USER_AGENT)) # connect to site and identify ourselves try: @@ -172,31 +172,31 @@ def download(dl, args): if file.imageinfo != {}: # file exists either locally or at Wikimedia Commons - file_url = file.imageinfo["url"] - file_size = file.imageinfo["size"] - file_sha1 = file.imageinfo["sha1"] + file_url = file.imageinfo['url'] + file_size = file.imageinfo['size'] + file_sha1 = file.imageinfo['sha1'] if args.verbose >= 1: print("Info: downloading '{}' " - "({} bytes) from {}".format(filename, file_size, site.host), end="") + "({} bytes) from {}".format(filename, file_size, site.host), end='') if args.output: print(" to '{}'".format(dest)) else: - print("\n", end="") - print("Info: {}".format(file_url)) + print('\n', end='') + print('Info: {}'.format(file_url)) if os.path.isfile(dest) and not args.force: print("File '{}' already exists, skipping download (use -f to ignore)".format(dest)) else: try: - fd = open(dest, "wb") + fd = open(dest, 'wb') except IOError as e: - print("File could not be written. The following error was encountered:") + print('File could not be written. The following error was encountered:') print(e) sys.exit(1) else: # download the file - with tqdm(total=file_size, unit="B", + with tqdm(total=file_size, unit='B', unit_scale=True, unit_divisor=1024) as progress_bar: with fd: res = site.connection.get(file_url, stream=True) @@ -209,14 +209,14 @@ def download(dl, args): dl_sha1 = verify_hash(dest) if args.verbose >= 1: - print("Info: downloaded file SHA1 is {}".format(dl_sha1)) - print("Info: server file SHA1 is {}".format(file_sha1)) + print('Info: downloaded file SHA1 is {}'.format(dl_sha1)) + print('Info: server file SHA1 is {}'.format(file_sha1)) if dl_sha1 == file_sha1: if args.verbose >= 1: - print("Info: hashes match!") + print('Info: hashes match!') # at this point, we've successfully downloaded the file else: - print("Error: hash mismatch! Downloaded file may be corrupt.") + print('Error: hash mismatch! Downloaded file may be corrupt.') sys.exit(1) else: @@ -235,7 +235,7 @@ def valid_file(search_string): """ # second group could also restrict to file extensions with three or more # letters with ([^/\r\n\t\f\v]+\.\w{3,}) - file_regex = re.compile(r"(File:|Image:)([^/\r\n\t\f\v]+\.\w+)$", re.I) + file_regex = re.compile(r'(File:|Image:)([^/\r\n\t\f\v]+\.\w+)$', re.I) return file_regex.search(search_string) @@ -248,7 +248,7 @@ def valid_site(search_string): :param search_string: string to validate :returns: a regex Match object if there's a match or None otherwise """ - site_regex = re.compile(r"wiki[mp]edia\.org$", re.I) + site_regex = re.compile(r'wiki[mp]edia\.org$', re.I) return site_regex.search(search_string) @@ -259,7 +259,7 @@ def verify_hash(filename): :return: hash digest """ hasher = hashlib.sha1() - with open(filename, "rb") as dl: + with open(filename, 'rb') as dl: buf = dl.read(BLOCKSIZE) while len(buf) > 0: hasher.update(buf) |
