diff options
| author | Cody Logan <clpo13@gmail.com> | 2019-12-06 14:47:44 -0800 |
|---|---|---|
| committer | Cody Logan <clpo13@gmail.com> | 2019-12-06 14:47:44 -0800 |
| commit | 5f35b45b0b15e0f66608b9c774b76f39e7aa93ee (patch) | |
| tree | ae5e812ae60fe287fd35d56b1884a637ca64acf0 | |
| parent | 8273f4cdc3a4ee67d936c2b0b06f3d5ee92c31bf (diff) | |
| download | wikiget-5f35b45b0b15e0f66608b9c774b76f39e7aa93ee.tar.gz wikiget-5f35b45b0b15e0f66608b9c774b76f39e7aa93ee.zip | |
Switch to Wikimedia Commons as default site
Commons is intended as a repository for freely-usable media, so it makes more
sense for it to be the default, and most images on Wikimedia sites (like English
Wikipedia) are also available there.
The functionality for specifying alternate sites is left in case users want to
download fair-use media, which is not available on Commons, as well as for the
possible future integration with non-Wikimedia MediaWiki sites (like Fandom/Wikia).
| -rw-r--r-- | README.md | 8 | ||||
| -rw-r--r-- | setup.py | 50 | ||||
| -rw-r--r-- | test/test_wikiget.py | 36 | ||||
| -rw-r--r-- | wikiget/version.py | 2 | ||||
| -rw-r--r-- | wikiget/wikiget.py | 84 |
5 files changed, 90 insertions, 90 deletions
@@ -15,9 +15,9 @@ Requires Python 2.7 or 3.5+. Install with `pip install --user wikiget` or, if yo `wikiget [-h] [-V] [-q | -v] [-f] [-a] [--site SITE] [-o OUTPUT] FILE` If `FILE` is in the form `File:Example.jpg` or `Example.jpg`, it will be fetched -from the default site, which is "en.wikipedia.org". If it's the fully-qualified -URL of a file description page, like `https://commons.wikimedia.org/wiki/File:Example.jpg`, -the file is fetched from the specified site, in this case "commons.wikimedia.org". +from the default site, which is "commons.wikimedia.org". If it's the fully-qualified +URL of a file description page, like `https://en.wikipedia.org/wiki/File:Example.jpg`, +the file is fetched from the specified site, in this case "en.wikipedia.org". Full URLs may contain characters your shell interprets differently, so you can either escape those characters with a backslash `\` or surround the entire URL with single `'` or double `"` quotes. @@ -42,7 +42,7 @@ offending filename is printed. ```bash wikiget File:Example.jpg -wikiget --site commons.wikimedia.org File:Example.jpg +wikiget --site en.wikipedia.org File:Example.jpg wikiget https://en.wikipedia.org/wiki/File:Example.jpg -o test.jpg ``` @@ -23,45 +23,45 @@ from os import path from setuptools import setup, find_packages here = path.abspath(path.dirname(__file__)) -with open(path.join(here, "README.md"), "r") as fr: +with open(path.join(here, 'README.md'), 'r') as fr: long_description = fr.read() version = {} -with open(path.join(here, "wikiget", "version.py"), "r") as fv: +with open(path.join(here, 'wikiget', 'version.py'), 'r') as fv: exec(fv.read(), version) setup( - name="wikiget", - version=version["__version__"], - author="Cody Logan", - author_email="clpo13@gmail.com", - description="CLI tool for downloading files from MediaWiki sites", + name='wikiget', + version=version['__version__'], + author='Cody Logan', + author_email='clpo13@gmail.com', + description='CLI tool for downloading files from MediaWiki sites', long_description=long_description, - long_description_content_type="text/markdown", - url="https://github.com/clpo13/wikiget", - keywords="download mediawiki wikimedia wikipedia", + long_description_content_type='text/markdown', + url='https://github.com/clpo13/wikiget', + keywords='download mediawiki wikimedia wikipedia', packages=find_packages(), classifiers=[ - "Development Status :: 4 - Beta", - "Environment :: Console", - "Intended Audience :: End Users/Desktop", - "License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)", - "Operating System :: OS Independent", - "Programming Language :: Python :: 2.7", - "Programming Language :: Python :: 3.5", - "Programming Language :: Python :: 3.6", - "Programming Language :: Python :: 3.7", - "Programming Language :: Python :: 3.8", - "Topic :: Utilities", + 'Development Status :: 4 - Beta', + 'Environment :: Console', + 'Intended Audience :: End Users/Desktop', + 'License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)', + 'Operating System :: OS Independent', + 'Programming Language :: Python :: 2.7', + 'Programming Language :: Python :: 3.5', + 'Programming Language :: Python :: 3.6', + 'Programming Language :: Python :: 3.7', + 'Programming Language :: Python :: 3.8', + 'Topic :: Utilities', ], python_requires='>=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*', - install_requires=["future", "mwclient>=0.10.0", "pytest-runner", "requests", "tqdm"], - tests_require=["pytest"], + install_requires=['future', 'mwclient>=0.10.0', 'pytest-runner', 'requests', 'tqdm'], + tests_require=['pytest'], project_urls={ - "Bug Reports": "https://github.com/clpo13/wikiget/issues", + 'Bug Reports': 'https://github.com/clpo13/wikiget/issues', }, entry_points={ - "console_scripts": [ + 'console_scripts': [ 'wikiget=wikiget.wikiget:main', ], }, diff --git a/test/test_wikiget.py b/test/test_wikiget.py index 8aaaec8..6bd1c9d 100644 --- a/test/test_wikiget.py +++ b/test/test_wikiget.py @@ -29,8 +29,8 @@ def test_invalid_site_input(): """ Invalid site strings should not return regex match objects. """ - invalid_input = ["example.com", "vim.wikia.com", - "en.wikipedia.com", "en.wikimpedia.org"] + invalid_input = ['example.com', 'vim.wikia.com', + 'en.wikipedia.com', 'en.wikimpedia.org'] for i in invalid_input: site_match = wikiget.valid_site(i) assert site_match is None @@ -40,8 +40,8 @@ def test_valid_site_input(): """ Valid site strings should return regex match objects. """ - valid_input = ["en.wikipedia.org", "commons.wikimedia.org", - "de.wikipedia.org", "meta.wikimedia.org"] + valid_input = ['en.wikipedia.org', 'commons.wikimedia.org', + 'de.wikipedia.org', 'meta.wikimedia.org'] for i in valid_input: site_match = wikiget.valid_site(i) assert site_match is not None @@ -53,20 +53,20 @@ def test_file_regex(): to the file prefix and name. :return: """ - i = "File:Example.jpg" + i = 'File:Example.jpg' file_match = wikiget.valid_file(i) assert file_match is not None - assert file_match.group(0) == "File:Example.jpg" # entire match - assert file_match.group(1) == "File:" # first group - assert file_match.group(2) == "Example.jpg" # second group + assert file_match.group(0) == 'File:Example.jpg' # entire match + assert file_match.group(1) == 'File:' # first group + assert file_match.group(2) == 'Example.jpg' # second group def test_invalid_file_input(): """ Invalid file strings should not return regex match objects. """ - invalid_input = ["file:example", "example.jpg", "Foo Bar.gif", - "Fil:Example.jpg"] + invalid_input = ['file:example', 'example.jpg', 'Foo Bar.gif', + 'Fil:Example.jpg'] for i in invalid_input: file_match = wikiget.valid_file(i) assert file_match is None @@ -76,9 +76,9 @@ def test_valid_file_input(): """ Valid file strings should return regex match objects. """ - valid_input = ["Image:example.jpg", "file:example.jpg", - "File:example.file-01.jpg", "FILE:FOO.BMP", - "File:ß handwritten sample.gif", "File:A (1).jpeg"] + valid_input = ['Image:example.jpg', 'file:example.jpg', + 'File:example.file-01.jpg', 'FILE:FOO.BMP', + 'File:ß handwritten sample.gif', 'File:A (1).jpeg'] for i in valid_input: file_match = wikiget.valid_file(i) assert file_match is not None @@ -89,14 +89,14 @@ def test_verify_hash(): Confirm that verify_hash returns the proper SHA1 hash. """ # TODO: do we need to actually create a file? - file_name = "testfile" - file_contents = "foobar" - file_sha1 = "8843d7f92416211de9ebb963ff4ce28125932878" + file_name = 'testfile' + file_contents = 'foobar' + file_sha1 = '8843d7f92416211de9ebb963ff4ce28125932878' try: - dl = open(file_name, "w") + dl = open(file_name, 'w') except PermissionError: - pytest.skip("need write access to create test file") + pytest.skip('need write access to create test file') else: with dl: dl.write(file_contents) diff --git a/wikiget/version.py b/wikiget/version.py index 0958641..72b7d2f 100644 --- a/wikiget/version.py +++ b/wikiget/version.py @@ -1,3 +1,3 @@ """Sets the program version in setup.py and on the command line.""" -__version__ = "0.2.1" +__version__ = '0.2.1' diff --git a/wikiget/wikiget.py b/wikiget/wikiget.py index 8bcd3fd..a78056c 100644 --- a/wikiget/wikiget.py +++ b/wikiget/wikiget.py @@ -40,9 +40,9 @@ from tqdm import tqdm from wikiget.version import __version__ BLOCKSIZE = 65536 -DEFAULT_SITE = "en.wikipedia.org" -USER_AGENT = "wikiget/{} (https://github.com/clpo13/wikiget) " \ - "mwclient/{}".format(__version__, mwclient_version) +DEFAULT_SITE = 'commons.wikimedia.org' +USER_AGENT = 'wikiget/{} (https://github.com/clpo13/wikiget) ' \ + 'mwclient/{}'.format(__version__, mwclient_version) def main(): @@ -62,25 +62,25 @@ def main(): it under certain conditions. There is NO WARRANTY, to the extent permitted by law. """) - parser.add_argument("FILE", help=""" + parser.add_argument('FILE', help=""" name of the file to download with the File: or Image: prefix, or the URL of its file description page """) - parser.add_argument("-V", "--version", action="version", - version="%(prog)s {}".format(__version__)) + parser.add_argument('-V', '--version', action='version', + version='%(prog)s {}'.format(__version__)) output_options = parser.add_mutually_exclusive_group() - output_options.add_argument("-q", "--quiet", help="suppress warning messages", - action="store_true") - output_options.add_argument("-v", "--verbose", - help="print detailed information; use -vv for even more detail", - action="count", default=0) - parser.add_argument("-f", "--force", help="force overwriting existing files", - action="store_true") - parser.add_argument("-s", "--site", default=DEFAULT_SITE, - help="MediaWiki site to download from (default: %(default)s)") - parser.add_argument("-o", "--output", help="write download to OUTPUT") - parser.add_argument("-a", "--batch", help="treat FILE as a textfile containing multiple files to download, one URL or filename per line", - action="store_true") + output_options.add_argument('-q', '--quiet', help='suppress warning messages', + action='store_true') + output_options.add_argument('-v', '--verbose', + help='print detailed information; use -vv for even more detail', + action='count', default=0) + parser.add_argument('-f', '--force', help='force overwriting existing files', + action='store_true') + parser.add_argument('-s', '--site', default=DEFAULT_SITE, + help='MediaWiki site to download from (default: %(default)s)') + parser.add_argument('-o', '--output', help='write download to OUTPUT') + parser.add_argument('-a', '--batch', help='treat FILE as a textfile containing multiple files to download, one URL or filename per line', + action='store_true') args = parser.parse_args() @@ -96,9 +96,9 @@ def main(): if args.verbose >= 1: print("Info: using batch file '{}'".format(input_file)) try: - fd = open(input_file, "r") + fd = open(input_file, 'r') except IOError as e: - print("File could not be read. The following error was encountered:") + print('File could not be read. The following error was encountered:') print(e) sys.exit(1) else: @@ -119,8 +119,8 @@ def download(dl, args): filename = url.path site_name = url.netloc if args.site is not DEFAULT_SITE and not args.quiet: - # this will work even if the user specifies 'en.wikipedia.org' - print("Warning: target is a URL, ignoring site specified with --site") + # this will work even if the user specifies 'commons.wikimedia.org' + print('Warning: target is a URL, ignoring site specified with --site') else: filename = dl site_name = args.site @@ -130,7 +130,7 @@ def download(dl, args): # check for valid site parameter if not site_match: - print("Only Wikimedia sites (wikipedia.org and wikimedia.org) are currently supported.") + print('Only Wikimedia sites (wikipedia.org and wikimedia.org) are currently supported.') sys.exit(1) # check if this is a valid file @@ -139,13 +139,13 @@ def download(dl, args): filename = file_match.group(2) else: # no file extension and/or prefix, probably an article - print("Downloading Wikipedia articles is not currently supported.", end="") + print('Downloading Wikipedia articles is not currently supported.', end='') if file_match and not file_match.group(1): # file extension detected, but no prefix # TODO: no longer possible to get to this point since file_match is None with no prefix print(" If this is a file, please add the 'File:' prefix.") else: - print("\n", end="") + print('\n', end='') sys.exit(1) filename = unquote(filename) # remove URL encoding for special characters @@ -153,7 +153,7 @@ def download(dl, args): dest = args.output or filename if args.verbose >= 2: - print("User agent: {}".format(USER_AGENT)) + print('User agent: {}'.format(USER_AGENT)) # connect to site and identify ourselves try: @@ -172,31 +172,31 @@ def download(dl, args): if file.imageinfo != {}: # file exists either locally or at Wikimedia Commons - file_url = file.imageinfo["url"] - file_size = file.imageinfo["size"] - file_sha1 = file.imageinfo["sha1"] + file_url = file.imageinfo['url'] + file_size = file.imageinfo['size'] + file_sha1 = file.imageinfo['sha1'] if args.verbose >= 1: print("Info: downloading '{}' " - "({} bytes) from {}".format(filename, file_size, site.host), end="") + "({} bytes) from {}".format(filename, file_size, site.host), end='') if args.output: print(" to '{}'".format(dest)) else: - print("\n", end="") - print("Info: {}".format(file_url)) + print('\n', end='') + print('Info: {}'.format(file_url)) if os.path.isfile(dest) and not args.force: print("File '{}' already exists, skipping download (use -f to ignore)".format(dest)) else: try: - fd = open(dest, "wb") + fd = open(dest, 'wb') except IOError as e: - print("File could not be written. The following error was encountered:") + print('File could not be written. The following error was encountered:') print(e) sys.exit(1) else: # download the file - with tqdm(total=file_size, unit="B", + with tqdm(total=file_size, unit='B', unit_scale=True, unit_divisor=1024) as progress_bar: with fd: res = site.connection.get(file_url, stream=True) @@ -209,14 +209,14 @@ def download(dl, args): dl_sha1 = verify_hash(dest) if args.verbose >= 1: - print("Info: downloaded file SHA1 is {}".format(dl_sha1)) - print("Info: server file SHA1 is {}".format(file_sha1)) + print('Info: downloaded file SHA1 is {}'.format(dl_sha1)) + print('Info: server file SHA1 is {}'.format(file_sha1)) if dl_sha1 == file_sha1: if args.verbose >= 1: - print("Info: hashes match!") + print('Info: hashes match!') # at this point, we've successfully downloaded the file else: - print("Error: hash mismatch! Downloaded file may be corrupt.") + print('Error: hash mismatch! Downloaded file may be corrupt.') sys.exit(1) else: @@ -235,7 +235,7 @@ def valid_file(search_string): """ # second group could also restrict to file extensions with three or more # letters with ([^/\r\n\t\f\v]+\.\w{3,}) - file_regex = re.compile(r"(File:|Image:)([^/\r\n\t\f\v]+\.\w+)$", re.I) + file_regex = re.compile(r'(File:|Image:)([^/\r\n\t\f\v]+\.\w+)$', re.I) return file_regex.search(search_string) @@ -248,7 +248,7 @@ def valid_site(search_string): :param search_string: string to validate :returns: a regex Match object if there's a match or None otherwise """ - site_regex = re.compile(r"wiki[mp]edia\.org$", re.I) + site_regex = re.compile(r'wiki[mp]edia\.org$', re.I) return site_regex.search(search_string) @@ -259,7 +259,7 @@ def verify_hash(filename): :return: hash digest """ hasher = hashlib.sha1() - with open(filename, "rb") as dl: + with open(filename, 'rb') as dl: buf = dl.read(BLOCKSIZE) while len(buf) > 0: hasher.update(buf) |
