aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCody Logan <clpo13@gmail.com>2019-12-06 14:47:44 -0800
committerCody Logan <clpo13@gmail.com>2019-12-06 14:47:44 -0800
commit5f35b45b0b15e0f66608b9c774b76f39e7aa93ee (patch)
treeae5e812ae60fe287fd35d56b1884a637ca64acf0
parent8273f4cdc3a4ee67d936c2b0b06f3d5ee92c31bf (diff)
downloadwikiget-5f35b45b0b15e0f66608b9c774b76f39e7aa93ee.tar.gz
wikiget-5f35b45b0b15e0f66608b9c774b76f39e7aa93ee.zip
Switch to Wikimedia Commons as default site
Commons is intended as a repository for freely-usable media, so it makes more sense for it to be the default, and most images on Wikimedia sites (like English Wikipedia) are also available there. The functionality for specifying alternate sites is left in case users want to download fair-use media, which is not available on Commons, as well as for the possible future integration with non-Wikimedia MediaWiki sites (like Fandom/Wikia).
-rw-r--r--README.md8
-rw-r--r--setup.py50
-rw-r--r--test/test_wikiget.py36
-rw-r--r--wikiget/version.py2
-rw-r--r--wikiget/wikiget.py84
5 files changed, 90 insertions, 90 deletions
diff --git a/README.md b/README.md
index a6500ad..6522494 100644
--- a/README.md
+++ b/README.md
@@ -15,9 +15,9 @@ Requires Python 2.7 or 3.5+. Install with `pip install --user wikiget` or, if yo
`wikiget [-h] [-V] [-q | -v] [-f] [-a] [--site SITE] [-o OUTPUT] FILE`
If `FILE` is in the form `File:Example.jpg` or `Example.jpg`, it will be fetched
-from the default site, which is "en.wikipedia.org". If it's the fully-qualified
-URL of a file description page, like `https://commons.wikimedia.org/wiki/File:Example.jpg`,
-the file is fetched from the specified site, in this case "commons.wikimedia.org".
+from the default site, which is "commons.wikimedia.org". If it's the fully-qualified
+URL of a file description page, like `https://en.wikipedia.org/wiki/File:Example.jpg`,
+the file is fetched from the specified site, in this case "en.wikipedia.org".
Full URLs may contain characters your shell interprets differently, so you can
either escape those characters with a backslash `\` or surround the entire URL
with single `'` or double `"` quotes.
@@ -42,7 +42,7 @@ offending filename is printed.
```bash
wikiget File:Example.jpg
-wikiget --site commons.wikimedia.org File:Example.jpg
+wikiget --site en.wikipedia.org File:Example.jpg
wikiget https://en.wikipedia.org/wiki/File:Example.jpg -o test.jpg
```
diff --git a/setup.py b/setup.py
index af030b3..759e4e4 100644
--- a/setup.py
+++ b/setup.py
@@ -23,45 +23,45 @@ from os import path
from setuptools import setup, find_packages
here = path.abspath(path.dirname(__file__))
-with open(path.join(here, "README.md"), "r") as fr:
+with open(path.join(here, 'README.md'), 'r') as fr:
long_description = fr.read()
version = {}
-with open(path.join(here, "wikiget", "version.py"), "r") as fv:
+with open(path.join(here, 'wikiget', 'version.py'), 'r') as fv:
exec(fv.read(), version)
setup(
- name="wikiget",
- version=version["__version__"],
- author="Cody Logan",
- author_email="clpo13@gmail.com",
- description="CLI tool for downloading files from MediaWiki sites",
+ name='wikiget',
+ version=version['__version__'],
+ author='Cody Logan',
+ author_email='clpo13@gmail.com',
+ description='CLI tool for downloading files from MediaWiki sites',
long_description=long_description,
- long_description_content_type="text/markdown",
- url="https://github.com/clpo13/wikiget",
- keywords="download mediawiki wikimedia wikipedia",
+ long_description_content_type='text/markdown',
+ url='https://github.com/clpo13/wikiget',
+ keywords='download mediawiki wikimedia wikipedia',
packages=find_packages(),
classifiers=[
- "Development Status :: 4 - Beta",
- "Environment :: Console",
- "Intended Audience :: End Users/Desktop",
- "License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)",
- "Operating System :: OS Independent",
- "Programming Language :: Python :: 2.7",
- "Programming Language :: Python :: 3.5",
- "Programming Language :: Python :: 3.6",
- "Programming Language :: Python :: 3.7",
- "Programming Language :: Python :: 3.8",
- "Topic :: Utilities",
+ 'Development Status :: 4 - Beta',
+ 'Environment :: Console',
+ 'Intended Audience :: End Users/Desktop',
+ 'License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)',
+ 'Operating System :: OS Independent',
+ 'Programming Language :: Python :: 2.7',
+ 'Programming Language :: Python :: 3.5',
+ 'Programming Language :: Python :: 3.6',
+ 'Programming Language :: Python :: 3.7',
+ 'Programming Language :: Python :: 3.8',
+ 'Topic :: Utilities',
],
python_requires='>=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*',
- install_requires=["future", "mwclient>=0.10.0", "pytest-runner", "requests", "tqdm"],
- tests_require=["pytest"],
+ install_requires=['future', 'mwclient>=0.10.0', 'pytest-runner', 'requests', 'tqdm'],
+ tests_require=['pytest'],
project_urls={
- "Bug Reports": "https://github.com/clpo13/wikiget/issues",
+ 'Bug Reports': 'https://github.com/clpo13/wikiget/issues',
},
entry_points={
- "console_scripts": [
+ 'console_scripts': [
'wikiget=wikiget.wikiget:main',
],
},
diff --git a/test/test_wikiget.py b/test/test_wikiget.py
index 8aaaec8..6bd1c9d 100644
--- a/test/test_wikiget.py
+++ b/test/test_wikiget.py
@@ -29,8 +29,8 @@ def test_invalid_site_input():
"""
Invalid site strings should not return regex match objects.
"""
- invalid_input = ["example.com", "vim.wikia.com",
- "en.wikipedia.com", "en.wikimpedia.org"]
+ invalid_input = ['example.com', 'vim.wikia.com',
+ 'en.wikipedia.com', 'en.wikimpedia.org']
for i in invalid_input:
site_match = wikiget.valid_site(i)
assert site_match is None
@@ -40,8 +40,8 @@ def test_valid_site_input():
"""
Valid site strings should return regex match objects.
"""
- valid_input = ["en.wikipedia.org", "commons.wikimedia.org",
- "de.wikipedia.org", "meta.wikimedia.org"]
+ valid_input = ['en.wikipedia.org', 'commons.wikimedia.org',
+ 'de.wikipedia.org', 'meta.wikimedia.org']
for i in valid_input:
site_match = wikiget.valid_site(i)
assert site_match is not None
@@ -53,20 +53,20 @@ def test_file_regex():
to the file prefix and name.
:return:
"""
- i = "File:Example.jpg"
+ i = 'File:Example.jpg'
file_match = wikiget.valid_file(i)
assert file_match is not None
- assert file_match.group(0) == "File:Example.jpg" # entire match
- assert file_match.group(1) == "File:" # first group
- assert file_match.group(2) == "Example.jpg" # second group
+ assert file_match.group(0) == 'File:Example.jpg' # entire match
+ assert file_match.group(1) == 'File:' # first group
+ assert file_match.group(2) == 'Example.jpg' # second group
def test_invalid_file_input():
"""
Invalid file strings should not return regex match objects.
"""
- invalid_input = ["file:example", "example.jpg", "Foo Bar.gif",
- "Fil:Example.jpg"]
+ invalid_input = ['file:example', 'example.jpg', 'Foo Bar.gif',
+ 'Fil:Example.jpg']
for i in invalid_input:
file_match = wikiget.valid_file(i)
assert file_match is None
@@ -76,9 +76,9 @@ def test_valid_file_input():
"""
Valid file strings should return regex match objects.
"""
- valid_input = ["Image:example.jpg", "file:example.jpg",
- "File:example.file-01.jpg", "FILE:FOO.BMP",
- "File:ß handwritten sample.gif", "File:A (1).jpeg"]
+ valid_input = ['Image:example.jpg', 'file:example.jpg',
+ 'File:example.file-01.jpg', 'FILE:FOO.BMP',
+ 'File:ß handwritten sample.gif', 'File:A (1).jpeg']
for i in valid_input:
file_match = wikiget.valid_file(i)
assert file_match is not None
@@ -89,14 +89,14 @@ def test_verify_hash():
Confirm that verify_hash returns the proper SHA1 hash.
"""
# TODO: do we need to actually create a file?
- file_name = "testfile"
- file_contents = "foobar"
- file_sha1 = "8843d7f92416211de9ebb963ff4ce28125932878"
+ file_name = 'testfile'
+ file_contents = 'foobar'
+ file_sha1 = '8843d7f92416211de9ebb963ff4ce28125932878'
try:
- dl = open(file_name, "w")
+ dl = open(file_name, 'w')
except PermissionError:
- pytest.skip("need write access to create test file")
+ pytest.skip('need write access to create test file')
else:
with dl:
dl.write(file_contents)
diff --git a/wikiget/version.py b/wikiget/version.py
index 0958641..72b7d2f 100644
--- a/wikiget/version.py
+++ b/wikiget/version.py
@@ -1,3 +1,3 @@
"""Sets the program version in setup.py and on the command line."""
-__version__ = "0.2.1"
+__version__ = '0.2.1'
diff --git a/wikiget/wikiget.py b/wikiget/wikiget.py
index 8bcd3fd..a78056c 100644
--- a/wikiget/wikiget.py
+++ b/wikiget/wikiget.py
@@ -40,9 +40,9 @@ from tqdm import tqdm
from wikiget.version import __version__
BLOCKSIZE = 65536
-DEFAULT_SITE = "en.wikipedia.org"
-USER_AGENT = "wikiget/{} (https://github.com/clpo13/wikiget) " \
- "mwclient/{}".format(__version__, mwclient_version)
+DEFAULT_SITE = 'commons.wikimedia.org'
+USER_AGENT = 'wikiget/{} (https://github.com/clpo13/wikiget) ' \
+ 'mwclient/{}'.format(__version__, mwclient_version)
def main():
@@ -62,25 +62,25 @@ def main():
it under certain conditions. There is NO WARRANTY, to the
extent permitted by law.
""")
- parser.add_argument("FILE", help="""
+ parser.add_argument('FILE', help="""
name of the file to download with the File: or Image: prefix,
or the URL of its file description page
""")
- parser.add_argument("-V", "--version", action="version",
- version="%(prog)s {}".format(__version__))
+ parser.add_argument('-V', '--version', action='version',
+ version='%(prog)s {}'.format(__version__))
output_options = parser.add_mutually_exclusive_group()
- output_options.add_argument("-q", "--quiet", help="suppress warning messages",
- action="store_true")
- output_options.add_argument("-v", "--verbose",
- help="print detailed information; use -vv for even more detail",
- action="count", default=0)
- parser.add_argument("-f", "--force", help="force overwriting existing files",
- action="store_true")
- parser.add_argument("-s", "--site", default=DEFAULT_SITE,
- help="MediaWiki site to download from (default: %(default)s)")
- parser.add_argument("-o", "--output", help="write download to OUTPUT")
- parser.add_argument("-a", "--batch", help="treat FILE as a textfile containing multiple files to download, one URL or filename per line",
- action="store_true")
+ output_options.add_argument('-q', '--quiet', help='suppress warning messages',
+ action='store_true')
+ output_options.add_argument('-v', '--verbose',
+ help='print detailed information; use -vv for even more detail',
+ action='count', default=0)
+ parser.add_argument('-f', '--force', help='force overwriting existing files',
+ action='store_true')
+ parser.add_argument('-s', '--site', default=DEFAULT_SITE,
+ help='MediaWiki site to download from (default: %(default)s)')
+ parser.add_argument('-o', '--output', help='write download to OUTPUT')
+ parser.add_argument('-a', '--batch', help='treat FILE as a textfile containing multiple files to download, one URL or filename per line',
+ action='store_true')
args = parser.parse_args()
@@ -96,9 +96,9 @@ def main():
if args.verbose >= 1:
print("Info: using batch file '{}'".format(input_file))
try:
- fd = open(input_file, "r")
+ fd = open(input_file, 'r')
except IOError as e:
- print("File could not be read. The following error was encountered:")
+ print('File could not be read. The following error was encountered:')
print(e)
sys.exit(1)
else:
@@ -119,8 +119,8 @@ def download(dl, args):
filename = url.path
site_name = url.netloc
if args.site is not DEFAULT_SITE and not args.quiet:
- # this will work even if the user specifies 'en.wikipedia.org'
- print("Warning: target is a URL, ignoring site specified with --site")
+ # this will work even if the user specifies 'commons.wikimedia.org'
+ print('Warning: target is a URL, ignoring site specified with --site')
else:
filename = dl
site_name = args.site
@@ -130,7 +130,7 @@ def download(dl, args):
# check for valid site parameter
if not site_match:
- print("Only Wikimedia sites (wikipedia.org and wikimedia.org) are currently supported.")
+ print('Only Wikimedia sites (wikipedia.org and wikimedia.org) are currently supported.')
sys.exit(1)
# check if this is a valid file
@@ -139,13 +139,13 @@ def download(dl, args):
filename = file_match.group(2)
else:
# no file extension and/or prefix, probably an article
- print("Downloading Wikipedia articles is not currently supported.", end="")
+ print('Downloading Wikipedia articles is not currently supported.', end='')
if file_match and not file_match.group(1):
# file extension detected, but no prefix
# TODO: no longer possible to get to this point since file_match is None with no prefix
print(" If this is a file, please add the 'File:' prefix.")
else:
- print("\n", end="")
+ print('\n', end='')
sys.exit(1)
filename = unquote(filename) # remove URL encoding for special characters
@@ -153,7 +153,7 @@ def download(dl, args):
dest = args.output or filename
if args.verbose >= 2:
- print("User agent: {}".format(USER_AGENT))
+ print('User agent: {}'.format(USER_AGENT))
# connect to site and identify ourselves
try:
@@ -172,31 +172,31 @@ def download(dl, args):
if file.imageinfo != {}:
# file exists either locally or at Wikimedia Commons
- file_url = file.imageinfo["url"]
- file_size = file.imageinfo["size"]
- file_sha1 = file.imageinfo["sha1"]
+ file_url = file.imageinfo['url']
+ file_size = file.imageinfo['size']
+ file_sha1 = file.imageinfo['sha1']
if args.verbose >= 1:
print("Info: downloading '{}' "
- "({} bytes) from {}".format(filename, file_size, site.host), end="")
+ "({} bytes) from {}".format(filename, file_size, site.host), end='')
if args.output:
print(" to '{}'".format(dest))
else:
- print("\n", end="")
- print("Info: {}".format(file_url))
+ print('\n', end='')
+ print('Info: {}'.format(file_url))
if os.path.isfile(dest) and not args.force:
print("File '{}' already exists, skipping download (use -f to ignore)".format(dest))
else:
try:
- fd = open(dest, "wb")
+ fd = open(dest, 'wb')
except IOError as e:
- print("File could not be written. The following error was encountered:")
+ print('File could not be written. The following error was encountered:')
print(e)
sys.exit(1)
else:
# download the file
- with tqdm(total=file_size, unit="B",
+ with tqdm(total=file_size, unit='B',
unit_scale=True, unit_divisor=1024) as progress_bar:
with fd:
res = site.connection.get(file_url, stream=True)
@@ -209,14 +209,14 @@ def download(dl, args):
dl_sha1 = verify_hash(dest)
if args.verbose >= 1:
- print("Info: downloaded file SHA1 is {}".format(dl_sha1))
- print("Info: server file SHA1 is {}".format(file_sha1))
+ print('Info: downloaded file SHA1 is {}'.format(dl_sha1))
+ print('Info: server file SHA1 is {}'.format(file_sha1))
if dl_sha1 == file_sha1:
if args.verbose >= 1:
- print("Info: hashes match!")
+ print('Info: hashes match!')
# at this point, we've successfully downloaded the file
else:
- print("Error: hash mismatch! Downloaded file may be corrupt.")
+ print('Error: hash mismatch! Downloaded file may be corrupt.')
sys.exit(1)
else:
@@ -235,7 +235,7 @@ def valid_file(search_string):
"""
# second group could also restrict to file extensions with three or more
# letters with ([^/\r\n\t\f\v]+\.\w{3,})
- file_regex = re.compile(r"(File:|Image:)([^/\r\n\t\f\v]+\.\w+)$", re.I)
+ file_regex = re.compile(r'(File:|Image:)([^/\r\n\t\f\v]+\.\w+)$', re.I)
return file_regex.search(search_string)
@@ -248,7 +248,7 @@ def valid_site(search_string):
:param search_string: string to validate
:returns: a regex Match object if there's a match or None otherwise
"""
- site_regex = re.compile(r"wiki[mp]edia\.org$", re.I)
+ site_regex = re.compile(r'wiki[mp]edia\.org$', re.I)
return site_regex.search(search_string)
@@ -259,7 +259,7 @@ def verify_hash(filename):
:return: hash digest
"""
hasher = hashlib.sha1()
- with open(filename, "rb") as dl:
+ with open(filename, 'rb') as dl:
buf = dl.read(BLOCKSIZE)
while len(buf) > 0:
hasher.update(buf)