Switch to Wikimedia Commons as default site

Commons is intended as a repository for freely-usable media, so it makes more sense for it to be the default, and most images on Wikimedia sites (like English Wikipedia) are also available there. The functionality for specifying alternate sites is left in case users want to download fair-use media, which is not available on Commons, as well as for the possible future integration with non-Wikimedia MediaWiki sites (like Fandom/Wikia).
author: Cody Logan <clpo13@gmail.com> 2019-12-06 14:47:44 -0800
committer: Cody Logan <clpo13@gmail.com> 2019-12-06 14:47:44 -0800
commit: 5f35b45b0b15e0f66608b9c774b76f39e7aa93ee (patch)
tree: ae5e812ae60fe287fd35d56b1884a637ca64acf0 /wikiget/wikiget.py
parent: 8273f4cdc3a4ee67d936c2b0b06f3d5ee92c31bf (diff)
download: wikiget-5f35b45b0b15e0f66608b9c774b76f39e7aa93ee.tar.gz
wikiget-5f35b45b0b15e0f66608b9c774b76f39e7aa93ee.zip
1 files changed, 42 insertions, 42 deletions
diff --git a/wikiget/wikiget.py b/wikiget/wikiget.py
index 8bcd3fd..a78056c 100644
--- a/wikiget/wikiget.py
+++ b/wikiget/wikiget.py
@@ -40,9 +40,9 @@ from tqdm import tqdm
 from wikiget.version import __version__
 
 BLOCKSIZE = 65536
-DEFAULT_SITE = "en.wikipedia.org"
-USER_AGENT = "wikiget/{} (https://github.com/clpo13/wikiget) " \
-             "mwclient/{}".format(__version__, mwclient_version)
+DEFAULT_SITE = 'commons.wikimedia.org'
+USER_AGENT = 'wikiget/{} (https://github.com/clpo13/wikiget) ' \
+             'mwclient/{}'.format(__version__, mwclient_version)
 
 
 def main():
@@ -62,25 +62,25 @@ def main():
                                      it under certain conditions. There is NO WARRANTY, to the
                                      extent permitted by law.
                                      """)
-    parser.add_argument("FILE", help="""
+    parser.add_argument('FILE', help="""
                         name of the file to download with the File: or Image: prefix,
                         or the URL of its file description page
                         """)
-    parser.add_argument("-V", "--version", action="version",
-                        version="%(prog)s {}".format(__version__))
+    parser.add_argument('-V', '--version', action='version',
+                        version='%(prog)s {}'.format(__version__))
     output_options = parser.add_mutually_exclusive_group()
-    output_options.add_argument("-q", "--quiet", help="suppress warning messages",
-                                action="store_true")
-    output_options.add_argument("-v", "--verbose",
-                                help="print detailed information; use -vv for even more detail",
-                                action="count", default=0)
-    parser.add_argument("-f", "--force", help="force overwriting existing files",
-                        action="store_true")
-    parser.add_argument("-s", "--site", default=DEFAULT_SITE,
-                        help="MediaWiki site to download from (default: %(default)s)")
-    parser.add_argument("-o", "--output", help="write download to OUTPUT")
-    parser.add_argument("-a", "--batch", help="treat FILE as a textfile containing multiple files to download, one URL or filename per line",
-                        action="store_true")
+    output_options.add_argument('-q', '--quiet', help='suppress warning messages',
+                                action='store_true')
+    output_options.add_argument('-v', '--verbose',
+                                help='print detailed information; use -vv for even more detail',
+                                action='count', default=0)
+    parser.add_argument('-f', '--force', help='force overwriting existing files',
+                        action='store_true')
+    parser.add_argument('-s', '--site', default=DEFAULT_SITE,
+                        help='MediaWiki site to download from (default: %(default)s)')
+    parser.add_argument('-o', '--output', help='write download to OUTPUT')
+    parser.add_argument('-a', '--batch', help='treat FILE as a textfile containing multiple files to download, one URL or filename per line',
+                        action='store_true')
 
     args = parser.parse_args()
 
@@ -96,9 +96,9 @@ def main():
         if args.verbose >= 1:
             print("Info: using batch file '{}'".format(input_file))
         try:
-            fd = open(input_file, "r")
+            fd = open(input_file, 'r')
         except IOError as e:
-            print("File could not be read. The following error was encountered:")
+            print('File could not be read. The following error was encountered:')
             print(e)
             sys.exit(1)
         else:
@@ -119,8 +119,8 @@ def download(dl, args):
         filename = url.path
         site_name = url.netloc
         if args.site is not DEFAULT_SITE and not args.quiet:
-            # this will work even if the user specifies 'en.wikipedia.org'
-            print("Warning: target is a URL, ignoring site specified with --site")
+            # this will work even if the user specifies 'commons.wikimedia.org'
+            print('Warning: target is a URL, ignoring site specified with --site')
     else:
         filename = dl
         site_name = args.site
@@ -130,7 +130,7 @@ def download(dl, args):
 
     # check for valid site parameter
     if not site_match:
-        print("Only Wikimedia sites (wikipedia.org and wikimedia.org) are currently supported.")
+        print('Only Wikimedia sites (wikipedia.org and wikimedia.org) are currently supported.')
         sys.exit(1)
 
     # check if this is a valid file
@@ -139,13 +139,13 @@ def download(dl, args):
         filename = file_match.group(2)
     else:
         # no file extension and/or prefix, probably an article
-        print("Downloading Wikipedia articles is not currently supported.", end="")
+        print('Downloading Wikipedia articles is not currently supported.', end='')
         if file_match and not file_match.group(1):
             # file extension detected, but no prefix
             # TODO: no longer possible to get to this point since file_match is None with no prefix
             print(" If this is a file, please add the 'File:' prefix.")
         else:
-            print("\n", end="")
+            print('\n', end='')
         sys.exit(1)
 
     filename = unquote(filename)  # remove URL encoding for special characters
@@ -153,7 +153,7 @@ def download(dl, args):
     dest = args.output or filename
 
     if args.verbose >= 2:
-        print("User agent: {}".format(USER_AGENT))
+        print('User agent: {}'.format(USER_AGENT))
 
     # connect to site and identify ourselves
     try:
@@ -172,31 +172,31 @@ def download(dl, args):
 
     if file.imageinfo != {}:
         # file exists either locally or at Wikimedia Commons
-        file_url = file.imageinfo["url"]
-        file_size = file.imageinfo["size"]
-        file_sha1 = file.imageinfo["sha1"]
+        file_url = file.imageinfo['url']
+        file_size = file.imageinfo['size']
+        file_sha1 = file.imageinfo['sha1']
 
         if args.verbose >= 1:
             print("Info: downloading '{}' "
-                  "({} bytes) from {}".format(filename, file_size, site.host), end="")
+                  "({} bytes) from {}".format(filename, file_size, site.host), end='')
             if args.output:
                 print(" to '{}'".format(dest))
             else:
-                print("\n", end="")
-            print("Info: {}".format(file_url))
+                print('\n', end='')
+            print('Info: {}'.format(file_url))
 
         if os.path.isfile(dest) and not args.force:
             print("File '{}' already exists, skipping download (use -f to ignore)".format(dest))
         else:
             try:
-                fd = open(dest, "wb")
+                fd = open(dest, 'wb')
             except IOError as e:
-                print("File could not be written. The following error was encountered:")
+                print('File could not be written. The following error was encountered:')
                 print(e)
                 sys.exit(1)
             else:
                 # download the file
-                with tqdm(total=file_size, unit="B",
+                with tqdm(total=file_size, unit='B',
                           unit_scale=True, unit_divisor=1024) as progress_bar:
                     with fd:
                         res = site.connection.get(file_url, stream=True)
@@ -209,14 +209,14 @@ def download(dl, args):
             dl_sha1 = verify_hash(dest)
 
             if args.verbose >= 1:
-                print("Info: downloaded file SHA1 is {}".format(dl_sha1))
-                print("Info: server file SHA1 is {}".format(file_sha1))
+                print('Info: downloaded file SHA1 is {}'.format(dl_sha1))
+                print('Info: server file SHA1 is {}'.format(file_sha1))
             if dl_sha1 == file_sha1:
                 if args.verbose >= 1:
-                    print("Info: hashes match!")
+                    print('Info: hashes match!')
                 # at this point, we've successfully downloaded the file
             else:
-                print("Error: hash mismatch! Downloaded file may be corrupt.")
+                print('Error: hash mismatch! Downloaded file may be corrupt.')
                 sys.exit(1)
 
     else:
@@ -235,7 +235,7 @@ def valid_file(search_string):
     """
     # second group could also restrict to file extensions with three or more
     # letters with ([^/\r\n\t\f\v]+\.\w{3,})
-    file_regex = re.compile(r"(File:|Image:)([^/\r\n\t\f\v]+\.\w+)$", re.I)
+    file_regex = re.compile(r'(File:|Image:)([^/\r\n\t\f\v]+\.\w+)$', re.I)
     return file_regex.search(search_string)
 
 
@@ -248,7 +248,7 @@ def valid_site(search_string):
     :param search_string: string to validate
     :returns: a regex Match object if there's a match or None otherwise
     """
-    site_regex = re.compile(r"wiki[mp]edia\.org$", re.I)
+    site_regex = re.compile(r'wiki[mp]edia\.org$', re.I)
     return site_regex.search(search_string)
 
 
@@ -259,7 +259,7 @@ def verify_hash(filename):
     :return: hash digest
     """
     hasher = hashlib.sha1()
-    with open(filename, "rb") as dl:
+    with open(filename, 'rb') as dl:
         buf = dl.read(BLOCKSIZE)
         while len(buf) > 0:
             hasher.update(buf)
author	Cody Logan <clpo13@gmail.com>	2019-12-06 14:47:44 -0800
committer	Cody Logan <clpo13@gmail.com>	2019-12-06 14:47:44 -0800
commit	5f35b45b0b15e0f66608b9c774b76f39e7aa93ee (patch)
tree	ae5e812ae60fe287fd35d56b1884a637ca64acf0 /wikiget/wikiget.py
parent	8273f4cdc3a4ee67d936c2b0b06f3d5ee92c31bf (diff)
download	wikiget-5f35b45b0b15e0f66608b9c774b76f39e7aa93ee.tar.gz wikiget-5f35b45b0b15e0f66608b9c774b76f39e7aa93ee.zip