aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--README.md20
-rw-r--r--setup.py5
-rw-r--r--test/test_wikiget.py27
-rw-r--r--wikiget/wikiget.py10
4 files changed, 42 insertions, 20 deletions
diff --git a/README.md b/README.md
index dd84ae8..9658e24 100644
--- a/README.md
+++ b/README.md
@@ -31,13 +31,14 @@ be downloaded to a different name with `-o`.
## Future plans
+- batch download categories and user uploads
- download from any MediaWiki-powered site, not just Wikimedia projects
- download Wikipedia articles, in plain text, wikitext, or other formats
## Contributing
-It's recommended that you use a virtual environment manager (like virtualenv) to
-install dependencies:
+It's recommended that you use a virtual environment manager (like
+[virtualenv](https://virtualenv.pypa.io/en/latest/)) to install dependencies:
```bash
pip install --user -U virtualenv
@@ -49,13 +50,14 @@ virtualenv venv
To activate the virtual environment, use one of the following commands:
```bash
-source venv/bin/activate # Linux and macOS (bash, zsh)
-.\venv\Scripts\activate.bat # Windows command prompt
-.\venv\Scripts\Activate.ps1 # Windows PowerShell
+source venv/bin/activate # Linux and macOS
+.\venv\Scripts\activate # Windows
```
-Then run `pip install -e .` to invoke an editable install, meaning any changes
-made to the source will be reflected immediately.
+Then run `pip install -e .` to invoke an
+["editable" install](https://pip.pypa.io/en/stable/reference/pip_install/#editable-installs),
+meaning any changes made to the source will be reflected immediately in the
+executable script. Unit tests can be run with `python setup.py test`.
## License
@@ -68,8 +70,8 @@ the Free Software Foundation, either version 3 of the License, or
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
-along with this program. If not, see <https://www.gnu.org/licenses/>.
+along with this program. If not, see <https://www.gnu.org/licenses/>.
diff --git a/setup.py b/setup.py
index 607411d..fb35851 100644
--- a/setup.py
+++ b/setup.py
@@ -4,9 +4,10 @@ Copyright (C) 2018 Cody Logan; licensed GPLv3+
SPDX-License-Identifier: GPL-3.0-or-later
"""
-from setuptools import setup, find_packages
-from os import path
from io import open
+from os import path
+
+from setuptools import setup, find_packages
here = path.abspath(path.dirname(__file__))
with open(path.join(here, "README.md"), "r") as fr:
diff --git a/test/test_wikiget.py b/test/test_wikiget.py
index 2f1c8f5..8a30a37 100644
--- a/test/test_wikiget.py
+++ b/test/test_wikiget.py
@@ -9,6 +9,9 @@ from wikiget import wikiget
def test_invalid_site_input():
+ """
+ Invalid site strings should not return regex match objects.
+ """
invalid_input = ["example.com", "vim.wikia.com",
"en.wikipedia.com", "en.wikimpedia.org"]
for i in invalid_input:
@@ -17,6 +20,9 @@ def test_invalid_site_input():
def test_valid_site_input():
+ """
+ Valid site strings should return regex match objects.
+ """
valid_input = ["en.wikipedia.org", "commons.wikimedia.org",
"de.wikipedia.org", "meta.wikimedia.org"]
for i in valid_input:
@@ -25,14 +31,23 @@ def test_valid_site_input():
def test_file_regex():
+ """
+ File regex should return a match object with match groups corresponding
+ to the file prefix and name.
+ :return:
+ """
i = "File:Example.jpg"
file_match = wikiget.valid_file(i)
- assert file_match.group(0)
- assert file_match.group(1) == "File:"
- assert file_match.group(2) == "Example.jpg"
+ assert file_match is not None
+ assert file_match.group(0) == "File:Example.jpg" # entire match
+ assert file_match.group(1) == "File:" # first group
+ assert file_match.group(2) == "Example.jpg" # second group
def test_invalid_file_input():
+ """
+ Invalid file strings should not return regex match objects.
+ """
invalid_input = ["file:example", "example.jpg", "Foo Bar.gif",
"Fil:Example.jpg"]
for i in invalid_input:
@@ -41,7 +56,11 @@ def test_invalid_file_input():
def test_valid_file_input():
- valid_input = ["Image:example.jpg", "file:example.jpg", "File:example.file-01.jpg",
+ """
+ Valid file strings should return regex match objects.
+ """
+ valid_input = ["Image:example.jpg", "file:example.jpg",
+ "File:example.file-01.jpg",
"File:ß handwritten sample.gif"]
for i in valid_input:
file_match = wikiget.valid_file(i)
diff --git a/wikiget/wikiget.py b/wikiget/wikiget.py
index bbea06e..52f0f66 100644
--- a/wikiget/wikiget.py
+++ b/wikiget/wikiget.py
@@ -28,7 +28,8 @@ from wikiget.version import __version__
def main():
"""
- Main entry point for console script. Automatically compiled by setuptools.
+ Main entry point for console script. Automatically compiled by setuptools
+ when installed with `pip install` or `python setup.py install`.
"""
default_site = "en.wikipedia.org"
user_agent = "wikiget/{} (https://github.com/clpo13/wikiget) " \
@@ -54,7 +55,7 @@ def main():
output_options.add_argument("-q", "--quiet", help="suppress warning messages",
action="store_true")
output_options.add_argument("-v", "--verbose",
- help="print detailed information, use -vv for even more detail",
+ help="print detailed information; use -vv for even more detail",
action="count", default=0)
parser.add_argument("-f", "--force", help="force overwriting existing files",
action="store_true")
@@ -98,14 +99,13 @@ def main():
print("Downloading Wikipedia articles is not currently supported.", end="")
if file_match and not file_match.group(1):
# file extension detected, but no prefix
- # TODO: no longer possible to get to this point
+ # TODO: no longer possible to get to this point since file_match is None with no prefix
print(" If this is a file, please add the 'File:' prefix.")
else:
print("\n", end="")
sys.exit(1)
- # remove URL encoding
- filename = unquote(filename)
+ filename = unquote(filename) # remove URL encoding for special characters
dest = args.output or filename