From 630541499a58f98c55d5cc372d21e745c106d250 Mon Sep 17 00:00:00 2001 From: Cody Logan Date: Fri, 13 Oct 2023 12:24:13 -0700 Subject: Refactor parsing logic and revise exception handling --- src/wikiget/parse.py | 54 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) create mode 100644 src/wikiget/parse.py (limited to 'src/wikiget/parse.py') diff --git a/src/wikiget/parse.py b/src/wikiget/parse.py new file mode 100644 index 0000000..09c0767 --- /dev/null +++ b/src/wikiget/parse.py @@ -0,0 +1,54 @@ +# wikiget - CLI tool for downloading files from Wikimedia sites +# Copyright (C) 2023 Cody Logan +# SPDX-License-Identifier: GPL-3.0-or-later +# +# Wikiget is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# Wikiget is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Wikiget. If not, see . + +import logging +from urllib.parse import unquote, urlparse + +import wikiget +from wikiget.exceptions import ParseError +from wikiget.validations import valid_file + + +def get_dest(dl, args): + url = urlparse(dl) + + if url.netloc: + filename = url.path + site_name = url.netloc + if args.site is not wikiget.DEFAULT_SITE: + # this will work even if the user specifies 'commons.wikimedia.org' + logging.warning("target is a URL, ignoring site specified with --site") + else: + filename = dl + site_name = args.site + + file_match = valid_file(filename) + + # check if this is a valid file + if file_match and file_match.group(1): + # has File:/Image: prefix and extension + filename = file_match.group(2) + else: + # no file extension and/or prefix, probably an article + msg = f"Could not parse input '{filename}' as a file" + raise ParseError(msg) + + filename = unquote(filename) # remove URL encoding for special characters + + dest = args.output or filename + + return filename, dest, site_name -- cgit v1.2.3 From 06335ba0176cabd84f5b548995f465ac1c09bc8e Mon Sep 17 00:00:00 2001 From: Cody Logan Date: Tue, 17 Oct 2023 14:00:14 -0700 Subject: Clean up exception handling and error messages --- src/wikiget/parse.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'src/wikiget/parse.py') diff --git a/src/wikiget/parse.py b/src/wikiget/parse.py index 09c0767..f5c221d 100644 --- a/src/wikiget/parse.py +++ b/src/wikiget/parse.py @@ -30,8 +30,9 @@ def get_dest(dl, args): filename = url.path site_name = url.netloc if args.site is not wikiget.DEFAULT_SITE: - # this will work even if the user specifies 'commons.wikimedia.org' - logging.warning("target is a URL, ignoring site specified with --site") + # this will work even if the user specifies 'commons.wikimedia.org' since + # we're comparing objects instead of values (is not vs. !=) + logging.warning("Target is a URL, ignoring site specified with --site") else: filename = dl site_name = args.site -- cgit v1.2.3 From 05457af0d73ff3a820c0b465e6607fc5832a6e74 Mon Sep 17 00:00:00 2001 From: Cody Logan Date: Fri, 20 Oct 2023 16:23:28 -0700 Subject: Reorganize File class --- src/wikiget/parse.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'src/wikiget/parse.py') diff --git a/src/wikiget/parse.py b/src/wikiget/parse.py index f5c221d..4e9b195 100644 --- a/src/wikiget/parse.py +++ b/src/wikiget/parse.py @@ -52,4 +52,6 @@ def get_dest(dl, args): dest = args.output or filename - return filename, dest, site_name + file = File(filename, dest, site_name) + + return file -- cgit v1.2.3 From c1820026f97eaf671c29ab30f02879de0ac4df89 Mon Sep 17 00:00:00 2001 From: Cody Logan Date: Fri, 20 Oct 2023 16:36:14 -0700 Subject: Add type annotations to source files --- src/wikiget/parse.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'src/wikiget/parse.py') diff --git a/src/wikiget/parse.py b/src/wikiget/parse.py index 4e9b195..fe3fe43 100644 --- a/src/wikiget/parse.py +++ b/src/wikiget/parse.py @@ -16,14 +16,16 @@ # along with Wikiget. If not, see . import logging +from argparse import Namespace from urllib.parse import unquote, urlparse import wikiget from wikiget.exceptions import ParseError +from wikiget.file import File from wikiget.validations import valid_file -def get_dest(dl, args): +def get_dest(dl: str, args: Namespace) -> File: url = urlparse(dl) if url.netloc: -- cgit v1.2.3