diff options
| author | clpo13 <cody@lokken.dev> | 2023-10-20 16:57:32 -0700 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2023-10-20 16:57:32 -0700 |
| commit | 8583862e2d16144f184db2e31dbc37dbe3464fed (patch) | |
| tree | 4a0d9edb5301b26d9dbd22ceb307a7e3b1db4820 /src/wikiget/parse.py | |
| parent | e274ccea56219c7d07c0e677d44c8122a699dcaf (diff) | |
| parent | c1820026f97eaf671c29ab30f02879de0ac4df89 (diff) | |
| download | wikiget-8583862e2d16144f184db2e31dbc37dbe3464fed.tar.gz wikiget-8583862e2d16144f184db2e31dbc37dbe3464fed.zip | |
Merge pull request #8 from clpo13/dev
Merge dev branch changes into master
Diffstat (limited to 'src/wikiget/parse.py')
| -rw-r--r-- | src/wikiget/parse.py | 59 |
1 files changed, 59 insertions, 0 deletions
diff --git a/src/wikiget/parse.py b/src/wikiget/parse.py new file mode 100644 index 0000000..fe3fe43 --- /dev/null +++ b/src/wikiget/parse.py @@ -0,0 +1,59 @@ +# wikiget - CLI tool for downloading files from Wikimedia sites +# Copyright (C) 2023 Cody Logan +# SPDX-License-Identifier: GPL-3.0-or-later +# +# Wikiget is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# Wikiget is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Wikiget. If not, see <https://www.gnu.org/licenses/>. + +import logging +from argparse import Namespace +from urllib.parse import unquote, urlparse + +import wikiget +from wikiget.exceptions import ParseError +from wikiget.file import File +from wikiget.validations import valid_file + + +def get_dest(dl: str, args: Namespace) -> File: + url = urlparse(dl) + + if url.netloc: + filename = url.path + site_name = url.netloc + if args.site is not wikiget.DEFAULT_SITE: + # this will work even if the user specifies 'commons.wikimedia.org' since + # we're comparing objects instead of values (is not vs. !=) + logging.warning("Target is a URL, ignoring site specified with --site") + else: + filename = dl + site_name = args.site + + file_match = valid_file(filename) + + # check if this is a valid file + if file_match and file_match.group(1): + # has File:/Image: prefix and extension + filename = file_match.group(2) + else: + # no file extension and/or prefix, probably an article + msg = f"Could not parse input '{filename}' as a file" + raise ParseError(msg) + + filename = unquote(filename) # remove URL encoding for special characters + + dest = args.output or filename + + file = File(filename, dest, site_name) + + return file |
