1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
|
# wikiget - CLI tool for downloading files from Wikimedia sites
# Copyright (C) 2023 Cody Logan
# SPDX-License-Identifier: GPL-3.0-or-later
#
# Wikiget is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Wikiget is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Wikiget. If not, see <https://www.gnu.org/licenses/>.
import fileinput
import logging
from argparse import Namespace
from typing import Dict
from urllib.parse import unquote, urlparse
import wikiget
from wikiget.exceptions import ParseError
from wikiget.file import File
from wikiget.validations import valid_file
logger = logging.getLogger(__name__)
def get_dest(dl: str, args: Namespace) -> File:
url = urlparse(dl)
if url.netloc:
filename = url.path
site_name = url.netloc
if args.site is not wikiget.DEFAULT_SITE:
# this will work even if the user specifies 'commons.wikimedia.org' since
# we're comparing objects instead of values (is not vs. !=)
logger.warning("Target is a URL, ignoring site specified with --site")
else:
filename = dl
site_name = args.site
file_match = valid_file(filename)
# check if this is a valid file
if file_match and file_match.group(1):
# has File:/Image: prefix and extension
filename = file_match.group(2)
else:
# no file extension and/or prefix, probably an article
msg = f"Could not parse input '{filename}' as a file"
raise ParseError(msg)
filename = unquote(filename) # remove URL encoding for special characters
dest = args.output or filename
file = File(filename, dest, site_name)
return file
def read_batch_file(batch_file: str) -> Dict[int, str]:
dl_list = {}
if batch_file == "-":
logger.info("Using stdin for batch download")
else:
logger.info(f"Using file '{batch_file}' for batch download")
with fileinput.input(batch_file) as fd:
# read the file into memory and process each line as we go
for line_num, line in enumerate(fd, start=1):
line_s = line.strip()
# ignore blank lines and lines starting with "#" (for comments)
if line_s and not line_s.startswith("#"):
dl_list[line_num] = line_s
return dl_list
|