1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
|
# wikiget - CLI tool for downloading files from Wikimedia sites
# Copyright (C) 2018-2023 Cody Logan and contributors
# SPDX-License-Identifier: GPL-3.0-or-later
#
# Wikiget is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Wikiget is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Wikiget. If not, see <https://www.gnu.org/licenses/>.
import argparse
import logging
import sys
import wikiget
from wikiget.dl import download, prep_download
def construct_parser():
parser = argparse.ArgumentParser(
description="""
A tool for downloading files from MediaWiki sites using the file name or
description page URL
""",
epilog="""
Copyright (C) 2018-2023 Cody Logan and contributors. License GPLv3+: GNU GPL
version 3 or later <http://www.gnu.org/licenses/gpl.html>. This is free
software; you are free to change and redistribute it under certain conditions.
There is NO WARRANTY, to the extent permitted by law.
""",
)
parser.add_argument(
"FILE",
help="""
name of the file to download with the File: prefix, or the URL of its file
description page
""",
)
parser.add_argument(
"-V",
"--version",
action="version",
version=f"%(prog)s {wikiget.wikiget_version}",
)
message_options = parser.add_mutually_exclusive_group()
message_options.add_argument(
"-q", "--quiet", help="suppress warning messages", action="store_true"
)
message_options.add_argument(
"-v",
"--verbose",
help="print detailed information; use -vv for even more detail",
action="count",
default=0,
)
parser.add_argument(
"-f", "--force", help="force overwriting existing files", action="store_true"
)
parser.add_argument(
"-s",
"--site",
default=wikiget.DEFAULT_SITE,
help="MediaWiki site to download from (default: %(default)s)",
)
parser.add_argument(
"-P",
"--path",
default=wikiget.DEFAULT_PATH,
help="MediaWiki site path, where api.php is located (default: %(default)s)",
)
parser.add_argument(
"-u",
"--username",
default="",
help="MediaWiki site username, for private wikis",
)
parser.add_argument(
"-p",
"--password",
default="",
help="MediaWiki site password, for private wikis",
)
output_options = parser.add_mutually_exclusive_group()
output_options.add_argument("-o", "--output", help="write download to OUTPUT")
output_options.add_argument(
"-a",
"--batch",
help="treat FILE as a textfile containing multiple files to download, one URL "
"or filename per line",
action="store_true",
)
parser.add_argument(
"-l", "--logfile", default="", help="save log output to LOGFILE"
)
parser.add_argument(
"-j",
"--threads",
default=1,
help="Number of parallel downloads to attempt in batch mode",
type=int,
)
return parser
def main():
parser = construct_parser()
args = parser.parse_args()
loglevel = logging.WARNING
if args.verbose >= wikiget.VERY_VERBOSE:
# this includes API and library messages
loglevel = logging.DEBUG
elif args.verbose >= wikiget.STD_VERBOSE:
loglevel = logging.INFO
elif args.quiet:
loglevel = logging.ERROR
# configure logging:
# console log level is set via -v, -vv, and -q options;
# file log level is always info (TODO: add debug option)
if args.logfile:
# log to console and file
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s [%(levelname)-7s] %(message)s",
filename=args.logfile,
)
console = logging.StreamHandler()
# TODO: even when loglevel is set to logging.DEBUG, debug messages aren't
# printing to console
console.setLevel(loglevel)
console.setFormatter(logging.Formatter("[%(levelname)s] %(message)s"))
logging.getLogger("").addHandler(console)
else:
# log only to console
logging.basicConfig(level=loglevel, format="[%(levelname)s] %(message)s")
# log events are appended to the file if it already exists, so note the start of a
# new download session
logging.info(f"Starting download session using wikiget {wikiget.wikiget_version}")
# logging.info(f"Log level is set to {loglevel}")
if args.batch:
# batch download mode
input_file = args.FILE
dl_list = []
logging.info(f"Using batch file '{input_file}'.")
try:
fd = open(input_file)
except OSError as e:
logging.error(
"File could not be read. The following error was encountered:"
)
logging.error(e)
sys.exit(1)
else:
with fd:
# store file contents in memory in case something happens to the file
# while we're downloading
for _, line in enumerate(fd):
dl_list.append(line)
# TODO: validate file contents before download process starts
for line_num, line in enumerate(dl_list, start=1):
url = line.strip()
# keep track of batch file line numbers for debugging/logging purposes
logging.info(f"Downloading '{url}' at line {line_num}:")
file = prep_download(url, args)
download(file, args)
else:
# single download mode
file = prep_download(args.FILE, args)
download(file, args)
|