wikiget/wikiget.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132

# wikiget - CLI tool for downloading files from Wikimedia sites
# Copyright (C) 2018-2021 Cody Logan and contributors
# SPDX-License-Identifier: GPL-3.0-or-later
#
# Wikiget is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Wikiget is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Wikiget. If not, see <https://www.gnu.org/licenses/>.

import argparse
import logging
import sys

from . import DEFAULT_SITE, DEFAULT_PATH, wikiget_version
from .dl import download


def main():
    """
    Main entry point for console script. Automatically compiled by setuptools
    when installed with `pip install` or `python setup.py install`.
    """

    parser = argparse.ArgumentParser(description="""
                                     A tool for downloading files from
                                     MediaWiki sites using the file name or
                                     description page URL
                                     """,
                                     epilog="""
                                     Copyright (C) 2018-2021 Cody Logan
                                     and contributors.
                                     License GPLv3+: GNU GPL version 3 or later
                                     <http://www.gnu.org/licenses/gpl.html>.
                                     This is free software; you are free to
                                     change and redistribute it under certain
                                     conditions. There is NO WARRANTY, to the
                                     extent permitted by law.
                                     """)
    parser.add_argument('FILE', help="""
                        name of the file to download with the File:
                        prefix, or the URL of its file description page
                        """)
    parser.add_argument('-V', '--version', action='version',
                        version=f'%(prog)s {wikiget_version}')
    message_options = parser.add_mutually_exclusive_group()
    message_options.add_argument('-q', '--quiet',
                                 help='suppress warning messages',
                                 action='store_true')
    message_options.add_argument('-v', '--verbose',
                                 help='print detailed information; '
                                 'use -vv for even more detail',
                                 action='count', default=0)
    parser.add_argument('-f', '--force',
                        help='force overwriting existing files',
                        action='store_true')
    parser.add_argument('-s', '--site', default=DEFAULT_SITE,
                        help='MediaWiki site to download from '
                        '(default: %(default)s)')
    parser.add_argument('-p', '--path', default=DEFAULT_PATH,
                        help='MediaWiki site path, where api.php is located '
                        '(default: %(default)s)')
    parser.add_argument('--username', default='',
                        help='MediaWiki site username, for private wikis')
    parser.add_argument('--password', default='',
                        help='MediaWiki site password, for private wikis')
    output_options = parser.add_mutually_exclusive_group()
    output_options.add_argument('-o', '--output',
                                help='write download to OUTPUT')
    output_options.add_argument('-a', '--batch',
                                help='treat FILE as a textfile containing '
                                'multiple files to download, one URL or '
                                'filename per line', action='store_true')

    args = parser.parse_args()

    loglevel = logging.WARNING
    if args.verbose >= 2:
        # this includes API and library messages
        loglevel = logging.DEBUG
    elif args.verbose >= 1:
        loglevel = logging.INFO
    elif args.quiet:
        loglevel = logging.ERROR

    # set up logger
    # TODO: optionally save to log file
    logging.basicConfig(
        level=loglevel,
        # format="%(asctime)s [%(levelname)s] %(message)s"
        format="[%(levelname)s] %(message)s"
    )

    if args.batch:
        # batch download mode
        input_file = args.FILE
        dl_list = []

        logging.info(f"Using batch file '{input_file}'.")

        try:
            fd = open(input_file, 'r')
        except IOError as e:
            logging.error("File could not be read. "
                          "The following error was encountered:")
            logging.error(e)
            sys.exit(1)
        else:
            with fd:
                # store file contents in memory in case something
                # happens to the file while we're downloading
                for _, line in enumerate(fd):
                    dl_list.append(line)

        # TODO: validate file contents before download process starts
        for line_num, url in enumerate(dl_list, start=1):
            url = url.strip()
            # keep track of batch file line numbers for
            # debugging/logging purposes
            logging.info(f"Downloading file {line_num} ({url}):")
            download(url, args)
    else:
        # single download mode
        dl = args.FILE
        download(dl, args)