From 78937820a82931baffb02c145b37c95af71b44bc Mon Sep 17 00:00:00 2001 From: Cody Logan Date: Fri, 27 Oct 2023 11:44:26 -0700 Subject: Move batch file processing to parse module --- src/wikiget/dl.py | 17 +++-------------- src/wikiget/parse.py | 16 ++++++++++++++++ 2 files changed, 19 insertions(+), 14 deletions(-) (limited to 'src') diff --git a/src/wikiget/dl.py b/src/wikiget/dl.py index 5bc24e9..0290cb7 100644 --- a/src/wikiget/dl.py +++ b/src/wikiget/dl.py @@ -30,7 +30,7 @@ from wikiget.client import connect_to_site, query_api from wikiget.exceptions import ParseError from wikiget.file import File from wikiget.logging import FileLogAdapter -from wikiget.parse import get_dest +from wikiget.parse import get_dest, read_batch_file from wikiget.validations import verify_hash logger = logging.getLogger(__name__) @@ -44,26 +44,15 @@ def prep_download(dl: str, args: Namespace) -> File: def batch_download(args: Namespace) -> int: - input_file = args.FILE - dl_list = {} errors = 0 - logger.info(f"Using batch file '{input_file}'.") - + # parse batch file try: - fd = open(input_file) + dl_list = read_batch_file(args.FILE) except OSError as e: logger.error("File could not be read. The following error was encountered:") logger.error(e) sys.exit(1) - else: - with fd: - # read the file into memory and process each line as we go - for line_num, line in enumerate(fd, start=1): - line_s = line.strip() - # ignore blank lines and lines starting with "#" (for comments) - if line_s and not line_s.startswith("#"): - dl_list[line_num] = line_s # TODO: validate file contents before download process starts with ThreadPoolExecutor(max_workers=args.threads) as executor: diff --git a/src/wikiget/parse.py b/src/wikiget/parse.py index 52cc262..998136a 100644 --- a/src/wikiget/parse.py +++ b/src/wikiget/parse.py @@ -59,3 +59,19 @@ def get_dest(dl: str, args: Namespace) -> File: file = File(filename, dest, site_name) return file + + +def read_batch_file(batch_file: str) -> dict[int, str]: + dl_list = {} + + logger.info(f"Using batch file '{batch_file}'.") + + with open(batch_file) as fd: + # read the file into memory and process each line as we go + for line_num, line in enumerate(fd, start=1): + line_s = line.strip() + # ignore blank lines and lines starting with "#" (for comments) + if line_s and not line_s.startswith("#"): + dl_list[line_num] = line_s + + return dl_list -- cgit v1.2.3