From 682b7b24b84c9d8614cf898a06f67681db222deb Mon Sep 17 00:00:00 2001 From: Cody Logan Date: Fri, 17 Nov 2023 16:42:10 -0800 Subject: Cache site connections for reuse in batch downloads --- src/wikiget/dl.py | 19 ++++++++++++++----- tests/test_dl.py | 4 ++-- 2 files changed, 16 insertions(+), 7 deletions(-) diff --git a/src/wikiget/dl.py b/src/wikiget/dl.py index d3b42fd..2160743 100644 --- a/src/wikiget/dl.py +++ b/src/wikiget/dl.py @@ -139,19 +139,28 @@ def batch_download(args: Namespace) -> int: # TODO: validate file contents before download process starts with ThreadPoolExecutor(max_workers=args.threads) as executor: futures = [] - site: Site = None + sites: list[Site] = [] for line_num, line in dl_dict.items(): # keep track of batch file line numbers for debugging/logging purposes logger.info("Processing '%s' at line %i", line, line_num) try: file = prep_download(line, args) + site = next( + filter( + lambda site: site.host == file.site, + sites, + ), + None, + ) # if there's already a Site object matching the desired host, reuse it # to reduce the number of API calls made per file - if not site or site.host != file.site: - logger.debug("Made a new site connection") - site = connect_to_site(file.site, args) + if site: + logger.debug("Reusing the existing connection to %s", site.host) else: - logger.debug("Reused an existing site connection") + logger.debug("Making a new connection to %s", file.site) + site = connect_to_site(file.site, args) + # cache the new Site for reuse + sites.append(site) file.image = query_api(file.name, site) except ParseError as e: logger.warning("%s (line %i)", str(e), line_num) diff --git a/tests/test_dl.py b/tests/test_dl.py index a15f397..1099ced 100644 --- a/tests/test_dl.py +++ b/tests/test_dl.py @@ -227,12 +227,12 @@ class TestBatchDownload: assert caplog.record_tuples[1] == ( "wikiget.dl", logging.DEBUG, - "Made a new site connection", + "Making a new connection to commons.wikimedia.org", ) assert caplog.record_tuples[3] == ( "wikiget.dl", logging.DEBUG, - "Reused an existing site connection", + "Reusing the existing connection to commons.wikimedia.org", ) @patch("wikiget.dl.read_batch_file") -- cgit v1.2.3