aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCody Logan <cody@lokken.dev>2023-11-17 16:42:10 -0800
committerCody Logan <cody@lokken.dev>2023-11-17 16:42:10 -0800
commit682b7b24b84c9d8614cf898a06f67681db222deb (patch)
treec862973f97e7bf928e93c0aa1125f60df182a301
parent60ecb1990f4efc07d596182f38953e8a50c1a726 (diff)
downloadwikiget-682b7b24b84c9d8614cf898a06f67681db222deb.tar.gz
wikiget-682b7b24b84c9d8614cf898a06f67681db222deb.zip
Cache site connections for reuse in batch downloads
-rw-r--r--src/wikiget/dl.py19
-rw-r--r--tests/test_dl.py4
2 files changed, 16 insertions, 7 deletions
diff --git a/src/wikiget/dl.py b/src/wikiget/dl.py
index d3b42fd..2160743 100644
--- a/src/wikiget/dl.py
+++ b/src/wikiget/dl.py
@@ -139,19 +139,28 @@ def batch_download(args: Namespace) -> int:
# TODO: validate file contents before download process starts
with ThreadPoolExecutor(max_workers=args.threads) as executor:
futures = []
- site: Site = None
+ sites: list[Site] = []
for line_num, line in dl_dict.items():
# keep track of batch file line numbers for debugging/logging purposes
logger.info("Processing '%s' at line %i", line, line_num)
try:
file = prep_download(line, args)
+ site = next(
+ filter(
+ lambda site: site.host == file.site,
+ sites,
+ ),
+ None,
+ )
# if there's already a Site object matching the desired host, reuse it
# to reduce the number of API calls made per file
- if not site or site.host != file.site:
- logger.debug("Made a new site connection")
- site = connect_to_site(file.site, args)
+ if site:
+ logger.debug("Reusing the existing connection to %s", site.host)
else:
- logger.debug("Reused an existing site connection")
+ logger.debug("Making a new connection to %s", file.site)
+ site = connect_to_site(file.site, args)
+ # cache the new Site for reuse
+ sites.append(site)
file.image = query_api(file.name, site)
except ParseError as e:
logger.warning("%s (line %i)", str(e), line_num)
diff --git a/tests/test_dl.py b/tests/test_dl.py
index a15f397..1099ced 100644
--- a/tests/test_dl.py
+++ b/tests/test_dl.py
@@ -227,12 +227,12 @@ class TestBatchDownload:
assert caplog.record_tuples[1] == (
"wikiget.dl",
logging.DEBUG,
- "Made a new site connection",
+ "Making a new connection to commons.wikimedia.org",
)
assert caplog.record_tuples[3] == (
"wikiget.dl",
logging.DEBUG,
- "Reused an existing site connection",
+ "Reusing the existing connection to commons.wikimedia.org",
)
@patch("wikiget.dl.read_batch_file")