From 2f3074e1b2a62cbd5e32778abc0ff82027c1ce3b Mon Sep 17 00:00:00 2001 From: Cody Logan Date: Thu, 16 Nov 2023 10:23:49 -0800 Subject: Reuse existing Site object when possible in batch downloads Previously, every file downloaded in a batch would create a new Site object. Now, the Site object created by the first file will be reused by subsequent files if it matches the file's requested host, which will significantly speed up the download process, assuming all files are from the same site. This is a quick and dirty fix which could be improved to better handle situations where there are a mix of files from different sites. --- tests/test_dl.py | 67 +++++++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 49 insertions(+), 18 deletions(-) (limited to 'tests') diff --git a/tests/test_dl.py b/tests/test_dl.py index cb893b8..b0822cf 100644 --- a/tests/test_dl.py +++ b/tests/test_dl.py @@ -34,20 +34,9 @@ from wikiget.wikiget import parse_args class TestPrepDownload: """Define tests related to wikiget.dl.prep_download.""" - @patch("wikiget.dl.query_api") - @patch("wikiget.dl.connect_to_site") - def test_prep_download( - self, mock_connect_to_site: MagicMock, mock_query_api: MagicMock - ) -> None: + def test_prep_download(self) -> None: """The prep_download function should create the expected file object.""" - mock_site = Mock() - mock_image = Mock() - - mock_connect_to_site.return_value = mock_site - mock_query_api.return_value = mock_image - expected_file = File(name="Example.jpg") - expected_file.image = mock_image args = parse_args(["File:Example.jpg"]) file = prep_download(args.FILE, args) @@ -104,7 +93,8 @@ class TestProcessDownload: mock_prep_download.return_value = File("Example.jpg") args = parse_args(["File:Example.jpg"]) - exit_code = process_download(args) + with patch("wikiget.dl.connect_to_site"), patch("wikiget.dl.query_api"): + exit_code = process_download(args) assert exit_code == 0 @@ -118,7 +108,8 @@ class TestProcessDownload: mock_prep_download.return_value = File("Example.jpg") args = parse_args(["File:Example.jpg"]) - exit_code = process_download(args) + with patch("wikiget.dl.connect_to_site"), patch("wikiget.dl.query_api"): + exit_code = process_download(args) assert exit_code == 1 @@ -168,12 +159,10 @@ class TestBatchDownload: """Define tests related to wikiget.dl.batch_download.""" @patch("wikiget.dl.download") - @patch("wikiget.dl.prep_download") @patch("wikiget.dl.read_batch_file") def test_batch_download( self, mock_read_batch_file: MagicMock, - mock_prep_download: MagicMock, mock_download: MagicMock, caplog: pytest.LogCaptureFixture, ) -> None: @@ -189,16 +178,58 @@ class TestBatchDownload: mock_download.return_value = 0 args = parse_args(["-a", "batch.txt"]) - errors = batch_download(args) + with patch("wikiget.dl.query_api"), patch("wikiget.dl.connect_to_site"), patch( + "wikiget.dl.prep_download" + ): + errors = batch_download(args) assert mock_read_batch_file.called - assert mock_prep_download.called assert mock_download.called assert caplog.record_tuples == [ ("wikiget.dl", logging.INFO, "Processing 'File:Example.jpg' at line 1") ] assert errors == 0 + @patch("wikiget.dl.connect_to_site") + @patch("wikiget.dl.prep_download") + @patch("wikiget.dl.read_batch_file") + def test_batch_download_reuse_site( + self, + mock_read_batch_file: MagicMock, + mock_prep_download: MagicMock, + mock_connect_to_site: MagicMock, + caplog: pytest.LogCaptureFixture, + ) -> None: + """Test that an existing site object is reused.""" + caplog.set_level(logging.DEBUG) + + mock_site = MagicMock() + mock_site.host = "commons.wikimedia.org" + mock_read_batch_file.return_value = { + 1: "File:Example.jpg", + 2: "File:Foobar.jpg", + } + mock_prep_download.return_value = File("Example.jpg") + mock_connect_to_site.return_value = mock_site + + args = parse_args(["-a", "batch.txt"]) + with patch("wikiget.dl.download"), patch("wikiget.dl.query_api"): + _ = batch_download(args) + + assert mock_read_batch_file.called + assert mock_prep_download.called + assert mock_connect_to_site.called + assert caplog.record_tuples[1] == ( + "wikiget.dl", + logging.DEBUG, + "Made a new site connection", + ) + assert caplog.record_tuples[3] == ( + "wikiget.dl", + logging.DEBUG, + "Reused an existing site connection", + ) + @patch("wikiget.dl.read_batch_file") def test_batch_download_os_error( self, mock_read_batch_file: MagicMock, caplog: pytest.LogCaptureFixture -- cgit v1.2.3 From 6178c170d88434937d28026fe592629bd967681e Mon Sep 17 00:00:00 2001 From: Cody Logan Date: Thu, 16 Nov 2023 12:01:17 -0800 Subject: Code cleanup; reorganize some tests --- tests/test_dl.py | 75 ++++++++++++++++++++++++++++++++++++-------------------- 1 file changed, 49 insertions(+), 26 deletions(-) (limited to 'tests') diff --git a/tests/test_dl.py b/tests/test_dl.py index 10f5fd5..a15f397 100644 --- a/tests/test_dl.py +++ b/tests/test_dl.py @@ -17,8 +17,10 @@ """Define tests related to the wikiget.dl module.""" +from __future__ import annotations + import logging -from pathlib import Path +from typing import TYPE_CHECKING from unittest.mock import MagicMock, Mock, patch import pytest @@ -30,6 +32,9 @@ from wikiget.exceptions import ParseError from wikiget.file import File from wikiget.wikiget import parse_args +if TYPE_CHECKING: + from pathlib import Path + class TestPrepDownload: """Define tests related to wikiget.dl.prep_download.""" @@ -348,7 +353,13 @@ class TestDownload: file.image.site.connection = requests.Session() return file - def test_download(self, mock_file: File, caplog: pytest.LogCaptureFixture) -> None: + @patch("wikiget.dl.verify_hash") + def test_download( + self, + mock_verify_hash: MagicMock, + mock_file: File, + caplog: pytest.LogCaptureFixture, + ) -> None: """Test that the correct log messages are created when downloading a file. There should be a series of info-level messages containing the filename, size, @@ -357,10 +368,10 @@ class TestDownload: """ caplog.set_level(logging.INFO) - with patch("wikiget.dl.verify_hash") as mock_verify_hash: - mock_verify_hash.return_value = "d01b79a6781c72ac9bfff93e5e2cfbeef4efc840" - args = parse_args(["File:Example.jpg"]) - errors = download(mock_file, args) + mock_verify_hash.return_value = "d01b79a6781c72ac9bfff93e5e2cfbeef4efc840" + + args = parse_args(["File:Example.jpg"]) + errors = download(mock_file, args) assert caplog.record_tuples == [ ( @@ -392,8 +403,12 @@ class TestDownload: ] assert errors == 0 + @patch("wikiget.dl.verify_hash") def test_download_with_output( - self, mock_file: File, caplog: pytest.LogCaptureFixture + self, + mock_verify_hash: MagicMock, + mock_file: File, + caplog: pytest.LogCaptureFixture, ) -> None: """Test that the correct log messages are created when downloading a file. @@ -402,11 +417,10 @@ class TestDownload: caplog.set_level(logging.INFO) tmp_file = mock_file.dest + mock_verify_hash.return_value = "d01b79a6781c72ac9bfff93e5e2cfbeef4efc840" - with patch("wikiget.dl.verify_hash") as mock_verify_hash: - mock_verify_hash.return_value = "d01b79a6781c72ac9bfff93e5e2cfbeef4efc840" - args = parse_args(["-o", str(tmp_file), "File:Example.jpg"]) - errors = download(mock_file, args) + args = parse_args(["-o", str(tmp_file), "File:Example.jpg"]) + errors = download(mock_file, args) assert caplog.record_tuples[0] == ( "wikiget.dl", @@ -436,18 +450,19 @@ class TestDownload: ] assert errors == 0 + @patch("pathlib.Path.open") def test_download_os_error( - self, mock_file: File, caplog: pytest.LogCaptureFixture + self, mock_open: MagicMock, mock_file: File, caplog: pytest.LogCaptureFixture ) -> None: """Test what happens when an OSError is raised during download. If the downloaded file cannot be created, an error log message should be created with details on the exception. """ - with patch("pathlib.Path.open") as mock_open: - mock_open.side_effect = OSError("write error") - args = parse_args(["File:Example.jpg"]) - errors = download(mock_file, args) + mock_open.side_effect = OSError("write error") + + args = parse_args(["File:Example.jpg"]) + errors = download(mock_file, args) assert caplog.record_tuples == [ ( @@ -458,18 +473,22 @@ class TestDownload: ] assert errors == 1 + @patch("wikiget.dl.verify_hash") def test_download_verify_os_error( - self, mock_file: File, caplog: pytest.LogCaptureFixture + self, + mock_verify_hash: MagicMock, + mock_file: File, + caplog: pytest.LogCaptureFixture, ) -> None: """Test what happens when an OSError is raised during verification. If the downloaded file cannot be read in order to calculate its hash, an error log message should be created with details on the exception. """ - with patch("wikiget.dl.verify_hash") as mock_verify_hash: - mock_verify_hash.side_effect = OSError("read error") - args = parse_args(["File:Example.jpg"]) - errors = download(mock_file, args) + mock_verify_hash.side_effect = OSError("read error") + + args = parse_args(["File:Example.jpg"]) + errors = download(mock_file, args) assert caplog.record_tuples == [ ( @@ -480,17 +499,21 @@ class TestDownload: ] assert errors == 1 + @patch("wikiget.dl.verify_hash") def test_download_verify_hash_mismatch( - self, mock_file: File, caplog: pytest.LogCaptureFixture + self, + mock_verify_hash: MagicMock, + mock_file: File, + caplog: pytest.LogCaptureFixture, ) -> None: """Test what happens when the downloaded file hash and server hash don't match. An error log message should be created if there's a hash mismatch. """ - with patch("wikiget.dl.verify_hash") as mock_verify_hash: - mock_verify_hash.return_value = "mismatch" - args = parse_args(["File:Example.jpg"]) - errors = download(mock_file, args) + mock_verify_hash.return_value = "mismatch" + + args = parse_args(["File:Example.jpg"]) + errors = download(mock_file, args) assert caplog.record_tuples == [ ( -- cgit v1.2.3 From 06dfda7b5430bfc895a39defad50f184d41281f1 Mon Sep 17 00:00:00 2001 From: Cody Logan Date: Thu, 16 Nov 2023 12:29:36 -0800 Subject: Additional type checking import blocks --- tests/conftest.py | 10 ++++++++-- tests/test_client.py | 2 ++ tests/test_file_class.py | 2 ++ tests/test_logging.py | 8 ++++++-- tests/test_wikiget_cli.py | 2 ++ 5 files changed, 20 insertions(+), 4 deletions(-) (limited to 'tests') diff --git a/tests/conftest.py b/tests/conftest.py index 6088029..128b581 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -17,13 +17,19 @@ """Define fixtures used across all tests in this folder.""" -from pathlib import Path +from __future__ import annotations + +from typing import TYPE_CHECKING import pytest -import requests_mock as rm from wikiget.file import File +if TYPE_CHECKING: + from pathlib import Path + + import requests_mock as rm + # 2x2 JPEG TEST_FILE_BYTES = ( b"\xff\xd8\xff\xdb\x00C\x00\x03\x02\x02\x02\x02\x02\x03\x02\x02\x02\x03\x03\x03\x03" diff --git a/tests/test_client.py b/tests/test_client.py index dae63f5..a0e4855 100644 --- a/tests/test_client.py +++ b/tests/test_client.py @@ -17,6 +17,8 @@ """Define tests related to the wikiget.client module.""" +from __future__ import annotations + import logging from unittest.mock import MagicMock, patch, sentinel diff --git a/tests/test_file_class.py b/tests/test_file_class.py index 4ad06d1..699f40d 100644 --- a/tests/test_file_class.py +++ b/tests/test_file_class.py @@ -17,6 +17,8 @@ """Define tests related to the wikiget.file module.""" +from __future__ import annotations + from wikiget import DEFAULT_SITE from wikiget.file import File diff --git a/tests/test_logging.py b/tests/test_logging.py index 8d58cdf..a402120 100644 --- a/tests/test_logging.py +++ b/tests/test_logging.py @@ -17,14 +17,18 @@ """Define tests related to the wikiget.logging module.""" +from __future__ import annotations + import logging from pathlib import Path - -import pytest +from typing import TYPE_CHECKING from wikiget.logging import FileLogAdapter, configure_logging from wikiget.wikiget import parse_args +if TYPE_CHECKING: + import pytest + class TestLogging: """Define tests related to wikiget.logging.configure_logging and FileLogAdapter.""" diff --git a/tests/test_wikiget_cli.py b/tests/test_wikiget_cli.py index 28c4399..87bc069 100644 --- a/tests/test_wikiget_cli.py +++ b/tests/test_wikiget_cli.py @@ -17,6 +17,8 @@ """Define tests related to the wikiget.wikiget module.""" +from __future__ import annotations + import logging from unittest.mock import MagicMock, patch -- cgit v1.2.3 From 682b7b24b84c9d8614cf898a06f67681db222deb Mon Sep 17 00:00:00 2001 From: Cody Logan Date: Fri, 17 Nov 2023 16:42:10 -0800 Subject: Cache site connections for reuse in batch downloads --- tests/test_dl.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tests') diff --git a/tests/test_dl.py b/tests/test_dl.py index a15f397..1099ced 100644 --- a/tests/test_dl.py +++ b/tests/test_dl.py @@ -227,12 +227,12 @@ class TestBatchDownload: assert caplog.record_tuples[1] == ( "wikiget.dl", logging.DEBUG, - "Made a new site connection", + "Making a new connection to commons.wikimedia.org", ) assert caplog.record_tuples[3] == ( "wikiget.dl", logging.DEBUG, - "Reused an existing site connection", + "Reusing the existing connection to commons.wikimedia.org", ) @patch("wikiget.dl.read_batch_file") -- cgit v1.2.3 From bc07a8fb109d8daabd1430883914013726abe7b0 Mon Sep 17 00:00:00 2001 From: Cody Logan Date: Fri, 17 Nov 2023 16:50:26 -0800 Subject: Refactor batch download tests --- tests/test_dl.py | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) (limited to 'tests') diff --git a/tests/test_dl.py b/tests/test_dl.py index 1099ced..7117e50 100644 --- a/tests/test_dl.py +++ b/tests/test_dl.py @@ -160,15 +160,15 @@ class TestProcessDownload: assert exit_code == 1 +@patch("wikiget.dl.read_batch_file") class TestBatchDownload: """Define tests related to wikiget.dl.batch_download.""" @patch("wikiget.dl.download") - @patch("wikiget.dl.read_batch_file") def test_batch_download( self, - mock_read_batch_file: MagicMock, mock_download: MagicMock, + mock_read_batch_file: MagicMock, caplog: pytest.LogCaptureFixture, ) -> None: """Test that no errors are returned for a successful batch download. @@ -197,12 +197,11 @@ class TestBatchDownload: @patch("wikiget.dl.connect_to_site") @patch("wikiget.dl.prep_download") - @patch("wikiget.dl.read_batch_file") def test_batch_download_reuse_site( self, - mock_read_batch_file: MagicMock, mock_prep_download: MagicMock, mock_connect_to_site: MagicMock, + mock_read_batch_file: MagicMock, caplog: pytest.LogCaptureFixture, ) -> None: """Test that an existing site object is reused.""" @@ -235,7 +234,6 @@ class TestBatchDownload: "Reusing the existing connection to commons.wikimedia.org", ) - @patch("wikiget.dl.read_batch_file") def test_batch_download_os_error( self, mock_read_batch_file: MagicMock, caplog: pytest.LogCaptureFixture ) -> None: @@ -252,11 +250,10 @@ class TestBatchDownload: ] @patch("wikiget.dl.prep_download") - @patch("wikiget.dl.read_batch_file") def test_batch_download_parse_error( self, - mock_read_batch_file: MagicMock, mock_prep_download: MagicMock, + mock_read_batch_file: MagicMock, caplog: pytest.LogCaptureFixture, ) -> None: """Test that a warning log message is created if ParseError is raised. @@ -278,11 +275,10 @@ class TestBatchDownload: assert errors == 1 @patch("wikiget.dl.prep_download") - @patch("wikiget.dl.read_batch_file") def test_batch_download_file_exists_error( self, - mock_read_batch_file: MagicMock, mock_prep_download: MagicMock, + mock_read_batch_file: MagicMock, caplog: pytest.LogCaptureFixture, ) -> None: """Test that a warning log message is created if the download file exists.""" @@ -300,11 +296,10 @@ class TestBatchDownload: assert errors == 1 @patch("wikiget.dl.prep_download") - @patch("wikiget.dl.read_batch_file") def test_batch_download_other_error( self, - mock_read_batch_file: MagicMock, mock_prep_download: MagicMock, + mock_read_batch_file: MagicMock, caplog: pytest.LogCaptureFixture, ) -> None: """Test that a warning log message is created if there are problems downloading. -- cgit v1.2.3