From 1f78c4a2401a08d3555409b5068171c708527cf2 Mon Sep 17 00:00:00 2001
From: Mehak Farhan <mehak.farhan@conradlabs.com>
Date: Wed, 14 May 2025 13:43:08 +0500
Subject: [PATCH] feat: add test for bird utils

---
 server/test/utilities/test_bird_utils.py | 480 +++++++++++++++++++++++
 1 file changed, 480 insertions(+)
 create mode 100644 server/test/utilities/test_bird_utils.py

diff --git a/server/test/utilities/test_bird_utils.py b/server/test/utilities/test_bird_utils.py
new file mode 100644
index 0000000..84de4dc
--- /dev/null
+++ b/server/test/utilities/test_bird_utils.py
@@ -0,0 +1,480 @@
+import json
+import unittest
+from pathlib import Path
+from unittest.mock import MagicMock, patch
+
+from utilities.bird_utils import (JSON_FILE_ENCODING,
+                                  add_sequential_ids_to_questions,
+                                  create_and_copy_test_file,
+                                  ensure_global_bird_test_file_path,
+                                  get_database_list,
+                                  group_bird_items_by_database_name,
+                                  load_json_from_file, save_json_to_file)
+from utilities.constants.bird_utils.indexing_constants import (DB_ID_KEY,
+                                                               QUESTION_ID_KEY,
+                                                               QUESTION_KEY)
+from utilities.constants.bird_utils.response_messages import (
+    ERROR_EMPTY_BIRD_ITEMS_LIST, ERROR_FILE_DECODE, ERROR_FILE_NOT_FOUND,
+    ERROR_FILE_READ, ERROR_FILE_SAVE, ERROR_JSON_DECODE, ERROR_MISSING_DB_ID,
+    ERROR_PATH_NOT_DIRECTORY, ERROR_PATH_NOT_EXIST)
+from utilities.constants.database_enums import DatasetType
+
+
+class TestLoadJsonFromFile(unittest.TestCase):
+    """Test suite for load_json_from_file function."""
+
+    def setUp(self):
+        self.mock_path = Path("test.json")
+
+    @patch("pathlib.Path.read_text")
+    def test_loads_valid_json_successfully(self, mock_read_text):
+        """Should return parsed JSON when file content is valid."""
+
+        # Set up data
+        json_data = [{QUESTION_KEY: "test question"}]
+
+        # Mock the read_text method to return valid JSON
+        mock_read_text.return_value = json.dumps(json_data)
+
+        # Call the function
+        result = load_json_from_file(self.mock_path)
+
+        # Assertions
+        self.assertEqual(result, json_data)
+        mock_read_text.assert_called_once_with(encoding=JSON_FILE_ENCODING)
+
+    @patch(
+        "pathlib.Path.read_text",
+        side_effect=json.JSONDecodeError("Expecting value", "doc", 0),
+    )
+    def test_raises_value_error_on_invalid_json(self, mock_read_text):
+        """Should raise ValueError when JSON is invalid."""
+
+        # Mock the read_text method to return invalid JSON
+        with self.assertRaises(ValueError) as context:
+            load_json_from_file(self.mock_path)
+
+        # Assertions
+        self.assertIsInstance(context.exception, ValueError)
+        self.assertIn(
+            ERROR_JSON_DECODE.format(file_path=self.mock_path), str(context.exception)
+        )
+
+    @patch("pathlib.Path.read_text", side_effect=FileNotFoundError)
+    def test_raises_file_not_found_error(self, mock_read_text):
+        """Should raise FileNotFoundError when file doesn't exist."""
+
+        # Mock the read_text method to raise FileNotFoundError
+        with self.assertRaises(FileNotFoundError) as context:
+            load_json_from_file(self.mock_path)
+
+        # Assertions
+        self.assertIsInstance(context.exception, FileNotFoundError)
+        self.assertIn(
+            ERROR_FILE_NOT_FOUND.format(file_path=self.mock_path),
+            str(context.exception),
+        )
+
+    @patch(
+        "pathlib.Path.read_text",
+        side_effect=UnicodeDecodeError("utf-8", b"", 0, 1, "error reason"),
+    )
+    def test_raises_unicode_decode_error(self, mock_read_text):
+        """Should raise UnicodeDecodeError when encoding fails."""
+
+        # Mock the read_text method to raise UnicodeDecodeError
+        with self.assertRaises(ValueError) as context:
+            load_json_from_file(self.mock_path)
+
+        # Assertions
+        self.assertIsInstance(context.exception, ValueError)
+        self.assertIn(
+            ERROR_FILE_DECODE.format(file_path=self.mock_path, error="error reason"),
+            str(context.exception),
+        )
+
+    @patch("pathlib.Path.read_text", side_effect=RuntimeError("some other error"))
+    def test_raises_generic_exception(self, mock_read_text):
+        """Should raise generic Exception on unexpected error."""
+
+        # Mock the read_text method to raise a generic exception
+        with self.assertRaises(RuntimeError) as context:
+            load_json_from_file(self.mock_path)
+
+        # Assertions
+        self.assertIsInstance(context.exception, RuntimeError)
+        self.assertIn(
+            ERROR_FILE_READ.format(file_path=self.mock_path, error="some other error"),
+            str(context.exception),
+        )
+
+
+class TestSaveJsonToFile(unittest.TestCase):
+    """Test suite for save_json_to_file function."""
+
+    def setUp(self):
+        self.mock_path = Path("test.json")
+        self.mock_data = [{QUESTION_KEY: "test question"}]
+
+    @patch("pathlib.Path.write_text")
+    def test_saves_json_successfully(self, mock_write_text):
+        """Should write JSON data to file successfully."""
+
+        # Call the function
+        save_json_to_file(self.mock_path, self.mock_data)
+
+        # Assertions
+        mock_write_text.assert_called_once_with(
+            json.dumps(self.mock_data, indent=4), encoding=JSON_FILE_ENCODING
+        )
+
+    @patch("pathlib.Path.write_text", side_effect=OSError("disk full"))
+    def test_raises_runtime_error_on_os_error(self, mock_write_text):
+        """Should raise RuntimeError if write_text throws an OSError."""
+
+        # Mock the write_text method to raise OSError
+        with self.assertRaises(RuntimeError) as context:
+            save_json_to_file(self.mock_path, self.mock_data)
+
+        # Assertions
+        self.assertIn(
+            ERROR_FILE_SAVE.format(file_path=self.mock_path, error="disk full"),
+            str(context.exception),
+        )
+
+    @patch("json.dumps", side_effect=TypeError("data not serializable"))
+    def test_raises_runtime_error_on_type_error(self, mock_write_text):
+        """Should raise RuntimeError if data cannot be serialized."""
+
+        # Mock the write_text method
+        with self.assertRaises(RuntimeError) as context:
+            save_json_to_file(self.mock_path, self.mock_data)
+
+        # Assertions
+        self.assertIn(
+            ERROR_FILE_SAVE.format(
+                file_path=self.mock_path, error="data not serializable"
+            ),
+            str(context.exception),
+        )
+
+
+class TestAddSequentialIdsToQuestions(unittest.TestCase):
+    """Test suite for add_sequential_ids_to_questions function."""
+
+    def setUp(self):
+        self.mock_path = Path("test.json")
+
+    @patch("utilities.bird_utils.save_json_to_file")
+    @patch("utilities.bird_utils.load_json_from_file")
+    def test_adds_sequential_ids_successfully(self, mock_load, mock_save):
+        """Should annotate and save list with sequential question_ids."""
+
+        # Set up data
+        input_data = [
+            {QUESTION_KEY: "test_question_1"},
+            {QUESTION_KEY: "test_question_2"},
+        ]
+        expected_output = [
+            {QUESTION_ID_KEY: 0, QUESTION_KEY: "test_question_1"},
+            {QUESTION_ID_KEY: 1, QUESTION_KEY: "test_question_2"},
+        ]
+
+        # Mock return value
+        mock_load.return_value = input_data
+
+        # Call function
+        add_sequential_ids_to_questions(self.mock_path)
+
+        # Assert save called with annotated output
+        mock_save.assert_called_once_with(self.mock_path, expected_output)
+
+    @patch("utilities.bird_utils.load_json_from_file", return_value=[])
+    def test_raises_value_error_on_empty_list(self, mock_load):
+        """Should raise ValueError if input list is empty."""
+
+        # Mock return value
+        with self.assertRaises(ValueError) as context:
+            add_sequential_ids_to_questions(self.mock_path)
+
+        # Assertions
+        self.assertIsInstance(context.exception, ValueError)
+        self.assertIn(ERROR_EMPTY_BIRD_ITEMS_LIST, str(context.exception))
+
+    @patch(
+        "utilities.bird_utils.load_json_from_file",
+        side_effect=RuntimeError("load failed"),
+    )
+    def test_propagates_errors_from_loader(self, mock_load):
+        """Should re-raise errors from load_json_from_file."""
+
+        # Mock return value
+        with self.assertRaises(RuntimeError) as context:
+            add_sequential_ids_to_questions(self.mock_path)
+
+        # Assertions
+        self.assertIsInstance(context.exception, RuntimeError)
+        self.assertIn("load failed", str(context.exception))
+
+    @patch(
+        "utilities.bird_utils.save_json_to_file",
+        side_effect=RuntimeError("save failed"),
+    )
+    @patch(
+        "utilities.bird_utils.load_json_from_file",
+        return_value=[{QUESTION_KEY: "test_question_1"}],
+    )
+    def test_propagates_errors_from_saver(self, mock_load, mock_save):
+        """Should re-raise errors from save_json_to_file."""
+
+        # Mock return value
+        with self.assertRaises(RuntimeError) as context:
+            add_sequential_ids_to_questions(self.mock_path)
+
+        # Assertions
+        self.assertIsInstance(context.exception, RuntimeError)
+        self.assertIn("save failed", str(context.exception))
+
+
+class TestGroupBirdItemsByDatabaseName(unittest.TestCase):
+    """Test suite for group_bird_items_by_database_name function."""
+
+    def test_groups_items_by_db_id(self):
+        """Should group items correctly based on db_id."""
+
+        # Set up data
+        bird_items = [
+            {DB_ID_KEY: "database_1", QUESTION_KEY: "test_question_1"},
+            {DB_ID_KEY: "database_2", QUESTION_KEY: "test_question_2"},
+            {DB_ID_KEY: "database_1", QUESTION_KEY: "test_question_3"},
+        ]
+
+        expected = {
+            "database_1": [
+                {DB_ID_KEY: "database_1", QUESTION_KEY: "test_question_1"},
+                {DB_ID_KEY: "database_1", QUESTION_KEY: "test_question_3"},
+            ],
+            "database_2": [{DB_ID_KEY: "database_2", QUESTION_KEY: "test_question_2"}],
+        }
+
+        # Call function
+        result = group_bird_items_by_database_name(bird_items)
+
+        # Assertions
+        self.assertEqual(result, expected)
+
+    def test_raises_error_on_empty_list(self):
+        """Should raise ValueError if bird_items is empty."""
+
+        # Mock return value and Call function
+        with self.assertRaises(ValueError) as context:
+            group_bird_items_by_database_name([])
+
+        # Assertions
+        self.assertIn(ERROR_EMPTY_BIRD_ITEMS_LIST, str(context.exception))
+
+    def test_raises_error_if_item_missing_db_id(self):
+        """Should raise ValueError if any item is missing db_id."""
+
+        # Set up data
+        bird_items = [
+            {DB_ID_KEY: "database_1", QUESTION_KEY: "test_question_1"},
+            {QUESTION_KEY: "test_question_2"},  # Missing db_id
+        ]
+
+        # Mock return value and Call function
+        with self.assertRaises(ValueError) as context:
+            group_bird_items_by_database_name(bird_items)
+
+        # Assertions
+        self.assertIn(ERROR_MISSING_DB_ID.format(index=1), str(context.exception))
+
+
+class TestGetDatabaseList(unittest.TestCase):
+    """Test suite for get_database_list function."""
+
+    def setUp(self):
+        self.mock_dir = Path("/mock/dataset")
+
+    @patch("pathlib.Path.iterdir")
+    @patch("pathlib.Path.is_dir", return_value=True)
+    @patch("pathlib.Path.exists", return_value=True)
+    def test_returns_database_list(self, mock_exists, mock_is_dir, mock_iterdir):
+        """Should return names of subdirectories."""
+
+        # Set up mock subdirectories
+        subdir1 = MagicMock(spec=Path)
+        subdir1.name = "database_1"
+        subdir1.is_dir.return_value = True
+
+        subdir2 = MagicMock(spec=Path)
+        subdir2.name = "database_2"
+        subdir2.is_dir.return_value = True
+
+        mock_iterdir.return_value = [subdir1, subdir2]
+
+        # Call function
+        result = get_database_list(self.mock_dir)
+
+        # Assertions
+        self.assertEqual(result, ["database_1", "database_2"])
+        mock_exists.assert_called_once()
+        mock_is_dir.assert_called_once()
+        mock_iterdir.assert_called_once()
+
+    @patch("pathlib.Path.exists", return_value=False)
+    def test_raises_if_path_does_not_exist(self, mock_exists):
+        """Should raise ValueError if path does not exist."""
+
+        # Mock return value and Call function
+        with self.assertRaises(ValueError) as context:
+            get_database_list(self.mock_dir)
+
+        # Assertions
+        self.assertIn(
+            ERROR_PATH_NOT_EXIST.format(dataset_directory=self.mock_dir),
+            str(context.exception),
+        )
+
+    @patch("pathlib.Path.exists", return_value=True)
+    @patch("pathlib.Path.is_dir", return_value=False)
+    def test_raises_if_path_is_not_directory(self, mock_is_dir, mock_exists):
+        """Should raise ValueError if path is not a directory."""
+
+        # Mock return value and Call function
+        with self.assertRaises(ValueError) as context:
+            get_database_list(self.mock_dir)
+
+        # Assertions
+        self.assertIn(
+            ERROR_PATH_NOT_DIRECTORY.format(dataset_directory=self.mock_dir),
+            str(context.exception),
+        )
+
+    @patch("pathlib.Path.iterdir")
+    @patch("pathlib.Path.is_dir", return_value=True)
+    @patch("pathlib.Path.exists", return_value=True)
+    def test_ignores_files_and_returns_only_dirs(
+        self, mock_exists, mock_is_dir, mock_iterdir
+    ):
+        """Should skip files and return only subdirectory names."""
+
+        # Set up mock subdirectories and files
+        dir_entry = MagicMock(spec=Path)
+        dir_entry.name = "database_directory"
+        dir_entry.is_dir.return_value = True
+
+        file_entry = MagicMock(spec=Path)
+        file_entry.name = "not_a_databse_directory.txt"
+        file_entry.is_dir.return_value = False
+
+        mock_iterdir.return_value = [dir_entry, file_entry]
+
+        # Call function
+        result = get_database_list(self.mock_dir)
+
+        # Assertions
+        self.assertEqual(result, ["database_directory"])
+
+
+class TestEnsureGlobalBirdTestFilePath(unittest.TestCase):
+    """Test suite for ensure_global_bird_test_file_path function."""
+
+    def setUp(self):
+        self.test_file = Path("/mock/path/to/test_file.json")
+
+    @patch("pathlib.Path.exists", return_value=True)
+    def test_returns_path_if_file_exists(self, mock_exists):
+        """Should return the file path as-is if the file already exists."""
+
+        # Call the function
+        result = ensure_global_bird_test_file_path(self.test_file)
+
+        # Assertions
+        self.assertEqual(result, self.test_file)
+        mock_exists.assert_called_once()
+
+    @patch("utilities.bird_utils.create_and_copy_test_file")
+    @patch("pathlib.Path.exists", return_value=False)
+    def test_creates_and_returns_path_if_missing(
+        self, mock_exists, mock_create_and_copy
+    ):
+        """Should call create_and_copy_test_file if file does not exist."""
+
+        # Mock return value
+        mock_create_and_copy.return_value = self.test_file
+
+        # Call the function
+        result = ensure_global_bird_test_file_path(self.test_file)
+
+        # Assertions
+        mock_exists.assert_called_once()
+        mock_create_and_copy.assert_called_once_with(self.test_file)
+        self.assertEqual(result, self.test_file)
+
+
+class TestCreateAndCopyTestFile(unittest.TestCase):
+    """Test suite for create_and_copy_test_file function."""
+
+    def setUp(self):
+        self.test_file = Path("/mock/test/file.json")
+        self.mock_source = Path("/mock/source/file.json")
+
+    @patch("utilities.bird_utils.shutil.copy")
+    @patch("utilities.bird_utils.Path.mkdir")
+    @patch("utilities.bird_utils.PATH_CONFIG")
+    def test_copies_file_successfully_and_returns_path(
+        self, mock_config, mock_mkdir, mock_copy
+    ):
+        """Should copy from source to test file and return path."""
+        # Mock return value
+        mock_config.bird_file_path.return_value = self.mock_source
+        mock_config.sample_dataset_type = DatasetType.BIRD_DEV
+
+        # Call the function
+        result = create_and_copy_test_file(self.test_file)
+
+        # Assertions
+        mock_mkdir.assert_called_once_with(parents=True, exist_ok=True)
+        mock_copy.assert_called_once_with(self.mock_source, self.test_file)
+        self.assertEqual(result, self.test_file)
+
+    @patch("utilities.bird_utils.add_sequential_ids_to_questions")
+    @patch("utilities.bird_utils.shutil.copy")
+    @patch("utilities.bird_utils.Path.mkdir")
+    @patch("utilities.bird_utils.PATH_CONFIG")
+    def test_adds_sequential_ids_if_dataset_is_train(
+        self, mock_config, mock_mkdir, mock_copy, mock_annotate
+    ):
+        """Should annotate file if dataset type is BIRD_TRAIN."""
+
+        # Mock return value
+        mock_config.bird_file_path.return_value = self.mock_source
+        mock_config.sample_dataset_type = DatasetType.BIRD_TRAIN
+
+        # Call the function
+        result = create_and_copy_test_file(self.test_file)
+
+        # Assertions
+        mock_annotate.assert_called_once_with(self.test_file)
+        self.assertEqual(result, self.test_file)
+
+    @patch("utilities.bird_utils.add_sequential_ids_to_questions")
+    @patch("utilities.bird_utils.shutil.copy")
+    @patch("utilities.bird_utils.Path.mkdir")
+    @patch("utilities.bird_utils.PATH_CONFIG")
+    def test_skips_annotation_if_dataset_is_not_train(
+        self, mock_config, mock_mkdir, mock_copy, mock_annotate
+    ):
+        """Should not annotate file if dataset type is not BIRD_TRAIN."""
+
+        # Mock return value
+        mock_config.bird_file_path.return_value = self.mock_source
+        mock_config.sample_dataset_type = DatasetType.BIRD_DEV
+
+        # Call the function
+        result = create_and_copy_test_file(self.test_file)
+
+        # Assertions
+        mock_annotate.assert_not_called()
+        self.assertEqual(result, self.test_file)