From 993a9e611f666bc559fd2d30e56d758e13d96445 Mon Sep 17 00:00:00 2001
From: Caleb Aguirre-Leon <Celeon600@gmail.com>
Date: Sat, 29 Mar 2025 00:01:21 -0400
Subject: [PATCH 01/15] Staging New File

---
 scraper.py | 84 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 84 insertions(+)
 create mode 100644 scraper.py

diff --git a/scraper.py b/scraper.py
new file mode 100644
index 0000000..7dcfe1e
--- /dev/null
+++ b/scraper.py
@@ -0,0 +1,84 @@
+"""
+This module contains a function to search for posts using the BlueSky API.
+"""
+import requests
+from alive_progress import alive_bar
+
+def search_posts(params, token):
+    # pylint: disable=E1102
+    # pylint: disable=C0301
+
+    """
+    Search for posts using the BlueSky API.
+
+    Args:
+        params (dict): The query parameters for the API request.
+            - query (str, required): The search term for the BlueSky posts.
+            - sort (str, optional): The sorting criteria for results. 
+               Options include "top" for top posts or "latest" for the latest posts.
+            - since (str, optional): The start date for posts (ISO 8601 format).
+            - until (str, optional): The end date for posts (ISO 8601 format).
+            - mentions (str, optional): Mentions to filter posts by. 
+            - Handles will be resolved to DIDs using the provided token.
+            - author (str, optional): The author of the posts (handle or DID).
+            - lang (str, optional): The language of the posts.
+            - domain (str, optional): A domain URL included in the posts.
+            - url (str, optional): A specific URL included in the posts.
+            - tags (list, optional): Tags to filter posts by (each tag <= 640 characters).
+            - limit (int, optional): The maximum number of posts to retrieve in a single response. 
+                Defaults to 25.
+            - cursor (str, optional): Pagination token for continuing from a previous request.
+            - posts_limit (int, optional): The maximum number of posts to retrieve across all responses.
+                Defaults to 500. 
+
+    Returns:
+        list: A list of posts matching the search criteria.
+
+    Notes:
+        - Progress is displayed using a progress bar indicating the number of posts fetched.
+        - Handles pagination automatically until `posts_limit` is reached or no further results are available.
+        - Logs and returns partial results if an error occurs during fetching.
+    """
+    posts = []
+    url = "https://bsky.social/xrpc/app.bsky.feed.searchPosts"
+    headers = {
+        "Authorization": f"Bearer {token}",
+        "Content-Type": "application/json",
+    }
+
+    total_fetched = 0
+    posts_limit = params.get("posts_limit", 1000)
+
+    with alive_bar(posts_limit) as progress:
+        while True:
+            try:
+                response = requests.get(url, headers=headers, params=params, timeout=10)
+                # print(response)
+                response.raise_for_status()
+                data = response.json()
+
+                #Check if we have reached our overall posts limit
+                new_posts = data.get("posts", [])
+                posts.extend(new_posts)
+                total_fetched += len(new_posts)
+
+                #Update progress bar
+                progress(len(new_posts))
+
+                if posts_limit and total_fetched >= posts_limit:
+                    print(f"Fetched {total_fetched} posts, total: {total_fetched}/{posts_limit}")
+                    return posts[:posts_limit]
+
+                #Move to the enxt page if available
+                next_cursor = data.get("cursor")
+                if not next_cursor:
+                    print(f"All posts fetched. Total: {total_fetched}")
+                    return posts
+
+                params["cursor"] = next_cursor
+            except requests.exceptions.RequestException as err:
+                print(f"Error fetching posts: {err}")
+                print(
+                    "Response:", response.text if "response" in locals() else "No response"
+                )
+                return posts
\ No newline at end of file

From 91889d86464c9d4ae514c5718b587d999b9a6ffc Mon Sep 17 00:00:00 2001
From: Caleb Aguirre-Leon <Celeon600@gmail.com>
Date: Sat, 29 Mar 2025 00:06:17 -0400
Subject: [PATCH 02/15] Staging Changes

---
 mission_blue.py | 87 ++-----------------------------------------------
 1 file changed, 2 insertions(+), 85 deletions(-)

diff --git a/mission_blue.py b/mission_blue.py
index 32c7d0c..662f1cc 100644
--- a/mission_blue.py
+++ b/mission_blue.py
@@ -10,6 +10,7 @@
 import click
 import file
 import auth
+import scraper
 # pylint: disable=C0301
 
 lang_dict = {
@@ -217,90 +218,6 @@ def generate_query_params(
     }
 
 
-def search_posts(params, token):
-    # pylint: disable=E1102
-    # pylint: disable=C0301
-
-    """
-    Search for posts using the BlueSky API.
-
-    Args:
-        params (dict): The query parameters for the API request.
-            - query (str, required): The search term for the BlueSky posts.
-            - sort (str, optional): The sorting criteria for results.
-               Options include "top" for top posts or "latest" for the latest posts.
-            - since (str, optional): The start date for posts (ISO 8601 format).
-            - until (str, optional): The end date for posts (ISO 8601 format).
-            - mentions (str, optional): Mentions to filter posts by.
-            - Handles will be resolved to DIDs using the provided token.
-            - author (str, optional): The author of the posts (handle or DID).
-            - lang (str, optional): The language of the posts.
-            - domain (str, optional): A domain URL included in the posts.
-            - url (str, optional): A specific URL included in the posts.
-            - tags (list, optional): Tags to filter posts by (each tag <= 640 characters).
-            - limit (int, optional): The maximum number of posts to retrieve in a single response.
-                Defaults to 25.
-            - cursor (str, optional): Pagination token for continuing from a previous request.
-            - posts_limit (int, optional): The maximum number of posts to retrieve across all responses.
-                Defaults to 500.
-
-    Returns:
-        list: A list of posts matching the search criteria.
-
-    Notes:
-        - Progress is displayed using a progress bar indicating the number of posts fetched.
-        - Handles pagination automatically until `posts_limit` is reached or no further results are available.
-        - Logs and returns partial results if an error occurs during fetching.
-    """
-    posts = []
-    url = "https://bsky.social/xrpc/app.bsky.feed.searchPosts"
-    headers = {
-        "Authorization": f"Bearer {token}",
-        "Content-Type": "application/json",
-    }
-
-    total_fetched = 0
-    posts_limit = params.get("posts_limit")
-    butterfly_bar = bar_factory("✨", tip="🦋", errors="🔥🧯👩‍🚒")
-
-    with alive_bar(posts_limit, bar=butterfly_bar, spinner="waves") as progress:
-        while True:
-            try:
-                response = requests.get(url, headers=headers, params=params, timeout=10)
-                # print(response)
-                response.raise_for_status()
-                data = response.json()
-
-                # Check if we have reached our overall posts limit
-                new_posts = data.get("posts", [])
-                posts.extend(new_posts)
-                total_fetched += len(new_posts)
-
-                # Update progress bar
-                progress(len(new_posts))
-
-                if posts_limit and total_fetched >= posts_limit:
-                    print(
-                        f"Fetched {total_fetched} posts, total: {total_fetched}/{posts_limit}"
-                    )
-                    return posts[:posts_limit]
-
-                # Move to the enxt page if available
-                next_cursor = data.get("cursor")
-                if not next_cursor:
-                    print(f"All posts fetched. Total: {total_fetched}")
-                    return posts
-
-                params["cursor"] = next_cursor
-            except requests.exceptions.RequestException as err:
-                print(f"Error fetching posts: {err}")
-                print(
-                    "Response:",
-                    response.text if "response" in locals() else "No response",
-                )
-                return posts
-
-
 # Begin Click CLI
 
 
@@ -446,7 +363,7 @@ def main(
 
     # Fetch posts
     print("Fetching posts...")
-    raw_posts = search_posts(query_param, access_token)
+    raw_posts = scraper.search_posts(query_param, access_token)
 
     # Extract post data
     print("Extracting post data...")

From 137762d3620176803b3875addb14309c80689574 Mon Sep 17 00:00:00 2001
From: Caleb Aguirre-Leon <Celeon600@gmail.com>
Date: Sat, 29 Mar 2025 00:12:24 -0400
Subject: [PATCH 03/15] Gaslight

---
 tests/testing | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 delete mode 100644 tests/testing

diff --git a/tests/testing b/tests/testing
deleted file mode 100644
index e69de29..0000000

From 8b30822253ecc75c268db233ec1ea6183065aa71 Mon Sep 17 00:00:00 2001
From: Caleb Aguirre-Leon <Celeon600@gmail.com>
Date: Wed, 14 May 2025 15:37:56 -0400
Subject: [PATCH 04/15] Add testing file

---
 tests/scraper_test.py | 68 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 68 insertions(+)
 create mode 100644 tests/scraper_test.py

diff --git a/tests/scraper_test.py b/tests/scraper_test.py
new file mode 100644
index 0000000..a3458d0
--- /dev/null
+++ b/tests/scraper_test.py
@@ -0,0 +1,68 @@
+"""Testing suite for the mission_blue module."""
+
+import unittest
+from unittest.mock import Mock, patch
+
+from scraper import (
+    search_posts,
+)
+
+
+class TestCase:
+    """Class used to store test data and expected results for the TestMissionBlue function."""
+
+    def __init__(self, data, expected_result):
+        self.data = data
+        self.expected_result = expected_result
+
+    def get_data(self):
+        # pylint: disable=missing-function-docstring
+        return self.data
+
+    def get_expected_result(self):
+        # pylint: disable=missing-function-docstring
+        return self.expected_result
+
+
+class TestSearchPosts(unittest.TestCase):
+    """_summary_.
+
+    Args:
+        unittest (_type_): _description_
+
+    """
+
+    def test_search_posts(self):
+        """Test case for the validate_url function.
+        This test verifies that the given url contains the correct post data.
+        Test data:
+        - Post Links with valid and invalid post urls.
+        - An expected result boolean.
+        Assertions:
+        - The result of validate_url(data) should match the expected_result.
+        """
+        # If any of the test cases fail, try looking at the no_content_template variable
+        # within the validate_url function.
+        cases = {
+            "Post Exists": TestCase(
+                data="https://bsky.app/profile/witheringtales.bsky.social/post/3legkyuzjs22m",
+                expected_result=True,
+            ),
+            # If the test case fails, look at the validate_url function logic for guidance
+            # on how to fix the test case.
+            "Post Doesn't Exist": TestCase(
+                data="https://bsky.app/profile/witheringtales.bsky.social/post/3legkyuzjs22",
+                expected_result=False,
+            ),
+        }
+
+        for case_name, case in cases.items():
+            with self.subTest(case_name):
+                result = validate_url(case.get_data())
+                self.assertEqual(result, case.get_expected_result())
+
+
+
+
+if __name__ == "__main__":
+    unittest.main()

From b947ad964d5d3f82dfde0b9e14282d2778c61431 Mon Sep 17 00:00:00 2001
From: Caleb Aguirre-Leon <Celeon600@gmail.com>
Date: Thu, 15 May 2025 00:54:25 -0400
Subject: [PATCH 05/15] Small changes

---
 mission_blue.py       | 2 --
 tests/scraper_test.py | 2 --
 2 files changed, 4 deletions(-)

diff --git a/mission_blue.py b/mission_blue.py
index 0d7e570..edd8cce 100644
--- a/mission_blue.py
+++ b/mission_blue.py
@@ -2,8 +2,6 @@
 
 import click
 import requests
-from alive_progress import alive_bar
-from alive_progress.animations.bars import bar_factory
 
 import auth
 import scraper
diff --git a/tests/scraper_test.py b/tests/scraper_test.py
index a3458d0..1062567 100644
--- a/tests/scraper_test.py
+++ b/tests/scraper_test.py
@@ -62,7 +62,5 @@ def test_search_posts(self):
                 self.assertEqual(result, case.get_expected_result())
 
 
-
-
 if __name__ == "__main__":
     unittest.main()

From a5d030bac0d70a223c5399b15ac0490fe383cf2b Mon Sep 17 00:00:00 2001
From: andewmark <amarkarian2021@gmail.com>
Date: Tue, 20 May 2025 10:59:36 -0400
Subject: [PATCH 06/15] Imported typing & Fixed Custom Progress Bar

---
 mission_blue.py | 1 +
 scraper.py      | 4 +++-
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/mission_blue.py b/mission_blue.py
index 9680005..f809698 100644
--- a/mission_blue.py
+++ b/mission_blue.py
@@ -5,6 +5,7 @@
 import auth
 import scraper
 import file
+from typing import Optional, List, Dict, Any
 # pylint: disable=C0301
 
 lang_dict = {
diff --git a/scraper.py b/scraper.py
index 7dcfe1e..ccb96ad 100644
--- a/scraper.py
+++ b/scraper.py
@@ -3,6 +3,7 @@
 """
 import requests
 from alive_progress import alive_bar
+from alive_progress.animations.bars import bar_factory
 
 def search_posts(params, token):
     # pylint: disable=E1102
@@ -48,8 +49,9 @@ def search_posts(params, token):
 
     total_fetched = 0
     posts_limit = params.get("posts_limit", 1000)
+    butterfly_bar = bar_factory("✨", tip="🦋", errors="🔥🧯👩‍🚒")
 
-    with alive_bar(posts_limit) as progress:
+    with alive_bar(posts_limit, bar=butterfly_bar, spinner="waves") as progress:
         while True:
             try:
                 response = requests.get(url, headers=headers, params=params, timeout=10)

From 1d1f0a7f16fa1a9e846f834ed24cf5b307726689 Mon Sep 17 00:00:00 2001
From: andewmark <amarkarian2021@gmail.com>
Date: Tue, 20 May 2025 11:01:47 -0400
Subject: [PATCH 07/15] Imported typing & Fixed Custom Progress Bar (Again)

---
 scraper.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/scraper.py b/scraper.py
index ccb96ad..974c2f7 100644
--- a/scraper.py
+++ b/scraper.py
@@ -55,7 +55,6 @@ def search_posts(params, token):
         while True:
             try:
                 response = requests.get(url, headers=headers, params=params, timeout=10)
-                # print(response)
                 response.raise_for_status()
                 data = response.json()
 
@@ -68,7 +67,9 @@ def search_posts(params, token):
                 progress(len(new_posts))
 
                 if posts_limit and total_fetched >= posts_limit:
-                    print(f"Fetched {total_fetched} posts, total: {total_fetched}/{posts_limit}")
+                    print(
+                        f"Fetched {total_fetched} posts, total: {total_fetched}/{posts_limit}"
+                    )
                     return posts[:posts_limit]
 
                 #Move to the enxt page if available

From b298745730c02b8f31550512d038a39483fcb875 Mon Sep 17 00:00:00 2001
From: andewmark <amarkarian2021@gmail.com>
Date: Wed, 21 May 2025 13:57:46 -0400
Subject: [PATCH 08/15] imported correct methods into scraper_test.py

---
 mission_blue.py       |  2 ++
 scraper.py            | 21 ++++++++++++---------
 tests/scraper_test.py |  7 ++-----
 3 files changed, 16 insertions(+), 14 deletions(-)

diff --git a/mission_blue.py b/mission_blue.py
index f809698..935e62e 100644
--- a/mission_blue.py
+++ b/mission_blue.py
@@ -6,6 +6,7 @@
 import scraper
 import file
 from typing import Optional, List, Dict, Any
+
 # pylint: disable=C0301
 
 lang_dict = {
@@ -210,6 +211,7 @@ def generate_query_params(
         "posts_limit": posts_limit,
     }
 
+
 # Begin Click CLI
 
 
diff --git a/scraper.py b/scraper.py
index 974c2f7..9ea11ff 100644
--- a/scraper.py
+++ b/scraper.py
@@ -1,10 +1,12 @@
 """
 This module contains a function to search for posts using the BlueSky API.
 """
+
 import requests
 from alive_progress import alive_bar
 from alive_progress.animations.bars import bar_factory
 
+
 def search_posts(params, token):
     # pylint: disable=E1102
     # pylint: disable=C0301
@@ -15,22 +17,22 @@ def search_posts(params, token):
     Args:
         params (dict): The query parameters for the API request.
             - query (str, required): The search term for the BlueSky posts.
-            - sort (str, optional): The sorting criteria for results. 
+            - sort (str, optional): The sorting criteria for results.
                Options include "top" for top posts or "latest" for the latest posts.
             - since (str, optional): The start date for posts (ISO 8601 format).
             - until (str, optional): The end date for posts (ISO 8601 format).
-            - mentions (str, optional): Mentions to filter posts by. 
+            - mentions (str, optional): Mentions to filter posts by.
             - Handles will be resolved to DIDs using the provided token.
             - author (str, optional): The author of the posts (handle or DID).
             - lang (str, optional): The language of the posts.
             - domain (str, optional): A domain URL included in the posts.
             - url (str, optional): A specific URL included in the posts.
             - tags (list, optional): Tags to filter posts by (each tag <= 640 characters).
-            - limit (int, optional): The maximum number of posts to retrieve in a single response. 
+            - limit (int, optional): The maximum number of posts to retrieve in a single response.
                 Defaults to 25.
             - cursor (str, optional): Pagination token for continuing from a previous request.
             - posts_limit (int, optional): The maximum number of posts to retrieve across all responses.
-                Defaults to 500. 
+                Defaults to 500.
 
     Returns:
         list: A list of posts matching the search criteria.
@@ -58,12 +60,12 @@ def search_posts(params, token):
                 response.raise_for_status()
                 data = response.json()
 
-                #Check if we have reached our overall posts limit
+                # Check if we have reached our overall posts limit
                 new_posts = data.get("posts", [])
                 posts.extend(new_posts)
                 total_fetched += len(new_posts)
 
-                #Update progress bar
+                # Update progress bar
                 progress(len(new_posts))
 
                 if posts_limit and total_fetched >= posts_limit:
@@ -72,7 +74,7 @@ def search_posts(params, token):
                     )
                     return posts[:posts_limit]
 
-                #Move to the enxt page if available
+                # Move to the enxt page if available
                 next_cursor = data.get("cursor")
                 if not next_cursor:
                     print(f"All posts fetched. Total: {total_fetched}")
@@ -82,6 +84,7 @@ def search_posts(params, token):
             except requests.exceptions.RequestException as err:
                 print(f"Error fetching posts: {err}")
                 print(
-                    "Response:", response.text if "response" in locals() else "No response"
+                    "Response:",
+                    response.text if "response" in locals() else "No response",
                 )
-                return posts
\ No newline at end of file
+                return posts
diff --git a/tests/scraper_test.py b/tests/scraper_test.py
index 1062567..e84c53f 100644
--- a/tests/scraper_test.py
+++ b/tests/scraper_test.py
@@ -1,11 +1,8 @@
 """Testing suite for the mission_blue module."""
 
 import unittest
-from unittest.mock import Mock, patch
-
-from scraper import (
-    search_posts,
-)
+from scraper import search_posts
+from file import validate_url
 
 
 class TestCase:

From 81285c9c64d088c3624b5d56b71fcee363e888ca Mon Sep 17 00:00:00 2001
From: andewmark <amarkarian2021@gmail.com>
Date: Wed, 21 May 2025 19:16:10 -0400
Subject: [PATCH 09/15] Added tests & Updated Logic search_posts now ensures
 that query is within params (required) Test Cases validate this new logic

---
 scraper.py            |  6 ++++
 tests/scraper_test.py | 83 +++++++++++++++++++------------------------
 2 files changed, 42 insertions(+), 47 deletions(-)

diff --git a/scraper.py b/scraper.py
index 9ea11ff..0dd7636 100644
--- a/scraper.py
+++ b/scraper.py
@@ -42,6 +42,12 @@ def search_posts(params, token):
         - Handles pagination automatically until `posts_limit` is reached or no further results are available.
         - Logs and returns partial results if an error occurs during fetching.
     """
+    # Validate input parameters
+    if "query" not in params:
+        raise ValueError("Query parameter is required.")
+    if not token:
+        raise ValueError("Token is required.")
+
     posts = []
     url = "https://bsky.social/xrpc/app.bsky.feed.searchPosts"
     headers = {
diff --git a/tests/scraper_test.py b/tests/scraper_test.py
index e84c53f..d921014 100644
--- a/tests/scraper_test.py
+++ b/tests/scraper_test.py
@@ -1,25 +1,8 @@
 """Testing suite for the mission_blue module."""
 
 import unittest
+from unittest.mock import patch, Mock, MagicMock
 from scraper import search_posts
-from file import validate_url
-
-
-class TestCase:
-    """Class used to store test data and expected results for the TestMissionBlue function."""
-
-    def __init__(self, data, expected_result):
-        self.data = data
-        self.expected_result = expected_result
-
-    def get_data(self):
-        # pylint: disable=missing-function-docstring
-        return self.data
-
-    def get_expected_result(self):
-        # pylint: disable=missing-function-docstring
-        return self.expected_result
-
 
 class TestSearchPosts(unittest.TestCase):
     """_summary_.
@@ -28,35 +11,41 @@ class TestSearchPosts(unittest.TestCase):
         unittest (_type_): _description_
 
     """
-
-    def test_search_posts(self):
-        """Test case for the validate_url function.
-        This test verifies that the given url contains the correct post data.
-        Test data:
-        - Post Links with valid and invalid post urls.
-        - An expected result boolean.
-        Assertions:
-        - The result of validate_url(data) should match the expected_result.
-        """
-        # If any of the test cases fail, try looking at the no_content_template variable
-        # within the validate_url function.
-        cases = {
-            "Post Exists": TestCase(
-                data="https://bsky.app/profile/witheringtales.bsky.social/post/3legkyuzjs22m",
-                expected_result=True,
-            ),
-            # If the test case fails, look at the validate_url function logic for guidance
-            # on how to fix the test case.
-            "Post Doesn't Exist": TestCase(
-                data="https://bsky.app/profile/witheringtales.bsky.social/post/3legkyuzjs22",
-                expected_result=False,
-            ),
-        }
-
-        for case_name, case in cases.items():
-            with self.subTest(case_name):
-                result = validate_url(case.get_data())
-                self.assertEqual(result, case.get_expected_result())
+    # Dummy API data for testing
+    
+    @patch("scraper.requests.get")
+    def test_no_query(self, mock_get: MagicMock) -> None:
+        """Test if the function raises ValueError when a query is not provided."""
+        params = {}
+        token = "valid_token"
+
+        with self.assertRaises(ValueError) as cm:
+            search_posts(params, token)
+        
+        mock_get.assert_not_called()
+        self.assertIn("query", str(cm.exception).lower())
+
+    @patch("scraper.requests.get")
+    def test_no_token(self, mock_get: MagicMock) -> None:
+        """Test if the function raises ValueError when a token it not provided."""
+        params = {"query": "test"}
+        token = None
+
+        with self.assertRaises(ValueError) as cm:
+            search_posts(params, token)
+        
+        mock_get.assert_not_called()
+        self.assertIn("token", str(cm.exception).lower())
+
+    # Ensure that the function returns an empty list when no posts are found
+
+    # Ensure that the function returns a list of posts when valid parameters are provided
+
+    # Ensure that the function handles pagination correctly and returns all posts
+
+    # Simulate a failed API response (e.g., 400: [InvalidRequest, ExpiredToken, InvalidToken, BadQueryString])
+    
+    # Simulate a failed API response (401)
 
 
 if __name__ == "__main__":

From 63b925f0a5f16b92564ba4eaa59853e8de16520d Mon Sep 17 00:00:00 2001
From: andewmark <amarkarian2021@gmail.com>
Date: Thu, 22 May 2025 01:20:54 -0400
Subject: [PATCH 10/15] added test

---
 scraper.py            |  4 ++-
 tests/scraper_test.py | 60 +++++++++++++++++++++++++++++++++----------
 2 files changed, 49 insertions(+), 15 deletions(-)

diff --git a/scraper.py b/scraper.py
index 0dd7636..169c2be 100644
--- a/scraper.py
+++ b/scraper.py
@@ -3,6 +3,7 @@
 """
 
 import requests
+import pprint
 from alive_progress import alive_bar
 from alive_progress.animations.bars import bar_factory
 
@@ -43,7 +44,7 @@ def search_posts(params, token):
         - Logs and returns partial results if an error occurs during fetching.
     """
     # Validate input parameters
-    if "query" not in params:
+    if "q" not in params:
         raise ValueError("Query parameter is required.")
     if not token:
         raise ValueError("Token is required.")
@@ -78,6 +79,7 @@ def search_posts(params, token):
                     print(
                         f"Fetched {total_fetched} posts, total: {total_fetched}/{posts_limit}"
                     )
+                    pprint.pprint(posts[0])
                     return posts[:posts_limit]
 
                 # Move to the enxt page if available
diff --git a/tests/scraper_test.py b/tests/scraper_test.py
index d921014..91d93f9 100644
--- a/tests/scraper_test.py
+++ b/tests/scraper_test.py
@@ -1,18 +1,13 @@
 """Testing suite for the mission_blue module."""
 
 import unittest
-from unittest.mock import patch, Mock, MagicMock
+from unittest.mock import patch, MagicMock
 from scraper import search_posts
 
-class TestSearchPosts(unittest.TestCase):
-    """_summary_.
 
-    Args:
-        unittest (_type_): _description_
+class TestSearchPosts(unittest.TestCase):
+    """Testing the search_posts() method."""
 
-    """
-    # Dummy API data for testing
-    
     @patch("scraper.requests.get")
     def test_no_query(self, mock_get: MagicMock) -> None:
         """Test if the function raises ValueError when a query is not provided."""
@@ -21,30 +16,67 @@ def test_no_query(self, mock_get: MagicMock) -> None:
 
         with self.assertRaises(ValueError) as cm:
             search_posts(params, token)
-        
+
         mock_get.assert_not_called()
         self.assertIn("query", str(cm.exception).lower())
 
     @patch("scraper.requests.get")
     def test_no_token(self, mock_get: MagicMock) -> None:
         """Test if the function raises ValueError when a token it not provided."""
-        params = {"query": "test"}
+        params = {"q": "test"}
         token = None
 
         with self.assertRaises(ValueError) as cm:
             search_posts(params, token)
-        
+
         mock_get.assert_not_called()
         self.assertIn("token", str(cm.exception).lower())
 
     # Ensure that the function returns an empty list when no posts are found
 
-    # Ensure that the function returns a list of posts when valid parameters are provided
+    @patch("scraper.requests.get")
+    def test_valid_response(self, mock_get: MagicMock) -> None:
+        """Test that the function returns a list of posts when valid parameters are provided."""
+        params = {"q": "test"}
+        token = "valid_token"
+
+        mock_response = MagicMock()
+        mock_response.status_code = 200
+        mock_response.json.return_value = {
+            "posts": [
+                {
+                    "uri": "at://did:plc:12345/app.bsky.feed.post/abcdef",
+                    "cid": "bafyre123...",
+                    "author": {
+                        "did": "did:plc:12345",
+                        "handle": "author_handle",
+                        "displayName": "Author Name",
+                    },
+                    "record": {
+                        "text": "Post content",
+                        "createdAt": "2023-10-01T00:00:00Z",
+                        "$type": "app.bsky.feed.post",
+                    },
+                    "indexedAt": "2023-10-01T00:00:01Z",
+                }
+            ],
+            "cursor": None,
+        }
+
+        mock_get.return_value = mock_response
 
-    # Ensure that the function handles pagination correctly and returns all posts
+        result = search_posts(params, token)
+
+        self.assertEqual(len(result), 1)
+        self.assertEqual(result[0]["record"]["text"], "Post content")
+        self.assertEqual(result[0]["author"]["handle"], "author_handle")
+        self.assertEqual(result[0]["record"]["createdAt"], "2023-10-01T00:00:00Z")
+        self.assertEqual(
+            result[0]["uri"], "at://did:plc:12345/app.bsky.feed.post/abcdef"
+        )
 
     # Simulate a failed API response (e.g., 400: [InvalidRequest, ExpiredToken, InvalidToken, BadQueryString])
-    
+
     # Simulate a failed API response (401)
 
 

From ff94fd894bdb2e11220489f0c5b35be7c19e6465 Mon Sep 17 00:00:00 2001
From: andewmark <amarkarian2021@gmail.com>
Date: Thu, 22 May 2025 21:58:34 -0400
Subject: [PATCH 11/15] Added Simulated API response test case Verfied that
 search_posts() logic handles Client Error 400 correctly and gracefully

---
 scraper.py            |  2 --
 tests/scraper_test.py | 25 ++++++++++++++++++++++++-
 2 files changed, 24 insertions(+), 3 deletions(-)

diff --git a/scraper.py b/scraper.py
index 169c2be..7050a52 100644
--- a/scraper.py
+++ b/scraper.py
@@ -3,7 +3,6 @@
 """
 
 import requests
-import pprint
 from alive_progress import alive_bar
 from alive_progress.animations.bars import bar_factory
 
@@ -79,7 +78,6 @@ def search_posts(params, token):
                     print(
                         f"Fetched {total_fetched} posts, total: {total_fetched}/{posts_limit}"
                     )
-                    pprint.pprint(posts[0])
                     return posts[:posts_limit]
 
                 # Move to the enxt page if available
diff --git a/tests/scraper_test.py b/tests/scraper_test.py
index 91d93f9..2a531e7 100644
--- a/tests/scraper_test.py
+++ b/tests/scraper_test.py
@@ -1,6 +1,9 @@
 """Testing suite for the mission_blue module."""
 
 import unittest
+import requests
+import io
+import sys
 from unittest.mock import patch, MagicMock
 from scraper import search_posts
 
@@ -76,8 +79,28 @@ def test_valid_response(self, mock_get: MagicMock) -> None:
         )
 
     # Simulate a failed API response (e.g., 400: [InvalidRequest, ExpiredToken, InvalidToken, BadQueryString])
+    @patch("scraper.requests.get")
+    def test_invalid_request(self, mock_get: MagicMock) -> None:
+        """Test that the function handles invalid requests gracefully."""
+        params = {"q": "test"}
+        token = "invalid_token"
+
+        mock_response = MagicMock()
+        mock_response.status_code = 400
+        mock_response.raise_for_status.side_effect = requests.exceptions.HTTPError(
+            "400 Client Error: InvalidToken"
+        )
+        mock_get.return_value = mock_response
+
+        # Redircting stdout to StringIO
+        captured_output = io.StringIO()
+        sys.stdout = captured_output
+
+        result = search_posts(params, token)
+        sys.stdout = sys.__stdout__
 
-    # Simulate a failed API response (401)
+        self.assertEqual(result, [])
+        self.assertIn("400 Client Error:", captured_output.getvalue())
 
 
 if __name__ == "__main__":

From 5098d24a32d49faecff57f9c2e559f7360616e1b Mon Sep 17 00:00:00 2001
From: andewmark <amarkarian2021@gmail.com>
Date: Wed, 4 Jun 2025 17:03:48 -0400
Subject: [PATCH 12/15] Resolved Nitpicks and suggested changes

---
 mission_blue.py       | 2 +-
 scraper.py            | 4 ++--
 tests/scraper_test.py | 8 +++-----
 3 files changed, 6 insertions(+), 8 deletions(-)

diff --git a/mission_blue.py b/mission_blue.py
index 935e62e..4a0c338 100644
--- a/mission_blue.py
+++ b/mission_blue.py
@@ -1,4 +1,4 @@
-"""This module conatins the BlueSky Web Scrapper."""
+"""This module contains the BlueSky Web Scrapper."""
 
 import click
 import requests
diff --git a/scraper.py b/scraper.py
index 7050a52..384c3dd 100644
--- a/scraper.py
+++ b/scraper.py
@@ -56,7 +56,7 @@ def search_posts(params, token):
     }
 
     total_fetched = 0
-    posts_limit = params.get("posts_limit", 1000)
+    posts_limit = params.get("posts_limit", 500)
     butterfly_bar = bar_factory("✨", tip="🦋", errors="🔥🧯👩‍🚒")
 
     with alive_bar(posts_limit, bar=butterfly_bar, spinner="waves") as progress:
@@ -80,7 +80,7 @@ def search_posts(params, token):
                     )
                     return posts[:posts_limit]
 
-                # Move to the enxt page if available
+                # Move to the nextt page if available
                 next_cursor = data.get("cursor")
                 if not next_cursor:
                     print(f"All posts fetched. Total: {total_fetched}")
diff --git a/tests/scraper_test.py b/tests/scraper_test.py
index 2a531e7..6be76f2 100644
--- a/tests/scraper_test.py
+++ b/tests/scraper_test.py
@@ -1,4 +1,4 @@
-"""Testing suite for the mission_blue module."""
+"""Testing suite for the scraper module."""
 
 import unittest
 import requests
@@ -25,7 +25,7 @@ def test_no_query(self, mock_get: MagicMock) -> None:
 
     @patch("scraper.requests.get")
     def test_no_token(self, mock_get: MagicMock) -> None:
-        """Test if the function raises ValueError when a token it not provided."""
+        """Test if the function raises ValueError when a token is not provided."""
         params = {"q": "test"}
         token = None
 
@@ -35,8 +35,6 @@ def test_no_token(self, mock_get: MagicMock) -> None:
         mock_get.assert_not_called()
         self.assertIn("token", str(cm.exception).lower())
 
-    # Ensure that the function returns an empty list when no posts are found
-
     @patch("scraper.requests.get")
     def test_valid_response(self, mock_get: MagicMock) -> None:
         """Test that the function returns a list of posts when valid parameters are provided."""
@@ -92,7 +90,7 @@ def test_invalid_request(self, mock_get: MagicMock) -> None:
         )
         mock_get.return_value = mock_response
 
-        # Redircting stdout to StringIO
+        # Redirecting stdout to StringIO
         captured_output = io.StringIO()
         sys.stdout = captured_output
 

From f122fc5d62d483f21cc8b405a4f09951818c1a25 Mon Sep 17 00:00:00 2001
From: Caleb Aguirre-Leon <Celeon600@gmail.com>
Date: Wed, 4 Jun 2025 22:36:23 +0100
Subject: [PATCH 13/15] Update scraper.py

Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com>
---
 scraper.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/scraper.py b/scraper.py
index 384c3dd..a966251 100644
--- a/scraper.py
+++ b/scraper.py
@@ -72,7 +72,9 @@ def search_posts(params, token):
                 total_fetched += len(new_posts)
 
                 # Update progress bar
-                progress(len(new_posts))
+                # Update progress bar
+                remaining = posts_limit - (total_fetched - len(new_posts))
+                progress(min(len(new_posts), remaining))
 
                 if posts_limit and total_fetched >= posts_limit:
                     print(

From 0a1d4f4f2f30e87bc432d76aacdf41b2b578f38d Mon Sep 17 00:00:00 2001
From: Caleb Aguirre-Leon <Celeon600@gmail.com>
Date: Wed, 4 Jun 2025 22:47:56 +0100
Subject: [PATCH 14/15] Applied Recommended Changes from Coderabbit AI By
 making the clean it ensures that our scraper remains functional when scrapes
 get large.

---
 scraper.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/scraper.py b/scraper.py
index a966251..1a01de9 100644
--- a/scraper.py
+++ b/scraper.py
@@ -76,13 +76,16 @@ def search_posts(params, token):
                 remaining = posts_limit - (total_fetched - len(new_posts))
                 progress(min(len(new_posts), remaining))
 
-                if posts_limit and total_fetched >= posts_limit:
+                if total_fetched >= posts_limit:
                     print(
                         f"Fetched {total_fetched} posts, total: {total_fetched}/{posts_limit}"
                     )
-                    return posts[:posts_limit]
+                    # Truncate only if we exceeded the limit
+                    if len(posts) > posts_limit:
+                        posts = posts[:posts_limit]
+                    return posts
 
-                # Move to the nextt page if available
+                # Move to the next page if available
                 next_cursor = data.get("cursor")
                 if not next_cursor:
                     print(f"All posts fetched. Total: {total_fetched}")

From 3581b2eabfbcb3106b9b0f7f0a085799808e7bcc Mon Sep 17 00:00:00 2001
From: andewmark <amarkarian2021@gmail.com>
Date: Wed, 4 Jun 2025 19:18:20 -0400
Subject: [PATCH 15/15] Fixed default Values Posts_limit was still defaulted to
 1000 even though docstring said 500. Other areas were the opposite.
 Everything now set to defaulted 500

---
 mission_blue.py | 6 +++---
 scraper.py      | 1 -
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/mission_blue.py b/mission_blue.py
index 4a0c338..03b1ebe 100644
--- a/mission_blue.py
+++ b/mission_blue.py
@@ -304,10 +304,10 @@ def generate_query_params(
     "--posts_limit",
     type=click.IntRange(1, None),
     required=False,
-    default=1000,
+    default=500,
     help=(
         "Set the total number of posts to fetch from the API across all paginated responses. This value limits the total data retrieved "
-        "even if multiple API calls are required. If not specified, 1000 posts will be recieved."
+        "even if multiple API calls are required. If not specified, 500 posts will be recieved."
     ),
 )
 def main(
@@ -322,7 +322,7 @@ def main(
     url: str = "",
     tags: tuple = (),
     limit: int = 25,
-    posts_limit: int = 1000,
+    posts_limit: int = 500,
 ) -> None:
     """Method that tests if each click param flag is being passed in correctly."""
     # pylint: disable=R0913
diff --git a/scraper.py b/scraper.py
index 1a01de9..933878c 100644
--- a/scraper.py
+++ b/scraper.py
@@ -71,7 +71,6 @@ def search_posts(params, token):
                 posts.extend(new_posts)
                 total_fetched += len(new_posts)
 
-                # Update progress bar
                 # Update progress bar
                 remaining = posts_limit - (total_fetched - len(new_posts))
                 progress(min(len(new_posts), remaining))