From 7c70f911df6ff1643253e2c6e76f31af69e87d41 Mon Sep 17 00:00:00 2001
From: Lu Ken <bluewish.ken.lu@live.cn>
Date: Mon, 1 Dec 2025 14:00:12 +0800
Subject: [PATCH 01/15] add missing python dependency

Signed-off-by: Lu Ken <bluewish.ken.lu@live.cn>
---
 requirements.txt | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/requirements.txt b/requirements.txt
index cc5d1d3..ad49e6b 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -14,3 +14,5 @@ langchain_core
 langchain_tavily
 langchain
 langgraph
+
+lxml[html_clean]

From fed62d1e1e278d34256c64d6a776eb63d5c171f8 Mon Sep 17 00:00:00 2001
From: Lu Ken <bluewish.ken.lu@live.cn>
Date: Mon, 1 Dec 2025 14:29:30 +0800
Subject: [PATCH 02/15] improve the HTML download by adding random sleep and
 multiple browser's headers

Signed-off-by: Lu Ken <bluewish.ken.lu@live.cn>
---
 src/gentrade/news/factory.py | 43 +++++++++++++++++++++++++++++++-----
 1 file changed, 37 insertions(+), 6 deletions(-)

diff --git a/src/gentrade/news/factory.py b/src/gentrade/news/factory.py
index 54b75b5..81f01e6 100644
--- a/src/gentrade/news/factory.py
+++ b/src/gentrade/news/factory.py
@@ -10,6 +10,7 @@
 import logging
 import time
 import threading
+import random
 from typing import List, Optional, Set
 from urllib.parse import urlparse  # Add this to extract domain from URL
 
@@ -261,6 +262,9 @@ def _extract_news_text(self, url: str) -> str:
         Returns:
             Cleaned text content of the article, or empty string if extraction fails.
         """
+        # Add random delay before request to avoid rate limiting
+        time.sleep(random.uniform(1, 3))
+
         try:
             article = Article(url)
             article.download()
@@ -287,22 +291,49 @@ def _fetch_original_html(self, url: str, timeout: int = 10) -> Optional[str]:
         Returns:
             Raw HTML content as a string, or None if fetch fails.
         """
-        headers = {
-            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
-                          "(KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
-        }
+        # More realistic headers that mimic popular browsers
+        headers_list = [
+            {
+                "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
+                            "(KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36",
+                "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,"
+                        "image/webp,*/*;q=0.8",
+                "Accept-Language": "en-US,en;q=0.5",
+                "Accept-Encoding": "gzip, deflate, br",
+                "Connection": "keep-alive",
+                "Upgrade-Insecure-Requests": "1",
+                "Sec-Fetch-Dest": "document",
+                "Sec-Fetch-Mode": "navigate",
+                "Sec-Fetch-Site": "none",
+                "Sec-Fetch-User": "?1"
+            },
+            {
+                "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
+                            "AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.5 Safari/605.1.15",
+                "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
+                "Accept-Language": "en-US,en;q=0.5",
+                "Accept-Encoding": "gzip, deflate, br",
+                "Connection": "keep-alive",
+                "Upgrade-Insecure-Requests": "1"
+            }
+        ]
         retries = 3
+        # Use a random header from the list for each request
+        headers = random.choice(headers_list)
 
         for attempt in range(retries):
             try:
+                # Add random delay between retries (0.5-2 seconds)
+                if attempt > 0:
+                    time.sleep(random.uniform(0.5, 2.0))
+
                 response = requests.get(
-                    url, headers=headers, timeout=timeout, verify=False
+                    url, headers=headers, timeout=timeout, verify=True
                 )
                 response.raise_for_status()
                 return response.text
             except Exception as e:
                 if attempt < retries - 1:
-                    time.sleep(1)
                     continue
                 LOG.error(f"Failed to fetch HTML after {retries} retries ({url}): {e}")
                 return None

From c75724b0a9caa4f8fc19567920143ad642197d78 Mon Sep 17 00:00:00 2001
From: Lu Ken <bluewish.ken.lu@live.cn>
Date: Mon, 1 Dec 2025 17:33:51 +0800
Subject: [PATCH 03/15] add Makefile

Signed-off-by: Lu Ken <bluewish.ken.lu@live.cn>
---
 Makefile | 33 +++++++++++++++++++++++++++++++++
 1 file changed, 33 insertions(+)
 create mode 100644 Makefile

diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..8a518a1
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,33 @@
+# Makefile for Pylint with suggestion-mode support
+
+# Default target
+all: lint
+
+# Pylint configuration
+PYLINT = pylint
+PYLINT_ARGS = --rcfile=.pylintrc
+PYLINT_VERSION = pylint==4.0.4
+
+
+# Lint all Python files with suggestion-mode support
+lint:
+	pip install $(PYLINT_VERSION)
+	@echo "Running Pylint with suggestion-mode support..."
+	find . -type f -name "*.py" | xargs $(PYLINT) $(PYLINT_ARGS)
+
+# Lint specific file(s)
+lint-file:
+	@if [ -z "$(FILE)" ]; then \
+		echo "Usage: make lint-file FILE=path/to/file.py"; \
+		exit 1; \
+	fi
+	pip install $(PYLINT_VERSION)
+	$(PYLINT) $(PYLINT_ARGS) $(FILE)
+
+
+# Clean up (optional)
+clean:
+	find . -type d -name "__pycache__" -exec rm -rf {} +
+	find . -type f -name "*.pyc" -delete
+
+.PHONY: all lint lint-file clean

From 088d654a564866c624f594b7451926a59036307a Mon Sep 17 00:00:00 2001
From: Lu Ken <bluewish.ken.lu@live.cn>
Date: Mon, 1 Dec 2025 17:34:17 +0800
Subject: [PATCH 04/15] remove obsoleted option

Signed-off-by: Lu Ken <bluewish.ken.lu@live.cn>
---
 .pylintrc | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/.pylintrc b/.pylintrc
index ded06ab..365e076 100644
--- a/.pylintrc
+++ b/.pylintrc
@@ -104,10 +104,6 @@ recursive=no
 # source root.
 source-roots=
 
-# When enabled, pylint would attempt to guess common misconfiguration and emit
-# user-friendly hints instead of false-positive error messages.
-suggestion-mode=yes
-
 # Allow loading of arbitrary C extensions. Extensions are imported into the
 # active Python interpreter and may run arbitrary code.
 unsafe-load-any-extension=no
@@ -457,7 +453,8 @@ disable=raw-checker-failed,
         import-error,
         duplicate-code,
         redefined-outer-name,
-        logging-fstring-interpolation
+        logging-fstring-interpolation,
+        abstract-class-instantiated
 
 # Enable the message, report, category or checker with the given id(s). You can
 # either give multiple identifier separated by comma (,) or put this option

From 44fac0b0b686a60d87f5d817f9b6ddf276da06b4 Mon Sep 17 00:00:00 2001
From: Lu Ken <bluewish.ken.lu@live.cn>
Date: Mon, 1 Dec 2025 17:34:46 +0800
Subject: [PATCH 05/15] update langchain's package name

Signed-off-by: Lu Ken <bluewish.ken.lu@live.cn>
---
 src/gentrade/llm/factory.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/gentrade/llm/factory.py b/src/gentrade/llm/factory.py
index dc86183..fd297c3 100644
--- a/src/gentrade/llm/factory.py
+++ b/src/gentrade/llm/factory.py
@@ -24,11 +24,11 @@
 from pydantic import Field
 
 from langchain_openai import ChatOpenAI
-from langchain.schema import (
+from langchain_core.messages import (
     AIMessage,
     BaseMessage
 )
-from langchain.schema.runnable import RunnableConfig
+from langchain_core.runnables import RunnableConfig
 
 
 LOG = logging.getLogger(__name__)

From 3d2c3b4953ef1210056b364c61fe8cacfc779bcc Mon Sep 17 00:00:00 2001
From: Lu Ken <bluewish.ken.lu@live.cn>
Date: Mon, 1 Dec 2025 17:35:14 +0800
Subject: [PATCH 06/15] update the directory name

Signed-off-by: Lu Ken <bluewish.ken.lu@live.cn>
---
 .github/workflows/pylint.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/pylint.yml b/.github/workflows/pylint.yml
index 0ecee88..504ab9f 100644
--- a/.github/workflows/pylint.yml
+++ b/.github/workflows/pylint.yml
@@ -33,5 +33,5 @@ jobs:
       - name: Analyzing the python code
         run: |
           set -ex
-          export PYTHONPATH=$PWD/src/tia/
+          export PYTHONPATH=$PWD/src/gentrade/
           find . -type f -name "*.py" | xargs pylint

From fd7619fbeef3a9e637ee5683b9b26629259dc26e Mon Sep 17 00:00:00 2001
From: Lu Ken <bluewish.ken.lu@live.cn>
Date: Mon, 1 Dec 2025 18:05:24 +0800
Subject: [PATCH 07/15] add more action in Makefile

Signed-off-by: Lu Ken <bluewish.ken.lu@live.cn>
---
 .github/workflows/pylint.yml   |  9 ++++++--
 Makefile                       | 39 ++++++++++++++++++++++------------
 src/gentrade/scraper/search.py |  1 +
 3 files changed, 34 insertions(+), 15 deletions(-)

diff --git a/.github/workflows/pylint.yml b/.github/workflows/pylint.yml
index 504ab9f..ccba373 100644
--- a/.github/workflows/pylint.yml
+++ b/.github/workflows/pylint.yml
@@ -20,18 +20,23 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - uses: actions/checkout@v3
+
       - uses: actions/setup-python@v4
         with:
           python-version: 3.11
+
       - name: Install dependencies
         run: |
+          sudo apt-get update
+          sudo apt-get install -y build-essential
+
           pip3 install --upgrade pip
-          pip3 install pylint
           for f in $(find -type f -name "requirements.txt"); do
             pip3 install -r $f
           done
+
       - name: Analyzing the python code
         run: |
           set -ex
           export PYTHONPATH=$PWD/src/gentrade/
-          find . -type f -name "*.py" | xargs pylint
+          make lint
diff --git a/Makefile b/Makefile
index 8a518a1..cf24f20 100644
--- a/Makefile
+++ b/Makefile
@@ -1,19 +1,21 @@
-# Makefile for Pylint with suggestion-mode support
 
-# Default target
-all: lint
+CURRENT_DIR := $(shell pwd)
 
 # Pylint configuration
-PYLINT = pylint
 PYLINT_ARGS = --rcfile=.pylintrc
 PYLINT_VERSION = pylint==4.0.4
 
+REQUIRED_PYTHON_VERSION := "3.11"
+PYTHON_VERSION_OK := $(shell python -c "import sys; print(sys.version_info >= (3,11) and sys.version_info < (3,12))")
+
+# Default target
+all: lint
 
-# Lint all Python files with suggestion-mode support
+# Lint all Python files
 lint:
 	pip install $(PYLINT_VERSION)
-	@echo "Running Pylint with suggestion-mode support..."
-	find . -type f -name "*.py" | xargs $(PYLINT) $(PYLINT_ARGS)
+	@echo "Running Pylint ..."
+	find . -type f -name "*.py" | xargs pylint $(PYLINT_ARGS)
 
 # Lint specific file(s)
 lint-file:
@@ -22,12 +24,23 @@ lint-file:
 		exit 1; \
 	fi
 	pip install $(PYLINT_VERSION)
-	$(PYLINT) $(PYLINT_ARGS) $(FILE)
-
+	pylint $(PYLINT_ARGS) $(FILE)
+
+# Check python version
+check-python:
+	@echo "Checking Python version..."
+	@if [ "$(PYTHON_VERSION_OK)" = "True" ]; then \
+		echo "✅ Python version is $(REQUIRED_PYTHON_VERSION) (compatible)"; \
+	else \
+		echo "❌ Error: Python $(REQUIRED_PYTHON_VERSION) is required (found $(shell python --version | cut -d' ' -f2))"; \
+		exit 1; \
+	fi
 
-# Clean up (optional)
+# Clean up
 clean:
-	find . -type d -name "__pycache__" -exec rm -rf {} +
-	find . -type f -name "*.pyc" -delete
+	@echo "Cleaning in: $(CURRENT_DIR)..."
+	find $(CURRENT_DIR) -type d -name "__pycache__" -exec rm -rf {} +
+	find $(CURRENT_DIR) -type f -name "*.pyc" -delete
+	find $(CURRENT_DIR) -type f -name ".pylint-cache" -exec rm -rf {} +
 
-.PHONY: all lint lint-file clean
+.PHONY: all lint lint-file clean check-python
diff --git a/src/gentrade/scraper/search.py b/src/gentrade/scraper/search.py
index 4a43e35..0509a4b 100644
--- a/src/gentrade/scraper/search.py
+++ b/src/gentrade/scraper/search.py
@@ -266,4 +266,5 @@ def search(
         fetch_content=True,
     )
 
+
     print(json.dumps(news, ensure_ascii=False, indent=2))

From 4f5cd029d86127c38b9f7205c435b4d17d07a8ac Mon Sep 17 00:00:00 2001
From: Lu Ken <bluewish.ken.lu@live.cn>
Date: Tue, 2 Dec 2025 10:40:38 +0800
Subject: [PATCH 08/15] add missing python dependency: newspaper3k

Signed-off-by: Lu Ken <bluewish.ken.lu@live.cn>
---
 requirements.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/requirements.txt b/requirements.txt
index ad49e6b..ed82156 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -16,3 +16,4 @@ langchain
 langgraph
 
 lxml[html_clean]
+newspaper3k
\ No newline at end of file

From 3ac2a9b4ed4cad5dd1d4df19971bf342ac4eced1 Mon Sep 17 00:00:00 2001
From: Lu Ken <bluewish.ken.lu@live.cn>
Date: Tue, 2 Dec 2025 10:50:04 +0800
Subject: [PATCH 09/15] clean up and refine the code in factory

Signed-off-by: Lu Ken <bluewish.ken.lu@live.cn>
---
 src/gentrade/news/factory.py | 201 ++++++-----------------------------
 1 file changed, 31 insertions(+), 170 deletions(-)

diff --git a/src/gentrade/news/factory.py b/src/gentrade/news/factory.py
index 81f01e6..81f4917 100644
--- a/src/gentrade/news/factory.py
+++ b/src/gentrade/news/factory.py
@@ -15,6 +15,7 @@
 from urllib.parse import urlparse  # Add this to extract domain from URL
 
 import requests
+from gentrade.scraper.extractor import ArticleContentExtractor
 from newspaper import Article
 from bs4 import BeautifulSoup
 
@@ -133,6 +134,35 @@ def _save_blocklist(self) -> None:
             for domain in self.blocklist:
                 f.write(f"{domain}\n")
 
+    def _fetch_thread(self, provider, aggregator, ticker, category,
+        max_hour_interval, max_count, is_process=False):
+        if ticker:
+            news = provider.fetch_stock_news(
+                ticker, category, max_hour_interval, max_count
+            )
+            LOG.info(
+                f"Fetched {len(news)} stock news articles for {ticker} from "
+                f"{provider.__class__.__name__}"
+            )
+        else:
+            news = provider.fetch_latest_market_news(
+                category, max_hour_interval, max_count
+            )
+            LOG.info(
+                f"Fetched {len(news)} market news articles from "
+                f"{provider.__class__.__name__}"
+            )
+
+        ace = ArticleContentExtractor.inst()
+        for item in news:
+            item.summary = ace.clean_html(item.summary)
+            if is_process:
+                item.content = ace.extract_content(item.url)
+                LOG.info(item.content)
+
+        with aggregator.db_lock:
+            aggregator.db.add_news(news)
+
     def sync_news(
         self,
         ticker: Optional[str] = None,
@@ -158,32 +188,10 @@ def sync_news(
 
         LOG.info("Starting news sync...")
 
-        def fetch_and_process(provider, aggregator, ticker, category, max_hour_interval, max_count):
-            if ticker:
-                news = provider.fetch_stock_news(
-                    ticker, category, max_hour_interval, max_count
-                )
-                LOG.info(
-                    f"Fetched {len(news)} stock news articles for {ticker} from "
-                    f"{provider.__class__.__name__}"
-                )
-            else:
-                news = provider.fetch_latest_market_news(
-                    category, max_hour_interval, max_count
-                )
-                LOG.info(
-                    f"Fetched {len(news)} market news articles from "
-                    f"{provider.__class__.__name__}"
-                )
-
-            aggregator.process_news(news)
-            with aggregator.db_lock:
-                aggregator.db.add_news(news)
-
         threads = []
         for provider in self.providers:
             thread = threading.Thread(
-                target=fetch_and_process,
+                target=self._fetch_thread,
                 args=(provider, self, ticker, category, max_hour_interval, max_count)
             )
             threads.append(thread)
@@ -195,28 +203,6 @@ def fetch_and_process(provider, aggregator, ticker, category, max_hour_interval,
         self.db.last_sync = current_time
         LOG.info("News sync completed.")
 
-    def process_news(self, news: List[NewsInfo]) -> None:
-        """Process news: Skip blocked sites → Check for dummy content → Clean content"""
-        # Filter out news from blocked websites FIRST
-        filtered_news = [n for n in news if not self._is_blocked(n.url)]
-
-        for article in filtered_news:
-            LOG.info(f"Processing news: {article.headline}")
-
-            # Extract content and check for dummy messages
-            content = self._extract_news_text(article.url)
-            if self._contains_dummy_content(content):
-                # Add the website to blocklist if dummy content is found
-                domain = self._extract_domain(article.url)
-                self.blocklist.add(domain)
-                LOG.warning(f"Blocked website {domain} (contains dummy content)")
-                continue  # Skip storing this article
-
-            # Proceed with normal cleaning if no dummy content
-            article.summary = self._clean_html(article.summary)
-            article.content = content
-            time.sleep(1)
-
     def _is_blocked(self, url: str) -> bool:
         """Check if the website of the URL is in the blocklist"""
         domain = self._extract_domain(url)
@@ -241,131 +227,6 @@ def _extract_domain(self, url: str) -> str:
             LOG.error(f"Failed to extract domain from {url}: {e}")
             return url  # Fallback to full URL if parsing fails
 
-    def _contains_dummy_content(self, content: str) -> bool:
-        """Check if content contains dummy messages (case-insensitive)"""
-        if not content:
-            return False
-        content_lower = content.lower()
-        # Count how many dummy keywords match
-        dummy_count = sum(1 for keyword in self.dummy_keywords if keyword in content_lower)
-        # Return True if ≥1 keyword matches (adjust threshold if needed)
-        return dummy_count >= 1
-
-    def _extract_news_text(self, url: str) -> str:
-        """Extract text content from a news article URL using newspaper3k.
-
-        Falls back to HTML scraping with BeautifulSoup if newspaper3k fails.
-
-        Args:
-            url: URL of the news article to extract text from.
-
-        Returns:
-            Cleaned text content of the article, or empty string if extraction fails.
-        """
-        # Add random delay before request to avoid rate limiting
-        time.sleep(random.uniform(1, 3))
-
-        try:
-            article = Article(url)
-            article.download()
-            article.parse()
-            if article.text:
-                return article.text
-
-            # Fallback to HTML scraping if newspaper3k returns empty text
-            html = self._fetch_original_html(url)
-            return self._clean_html(html)
-
-        except Exception as e:
-            LOG.error(f"Failed to extract text with newspaper3k ({url}): {e}")
-            html = self._fetch_original_html(url)
-            return self._clean_html(html)
-
-    def _fetch_original_html(self, url: str, timeout: int = 10) -> Optional[str]:
-        """Fetch raw HTML content from a URL with retries.
-
-        Args:
-            url: URL to fetch HTML from.
-            timeout: Request timeout in seconds (default: 10).
-
-        Returns:
-            Raw HTML content as a string, or None if fetch fails.
-        """
-        # More realistic headers that mimic popular browsers
-        headers_list = [
-            {
-                "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
-                            "(KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36",
-                "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,"
-                        "image/webp,*/*;q=0.8",
-                "Accept-Language": "en-US,en;q=0.5",
-                "Accept-Encoding": "gzip, deflate, br",
-                "Connection": "keep-alive",
-                "Upgrade-Insecure-Requests": "1",
-                "Sec-Fetch-Dest": "document",
-                "Sec-Fetch-Mode": "navigate",
-                "Sec-Fetch-Site": "none",
-                "Sec-Fetch-User": "?1"
-            },
-            {
-                "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
-                            "AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.5 Safari/605.1.15",
-                "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
-                "Accept-Language": "en-US,en;q=0.5",
-                "Accept-Encoding": "gzip, deflate, br",
-                "Connection": "keep-alive",
-                "Upgrade-Insecure-Requests": "1"
-            }
-        ]
-        retries = 3
-        # Use a random header from the list for each request
-        headers = random.choice(headers_list)
-
-        for attempt in range(retries):
-            try:
-                # Add random delay between retries (0.5-2 seconds)
-                if attempt > 0:
-                    time.sleep(random.uniform(0.5, 2.0))
-
-                response = requests.get(
-                    url, headers=headers, timeout=timeout, verify=True
-                )
-                response.raise_for_status()
-                return response.text
-            except Exception as e:
-                if attempt < retries - 1:
-                    continue
-                LOG.error(f"Failed to fetch HTML after {retries} retries ({url}): {e}")
-                return None
-
-        return None
-
-    def _clean_html(self, html_content: Optional[str]) -> str:
-        """Clean HTML content to extract readable text.
-
-        Removes scripts, styles, and other non-content elements, then normalizes whitespace.
-
-        Args:
-            html_content: Raw HTML content to clean.
-
-        Returns:
-            Cleaned text string, or empty string if input is None/empty.
-        """
-        if not html_content:
-            return ""
-
-        soup = BeautifulSoup(html_content, "html.parser")
-
-        # Remove non-content elements
-        for element in soup(["script", "style", "iframe", "nav", "aside", "footer"]):
-            element.decompose()
-
-        # Extract and normalize text
-        text = soup.get_text()
-        lines = (line.strip() for line in text.splitlines())
-        chunks = (phrase.strip() for line in lines for phrase in line.split("  "))
-        return "\n".join(chunk for chunk in chunks if chunk)
-
 
 if __name__ == "__main__":
     logging.basicConfig(level=logging.INFO)

From feee11239cb2e293a332b671f203c441375100eb Mon Sep 17 00:00:00 2001
From: Lu Ken <bluewish.ken.lu@live.cn>
Date: Tue, 2 Dec 2025 10:51:13 +0800
Subject: [PATCH 10/15] centralize the html download and process action into
 ArticleContentExtractor class

Signed-off-by: Lu Ken <bluewish.ken.lu@live.cn>
---
 src/gentrade/scraper/extractor.py | 79 +++++++++++++++++++++++--------
 1 file changed, 58 insertions(+), 21 deletions(-)

diff --git a/src/gentrade/scraper/extractor.py b/src/gentrade/scraper/extractor.py
index 57a99bd..2fc59a1 100644
--- a/src/gentrade/scraper/extractor.py
+++ b/src/gentrade/scraper/extractor.py
@@ -10,11 +10,12 @@
 import json
 import logging
 import os
+from pickle import NONE
 import random
 import re
 import time
 
-from typing import Dict, List
+from typing import Dict, List, Optional
 from urllib.parse import urlparse
 
 import requests
@@ -93,7 +94,9 @@ def save_dummy_patterns(self, dummy_patterns: List[str]):
 class ArticleContentExtractor:
     """Handles article content extraction with dummy content filtering."""
 
-    def __init__(self, storage: ScraperStorage):
+    _instance = None
+
+    def __init__(self, storage: ScraperStorage=None):
         self.ignored_extensions = (
             ".pdf", ".doc", ".docx", ".xls", ".xlsx",
             ".zip", ".rar", ".jpg", ".png", ".jpeg", ".gif"
@@ -105,7 +108,10 @@ def __init__(self, storage: ScraperStorage):
             "ads by", "sponsored content", "subscribe to access"
         }
 
+        if storage is None:
+            storage = ScraperStorage()
         self.storage = storage
+
         self.blocked_domains = self.storage.load_blocked_domains()
         self.dummy_patterns = self.storage.load_dummy_patterns()
 
@@ -129,7 +135,7 @@ def _get_random_headers(self) -> Dict[str, str]:
             "Upgrade-Insecure-Requests": "1",
         }
 
-    def _clean_html(self, html: str) -> str:
+    def clean_html(self, html: str) -> str:
         """Clean raw HTML by removing non-content elements and ads."""
         if not html:
             return ""
@@ -221,31 +227,23 @@ def extract_content(self, url: str) -> str:
             return "Unsupported file type (non-HTML)"
 
         try:
-            article = Article(url, language="zh")
+            article = Article(url)
             article.download()
             article.parse()
-            content = article.text.strip()
+            if article.text:
+                content = article.text.strip()
+            else:
+                # Fallback to HTML scrapping if newspaper3k returns empty text
+                html = self._fetch_original_html(url)
+                content = self.clean_html(html)
         except ArticleException as e:
             logger.warning(
                 "newspaper3k extraction failed: %s - falling back to HTML cleaning",
                 str(e)
             )
-            try:
-                headers = self._get_random_headers()
-                response = requests.get(
-                    url, headers=headers, timeout=10, allow_redirects=True
-                )
-                response.encoding = response.apparent_encoding
-
-                if response.status_code != 200:
-                    logger.warning("Failed to retrieve article (status %s): %s",
-                                   response.status_code, url)
-                    return "Failed to retrieve content (HTTP error)"
-
-                content = self._clean_html(response.text)
-            except requests.exceptions.RequestException as e:
-                logger.error("Request error for %s: %s", url, str(e))
-                return "Failed to retrieve content (network error)"
+            # Fallback to HTML scrapping if newspaper3k returns empty text
+            html = self._fetch_original_html(url)
+            content = self.clean_html(html)
 
         if self._is_dummy_content(content):
             logger.warning("Dummy content detected at: %s", url)
@@ -254,3 +252,42 @@ def extract_content(self, url: str) -> str:
             return "Content blocked: Contains cookie notices or irrelevant material"
 
         return content
+
+    def _fetch_original_html(self, url: str, timeout: int = 10) -> Optional[str]:
+        """Fetch raw HTML content from a URL with retries.
+
+        Args:
+            url: URL to fetch HTML from.
+            timeout: Request timeout in seconds (default: 10).
+
+        Returns:
+            Raw HTML content as a string, or None if fetch fails.
+        """
+
+        retries = 3
+        headers = self._get_random_headers()
+
+        for attempt in range(retries):
+            try:
+                # Add random delay between retries (0.5-2 seconds)
+                if attempt > 0:
+                    time.sleep(random.uniform(0.5, 2.0))
+
+                response = requests.get(
+                    url, headers=headers, timeout=timeout, verify=True
+                )
+                response.raise_for_status()
+                return response.text
+            except Exception as e:
+                if attempt < retries - 1:
+                    continue
+                logger.error(f"Failed to fetch HTML after {retries} retries ({url}): {e}")
+                return None
+
+        return None
+
+    @staticmethod
+    def inst(storage: ScraperStorage=None):
+        if ArticleContentExtractor._instance is None:
+            ArticleContentExtractor._instance = ArticleContentExtractor(storage)
+        return ArticleContentExtractor._instance
\ No newline at end of file

From 62bfa9d603b6db9b5641536bfe59d8b0c64c18ce Mon Sep 17 00:00:00 2001
From: Lu Ken <bluewish.ken.lu@live.cn>
Date: Tue, 2 Dec 2025 15:27:13 +0800
Subject: [PATCH 11/15] Makefile: check pylint version before run make line
 instead of installing it everytime

Signed-off-by: Lu Ken <bluewish.ken.lu@live.cn>
---
 Makefile | 28 +++++++++++++++++++---------
 1 file changed, 19 insertions(+), 9 deletions(-)

diff --git a/Makefile b/Makefile
index cf24f20..2fe8813 100644
--- a/Makefile
+++ b/Makefile
@@ -1,19 +1,29 @@
 
 CURRENT_DIR := $(shell pwd)
 
+PYTHON_VERSION := "3.11"
+PYTHON_VERSION_OK := $(shell python -c "import sys; print(sys.version_info >= (3,11) and sys.version_info < (3,12))")
+
 # Pylint configuration
 PYLINT_ARGS = --rcfile=.pylintrc
-PYLINT_VERSION = pylint==4.0.4
+PYLINT_VERSION = 4.0.4
+PYLINT_INSTALLED := $(shell python -c "import pkg_resources; print('pylint' in {pkg.key for pkg in pkg_resources.working_set})" 2>/dev/null || echo "False")
+PYLINT_VERSION_OK := $(shell python -c "import pylint; print(pylint.__version__ == '$(PYLINT_VERSION)')" 2>/dev/null || echo "False")
 
-REQUIRED_PYTHON_VERSION := "3.11"
-PYTHON_VERSION_OK := $(shell python -c "import sys; print(sys.version_info >= (3,11) and sys.version_info < (3,12))")
 
 # Default target
 all: lint
 
+ensure-pylint:
+	@if [ "$(PYLINT_INSTALLED)" = "False" ] || [ "$(PYLINT_VERSION_OK)" = "False" ]; then \
+		echo "Installing/upgrading pylint to version $(PYLINT_VERSION)..."; \
+		pip install pylint=="$(PYLINT_VERSION)"; \
+	else \
+		echo "✅ pylint $(PYLINT_VERSION) is already installed"; \
+	fi
+
 # Lint all Python files
-lint:
-	pip install $(PYLINT_VERSION)
+lint: check-python ensure-pylint
 	@echo "Running Pylint ..."
 	find . -type f -name "*.py" | xargs pylint $(PYLINT_ARGS)
 
@@ -23,16 +33,16 @@ lint-file:
 		echo "Usage: make lint-file FILE=path/to/file.py"; \
 		exit 1; \
 	fi
-	pip install $(PYLINT_VERSION)
+	pip install pylint=="$(PYLINT_VERSION)"
 	pylint $(PYLINT_ARGS) $(FILE)
 
 # Check python version
 check-python:
 	@echo "Checking Python version..."
 	@if [ "$(PYTHON_VERSION_OK)" = "True" ]; then \
-		echo "✅ Python version is $(REQUIRED_PYTHON_VERSION) (compatible)"; \
+		echo "✅ Python version is $(PYTHON_VERSION) (compatible)"; \
 	else \
-		echo "❌ Error: Python $(REQUIRED_PYTHON_VERSION) is required (found $(shell python --version | cut -d' ' -f2))"; \
+		echo "❌ Error: Python $(PYTHON_VERSION) is required (found $(shell python --version | cut -d' ' -f2))"; \
 		exit 1; \
 	fi
 
@@ -43,4 +53,4 @@ clean:
 	find $(CURRENT_DIR) -type f -name "*.pyc" -delete
 	find $(CURRENT_DIR) -type f -name ".pylint-cache" -exec rm -rf {} +
 
-.PHONY: all lint lint-file clean check-python
+.PHONY: all lint lint-file clean check-python ensure-pylint

From b7b295e1ea99f070e23ec1270453538a8f7d1fcc Mon Sep 17 00:00:00 2001
From: Lu Ken <bluewish.ken.lu@live.cn>
Date: Tue, 2 Dec 2025 17:05:56 +0800
Subject: [PATCH 12/15] refactory

Signed-off-by: Lu Ken <bluewish.ken.lu@live.cn>
---
 Makefile                          | 112 +++++++++++++++---------------
 src/gentrade/news/factory.py      |   6 +-
 src/gentrade/scraper/extractor.py |   3 +-
 3 files changed, 58 insertions(+), 63 deletions(-)

diff --git a/Makefile b/Makefile
index 2fe8813..de3b8e6 100644
--- a/Makefile
+++ b/Makefile
@@ -1,56 +1,56 @@
-
-CURRENT_DIR := $(shell pwd)
-
-PYTHON_VERSION := "3.11"
-PYTHON_VERSION_OK := $(shell python -c "import sys; print(sys.version_info >= (3,11) and sys.version_info < (3,12))")
-
-# Pylint configuration
-PYLINT_ARGS = --rcfile=.pylintrc
-PYLINT_VERSION = 4.0.4
-PYLINT_INSTALLED := $(shell python -c "import pkg_resources; print('pylint' in {pkg.key for pkg in pkg_resources.working_set})" 2>/dev/null || echo "False")
-PYLINT_VERSION_OK := $(shell python -c "import pylint; print(pylint.__version__ == '$(PYLINT_VERSION)')" 2>/dev/null || echo "False")
-
-
-# Default target
-all: lint
-
-ensure-pylint:
-	@if [ "$(PYLINT_INSTALLED)" = "False" ] || [ "$(PYLINT_VERSION_OK)" = "False" ]; then \
-		echo "Installing/upgrading pylint to version $(PYLINT_VERSION)..."; \
-		pip install pylint=="$(PYLINT_VERSION)"; \
-	else \
-		echo "✅ pylint $(PYLINT_VERSION) is already installed"; \
-	fi
-
-# Lint all Python files
-lint: check-python ensure-pylint
-	@echo "Running Pylint ..."
-	find . -type f -name "*.py" | xargs pylint $(PYLINT_ARGS)
-
-# Lint specific file(s)
-lint-file:
-	@if [ -z "$(FILE)" ]; then \
-		echo "Usage: make lint-file FILE=path/to/file.py"; \
-		exit 1; \
-	fi
-	pip install pylint=="$(PYLINT_VERSION)"
-	pylint $(PYLINT_ARGS) $(FILE)
-
-# Check python version
-check-python:
-	@echo "Checking Python version..."
-	@if [ "$(PYTHON_VERSION_OK)" = "True" ]; then \
-		echo "✅ Python version is $(PYTHON_VERSION) (compatible)"; \
-	else \
-		echo "❌ Error: Python $(PYTHON_VERSION) is required (found $(shell python --version | cut -d' ' -f2))"; \
-		exit 1; \
-	fi
-
-# Clean up
-clean:
-	@echo "Cleaning in: $(CURRENT_DIR)..."
-	find $(CURRENT_DIR) -type d -name "__pycache__" -exec rm -rf {} +
-	find $(CURRENT_DIR) -type f -name "*.pyc" -delete
-	find $(CURRENT_DIR) -type f -name ".pylint-cache" -exec rm -rf {} +
-
-.PHONY: all lint lint-file clean check-python ensure-pylint
+
+CURRENT_DIR := $(shell pwd)
+
+PYTHON_VERSION := "3.11"
+PYTHON_VERSION_OK := $(shell python -c "import sys; print(sys.version_info >= (3,11) and sys.version_info < (3,12))")
+
+# Pylint configuration
+PYLINT_ARGS = --rcfile=.pylintrc
+PYLINT_VERSION = 4.0.4
+PYLINT_INSTALLED := $(shell python -c "import pkg_resources; print('pylint' in {pkg.key for pkg in pkg_resources.working_set})" 2>/dev/null || echo "False")
+PYLINT_VERSION_OK := $(shell python -c "import pylint; print(pylint.__version__ == '$(PYLINT_VERSION)')" 2>/dev/null || echo "False")
+
+# Default target
+all: lint
+
+ensure-pylint:
+	@if [ "$(PYLINT_INSTALLED)" = "False" ] || [ "$(PYLINT_VERSION_OK)" = "False" ]; then \
+		echo "Installing/upgrading pylint to version $(PYLINT_VERSION)..."; \
+		pip install pylint=="$(PYLINT_VERSION)"; \
+	else \
+		echo "✅ pylint $(PYLINT_VERSION) is already installed"; \
+	fi
+
+# Lint all Python files
+lint: check-python ensure-pylint
+	@echo "Running Pylint ..."
+	@export PYTHONPATH=$(CURRENT_DIR)/src
+	find . -type f -name "*.py" | xargs pylint $(PYLINT_ARGS)
+
+# Lint specific file(s)
+lint-file:
+	@if [ -z "$(FILE)" ]; then \
+		echo "Usage: make lint-file FILE=path/to/file.py"; \
+		exit 1; \
+	fi
+	pip install pylint=="$(PYLINT_VERSION)"
+	pylint $(PYLINT_ARGS) $(FILE)
+
+# Check python version
+check-python:
+	@echo "Checking Python version..."
+	@if [ "$(PYTHON_VERSION_OK)" = "True" ]; then \
+		echo "✅ Python version is $(PYTHON_VERSION) (compatible)"; \
+	else \
+		echo "❌ Error: Python $(PYTHON_VERSION) is required (found $(shell python --version | cut -d' ' -f2))"; \
+		exit 1; \
+	fi
+
+# Clean up
+clean:
+	@echo "Cleaning in: $(CURRENT_DIR)..."
+	find $(CURRENT_DIR) -type d -name "__pycache__" -exec rm -rf {} +
+	find $(CURRENT_DIR) -type f -name "*.pyc" -delete
+	find $(CURRENT_DIR) -type f -name ".pylint-cache" -exec rm -rf {} +
+
+.PHONY: all lint lint-file clean check-python ensure-pylint
diff --git a/src/gentrade/news/factory.py b/src/gentrade/news/factory.py
index 81f4917..4fb9e60 100644
--- a/src/gentrade/news/factory.py
+++ b/src/gentrade/news/factory.py
@@ -10,16 +10,12 @@
 import logging
 import time
 import threading
-import random
 from typing import List, Optional, Set
 from urllib.parse import urlparse  # Add this to extract domain from URL
 
-import requests
 from gentrade.scraper.extractor import ArticleContentExtractor
-from newspaper import Article
-from bs4 import BeautifulSoup
 
-from gentrade.news.meta import NewsInfo, NewsProviderBase, NewsDatabase
+from gentrade.news.meta import NewsProviderBase, NewsDatabase
 from gentrade.news.googlenews import GoogleNewsProvider
 from gentrade.news.newsapi import NewsApiProvider
 from gentrade.news.rss import RssProvider
diff --git a/src/gentrade/scraper/extractor.py b/src/gentrade/scraper/extractor.py
index 2fc59a1..12a91b8 100644
--- a/src/gentrade/scraper/extractor.py
+++ b/src/gentrade/scraper/extractor.py
@@ -10,7 +10,6 @@
 import json
 import logging
 import os
-from pickle import NONE
 import random
 import re
 import time
@@ -290,4 +289,4 @@ def _fetch_original_html(self, url: str, timeout: int = 10) -> Optional[str]:
     def inst(storage: ScraperStorage=None):
         if ArticleContentExtractor._instance is None:
             ArticleContentExtractor._instance = ArticleContentExtractor(storage)
-        return ArticleContentExtractor._instance
\ No newline at end of file
+        return ArticleContentExtractor._instance

From bb3035e2b4c0ba9cbc70ddae1b90da5756fd718e Mon Sep 17 00:00:00 2001
From: Lu Ken <bluewish.ken.lu@live.cn>
Date: Tue, 2 Dec 2025 17:22:59 +0800
Subject: [PATCH 13/15] remove Google news provider

Signed-off-by: Lu Ken <bluewish.ken.lu@live.cn>
---
 src/gentrade/news/factory.py    |  71 +------------
 src/gentrade/news/googlenews.py | 158 ---------------------------
 tests/test_api_google_news.py   | 182 --------------------------------
 tests/test_gentrade_news.py     |  59 +----------
 4 files changed, 6 insertions(+), 464 deletions(-)
 delete mode 100644 src/gentrade/news/googlenews.py
 delete mode 100644 tests/test_api_google_news.py

diff --git a/src/gentrade/news/factory.py b/src/gentrade/news/factory.py
index 4fb9e60..0ed7626 100644
--- a/src/gentrade/news/factory.py
+++ b/src/gentrade/news/factory.py
@@ -10,13 +10,11 @@
 import logging
 import time
 import threading
-from typing import List, Optional, Set
-from urllib.parse import urlparse  # Add this to extract domain from URL
+from typing import List, Optional
 
 from gentrade.scraper.extractor import ArticleContentExtractor
 
 from gentrade.news.meta import NewsProviderBase, NewsDatabase
-from gentrade.news.googlenews import GoogleNewsProvider
 from gentrade.news.newsapi import NewsApiProvider
 from gentrade.news.rss import RssProvider
 from gentrade.news.finnhub import FinnhubNewsProvider
@@ -36,8 +34,7 @@ def create_provider(provider_type: str, **kwargs) -> NewsProviderBase:
         """Create a news provider instance based on the specified provider type.
 
         Args:
-            provider_type: Type of news provider. Supported values: "newsapi", "finnhub",
-                "google", "rss".
+            provider_type: Type of news provider. Supported values: "newsapi", "finnhub", "rss".
            ** kwargs: Additional keyword arguments for provider initialization (e.g., feed_url
                 for RSS providers).
 
@@ -52,7 +49,6 @@ def create_provider(provider_type: str, **kwargs) -> NewsProviderBase:
         providers = {
             "newsapi": NewsApiProvider,
             "finnhub": FinnhubNewsProvider,
-            "google": GoogleNewsProvider,
             "rss": RssProvider
         }
 
@@ -72,15 +68,6 @@ def create_provider(provider_type: str, **kwargs) -> NewsProviderBase:
                 raise ValueError("FINNHUB_API_KEY environment variable not set")
             return provider_class(api_key=api_key)
 
-        if provider_type_lower == "google":
-            api_key = os.getenv("GOOGLE_CLOUD_API_KEY")
-            cse_id = os.getenv("GOOGLE_CSE_ID")
-            if not api_key or not cse_id:
-                raise ValueError(
-                    "GOOGLE_CLOUD_API_KEY or GOOGLE_CSE_ID environment variable not set"
-                )
-            return provider_class(api_key=api_key, cse_id=cse_id)
-
         if provider_type_lower == "rss":
             feed_url = kwargs.get("feed_url", os.getenv("RSS_FEED_URL"))
             return provider_class(feed_url=feed_url)
@@ -106,32 +93,8 @@ def __init__(self, providers: List[NewsProviderBase], db: NewsDatabase):
         self.db = db
         self.db_lock = threading.Lock()
 
-        # 1. Add blocklist (stores blocked domain names, e.g., "example.com")
-        self.blocklist: Set[str] = set()
-
-        # 2. Add dummy content keywords (expand this list based on your needs)
-        self.dummy_keywords = {
-            "we use cookies", "cookie policy", "analyze website traffic",
-            "accept cookies", "reject cookies", "by continuing to use",
-            "this website uses cookies", "improve user experience",
-            "ads by", "sponsored content", "subscribe to access"
-        }
-        #self.blocklist = self._load_blocklist()
-
-    def _load_blocklist(self) -> Set[str]:
-        try:
-            with open("news_blocklist.txt", "r", encoding="utf-8") as f:
-                return set(line.strip() for line in f if line.strip())
-        except FileNotFoundError:
-            return set()
-
-    def _save_blocklist(self) -> None:
-        with open("news_blocklist.txt", "w", encoding="utf-8") as f:
-            for domain in self.blocklist:
-                f.write(f"{domain}\n")
-
     def _fetch_thread(self, provider, aggregator, ticker, category,
-        max_hour_interval, max_count, is_process=False):
+        max_hour_interval, max_count, is_process=True):
         if ticker:
             news = provider.fetch_stock_news(
                 ticker, category, max_hour_interval, max_count
@@ -199,31 +162,6 @@ def sync_news(
         self.db.last_sync = current_time
         LOG.info("News sync completed.")
 
-    def _is_blocked(self, url: str) -> bool:
-        """Check if the website of the URL is in the blocklist"""
-        domain = self._extract_domain(url)
-        if domain in self.blocklist:
-            LOG.info(f"Skipping blocked website: {domain} (URL: {url})")
-            return True
-        return False
-
-    def _extract_domain(self, url: str) -> str:
-        """Extract the main domain from a URL
-        (e.g., "https://www.example.com/news" → "example.com")
-        """
-        try:
-            parsed = urlparse(url)
-            # Split subdomains (e.g., "www.example.co.uk" → "example.co.uk" for common TLDs)
-            domain_parts = parsed.netloc.split(".")
-            # Handle cases like "co.uk" (adjust based on your target regions)
-            if len(domain_parts) >= 3 and domain_parts[-2] in ["co", "com", "org", "net"]:
-                return ".".join(domain_parts[-3:])
-            return ".".join(domain_parts[-2:])
-        except Exception as e:
-            LOG.error(f"Failed to extract domain from {url}: {e}")
-            return url  # Fallback to full URL if parsing fails
-
-
 if __name__ == "__main__":
     logging.basicConfig(level=logging.INFO)
     db = NewsDatabase()
@@ -232,11 +170,10 @@ def _extract_domain(self, url: str) -> str:
         # Initialize providers using the factory
         newsapi_provider = NewsFactory.create_provider("newsapi")
         finnhub_provider = NewsFactory.create_provider("finnhub")
-        google_provider = NewsFactory.create_provider("google")
         rss_provider = NewsFactory.create_provider("rss")
 
         # Create aggregator with selected providers
-        aggregator = NewsAggregator(providers=[newsapi_provider], db=db)
+        aggregator = NewsAggregator(providers=[rss_provider], db=db)
 
         # Sync market news and stock-specific news
         aggregator.sync_news(category="business", max_hour_interval=64, max_count=10)
diff --git a/src/gentrade/news/googlenews.py b/src/gentrade/news/googlenews.py
deleted file mode 100644
index 87f1193..0000000
--- a/src/gentrade/news/googlenews.py
+++ /dev/null
@@ -1,158 +0,0 @@
-"""Google Custom Search (GCS) news provider for financial news retrieval.
-
-Implements the NewsProviderBase abstract class to fetch general market news and stock-specific
-news via Google's Custom Search API. Supports filtering by time interval, article count,
-region, and language, while formatting results into standardized NewsInfo objects.
-"""
-
-import logging
-import time
-from typing import List
-
-import requests
-
-from gentrade.news.meta import NewsInfo, NewsProviderBase
-
-LOG = logging.getLogger(__name__)
-
-
-class GoogleNewsProvider(NewsProviderBase):
-    """News provider using Google Custom Search API to retrieve financial news.
-
-    Authenticates with Google Cloud API key and Custom Search Engine (CSE) ID. Fetches
-    market-wide or stock-specific news, with built-in filtering for recency and result count.
-    """
-
-    def __init__(self, api_key: str, cse_id: str):
-        """Initialize GoogleNewsProvider with required authentication credentials.
-
-        Args:
-            api_key: Google Cloud API key for Custom Search request authentication.
-            cse_id: Google Custom Search Engine (CSE) ID configured for news retrieval.
-        """
-        self.api_key = api_key
-        self.cse_id = cse_id
-        self.base_url = "https://www.googleapis.com/customsearch/v1"
-
-    def fetch_latest_market_news(
-        self,
-        category: str = "business",
-        max_hour_interval: int = 24,
-        max_count: int = 10
-    ) -> List[NewsInfo]:
-        """Fetch latest general market news via Google Custom Search.
-
-        Retrieves financial market news from the last `max_hour_interval` hours, limited to
-        `max_count` articles, and assigns the specified category.
-
-        Args:
-            category: Category label for fetched news (default: "business").
-            max_hour_interval: Maximum age (in hours) of articles to retrieve (default: 24).
-            max_count: Maximum number of articles to return (default: 10).
-
-        Returns:
-            List of NewsInfo objects with formatted market news; empty list if fetch fails
-            or no results exist.
-        """
-        params = {
-            "key": self.api_key,
-            "cx": self.cse_id,
-            "q": "finance stock market",  # Core query for market news
-            "num": max_count,
-            "dateRestrict": f"h{max_hour_interval}",  # Filter by recent hours
-            "gl": "us",  # Focus on US region results
-            "lr": "lang_en",  # Restrict to English language
-            "siteSearch": "news.google.com",  # Limit to Google News sources
-            "siteSearchFilter": "i"  # Exclude duplicate results
-        }
-
-        try:
-            response = requests.get(self.base_url, params=params, timeout=10)
-            response.raise_for_status()  # Raise error for HTTP status codes ≥400
-            items = response.json().get("items", [])  # Extract articles from response
-
-            # Convert API response to standardized NewsInfo objects
-            news_list = [
-                NewsInfo(
-                    category=category,
-                    datetime=int(time.time()),  # Google CSE lacks article timestamp
-                    headline=item.get("title", ""),
-                    id=self.url_to_hash_id(item.get("link", "")),
-                    image=item.get("pagemap", {}).get("cse_image", [{}])[0].get("src", ""),
-                    related="",  # No stock ticker for general market news
-                    source=item.get("displayLink", ""),  # Source domain (e.g., "bloomberg.com")
-                    summary=item.get("snippet", ""),  # Short article preview
-                    url=item.get("link", ""),  # Direct article URL
-                    content="",  # Content extracted later by aggregator
-                    provider='google',
-                    market='us'
-                )
-                for item in items
-            ]
-
-            return self._filter_news(news_list, max_hour_interval, max_count)
-
-        except requests.RequestException as e:
-            LOG.debug(f"Failed to fetch market news from Google Custom Search: {e}")
-            return []
-
-    def fetch_stock_news(
-        self,
-        ticker: str,
-        category: str = "business",
-        max_hour_interval: int = 24,
-        max_count: int = 10
-    ) -> List[NewsInfo]:
-        """Fetch stock-specific news for a given ticker via Google Custom Search.
-
-        Retrieves news related to the specified stock ticker from the last `max_hour_interval`
-        hours, limited to `max_count` articles, and assigns the specified category.
-
-        Args:
-            ticker: Stock ticker symbol (e.g., "AAPL") to fetch news for.
-            category: Category label for fetched news (default: "business").
-            max_hour_interval: Maximum age (in hours) of articles to retrieve (default: 24).
-            max_count: Maximum number of articles to return (default: 10).
-
-        Returns:
-            List of NewsInfo objects with formatted stock news; empty list if fetch fails
-            or no results exist.
-        """
-        params = {
-            "key": self.api_key,
-            "cx": self.cse_id,
-            "q": f"{ticker} stock news",  # Ticker-specific query
-            "num": max_count,
-            "dateRestrict": f"h{max_hour_interval}",  # Filter by recent hours
-            "sort": "date"  # Sort results by most recent first
-        }
-
-        try:
-            response = requests.get(self.base_url, params=params, timeout=10)
-            response.raise_for_status()
-            items = response.json().get("items", [])
-
-            # Convert API response to standardized NewsInfo objects
-            news_list = [
-                NewsInfo(
-                    category=category,
-                    datetime=int(time.time()),  # Google CSE lacks article timestamp
-                    headline=item.get("title", ""),
-                    id=hash(item.get("link", "")),  # Unique ID from URL
-                    image=item.get("pagemap", {}).get("cse_image", [{}])[0].get("src", ""),
-                    related=ticker,  # Associate with target stock ticker
-                    source=item.get("displayLink", ""),
-                    summary=item.get("snippet", ""),
-                    url=item.get("link", ""),
-                    content="",  # Content extracted later
-                    provider='google',
-                    market='us'
-                )
-                for item in items
-            ]
-
-            return self._filter_news(news_list, max_hour_interval, max_count)
-
-        except requests.RequestException as e:
-            LOG.debug(f"Failed to fetch {ticker} stock news from Google Custom Search: {e}")
-            return []
diff --git a/tests/test_api_google_news.py b/tests/test_api_google_news.py
deleted file mode 100644
index 0596d23..0000000
--- a/tests/test_api_google_news.py
+++ /dev/null
@@ -1,182 +0,0 @@
-"""
-Google News API Test Suite
-
-This module contains pytest tests to verify functionality of Google's Custom Search API
-for retrieving financial and stock-related news. It includes tests for:
-- API credential validation
-- General financial news retrieval
-- Specific stock symbol news retrieval
-
-Requirements:
-- Valid Google Cloud API key with Custom Search API enabled: GOOGLE_API_KEY
-- Custom Search Engine ID (CX) configured for news search: CX
-- Environment variables stored in .env file
-"""
-
-import os
-import requests
-import pytest
-from dotenv import load_dotenv
-
-# Load environment variables from .env file
-load_dotenv()
-
-@pytest.fixture(scope="module")
-def api_credentials():
-    """
-    Fixture to validate and provide API credentials.
-
-    Retrieves Google API key and Custom Search Engine ID from environment variables
-    and performs basic validation. Fails if any credential is missing.
-
-    Returns:
-        dict: Contains valid API credentials with keys 'api_key' and 'cx'
-    """
-    api_key = os.getenv("GOOGLE_API_KEY")
-    cx = os.getenv("GOOGLE_CX")
-
-    error_messages = []
-    if not api_key:
-        error_messages.append(
-            "GOOGLE_API_KEY not found in environment variables. "
-            "Please check your .env file."
-        )
-    if not cx:
-        error_messages.append(
-            "GOOGLE_CX (Custom Search Engine ID) not found. "
-            "Please check your .env file."
-        )
-
-    assert not error_messages, "\n".join(error_messages)
-
-    return {
-        "api_key": api_key,
-        "cx": cx
-    }
-
-
-def test_api_credentials_work(api_credentials):
-    """
-    Test if API credentials are valid and functional.
-
-    Performs a basic test query to verify that the provided API key and
-    CX ID can successfully authenticate with the Google Custom Search API.
-    Provides detailed error messages for common authentication issues.
-    """
-    url = "https://www.googleapis.com/customsearch/v1"
-    test_params = {
-        "key": api_credentials["api_key"],
-        "cx": api_credentials["cx"],
-        "q": "test query",
-        "num": 1
-    }
-
-    response = requests.get(url, params=test_params, timeout=30)
-
-    # Handle common authentication errors with detailed guidance
-    if response.status_code == 403:
-        pytest.fail(
-            "403 Forbidden: Invalid credentials or insufficient permissions.\n"
-            "Possible fixes:\n"
-            "1. Verify your API key is correct in .env\n"
-            "2. Ensure Custom Search API is enabled in Google Cloud Console\n"
-            "3. Check if your API key has IP restrictions that block this request\n"
-            "4. Confirm your project has billing enabled (required for production use)\n"
-            f"API Response: {response.text}"
-        )
-    elif response.status_code == 400:
-        pytest.fail(
-            f"400 Bad Request: Invalid parameters. Check your CX ID.\n"
-            f"API Response: {response.text}"
-        )
-
-    assert response.status_code == 200, \
-        f"API request failed with status code {response.status_code}. Response: {response.text}"
-
-
-def test_get_latest_financial_news(api_credentials):
-    """
-    Test retrieval of latest financial news from Google News.
-
-    Queries the Custom Search API for recent financial news (past 1 week)
-    and validates the structure and content of returned results.
-    """
-    url = "https://www.googleapis.com/customsearch/v1"
-    params = {
-        "key": api_credentials["api_key"],
-        "cx": api_credentials["cx"],
-        "q": "finance stock market",
-        "num": 10,
-        "dateRestrict": "w1",  # Restrict results to past 1 week
-        "gl": "us",            # Focus on United States results
-        "lr": "lang_en",       # Restrict to English language
-        "siteSearch": "news.google.com",  # Search only Google News
-        "siteSearchFilter": "i"           # Include only specified sites
-    }
-
-    # Execute API request
-    response = requests.get(url, params=params, timeout=30)
-
-    # Handle specific API errors
-    if response.status_code == 403:
-        pytest.fail(f"403 Forbidden: Check API key and permissions. Response: {response.text}")
-    if response.status_code == 429:
-        pytest.fail(f"429 Too Many Requests: API quota exceeded. Response: {response.text}")
-
-    # Verify successful response
-    assert response.status_code == 200, \
-        f"API request failed with status code {response.status_code}. Response: {response.text}"
-
-    # Parse and validate response content
-    results = response.json()
-
-    assert "items" in results, f"No news items found. API response: {results}"
-    assert len(results["items"]) > 0, "No articles returned from Google News"
-
-    # Validate individual news articles
-    for item in results["items"][:3]:  # Check first 3 articles
-        assert "title" in item, "News item missing title"
-        assert "link" in item, "News item missing URL"
-        assert "snippet" in item, "News item missing snippet"
-
-
-def test_get_specific_stock_news(api_credentials):
-    """
-    Test retrieval of news for specific stock symbols.
-
-    Queries the Custom Search API for news related to major tech stocks
-    and verifies that returned articles mention the target stock symbol.
-    """
-    stock_symbols = ["AAPL", "MSFT", "GOOGL"]
-    url = "https://www.googleapis.com/customsearch/v1"
-
-    for symbol in stock_symbols:
-        params = {
-            "key": api_credentials["api_key"],
-            "cx": api_credentials["cx"],
-            "q": f"{symbol} stock news",
-            "num": 5,
-            "dateRestrict": "w1",  # Restrict to past 1 week
-            "gl": "us",
-            "lr": "lang_en",
-            "siteSearch": "news.google.com",
-            "siteSearchFilter": "i"
-        }
-
-        response = requests.get(url, params=params, timeout=30)
-
-        if response.status_code == 403:
-            pytest.fail(f"403 Forbidden for {symbol}: Check API key and permissions")
-
-        assert response.status_code == 200, \
-            f"Failed to get news for {symbol} (status code {response.status_code})"
-
-        results = response.json()
-
-        # Validate stock symbol appears in results when available
-        if "items" in results and len(results["items"]) > 0:
-            symbol_in_results = any(
-                symbol in item["title"].upper() or symbol in item.get("snippet", "").upper()
-                for item in results["items"]
-            )
-            assert symbol_in_results, f"No results mentioning {symbol} found"
diff --git a/tests/test_gentrade_news.py b/tests/test_gentrade_news.py
index 52e000b..0fd2859 100644
--- a/tests/test_gentrade_news.py
+++ b/tests/test_gentrade_news.py
@@ -7,7 +7,6 @@
 from gentrade.news.meta import NewsProviderBase
 from gentrade.news.newsapi import NewsApiProvider
 from gentrade.news.finnhub import FinnhubNewsProvider
-from gentrade.news.googlenews import GoogleNewsProvider
 from gentrade.news.rss import RssProvider
 
 
@@ -43,24 +42,6 @@ def test_create_finnhub_missing_key(self):
                 NewsFactory.create_provider("finnhub")
             assert "FINNHUB_API_KEY" in str(excinfo.value)
 
-    @patch.dict(os.environ, {
-        "GOOGLE_CLOUD_API_KEY": "test_google_key",
-        "GOOGLE_CSE_ID": "test_cse_id"
-    })
-    def test_create_google_provider(self):
-        """Test Google News provider creation with valid env vars"""
-        provider = NewsFactory.create_provider("google")
-        assert isinstance(provider, GoogleNewsProvider)
-        assert provider.api_key == "test_google_key"
-        assert provider.cse_id == "test_cse_id"
-
-    def test_create_google_missing_credentials(self):
-        """Test Google creation fails with missing credentials"""
-        with patch.dict(os.environ, {}, clear=True):
-            with pytest.raises(ValueError) as excinfo:
-                NewsFactory.create_provider("google")
-            assert "GOOGLE_CSE_ID" in str(excinfo.value)
-
     def test_create_rss_provider_with_feed_url(self):
         """Test RSS provider creation with explicit feed URL"""
         feed_url = "https://test-feed.com/rss"
@@ -94,10 +75,6 @@ class TestNewsProvidersCommon:
     @pytest.fixture(params=[
         ("newsapi", NewsApiProvider, {"NEWSAPI_API_KEY": "test_key"}),
         ("finnhub", FinnhubNewsProvider, {"FINNHUB_API_KEY": "test_key"}),
-        ("google", GoogleNewsProvider, {
-            "GOOGLE_CLOUD_API_KEY": "test_key",
-            "GOOGLE_CSE_ID": "test_id"
-        }),
         ("rss", RssProvider, {})
     ])
     def provider_setup(self, request):
@@ -128,8 +105,6 @@ def test_fetch_market_news_returns_list(self, provider_setup):
                 mock_response.json.return_value = {"articles": []}
             elif provider_setup[0] == "finnhub":
                 mock_response.json.return_value = []
-            elif provider_setup[0] == "google":
-                mock_response.json.return_value = {"items": []}
             elif provider_setup[0] == "rss":
                 pass  # Handled in RSS specific tests
 
@@ -152,9 +127,6 @@ def test_fetch_stock_news_returns_list(self, provider_setup):
             elif provider_type == "finnhub":
                 # Finnhub returns list directly
                 mock_response.json.return_value = []
-            elif provider_type == "google":
-                # Google returns {"items": [...]}
-                mock_response.json.return_value = {"items": []}
             elif provider_type == "rss":
                 # RSS uses feedparser, handled separately
                 pass
@@ -190,29 +162,6 @@ def test_fetch_market_news_params(self, mock_get, newsapi_provider):
         assert "from" in params
 
 
-class TestGoogleNewsProvider:
-    """Google News-specific test cases"""
-
-    @pytest.fixture
-    def google_provider(self):
-        with patch.dict(os.environ, {
-            "GOOGLE_CLOUD_API_KEY": "test_key",
-            "GOOGLE_CSE_ID": "test_id"
-        }):
-            return NewsFactory.create_provider("google")
-
-    @patch("gentrade.news.googlenews.requests.get")
-    def test_fetch_stock_news_query(self, mock_get, google_provider):
-        """Test Google News uses correct stock query"""
-        mock_get.return_value = Mock(status_code=200, json=lambda: {"items": []})
-        google_provider.fetch_stock_news(ticker="MSFT", max_count=3)
-
-        _, kwargs = mock_get.call_args
-        params = kwargs["params"]
-        assert params["q"] == "MSFT stock news"
-        assert params["num"] == 3
-
-
 class TestRssProvider:
     """RSS Provider-specific test cases"""
 
@@ -268,17 +217,13 @@ def test_company_news_endpoint(self, mock_get, finnhub_provider):
 class TestProviderErrorHandling:
     """Tests for provider error handling"""
 
-    @pytest.fixture(params=["newsapi", "finnhub", "google"])
+    @pytest.fixture(params=["newsapi", "finnhub"])
     def api_provider(self, request):
         """Fixture for API-based providers (non-RSS)"""
         provider_type = request.param
         env_vars = {
             "newsapi": {"NEWSAPI_API_KEY": "test"},
-            "finnhub": {"FINNHUB_API_KEY": "test"},
-            "google": {
-                "GOOGLE_CLOUD_API_KEY": "test",
-                "GOOGLE_CSE_ID": "test"
-            }
+            "finnhub": {"FINNHUB_API_KEY": "test"}
         }[provider_type]
 
         with patch.dict(os.environ, env_vars):

From 433b07de649e88bdf6921013a15195698f116f6b Mon Sep 17 00:00:00 2001
From: Lu Ken <bluewish.ken.lu@live.cn>
Date: Tue, 2 Dec 2025 18:02:02 +0800
Subject: [PATCH 14/15] use loguru instead of logging

Signed-off-by: Lu Ken <bluewish.ken.lu@live.cn>
---
 requirements.txt             |  1 +
 src/gentrade/news/factory.py | 29 +++++++++++++----------------
 src/gentrade/news/finnhub.py |  9 +++------
 src/gentrade/news/meta.py    |  7 ++-----
 src/gentrade/news/newsapi.py | 12 ++++--------
 src/gentrade/news/rss.py     | 14 ++++++--------
 6 files changed, 29 insertions(+), 43 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index ed82156..65c6d97 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -8,6 +8,7 @@ backtrader
 ag2
 mplfinance
 ntplib
+loguru
 
 langchain_openai
 langchain_core
diff --git a/src/gentrade/news/factory.py b/src/gentrade/news/factory.py
index 0ed7626..233c62f 100644
--- a/src/gentrade/news/factory.py
+++ b/src/gentrade/news/factory.py
@@ -7,10 +7,10 @@
 """
 
 import os
-import logging
 import time
 import threading
 from typing import List, Optional
+from loguru import logger
 
 from gentrade.scraper.extractor import ArticleContentExtractor
 
@@ -19,8 +19,6 @@
 from gentrade.news.rss import RssProvider
 from gentrade.news.finnhub import FinnhubNewsProvider
 
-LOG = logging.getLogger(__name__)
-
 
 class NewsFactory:
     """Factory class for creating news provider instances based on provider type.
@@ -99,7 +97,7 @@ def _fetch_thread(self, provider, aggregator, ticker, category,
             news = provider.fetch_stock_news(
                 ticker, category, max_hour_interval, max_count
             )
-            LOG.info(
+            logger.info(
                 f"Fetched {len(news)} stock news articles for {ticker} from "
                 f"{provider.__class__.__name__}"
             )
@@ -107,7 +105,7 @@ def _fetch_thread(self, provider, aggregator, ticker, category,
             news = provider.fetch_latest_market_news(
                 category, max_hour_interval, max_count
             )
-            LOG.info(
+            logger.info(
                 f"Fetched {len(news)} market news articles from "
                 f"{provider.__class__.__name__}"
             )
@@ -117,7 +115,7 @@ def _fetch_thread(self, provider, aggregator, ticker, category,
             item.summary = ace.clean_html(item.summary)
             if is_process:
                 item.content = ace.extract_content(item.url)
-                LOG.info(item.content)
+                logger.info(item.content)
 
         with aggregator.db_lock:
             aggregator.db.add_news(news)
@@ -142,10 +140,10 @@ def sync_news(
         """
         current_time = time.time()
         if current_time < self.db.last_sync + 3600:
-            LOG.info("Skipping sync: Last sync was less than 1 hour ago.")
+            logger.info("Skipping sync: Last sync was less than 1 hour ago.")
             return
 
-        LOG.info("Starting news sync...")
+        logger.info("Starting news sync...")
 
         threads = []
         for provider in self.providers:
@@ -160,10 +158,9 @@ def sync_news(
             thread.join()
 
         self.db.last_sync = current_time
-        LOG.info("News sync completed.")
+        logger.info("News sync completed.")
 
 if __name__ == "__main__":
-    logging.basicConfig(level=logging.INFO)
     db = NewsDatabase()
 
     try:
@@ -186,18 +183,18 @@ def sync_news(
 
         # Log results
         all_news = db.get_all_news()
-        LOG.info(f"Total articles in database: {len(all_news)}")
+        logger.info(f"Total articles in database: {len(all_news)}")
 
         if all_news:
-            LOG.info("Example article:")
-            LOG.info(all_news[0].to_dict())
+            logger.info("Example article:")
+            logger.info(all_news[0].to_dict())
 
             for news_item in all_news:
-                LOG.info("--------------------------------")
+                logger.info("--------------------------------")
                 print(news_item.headline)
                 print(news_item.url)
                 print(news_item.content)
-                LOG.info("--------------------------------")
+                logger.info("--------------------------------")
 
     except ValueError as e:
-        LOG.error(f"Error during news aggregation: {e}")
+        logger.error(f"Error during news aggregation: {e}")
diff --git a/src/gentrade/news/finnhub.py b/src/gentrade/news/finnhub.py
index d51da79..14fb5cc 100644
--- a/src/gentrade/news/finnhub.py
+++ b/src/gentrade/news/finnhub.py
@@ -5,17 +5,14 @@
 and news specific to individual stock tickers, with filtering by time interval and article count.
 """
 
-import logging
 import time
 from typing import List
 from datetime import datetime, timedelta
 import requests
+from loguru import logger
 
 from gentrade.news.meta import NewsInfo, NewsProviderBase
 
-LOG = logging.getLogger(__name__)
-
-
 class FinnhubNewsProvider(NewsProviderBase):
     """News provider implementation for fetching news via the Finnhub.io API.
 
@@ -87,7 +84,7 @@ def fetch_latest_market_news(
             return self._filter_news(news_list, max_hour_interval, max_count)
 
         except requests.RequestException as e:
-            LOG.debug(f"Error fetching market news from Finnhub: {e}")
+            logger.debug(f"Error fetching market news from Finnhub: {e}")
             return []
 
     def fetch_stock_news(
@@ -146,5 +143,5 @@ def fetch_stock_news(
             return self._filter_news(news_list, max_hour_interval, max_count)
 
         except requests.RequestException as e:
-            LOG.debug(f"Error fetching stock news from Finnhub: {e}")
+            logger.debug(f"Error fetching stock news from Finnhub: {e}")
             return []
diff --git a/src/gentrade/news/meta.py b/src/gentrade/news/meta.py
index 4b59d1c..088f350 100644
--- a/src/gentrade/news/meta.py
+++ b/src/gentrade/news/meta.py
@@ -9,17 +9,14 @@
 """
 
 import abc
-import logging
 import time
 import hashlib
 from typing import Dict, List, Any, Optional
 from datetime import datetime
 from dataclasses import dataclass
-
+from loguru import logger
 import requests
 
-LOG = logging.getLogger(__name__)
-
 NEWS_MARKET = [
     'us', 'zh', 'hk', 'cypto', 'common'
 ]
@@ -79,7 +76,7 @@ def fetch_article_html(self) -> Optional[str]:
             response.raise_for_status()
             return response.text
         except requests.RequestException as e:
-            LOG.debug(f"Failed to fetch HTML for {self.url}: {e}")
+            logger.debug(f"Failed to fetch HTML for {self.url}: {e}")
             return None
 
 
diff --git a/src/gentrade/news/newsapi.py b/src/gentrade/news/newsapi.py
index 67a8ca4..51e6c05 100644
--- a/src/gentrade/news/newsapi.py
+++ b/src/gentrade/news/newsapi.py
@@ -5,17 +5,13 @@
 article count, and language, while formatting results into standardized NewsInfo objects.
 """
 
-import logging
 from typing import List
 from datetime import datetime, timedelta
-
 import requests
+from loguru import logger
 
 from gentrade.news.meta import NewsInfo, NewsProviderBase
 
-LOG = logging.getLogger(__name__)
-
-
 class NewsApiProvider(NewsProviderBase):
     """News provider that uses NewsAPI.org to fetch financial and stock-specific news.
 
@@ -92,10 +88,10 @@ def fetch_latest_market_news(
             return self._filter_news(news_list, max_hour_interval, max_count)
 
         except requests.RequestException as e:
-            LOG.debug(f"Failed to fetch market news from NewsAPI.org: {e}")
+            logger.debug(f"Failed to fetch market news from NewsAPI.org: {e}")
             return []
         except Exception as e:
-            LOG.debug(f"Unexpected error: {e}")
+            logger.debug(f"Unexpected error: {e}")
             return []
 
     def fetch_stock_news(
@@ -159,5 +155,5 @@ def fetch_stock_news(
             return self._filter_news(news_list, max_hour_interval, max_count)
 
         except requests.RequestException as e:
-            LOG.debug(f"Failed to fetch {ticker} stock news from NewsAPI.org: {e}")
+            logger.debug(f"Failed to fetch {ticker} stock news from NewsAPI.org: {e}")
             return []
diff --git a/src/gentrade/news/rss.py b/src/gentrade/news/rss.py
index 9d0516f..5fcbb9d 100644
--- a/src/gentrade/news/rss.py
+++ b/src/gentrade/news/rss.py
@@ -7,16 +7,14 @@
 """
 
 import os
-import logging
 from typing import List
 
 import requests
 import feedparser
+from loguru import logger
 
 from gentrade.news.meta import NewsInfo, NewsProviderBase
 
-LOG = logging.getLogger(__name__)
-
 
 class RssProvider(NewsProviderBase):
     """News provider that fetches news from RSS/ATOM feeds.
@@ -63,7 +61,7 @@ def fetch_latest_market_news(
             parsing fails, or no valid articles exist.
         """
         if not self.feed_url:
-            LOG.error("RSS feed URL is missing (no explicit URL, env var, or default).")
+            logger.error("RSS feed URL is missing (no explicit URL, env var, or default).")
             return []
 
         # Headers to mimic browser (avoid feed server blocking) and accept RSS/XML
@@ -81,7 +79,7 @@ def fetch_latest_market_news(
             # Parse feed with feedparser
             feed = feedparser.parse(response.text)
             if not feed.entries:
-                LOG.warning(f"No articles found in RSS feed: {self.feed_url}")
+                logger.warning(f"No articles found in RSS feed: {self.feed_url}")
                 return []
 
             # Convert feed entries to standardized NewsInfo objects
@@ -111,16 +109,16 @@ def fetch_latest_market_news(
             return self._filter_news(news_list, max_hour_interval, max_count)
 
         except requests.HTTPError as e:
-            LOG.error(
+            logger.error(
                 f"HTTP error fetching RSS feed {self.feed_url}: "
                 f"Status {e.response.status_code} - {str(e)}"
             )
             return []
         except requests.RequestException as e:
-            LOG.error(f"Network error fetching RSS feed {self.feed_url}: {str(e)}")
+            logger.error(f"Network error fetching RSS feed {self.feed_url}: {str(e)}")
             return []
         except Exception as e:
-            LOG.error(f"Unexpected error parsing RSS feed {self.feed_url}: {str(e)}")
+            logger.error(f"Unexpected error parsing RSS feed {self.feed_url}: {str(e)}")
             return []
 
     def fetch_stock_news(

From aee73469586ef0de1f0afd5e1ecf1eccc3a7b67c Mon Sep 17 00:00:00 2001
From: Lu Ken <bluewish.ken.lu@live.cn>
Date: Tue, 2 Dec 2025 21:26:15 +0800
Subject: [PATCH 15/15] add market interface for news provider

Signed-off-by: Lu Ken <bluewish.ken.lu@live.cn>
---
 src/gentrade/news/finnhub.py | 4 ++++
 src/gentrade/news/meta.py    | 6 +++++-
 src/gentrade/news/newsapi.py | 4 ++++
 3 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/src/gentrade/news/finnhub.py b/src/gentrade/news/finnhub.py
index 14fb5cc..beb1d53 100644
--- a/src/gentrade/news/finnhub.py
+++ b/src/gentrade/news/finnhub.py
@@ -30,6 +30,10 @@ def __init__(self, api_key: str):
         self.api_key = api_key
         self.base_url = "https://finnhub.io/api/v1"
 
+    @property
+    def market(self):
+        return 'us'
+
     def fetch_latest_market_news(
         self,
         category: str = "business",
diff --git a/src/gentrade/news/meta.py b/src/gentrade/news/meta.py
index 088f350..dd19ca3 100644
--- a/src/gentrade/news/meta.py
+++ b/src/gentrade/news/meta.py
@@ -34,7 +34,7 @@ class NewsInfo:
     summary: str
     url: str
     content: str
-    provider: str  # provder like newsapi, google, finnhub, rss
+    provider: str  # provder like newsapi, finnhub, rss
     market: str    # market type like us, chn, eur, hk, crypto
 
     def to_dict(self) -> Dict[str, Any]:
@@ -86,6 +86,10 @@ class NewsProviderBase(metaclass=abc.ABCMeta):
     All concrete news providers (e.g., NewsAPI, Finnhub) must implement these methods.
     """
 
+    @property
+    def market(self):
+        return 'common'
+
     @abc.abstractmethod
     def fetch_latest_market_news(
         self,
diff --git a/src/gentrade/news/newsapi.py b/src/gentrade/news/newsapi.py
index 51e6c05..53d0a40 100644
--- a/src/gentrade/news/newsapi.py
+++ b/src/gentrade/news/newsapi.py
@@ -29,6 +29,10 @@ def __init__(self, api_key: str):
         self.api_key = api_key
         self.base_url = "https://newsapi.org/v2/everything"  # Core endpoint for news retrieval
 
+    @property
+    def market(self):
+        return 'us'
+
     def fetch_latest_market_news(
         self,
         category: str = "business",