diff --git a/README.md b/README.md index 99187d2..358f270 100644 --- a/README.md +++ b/README.md @@ -1,11 +1,13 @@ # Concall Parser -**Concall Parser** is an open-source Python library designed to efficiently extract insights from earnings call (concall) transcripts. It enables seamless extraction of management commentary, analyst discussions, company information, perfect for building financial research tools, summarizers, or investor dashboards.. +**Concall Parser** is an open-source Python library designed to efficiently extract insights from earnings call (concall) transcripts. It enables seamless extraction of management commentary, analyst discussions, company information, perfect for building financial research tools, summarizers, or investor dashboards. Check out the repo at [Github](https://github.com/JS12540/concall-parser/). --- +**Note:** We currently support earnings calls of Indian companies (BSE, NSE registered) only. + ## 📦 Installation Install the library using pip: @@ -44,6 +46,13 @@ export GROQ_API_KEY="YOUR GROQ API KEY" export GROQ_MODEL="YOUR GROQ MODEL NAME" ``` +Or just pass in the values when creating the parser object. + +```python +parser = ConcallParser(path="path/to/concall.pdf", groq_api_key=your_api_key, groq_model=preferred_groq_model) +``` + + We use llama3-70b-8192 as the default model if any groq supported models are not provided as env. ## ✨ Features diff --git a/concall_parser/log_config.py b/concall_parser/log_config.py index 04febfa..56817da 100644 --- a/concall_parser/log_config.py +++ b/concall_parser/log_config.py @@ -1,19 +1,32 @@ import logging -logger = logging.Logger("concall_logger") -logger.setLevel(logging.DEBUG) +logger = logging.getLogger("concall_parser") -stream_handler = logging.StreamHandler() -file_handler = logging.FileHandler(filename='app.log', mode='w') -formatter = logging.Formatter( - fmt="%(asctime)s - %(levelname)s - %(filename)s:%(lineno)d - %(message)s" -) +def configure_logger( + save_to_file: bool = False, + logging_level: str = "INFO", + log_file: str = "app.log" +) -> None: + """Configure the global logger. + + Args: + save_to_file: Whether to save logs to file + logging_level: Logging level (DEBUG/INFO/WARNING/ERROR) + log_file: Log file path when save_to_file is True + """ + logger.handlers.clear() + level = getattr(logging, logging_level.upper()) + logger.setLevel(level) -stream_handler.setFormatter(formatter) -stream_handler.setLevel(logging.INFO) + formatter = logging.Formatter( + '%(asctime)s - %(name)s - %(levelname)s - %(message)s' + ) -file_handler.setFormatter(formatter) -file_handler.setLevel(logging.DEBUG) + console_handler = logging.StreamHandler() + console_handler.setFormatter(formatter) + logger.addHandler(console_handler) -logger.addHandler(stream_handler) -logger.addHandler(file_handler) \ No newline at end of file + if save_to_file: + file_handler = logging.FileHandler(log_file) + file_handler.setFormatter(formatter) + logger.addHandler(file_handler) \ No newline at end of file diff --git a/concall_parser/parser.py b/concall_parser/parser.py index 0015521..b1d2136 100644 --- a/concall_parser/parser.py +++ b/concall_parser/parser.py @@ -4,6 +4,7 @@ from concall_parser.extractors.management_case_extractor import ( ManagementCaseExtractor, ) +from concall_parser.log_config import configure_logger from concall_parser.utils.file_utils import ( get_document_transcript, get_transcript_from_link, @@ -13,20 +14,41 @@ class ConcallParser: """Parses the conference call transcript.""" - def __init__(self, path: str = None, link: str = None): - self.transcript = self._get_document_transcript( - filepath=path, link=link - ) - self.groq_api_key = get_groq_api_key() - self.groq_model = get_groq_model() + def __init__( + self, + path: str = None, + link: str = None, + groq_api_key: str | None = None, + groq_model: str = "llama3:70b-8192", + save_logs_to_file: bool = False, + logging_level: str = "INFO", + log_file: str = "app.log", + ): + """Initialize ConcallParser. + + Args: + path: Path to local PDF file + link: URL to PDF file + groq_api_key: Optional Groq API key (falls back to env var) + groq_model: Optional Groq model name (falls back to env var) + save_logs_to_file: Whether to save logs to file + logging_level: Logging level (DEBUG/INFO/WARNING/ERROR) + log_file: Log file path when save_logs_to_file is True + """ + self.transcript = self._get_document_transcript(filepath=path, link=link) + self.groq_api_key = groq_api_key if groq_api_key else get_groq_api_key() + self.groq_model = groq_model if groq_model else get_groq_model() self.company_and_management_extractor = CompanyAndManagementExtractor() self.dialogue_extractor = DialogueExtractor() self.management_case_extractor = ManagementCaseExtractor() + configure_logger( + save_to_file=save_logs_to_file, + logging_level=logging_level, + log_file=log_file, + ) - def _get_document_transcript( - self, filepath: str, link: str - ) -> dict[int, str]: + def _get_document_transcript(self, filepath: str, link: str) -> dict[int, str]: """Extracts text of a pdf document. Takes in a filepath (locally stored document) or link (online doc) to extract document @@ -75,11 +97,9 @@ def extract_concall_info(self) -> dict: def extract_commentary(self) -> list: """Extracts commentary from the input.""" - response = ( - self.dialogue_extractor.extract_commentary_and_future_outlook( - transcript=self.transcript, - groq_model=self.groq_model, - ) + response = self.dialogue_extractor.extract_commentary_and_future_outlook( + transcript=self.transcript, + groq_model=self.groq_model, ) return response