From 191f8efc8a0ebc68fa3d00db9f71681ff13ca70e Mon Sep 17 00:00:00 2001 From: pranshu-raj-211 Date: Wed, 23 Apr 2025 23:22:18 +0530 Subject: [PATCH 1/2] modified concall parser instance creation add options for log config, api key and groq model inputs while creating object change logger - user can define what they need (file, level options) update README with note about Indian concall support Signed-off-by: pranshu-raj-211 --- README.md | 4 ++- concall_parser/log_config.py | 39 +++++++++++++++++++---------- concall_parser/parser.py | 48 +++++++++++++++++++++++++----------- 3 files changed, 63 insertions(+), 28 deletions(-) diff --git a/README.md b/README.md index 99187d2..17087f8 100644 --- a/README.md +++ b/README.md @@ -1,11 +1,13 @@ # Concall Parser -**Concall Parser** is an open-source Python library designed to efficiently extract insights from earnings call (concall) transcripts. It enables seamless extraction of management commentary, analyst discussions, company information, perfect for building financial research tools, summarizers, or investor dashboards.. +**Concall Parser** is an open-source Python library designed to efficiently extract insights from earnings call (concall) transcripts. It enables seamless extraction of management commentary, analyst discussions, company information, perfect for building financial research tools, summarizers, or investor dashboards. Check out the repo at [Github](https://github.com/JS12540/concall-parser/). --- +**Note:** We currently support earnings calls of Indian companies (BSE, NSE registered) only. + ## 📦 Installation Install the library using pip: diff --git a/concall_parser/log_config.py b/concall_parser/log_config.py index 04febfa..56817da 100644 --- a/concall_parser/log_config.py +++ b/concall_parser/log_config.py @@ -1,19 +1,32 @@ import logging -logger = logging.Logger("concall_logger") -logger.setLevel(logging.DEBUG) +logger = logging.getLogger("concall_parser") -stream_handler = logging.StreamHandler() -file_handler = logging.FileHandler(filename='app.log', mode='w') -formatter = logging.Formatter( - fmt="%(asctime)s - %(levelname)s - %(filename)s:%(lineno)d - %(message)s" -) +def configure_logger( + save_to_file: bool = False, + logging_level: str = "INFO", + log_file: str = "app.log" +) -> None: + """Configure the global logger. + + Args: + save_to_file: Whether to save logs to file + logging_level: Logging level (DEBUG/INFO/WARNING/ERROR) + log_file: Log file path when save_to_file is True + """ + logger.handlers.clear() + level = getattr(logging, logging_level.upper()) + logger.setLevel(level) -stream_handler.setFormatter(formatter) -stream_handler.setLevel(logging.INFO) + formatter = logging.Formatter( + '%(asctime)s - %(name)s - %(levelname)s - %(message)s' + ) -file_handler.setFormatter(formatter) -file_handler.setLevel(logging.DEBUG) + console_handler = logging.StreamHandler() + console_handler.setFormatter(formatter) + logger.addHandler(console_handler) -logger.addHandler(stream_handler) -logger.addHandler(file_handler) \ No newline at end of file + if save_to_file: + file_handler = logging.FileHandler(log_file) + file_handler.setFormatter(formatter) + logger.addHandler(file_handler) \ No newline at end of file diff --git a/concall_parser/parser.py b/concall_parser/parser.py index 0015521..b1d2136 100644 --- a/concall_parser/parser.py +++ b/concall_parser/parser.py @@ -4,6 +4,7 @@ from concall_parser.extractors.management_case_extractor import ( ManagementCaseExtractor, ) +from concall_parser.log_config import configure_logger from concall_parser.utils.file_utils import ( get_document_transcript, get_transcript_from_link, @@ -13,20 +14,41 @@ class ConcallParser: """Parses the conference call transcript.""" - def __init__(self, path: str = None, link: str = None): - self.transcript = self._get_document_transcript( - filepath=path, link=link - ) - self.groq_api_key = get_groq_api_key() - self.groq_model = get_groq_model() + def __init__( + self, + path: str = None, + link: str = None, + groq_api_key: str | None = None, + groq_model: str = "llama3:70b-8192", + save_logs_to_file: bool = False, + logging_level: str = "INFO", + log_file: str = "app.log", + ): + """Initialize ConcallParser. + + Args: + path: Path to local PDF file + link: URL to PDF file + groq_api_key: Optional Groq API key (falls back to env var) + groq_model: Optional Groq model name (falls back to env var) + save_logs_to_file: Whether to save logs to file + logging_level: Logging level (DEBUG/INFO/WARNING/ERROR) + log_file: Log file path when save_logs_to_file is True + """ + self.transcript = self._get_document_transcript(filepath=path, link=link) + self.groq_api_key = groq_api_key if groq_api_key else get_groq_api_key() + self.groq_model = groq_model if groq_model else get_groq_model() self.company_and_management_extractor = CompanyAndManagementExtractor() self.dialogue_extractor = DialogueExtractor() self.management_case_extractor = ManagementCaseExtractor() + configure_logger( + save_to_file=save_logs_to_file, + logging_level=logging_level, + log_file=log_file, + ) - def _get_document_transcript( - self, filepath: str, link: str - ) -> dict[int, str]: + def _get_document_transcript(self, filepath: str, link: str) -> dict[int, str]: """Extracts text of a pdf document. Takes in a filepath (locally stored document) or link (online doc) to extract document @@ -75,11 +97,9 @@ def extract_concall_info(self) -> dict: def extract_commentary(self) -> list: """Extracts commentary from the input.""" - response = ( - self.dialogue_extractor.extract_commentary_and_future_outlook( - transcript=self.transcript, - groq_model=self.groq_model, - ) + response = self.dialogue_extractor.extract_commentary_and_future_outlook( + transcript=self.transcript, + groq_model=self.groq_model, ) return response From d8fd92ea41c4c112438194dbcd3feb50b48b642b Mon Sep 17 00:00:00 2001 From: Pranshu Raj <113211320+pranshu-raj-211@users.noreply.github.com> Date: Sat, 26 Apr 2025 11:19:21 +0530 Subject: [PATCH 2/2] Update README.md Add example of passing in groq env directly in parser object init --- README.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/README.md b/README.md index 17087f8..358f270 100644 --- a/README.md +++ b/README.md @@ -46,6 +46,13 @@ export GROQ_API_KEY="YOUR GROQ API KEY" export GROQ_MODEL="YOUR GROQ MODEL NAME" ``` +Or just pass in the values when creating the parser object. + +```python +parser = ConcallParser(path="path/to/concall.pdf", groq_api_key=your_api_key, groq_model=preferred_groq_model) +``` + + We use llama3-70b-8192 as the default model if any groq supported models are not provided as env. ## ✨ Features