Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 10 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
# Concall Parser

**Concall Parser** is an open-source Python library designed to efficiently extract insights from earnings call (concall) transcripts. It enables seamless extraction of management commentary, analyst discussions, company information, perfect for building financial research tools, summarizers, or investor dashboards..
**Concall Parser** is an open-source Python library designed to efficiently extract insights from earnings call (concall) transcripts. It enables seamless extraction of management commentary, analyst discussions, company information, perfect for building financial research tools, summarizers, or investor dashboards.

Check out the repo at [Github](https://github.com/JS12540/concall-parser/).

---

**Note:** We currently support earnings calls of Indian companies (BSE, NSE registered) only.

## 📦 Installation

Install the library using pip:
Expand Down Expand Up @@ -44,6 +46,13 @@ export GROQ_API_KEY="YOUR GROQ API KEY"
export GROQ_MODEL="YOUR GROQ MODEL NAME"
```

Or just pass in the values when creating the parser object.

```python
parser = ConcallParser(path="path/to/concall.pdf", groq_api_key=your_api_key, groq_model=preferred_groq_model)
```


We use llama3-70b-8192 as the default model if any groq supported models are not provided as env.

## ✨ Features
Expand Down
39 changes: 26 additions & 13 deletions concall_parser/log_config.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,32 @@
import logging

logger = logging.Logger("concall_logger")
logger.setLevel(logging.DEBUG)
logger = logging.getLogger("concall_parser")

stream_handler = logging.StreamHandler()
file_handler = logging.FileHandler(filename='app.log', mode='w')
formatter = logging.Formatter(
fmt="%(asctime)s - %(levelname)s - %(filename)s:%(lineno)d - %(message)s"
)
def configure_logger(
save_to_file: bool = False,
logging_level: str = "INFO",
log_file: str = "app.log"
) -> None:
"""Configure the global logger.

Args:
save_to_file: Whether to save logs to file
logging_level: Logging level (DEBUG/INFO/WARNING/ERROR)
log_file: Log file path when save_to_file is True
"""
logger.handlers.clear()
level = getattr(logging, logging_level.upper())
logger.setLevel(level)

stream_handler.setFormatter(formatter)
stream_handler.setLevel(logging.INFO)
formatter = logging.Formatter(
'%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)

file_handler.setFormatter(formatter)
file_handler.setLevel(logging.DEBUG)
console_handler = logging.StreamHandler()
console_handler.setFormatter(formatter)
logger.addHandler(console_handler)

logger.addHandler(stream_handler)
logger.addHandler(file_handler)
if save_to_file:
file_handler = logging.FileHandler(log_file)
file_handler.setFormatter(formatter)
logger.addHandler(file_handler)
48 changes: 34 additions & 14 deletions concall_parser/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from concall_parser.extractors.management_case_extractor import (
ManagementCaseExtractor,
)
from concall_parser.log_config import configure_logger
from concall_parser.utils.file_utils import (
get_document_transcript,
get_transcript_from_link,
Expand All @@ -13,20 +14,41 @@
class ConcallParser:
"""Parses the conference call transcript."""

def __init__(self, path: str = None, link: str = None):
self.transcript = self._get_document_transcript(
filepath=path, link=link
)
self.groq_api_key = get_groq_api_key()
self.groq_model = get_groq_model()
def __init__(
self,
path: str = None,
link: str = None,
groq_api_key: str | None = None,
groq_model: str = "llama3:70b-8192",
save_logs_to_file: bool = False,
logging_level: str = "INFO",
log_file: str = "app.log",
):
"""Initialize ConcallParser.

Args:
path: Path to local PDF file
link: URL to PDF file
groq_api_key: Optional Groq API key (falls back to env var)
groq_model: Optional Groq model name (falls back to env var)
save_logs_to_file: Whether to save logs to file
logging_level: Logging level (DEBUG/INFO/WARNING/ERROR)
log_file: Log file path when save_logs_to_file is True
"""
self.transcript = self._get_document_transcript(filepath=path, link=link)
self.groq_api_key = groq_api_key if groq_api_key else get_groq_api_key()
self.groq_model = groq_model if groq_model else get_groq_model()

self.company_and_management_extractor = CompanyAndManagementExtractor()
self.dialogue_extractor = DialogueExtractor()
self.management_case_extractor = ManagementCaseExtractor()
configure_logger(
save_to_file=save_logs_to_file,
logging_level=logging_level,
log_file=log_file,
)

def _get_document_transcript(
self, filepath: str, link: str
) -> dict[int, str]:
def _get_document_transcript(self, filepath: str, link: str) -> dict[int, str]:
"""Extracts text of a pdf document.

Takes in a filepath (locally stored document) or link (online doc) to extract document
Expand Down Expand Up @@ -75,11 +97,9 @@ def extract_concall_info(self) -> dict:

def extract_commentary(self) -> list:
"""Extracts commentary from the input."""
response = (
self.dialogue_extractor.extract_commentary_and_future_outlook(
transcript=self.transcript,
groq_model=self.groq_model,
)
response = self.dialogue_extractor.extract_commentary_and_future_outlook(
transcript=self.transcript,
groq_model=self.groq_model,
)
return response

Expand Down
Loading