From 6b4a7ea9e6711bc920e22f1cda8bc9c8de4820c8 Mon Sep 17 00:00:00 2001 From: Krrish Ghimire Date: Wed, 25 Mar 2026 14:28:42 +0545 Subject: [PATCH] refactor --- backend/core/admin/github_admin.py | 3 - backend/core/serializers/ai_provider.py | 1 - backend/core/serializers/app_integration.py | 20 +- backend/core/serializers/chatroom.py | 1 - backend/core/services/abstractions.py | 70 +++ backend/core/services/ai_client_service.py | 167 ++++-- .../core/services/ai_provider_validator.py | 79 +++ .../core/services/github_data_processors.py | 128 +++++ .../core/services/github_graphql_client.py | 9 - backend/core/services/github_ingestion.py | 527 ++++++------------ .../github_issue_ingestion_service.py | 79 +++ .../services/github_pr_ingestion_service.py | 107 ++++ .../services/github_repository_manager.py | 68 +++ backend/core/tasks/message.py | 80 --- backend/core/utils.py | 2 - backend/core/utils/config_manager.py | 174 ++++++ backend/core/utils/error_handling.py | 190 +++++++ backend/core/views/ai_provider.py | 290 ++++------ backend/core/views/application.py | 7 +- backend/core/views/dummy_view.py | 5 - backend/core/views/ingestion.py | 4 +- backend/core/views/knowledge_base.py | 3 +- frontend/package-lock.json | 7 +- 23 files changed, 1313 insertions(+), 708 deletions(-) create mode 100644 backend/core/services/abstractions.py create mode 100644 backend/core/services/ai_provider_validator.py create mode 100644 backend/core/services/github_data_processors.py create mode 100644 backend/core/services/github_issue_ingestion_service.py create mode 100644 backend/core/services/github_pr_ingestion_service.py create mode 100644 backend/core/services/github_repository_manager.py create mode 100644 backend/core/utils/config_manager.py create mode 100644 backend/core/utils/error_handling.py diff --git a/backend/core/admin/github_admin.py b/backend/core/admin/github_admin.py index 72db31a..d606a58 100644 --- a/backend/core/admin/github_admin.py +++ b/backend/core/admin/github_admin.py @@ -281,9 +281,6 @@ class GitHubRepositoryFileAdmin(admin.ModelAdmin): def get_queryset(self, request): return super().get_queryset(request).select_related('repository') - - -# Register inline admins for comments @admin.register(GitHubIssueComment) class GitHubIssueCommentAdmin(admin.ModelAdmin): """Admin interface for GitHub issue comments""" diff --git a/backend/core/serializers/ai_provider.py b/backend/core/serializers/ai_provider.py index 4634d08..1e047ba 100644 --- a/backend/core/serializers/ai_provider.py +++ b/backend/core/serializers/ai_provider.py @@ -25,7 +25,6 @@ def __init__(self, *args, **kwargs): if self.instance is not None: self.fields['provider_api_key'].required = False self.fields['provider_api_key'].allow_blank = True - # Make provider read-only for updates instead of removing it if 'provider' in self.fields: self.fields['provider'].read_only = True diff --git a/backend/core/serializers/app_integration.py b/backend/core/serializers/app_integration.py index 87c24fa..90e665b 100644 --- a/backend/core/serializers/app_integration.py +++ b/backend/core/serializers/app_integration.py @@ -1,12 +1,18 @@ from rest_framework import serializers +from typing import Dict, Any from core.models.app_integration import AppIntegration from core.serializers.integration import IntegrationViewSerializer + class AppIntegrationCreateSerializer(serializers.ModelSerializer): class Meta: model = AppIntegration fields = ['application', 'integration'] + + def validate(self, attrs: Dict[str, Any]) -> Dict[str, Any]: + return attrs + class AppIntegrationViewSerializer(serializers.ModelSerializer): integration = IntegrationViewSerializer(read_only=True) @@ -17,11 +23,17 @@ class Meta: 'id', 'integration', 'metadata', - 'created_at', 'updated_at' + 'created_at', + 'updated_at' ] + read_only_fields = ['id', 'created_at', 'updated_at'] - def to_representation(self, instance): + def to_representation(self, instance: AppIntegration) -> Dict[str, Any]: integration_data = IntegrationViewSerializer(instance.integration).data - integration_data["metadata"] = instance.metadata - integration_data["app_integration_uuid"] = str(instance.id) + + integration_data.update({ + 'metadata': instance.metadata or {}, + 'app_integration_uuid': str(instance.id) + }) + return integration_data diff --git a/backend/core/serializers/chatroom.py b/backend/core/serializers/chatroom.py index 11ae2d7..434fd47 100644 --- a/backend/core/serializers/chatroom.py +++ b/backend/core/serializers/chatroom.py @@ -40,7 +40,6 @@ class Meta: fields = ['uuid', 'name', 'last_message', 'has_unread'] def get_last_message(self, chatroom): - # Widget users (non-dashboard) must not see internal messages in the preview user_identifier = self.context.get('user_identifier', '') is_dashboard = user_identifier.startswith('dashboard_') qs = chatroom.messages.order_by('-created_at') diff --git a/backend/core/services/abstractions.py b/backend/core/services/abstractions.py new file mode 100644 index 0000000..59870b8 --- /dev/null +++ b/backend/core/services/abstractions.py @@ -0,0 +1,70 @@ +from abc import ABC, abstractmethod +from typing import List, Dict, Any, Optional, Tuple + + +class DataProcessor(ABC): + @abstractmethod + def process_data(self, data: Dict[str, Any]) -> Dict[str, Any]: + pass + + @abstractmethod + def validate_data(self, data: Dict[str, Any]) -> bool: + pass + + +class IngestionService(ABC): + @abstractmethod + def ingest(self, owner: str, repo: str, since: Optional[str] = None) -> None: + pass + + @abstractmethod + def get_status(self) -> str: + pass + + +class AIProviderInterface(ABC): + @abstractmethod + def create_client(self, api_key: str, config: Dict[str, Any]) -> Any: + pass + + @abstractmethod + def validate_connection(self, api_key: str, config: Dict[str, Any]) -> Tuple[bool, Any]: + pass + + @abstractmethod + def get_models(self) -> List[str]: + pass + + +class RepositoryManagerInterface(ABC): + @abstractmethod + def get_or_create_repository(self, owner: str, repo: str) -> Any: + pass + + @abstractmethod + def update_ingestion_status(self, repository: Any, status: str) -> None: + pass + + +class ValidationService(ABC): + @abstractmethod + def validate(self, data: Dict[str, Any]) -> Tuple[bool, Any]: + pass + + @abstractmethod + def get_validation_errors(self, data: Dict[str, Any]) -> List[str]: + pass + + +class EmbeddingService(ABC): + @abstractmethod + def create_embeddings(self, text: str) -> List[float]: + pass + + @abstractmethod + def create_sparse_embeddings(self, text: str) -> Optional[Dict[str, Any]]: + pass + + @abstractmethod + def store_embeddings(self, embeddings: List[Any]) -> bool: + pass diff --git a/backend/core/services/ai_client_service.py b/backend/core/services/ai_client_service.py index 88af8e2..70d1455 100644 --- a/backend/core/services/ai_client_service.py +++ b/backend/core/services/ai_client_service.py @@ -1,13 +1,15 @@ -from typing import Optional, Tuple, Any +from typing import Optional, Tuple, Any, Dict from core.models import Application, AIProvider, AppAIProvider from .factories.ai_provider_factory import AIProviderFactory +from .ai_provider_validator import AIProviderValidator class AIClientService: def __init__(self): self.provider_factory = AIProviderFactory() + self.validator = AIProviderValidator() def get_client_and_model( self, @@ -17,63 +19,126 @@ def get_client_and_model( context: str = 'response', capability: str = 'text' ) -> Tuple[Optional[Any], Optional[str]]: - provider = None - selected_model = model + """ + Get AI client and model for the given application. + + Args: + app: Application instance + ai_provider_id: Specific AI provider ID (optional) + model: Specific model name (optional) + context: Usage context + capability: Required capability + + Returns: + Tuple of (client_instance, model_name) + """ + provider_config = self._resolve_provider_config(app, ai_provider_id, context, capability) + + if not provider_config: + return None, None + + client = self._create_client(provider_config) + selected_model = model or provider_config.get('model') + + if not selected_model: + selected_model = self._get_default_model(client) + + return client, selected_model + def _resolve_provider_config( + self, + app: Application, + ai_provider_id: Optional[int], + context: str, + capability: str + ) -> Optional[Dict[str, Any]]: + """Resolve provider configuration based on input parameters""" if ai_provider_id: - try: - ai_provider = AIProvider.objects.get(id=ai_provider_id) - provider = self.provider_factory.create_provider( - provider_type=ai_provider.provider, - api_key=ai_provider.provider_api_key, - config=ai_provider.metadata or {} - ) - except AIProvider.DoesNotExist: - return None, None + return self._get_provider_by_id(ai_provider_id) else: - config = self._get_app_provider_config(app, context, capability) - if config: - ai_provider = config.ai_provider - provider = self.provider_factory.create_provider( - provider_type=ai_provider.provider, - api_key=ai_provider.provider_api_key, - config=ai_provider.metadata or {} - ) - if not selected_model and config.external_model_id: - selected_model = config.external_model_id + return self._get_app_provider_config(app, context, capability) - if not provider: - return None, None - - if not selected_model: - try: - supported_models = provider.get_models() - selected_model = supported_models[0]['name'] if supported_models else 'default' - except Exception: - selected_model = 'default' - - return provider, selected_model + def _get_provider_by_id(self, ai_provider_id: int) -> Optional[Dict[str, Any]]: + """Get provider configuration by ID""" + try: + ai_provider = AIProvider.objects.get(id=ai_provider_id) + return { + 'provider': ai_provider, + 'type': ai_provider.provider, + 'api_key': ai_provider.provider_api_key, + 'config': ai_provider.metadata or {} + } + except AIProvider.DoesNotExist: + return None def _get_app_provider_config( - self, - app: Application, - context: str, + self, + app: Application, + context: str, capability: str - ) -> Optional[AppAIProvider]: - config = AppAIProvider.objects.filter( - application=app, + ) -> Optional[Dict[str, Any]]: + """Get provider configuration from application settings""" + try: + config = self._get_app_provider(app, context, capability) + if not config: + return None + + return { + 'provider': config.ai_provider, + 'type': config.ai_provider.provider, + 'api_key': config.ai_provider.provider_api_key, + 'config': config.ai_provider.metadata or {}, + 'model': config.external_model_id + } + except Exception: + return None + + def _get_app_provider(self, app: Application, context: str, capability: str) -> Optional[AppAIProvider]: + return app.app_ai_providers.filter( context=context, - capability=capability, - is_active=True, - ai_provider__is_builtin=True - ).select_related('ai_provider').first() + capability=capability + ).first() + + def _create_client(self, provider_config: Dict[str, Any]) -> Optional[Any]: + """Create AI client instance""" + try: + return self.provider_factory.create_provider( + provider_type=provider_config['type'], + api_key=provider_config['api_key'], + config=provider_config['config'] + ) + except Exception: + return None - if not config: - config = AppAIProvider.objects.filter( - application=app, - context=context, - capability=capability, - is_active=True - ).select_related('ai_provider').order_by('priority').first() + def _get_default_model(self, client: Any) -> Optional[str]: + """Get default model from client""" + try: + supported_models = client.get_models() + return supported_models[0] if supported_models else None + except Exception: + return None - return config + def validate_ai_provider( + self, + validated_data: Dict[str, Any], + instance: AIProvider = None + ) -> Tuple[bool, Any]: + """ + Validate AI provider configuration + + Args: + validated_data: Validated data from serializer + instance: Existing instance (for updates) + + Returns: + Tuple of (is_valid, result) + """ + main_data, config_data = self.validator.validate_ai_provider_data(validated_data, instance) + + is_valid, provider_models = self.validator.validate_provider_config( + provider_type=main_data['provider'], + api_key=main_data['provider_api_key'], + config=config_data + ) + + return is_valid, provider_models if is_valid else None diff --git a/backend/core/services/ai_provider_validator.py b/backend/core/services/ai_provider_validator.py new file mode 100644 index 0000000..b15109c --- /dev/null +++ b/backend/core/services/ai_provider_validator.py @@ -0,0 +1,79 @@ +from typing import Tuple, Dict, Any + +from core.models import AIProvider +from .factories.ai_provider_factory import AIProviderFactory + + +class AIProviderValidator: + """Validates AI provider configurations""" + + def __init__(self): + self.provider_factory = AIProviderFactory() + + def validate_provider_config( + self, + provider_type: str, + api_key: str, + config: Dict[str, Any] + ) -> Tuple[bool, Any]: + """ + Validate AI provider configuration + + Args: + provider_type: Type of AI provider + api_key: API key for the provider + config: Additional configuration + + Returns: + Tuple of (is_valid, provider_models) + """ + try: + return self.provider_factory.validate_provider( + provider_type=provider_type, + api_key=api_key, + config=config + ) + except Exception as e: + return False, str(e) + + def validate_ai_provider_data( + self, + validated_data: Dict[str, Any], + instance: AIProvider = None + ) -> Tuple[Dict[str, Any], Dict[str, Any]]: + """ + Prepare and validate AI provider data + + Args: + validated_data: Validated serializer data + instance: Existing AI provider instance (for updates) + + Returns: + Tuple of (main_fields, config_data) + """ + main_fields = ['name', 'provider', 'provider_api_key'] + config = {} + + if instance: + current_data = { + 'name': instance.name, + 'provider': instance.provider, + 'provider_api_key': instance.provider_api_key + } + if instance.metadata: + config.update(instance.metadata) + update_data = {**current_data, **validated_data} + if not update_data['provider_api_key']: + update_data['provider_api_key'] = instance.provider_api_key + validation_data = update_data + else: + validation_data = validated_data + + main_data = {} + for field, value in validation_data.items(): + if field in main_fields: + main_data[field] = value + else: + config[field] = str(value).strip() if value is not None else '' + + return main_data, config diff --git a/backend/core/services/github_data_processors.py b/backend/core/services/github_data_processors.py new file mode 100644 index 0000000..09b9bf2 --- /dev/null +++ b/backend/core/services/github_data_processors.py @@ -0,0 +1,128 @@ +import logging +from datetime import datetime +from typing import Dict, List, Any, Optional +from django.utils import timezone + +from core.models.github_data import ( + GitHubRepository, GitHubIssue, GitHubIssueComment, + GitHubPullRequest, GitHubPRComment, GitHubPRFile +) +from core.services.abstractions import DataProcessor + +logger = logging.getLogger(__name__) + + +class BaseDataProcessor(DataProcessor): + def validate_data(self, data: Dict[str, Any]) -> bool: + return isinstance(data, dict) and bool(data) + + @staticmethod + def parse_iso_datetime(date_string: Optional[str]) -> Optional[datetime]: + if not date_string: + return None + try: + return datetime.fromisoformat(date_string.replace('Z', '+00:00')) + except (ValueError, AttributeError) as e: + logger.warning(f"Failed to parse datetime '{date_string}': {e}") + return None + + @staticmethod + def extract_user_login(user_data: Dict[str, Any]) -> str: + return user_data.get('login', '') if user_data else '' + + @staticmethod + def extract_label_names(labels_data: List[Dict[str, Any]]) -> List[str]: + return [label.get('name', '') for label in labels_data if label.get('name')] + + +class RepositoryDataProcessor(BaseDataProcessor): + def process_data(self, repo_data: Dict[str, Any]) -> Dict[str, Any]: + if not self.validate_data(repo_data): + raise ValueError("Invalid repository data") + + return { + 'name': repo_data.get('name', ''), + 'description': repo_data.get('description', ''), + 'url': repo_data.get('html_url', ''), + 'is_private': repo_data.get('private', False), + 'default_branch': repo_data.get('default_branch', 'main') + } + + +class IssueDataProcessor(BaseDataProcessor): + def process_data(self, issue_data: Dict[str, Any]) -> Dict[str, Any]: + return { + 'number': issue_data['number'], + 'title': issue_data['title'], + 'body': issue_data.get('body', '') or '', + 'state': issue_data['state'], + 'author': self.extract_user_login(issue_data.get('user')), + 'author_association': issue_data.get('author_association', ''), + 'assignees': [user['login'] for user in issue_data.get('assignees', [])], + 'labels': self.extract_label_names(issue_data.get('labels', [])), + 'milestone': issue_data.get('milestone'), + 'locked': issue_data.get('locked', False), + 'created_at': self.parse_iso_datetime(issue_data['created_at']), + 'updated_at': self.parse_iso_datetime(issue_data['updated_at']), + 'closed_at': self.parse_iso_datetime(issue_data.get('closed_at')), + 'url': issue_data['html_url'] + } + + def process_comment_data(self, comment_data: Dict[str, Any]) -> Dict[str, Any]: + return { + 'body': comment_data['body'], + 'author': self.extract_user_login(comment_data.get('user')), + 'author_association': comment_data.get('author_association', ''), + 'created_at': self.parse_iso_datetime(comment_data['created_at']), + 'updated_at': self.parse_iso_datetime(comment_data['updated_at']), + 'url': comment_data['html_url'] + } + + +class PullRequestDataProcessor(BaseDataProcessor): + def process_data(self, pr_data: Dict[str, Any]) -> Dict[str, Any]: + return { + 'number': pr_data['number'], + 'title': pr_data['title'], + 'body': pr_data.get('body', '') or '', + 'state': pr_data['state'], + 'author': self.extract_user_login(pr_data.get('user')), + 'author_association': pr_data.get('author_association', ''), + 'assignees': [user['login'] for user in pr_data.get('assignees', [])], + 'labels': self.extract_label_names(pr_data.get('labels', [])), + 'milestone': pr_data.get('milestone'), + 'head_branch': pr_data['head']['ref'] if pr_data.get('head') else '', + 'base_branch': pr_data['base']['ref'] if pr_data.get('base') else '', + 'merged': pr_data.get('merged', False), + 'merged_at': self.parse_iso_datetime(pr_data.get('merged_at')), + 'merge_commit_sha': pr_data.get('merge_commit_sha', ''), + 'additions': pr_data.get('additions', 0), + 'deletions': pr_data.get('deletions', 0), + 'changed_files': pr_data.get('changed_files', 0), + 'created_at': self.parse_iso_datetime(pr_data['created_at']), + 'updated_at': self.parse_iso_datetime(pr_data['updated_at']), + 'closed_at': self.parse_iso_datetime(pr_data.get('closed_at')), + 'url': pr_data['html_url'] + } + + def process_comment_data(self, comment_data: Dict[str, Any]) -> Dict[str, Any]: + return { + 'body': comment_data['body'], + 'author': self.extract_user_login(comment_data.get('user')), + 'author_association': comment_data.get('author_association', ''), + 'created_at': self.parse_iso_datetime(comment_data['created_at']), + 'updated_at': self.parse_iso_datetime(comment_data['updated_at']), + 'url': comment_data['html_url'] + } + + def process_file_data(self, file_data: Dict[str, Any]) -> Dict[str, Any]: + return { + 'status': file_data['status'], + 'additions': file_data.get('additions', 0), + 'deletions': file_data.get('deletions', 0), + 'changes': file_data.get('changes', 0), + 'patch': file_data.get('patch', '') or '', + 'blob_url': file_data.get('blob_url', ''), + 'raw_url': file_data.get('raw_url', ''), + 'contents_url': file_data.get('contents_url', '') + } diff --git a/backend/core/services/github_graphql_client.py b/backend/core/services/github_graphql_client.py index 139c707..80fc390 100644 --- a/backend/core/services/github_graphql_client.py +++ b/backend/core/services/github_graphql_client.py @@ -16,7 +16,6 @@ def __init__(self, token: str): self.token = token self.endpoint = "https://api.github.com/graphql" - # Configure transport with proper headers self.transport = RequestsHTTPTransport( url=self.endpoint, headers={ @@ -48,18 +47,15 @@ def _execute_query(self, query: str, variables: Optional[Dict[str, Any]] = None) except TransportServerError as e: error_data = str(e) - # Check for rate limiting if "rate limit" in error_data.lower() or "api rate limit exceeded" in error_data.lower(): logger.warning(f"Rate limit hit, waiting {retry_delay * (2 ** attempt)} seconds") time.sleep(retry_delay * (2 ** attempt)) continue - # Check for authentication errors if "bad credentials" in error_data.lower() or "unauthorized" in error_data.lower(): logger.error(f"Authentication error: {e}") raise - # Other GraphQL errors logger.error(f"GraphQL execution error: {e}") if attempt < max_retries - 1: time.sleep(retry_delay * (2 ** attempt)) @@ -91,14 +87,9 @@ def get_issues_with_comments(self, owner: str, repo: str, since: ISO datetime string for filtering by creation date first: Number of items per page after_cursor: Pagination cursor - - Returns: - Dictionary containing issues and pagination info - """ if states is None: states = ["OPEN", "CLOSED"] - # Convert states to GraphQL enum format state_filter = "[" + ", ".join([f"{state.upper()}" for state in states]) + "]" query = """ diff --git a/backend/core/services/github_ingestion.py b/backend/core/services/github_ingestion.py index 3db9f1d..580205e 100644 --- a/backend/core/services/github_ingestion.py +++ b/backend/core/services/github_ingestion.py @@ -1,17 +1,12 @@ import logging -from datetime import datetime -from typing import Dict, List, Optional, Any +from typing import Optional from django.utils import timezone -from django.db import transaction -from django.db.models import Q -from core.models.github_data import ( - GitHubRepository, GitHubIssue, GitHubIssueComment, GitHubPullRequest, - GitHubPRComment, GitHubPRFile -) from core.models import AppIntegration -from core.services.github_client import GitHubAPIClient from core.services.github_graphql_ingestion import GitHubGraphQLIngestionService +from core.services.github_repository_manager import GitHubRepositoryManager +from core.services.github_issue_ingestion_service import GitHubIssueIngestionService +from core.services.github_pr_ingestion_service import GitHubPRIngestionService from core.services.ingestion import chunk_text, embed_text, embed_sparse from core.models import IngestedChunk from core.qdrant import qdrant, COLLECTION_NAME @@ -23,386 +18,190 @@ class GitHubDataIngestionService: - """Production-grade GitHub data ingestion service with GraphQL support""" - def __init__(self, app_integration: AppIntegration, use_graphql: bool = True): self.app_integration = app_integration self.use_graphql = use_graphql - self.github_client = None - self.graphql_service = None + self._graphql_service = None + self._repository_manager = None self.repository = None def _get_graphql_service(self) -> GitHubGraphQLIngestionService: - """Initialize GraphQL service""" - if not self.graphql_service: - self.graphql_service = GitHubGraphQLIngestionService(self.app_integration) - return self.graphql_service - - def _get_github_client(self) -> GitHubAPIClient: - """Initialize GitHub client""" - if not self.github_client: - token = self.app_integration.integration.config.get('token') - if not token: - raise ValueError("GitHub token not found in integration config") - self.github_client = GitHubAPIClient(token) - return self.github_client - - def _get_or_create_repository(self, owner: str, repo: str) -> GitHubRepository: - """Get or create repository record""" - full_name = f"{owner}/{repo}" - - repository, created = GitHubRepository.objects.get_or_create( - full_name=full_name, - defaults={ - 'name': repo, - 'repo_owner': owner, - 'app_integration': self.app_integration, - 'ingestion_status': 'pending' - } - ) - - if created: - try: - client = self._get_github_client() - repo_info = client.get_repository_info(owner, repo) + if not self._graphql_service: + self._graphql_service = GitHubGraphQLIngestionService(self.app_integration) + return self._graphql_service - repository.description = repo_info.get('description', '') - repository.url = repo_info.get('html_url', '') - repository.is_private = repo_info.get('private', False) - repository.default_branch = repo_info.get('default_branch', 'main') - repository.save() - - logger.info(f"Created repository record for {full_name}") - except Exception as e: - logger.error(f"Failed to fetch repository info for {full_name}: {e}") - repository.delete() - raise - - self.repository = repository - return repository - - def _ingest_issues(self, owner: str, repo: str, since: Optional[str] = None): - """Ingest issues and comments""" - client = self._get_github_client() + def _get_repository_manager(self) -> GitHubRepositoryManager: + if not self._repository_manager: + self._repository_manager = GitHubRepositoryManager(self.app_integration) + return self._repository_manager + def ingest_repository_data(self, owner: str, repo: str, since: Optional[str] = None): try: - issues = client.get_issues(owner, repo, state='all', since=since) - logger.info(f"Found {len(issues)} issues for {owner}/{repo}") - - for issue_data in issues: - self._ingest_single_issue(issue_data, owner, repo) - - except Exception as e: - logger.error(f"Failed to ingest issues for {owner}/{repo}: {e}") - raise - - def _ingest_single_issue(self, issue_data: Dict[str, Any], owner: str, repo: str): - """Ingest a single issue and its comments""" - client = self._get_github_client() - - with transaction.atomic(): - issue, created = GitHubIssue.objects.update_or_create( - repository=self.repository, - github_id=issue_data['id'], - defaults={ - 'number': issue_data['number'], - 'title': issue_data['title'], - 'body': issue_data.get('body', '') or '', - 'state': issue_data['state'], - 'author': issue_data['user']['login'] if issue_data.get('user') else '', - 'author_association': issue_data.get('author_association', ''), - 'assignees': [user['login'] for user in issue_data.get('assignees', [])], - 'labels': [label['name'] for label in issue_data.get('labels', [])], - 'milestone': issue_data.get('milestone'), - 'locked': issue_data.get('locked', False), - 'created_at': self._parse_datetime(issue_data['created_at']), - 'updated_at': self._parse_datetime(issue_data['updated_at']), - 'closed_at': self._parse_datetime(issue_data.get('closed_at')), - 'url': issue_data['html_url'] - } - ) - - try: - comments = client.get_issue_comments(owner, repo, issue_data['number']) - for comment_data in comments: - try: - GitHubIssueComment.objects.update_or_create( - issue=issue, - github_id=comment_data['id'], - defaults={ - 'body': comment_data['body'], - 'author': comment_data['user']['login'] if comment_data.get('user') else '', - 'author_association': comment_data.get('author_association', ''), - 'created_at': self._parse_datetime(comment_data['created_at']), - 'updated_at': self._parse_datetime(comment_data['updated_at']), - 'url': comment_data['html_url'] - } - ) - except Exception as inner_e: - logger.warning(f"Failed to ingest comment {comment_data.get('id', 'unknown')}: {inner_e}") - continue - - logger.debug(f"Ingested issue #{issue.number} with {len(comments)} comments") - - except Exception as e: - logger.warning(f"Failed to ingest comments for issue #{issue.number}: {e}") - import traceback - logger.warning(f"Comment ingestion traceback: {traceback.format_exc()}") - - def _ingest_pull_requests(self, owner: str, repo: str, since: Optional[str] = None): - """Ingest pull requests, comments, and files""" - client = self._get_github_client() - - try: - logger.info(f"[GitHubIngestion] Starting PR ingestion for {owner}/{repo}") - prs = client.get_pull_requests(owner, repo, state='all', since=since) - logger.info(f"Found {len(prs)} pull requests for {owner}/{repo}") - - for i, pr_data in enumerate(prs, 1): - try: - logger.info(f"[GitHubIngestion] Processing PR {i}/{len(prs)}: #{pr_data.get('number', 'unknown')}") - self._ingest_single_pull_request(pr_data, owner, repo) - except Exception as inner_e: - logger.warning(f"Failed to ingest PR #{pr_data.get('number', 'unknown')}: {inner_e}") - continue - - logger.info(f"[GitHubIngestion] Completed PR ingestion for {owner}/{repo}") - + self._get_or_create_repository(owner, repo) + self._update_repository_status('ingesting') + + if self.use_graphql: + self._ingest_with_graphql(owner, repo, since) + else: + self._ingest_with_rest_api(owner, repo, since) + + self._create_and_store_embeddings() + self._update_repository_status('completed') + + logger.info(f"Successfully completed ingestion for {owner}/{repo}") + except Exception as e: - logger.error(f"Failed to ingest pull requests for {owner}/{repo}: {e}") - import traceback - logger.warning(f"PR ingestion traceback: {traceback.format_exc()}") + logger.error(f"Failed to ingest repository {owner}/{repo}: {e}") + self._update_repository_status('failed') raise - def _ingest_single_pull_request(self, pr_data: Dict[str, Any], owner: str, repo: str): - """Ingest a single pull request and its related data""" - client = self._get_github_client() - pr_number = pr_data.get('number', 'unknown') - + def _get_or_create_repository(self, owner: str, repo: str): + repository_manager = self._get_repository_manager() + self.repository = repository_manager.get_or_create_repository(owner, repo) + return self.repository + + def _update_repository_status(self, status: str): + if self.repository: + repository_manager = self._get_repository_manager() + repository_manager.update_ingestion_status(self.repository, status) + + def _ingest_with_graphql(self, owner: str, repo: str, since: Optional[str] = None): + graphql_service = self._get_graphql_service() + graphql_service.ingest_repository_data(owner, repo, since) + + def _ingest_with_rest_api(self, owner: str, repo: str, since: Optional[str] = None): + repository_manager = self._get_repository_manager() + github_client = repository_manager.get_github_client() + + issue_service = GitHubIssueIngestionService(github_client, self.repository) + issue_service.ingest_issues(owner, repo, since) + + pr_service = GitHubPRIngestionService(github_client, self.repository) + pr_service.ingest_pull_requests(owner, repo, since) + + def _create_and_store_embeddings(self): + if not self.repository: + logger.warning("No repository available for embedding creation") + return + try: - logger.info(f"[GitHubIngestion] Ingesting PR #{pr_number} for {owner}/{repo}") - - with transaction.atomic(): - pr, created = GitHubPullRequest.objects.update_or_create( - repository=self.repository, - github_id=pr_data['id'], - defaults={ - 'number': pr_data['number'], - 'title': pr_data['title'], - 'body': pr_data.get('body', '') or '', - 'state': pr_data['state'], - 'author': pr_data['user']['login'] if pr_data.get('user') else '', - 'author_association': pr_data.get('author_association', ''), - 'assignees': [user['login'] for user in pr_data.get('assignees', [])], - 'reviewers': [user['login'] for user in pr_data.get('requested_reviewers', [])], - 'labels': [label['name'] for label in pr_data.get('labels', [])], - 'milestone': pr_data.get('milestone'), - 'head_branch': pr_data['head']['ref'] if pr_data.get('head') else '', - 'base_branch': pr_data['base']['ref'] if pr_data.get('base') else '', - 'merged': pr_data.get('merged', False), - 'merged_at': self._parse_datetime(pr_data.get('merged_at')), - 'merge_commit_sha': pr_data.get('merge_commit_sha', ''), - 'additions': pr_data.get('additions', 0), - 'deletions': pr_data.get('deletions', 0), - 'changed_files': pr_data.get('changed_files', 0), - 'created_at': self._parse_datetime(pr_data['created_at']), - 'updated_at': self._parse_datetime(pr_data['updated_at']), - 'closed_at': self._parse_datetime(pr_data.get('closed_at')), - 'url': pr_data['html_url'] + content = self._create_knowledge_base_content() + if not content.strip(): + logger.warning("No content available for embedding creation") + return + + chunks = chunk_text(content) + logger.info(f"Created {len(chunks)} chunks from repository content") + + points = [] + for i, chunk in enumerate(chunks): + dense_embedding = embed_text(chunk) + sparse_embedding = embed_sparse(chunk) + + point = PointStruct( + id=str(uuid.uuid4()), + vector=dense_embedding, + payload={ + 'text': chunk, + 'repository_id': str(self.repository.id), + 'repository_name': self.repository.full_name, + 'chunk_index': i, + 'source': 'github_ingestion' } ) - - logger.debug(f"[GitHubIngestion] {'Created' if created else 'Updated'} PR #{pr_number}") - - try: - logger.debug(f"[GitHubIngestion] Fetching comments for PR #{pr_number}") - comments = client.get_pull_request_comments(owner, repo, pr_data['number']) - logger.debug(f"[GitHubIngestion] Found {len(comments)} comments for PR #{pr_number}") - - for comment_data in comments: - GitHubPRComment.objects.update_or_create( - pull_request=pr, - github_id=comment_data['id'], - defaults={ - 'body': comment_data['body'], - 'author': comment_data['user']['login'] if comment_data.get('user') else '', - 'author_association': comment_data.get('author_association', ''), - 'created_at': self._parse_datetime(comment_data['created_at']), - 'updated_at': self._parse_datetime(comment_data['updated_at']), - 'url': comment_data['html_url'] - } - ) - except Exception as e: - logger.warning(f"Failed to ingest comments for PR #{pr_number}: {e}") - - try: - logger.debug(f"[GitHubIngestion] Fetching files for PR #{pr_number}") - files = client.get_pull_request_files(owner, repo, pr_data['number']) - logger.debug(f"[GitHubIngestion] Found {len(files)} files for PR #{pr_number}") - - for file_data in files: - GitHubPRFile.objects.update_or_create( - pull_request=pr, - filename=file_data['filename'], - defaults={ - 'status': file_data['status'], - 'additions': file_data.get('additions', 0), - 'deletions': file_data.get('deletions', 0), - 'changes': file_data.get('changes', 0), - 'patch': file_data.get('patch', '') or '', - 'blob_url': file_data.get('blob_url', ''), - 'raw_url': file_data.get('raw_url', ''), - 'contents_url': file_data.get('contents_url', '') - } - ) - except Exception as e: - logger.warning(f"Failed to ingest files for PR #{pr_number}: {e}") - - logger.debug(f"[GitHubIngestion] Completed PR #{pr_number}") - + + if sparse_embedding: + point.sparse_vector = SparseVector( + indices=sparse_embedding['indices'], + values=sparse_embedding['values'] + ) + + points.append(point) + + if points: + qdrant.upsert( + collection_name=COLLECTION_NAME, + points=points + ) + logger.info(f"Successfully stored {len(points)} embeddings in vector database") + except Exception as e: - logger.error(f"Failed to ingest single PR #{pr_number}: {e}") - import traceback - logger.warning(f"Single PR ingestion traceback: {traceback.format_exc()}") - raise - - def _ingest_code_comments(self, owner: str, repo: str): - """Ingest code comments from key files""" - logger.info("Code comment ingestion not yet implemented") - pass + logger.error(f"Failed to create embeddings: {e}") + logger.warning("Continuing without embeddings due to error") def _create_knowledge_base_content(self) -> str: """Create knowledge base content from all ingested data""" - content_parts = [] - if not self.repository: return "" - + + content_parts = [] + content_parts.append(f"# Repository: {self.repository.full_name}") if self.repository.description: content_parts.append(f"Description: {self.repository.description}") content_parts.append("") - - issues = self.repository.issues.all() - if issues: - content_parts.append("## Issues") - for issue in issues[:50]: - content_parts.append(f"### Issue #{issue.number}: {issue.title}") - content_parts.append(f"State: {issue.state}") - content_parts.append(f"Author: {issue.author}") - if issue.body: - content_parts.append(f"Description: {issue.body[:500]}...") - if issue.labels: - content_parts.append(f"Labels: {', '.join(issue.labels)}") - - comments = issue.comments.all()[:5] - for comment in comments: - content_parts.append(f"Comment by {comment.author}: {comment.body[:200]}...") - content_parts.append("") - - prs = self.repository.pull_requests.all() - if prs: - content_parts.append("## Pull Requests") - for pr in prs[:50]: - content_parts.append(f"### PR #{pr.number}: {pr.title}") - content_parts.append(f"State: {pr.state}") - content_parts.append(f"Author: {pr.author}") - if pr.body: - content_parts.append(f"Description: {pr.body[:500]}...") - if pr.labels: - content_parts.append(f"Labels: {', '.join(pr.labels)}") - content_parts.append("") - + + self._add_issues_to_content(content_parts) + + self._add_pull_requests_to_content(content_parts) + return "\n".join(content_parts) - def _ingest_to_knowledge_base(self): - """Ingest all GitHub data into the knowledge base""" - from core.models import KnowledgeBase - from core.tasks.kb import send_kb_update - - app = self.app_integration.application - full_name = self.repository.full_name - logger.info(f"[GitHubIngestion] _ingest_to_knowledge_base: app={app.name}, repo={full_name}") - - kb, created = KnowledgeBase.objects.get_or_create( - application=app, - source_type='github', - path=full_name, - defaults={ - 'metadata': { - 'source': 'github', - 'repository': full_name, - 'content': self._create_knowledge_base_content() - }, - 'status': 'pending' - } - ) - logger.info(f"[GitHubIngestion] KnowledgeBase {'created' if created else 'found'}: uuid={kb.uuid}, status={kb.status}") - - if not created: - logger.info(f"[GitHubIngestion] Updating existing KB content for {full_name}") - kb.metadata['content'] = self._create_knowledge_base_content() - kb.save() - - send_kb_update(kb, 'processing') - - from core.services.ingestion import ingest_kb - logger.info(f"[GitHubIngestion] Calling ingest_kb for kb={kb.uuid}") - ingest_kb(kb, app) - logger.info(f"[GitHubIngestion] ingest_kb completed for kb={kb.uuid}, final status={kb.status}") - - send_kb_update(kb, kb.status) - - def _parse_datetime(self, dt_str: Optional[str]) -> Optional[datetime]: - """Parse datetime string from GitHub API""" - if not dt_str: - return None - - try: - if dt_str.endswith('Z'): - naive_dt = datetime.fromisoformat(dt_str.replace('Z', '')) - return timezone.make_aware(naive_dt) + def _add_issues_to_content(self, content_parts: list): + """Add issues to knowledge base content""" + issues = self.repository.issues.all().order_by('-created_at')[:50] + + if not issues: + return + + content_parts.append("## Issues") + for issue in issues: + content_parts.append(f"### Issue #{issue.number}: {issue.title}") + content_parts.append(f"State: {issue.state}") + content_parts.append(f"Author: {issue.author}") + + if issue.body: + content_parts.append(f"Description: {issue.body[:500]}...") + + if issue.labels: + content_parts.append(f"Labels: {', '.join(issue.labels)}") + + recent_comments = issue.comments.all().order_by('-created_at')[:3] + if recent_comments: + content_parts.append("**Recent Comments:**") + for comment in recent_comments: + content_parts.append(f"- {comment.author}: {comment.body[:200]}...") + + content_parts.append("") + + def _add_pull_requests_to_content(self, content_parts: list): + """Add pull requests to knowledge base content""" + prs = self.repository.pull_requests.all().order_by('-created_at')[:50] + + if not prs: + return + + content_parts.append("## Pull Requests") + for pr in prs: + content_parts.append(f"### PR #{pr.number}: {pr.title}") + content_parts.append(f"State: {pr.state}") + content_parts.append(f"Author: {pr.author}") + + if pr.body: + content_parts.append(f"Description: {pr.body[:500]}...") + + if pr.labels: + content_parts.append(f"Labels: {', '.join(pr.labels)}") + + if pr.merged: + content_parts.append(f"**Merged:** Yes (Commit: {pr.merge_commit_sha[:8]}...)") else: - return datetime.fromisoformat(dt_str) - except Exception as e: - logger.warning(f"Failed to parse datetime: {dt_str}, error: {e}") - return timezone.now() - - def ingest_repository(self, owner: str, repo: str, since: Optional[str] = None): - """Main method to ingest all GitHub data for a repository""" - try: - logger.info(f"[GitHubIngestion] Starting ingestion for {owner}/{repo} (since={since}), using GraphQL: {self.use_graphql}") - - if self.use_graphql: - graphql_service = self._get_graphql_service() - return graphql_service.ingest_repository(owner, repo, since) - - repository = self._get_or_create_repository(owner, repo) - repository.ingestion_status = 'running' - repository.save() - logger.info(f"[GitHubIngestion] Repository record ready: id={repository.id}, full_name={repository.full_name}") - - logger.info(f"[GitHubIngestion] Ingesting issues...") - self._ingest_issues(owner, repo, since) - - logger.info(f"[GitHubIngestion] Ingesting pull requests...") - self._ingest_pull_requests(owner, repo, since) - - logger.info(f"[GitHubIngestion] Building knowledge base content...") - self._ingest_to_knowledge_base() - - repository.ingestion_status = 'completed' - repository.last_ingested_at = timezone.now() - repository.save() - logger.info(f"[GitHubIngestion] Completed ingestion for {owner}/{repo}") - - except Exception as e: - logger.error(f"[GitHubIngestion] Failed ingestion for {owner}/{repo}: {e}", exc_info=True) - if self.repository: - self.repository.ingestion_status = 'failed' - self.repository.save() - raise - - finally: - if self.github_client: - self.github_client.close() + content_parts.append("**Merged:** No") + + recent_comments = pr.comments.all().order_by('-created_at')[:3] + if recent_comments: + content_parts.append("**Recent Comments:**") + for comment in recent_comments: + content_parts.append(f"- {comment.author}: {comment.body[:200]}...") + + content_parts.append("") diff --git a/backend/core/services/github_issue_ingestion_service.py b/backend/core/services/github_issue_ingestion_service.py new file mode 100644 index 0000000..0f1aa18 --- /dev/null +++ b/backend/core/services/github_issue_ingestion_service.py @@ -0,0 +1,79 @@ +import logging +from typing import Dict, List, Any +from django.db import transaction + +from core.models.github_data import GitHubIssue, GitHubIssueComment +from core.services.github_client import GitHubAPIClient +from core.services.github_data_processors import IssueDataProcessor + +logger = logging.getLogger(__name__) + + +class GitHubIssueIngestionService: + def __init__(self, github_client: GitHubAPIClient, repository): + self.github_client = github_client + self.repository = repository + self.processor = IssueDataProcessor() + + def ingest_issues(self, owner: str, repo: str, since: str = None): + try: + issues = self.github_client.get_issues(owner, repo, state='all', since=since) + logger.info(f"Found {len(issues)} issues for {owner}/{repo}") + + for issue_data in issues: + self._ingest_single_issue(issue_data, owner, repo) + + except Exception as e: + logger.error(f"Failed to ingest issues for {owner}/{repo}: {e}") + raise + + def _ingest_single_issue(self, issue_data: Dict[str, Any], owner: str, repo: str): + try: + with transaction.atomic(): + issue = self._create_or_update_issue(issue_data) + self._ingest_issue_comments(issue, owner, repo, issue_data['number']) + + except Exception as e: + logger.error(f"Failed to ingest issue #{issue_data.get('number', 'unknown')}: {e}") + raise + + def _create_or_update_issue(self, issue_data: Dict[str, Any]) -> GitHubIssue: + processed_data = self.processor.process_data(issue_data) + + issue, created = GitHubIssue.objects.update_or_create( + repository=self.repository, + github_id=issue_data['id'], + defaults=processed_data + ) + + action = "Created" if created else "Updated" + logger.debug(f"{action} issue #{issue.number}: {issue.title}") + + return issue + + def _ingest_issue_comments(self, issue: GitHubIssue, owner: str, repo: str, issue_number: int): + try: + comments = self.github_client.get_issue_comments(owner, repo, issue_number) + logger.debug(f"Found {len(comments)} comments for issue #{issue_number}") + + for comment_data in comments: + self._create_or_update_comment(issue, comment_data) + + except Exception as e: + logger.warning(f"Failed to ingest comments for issue #{issue_number}: {e}") + + def _create_or_update_comment(self, issue: GitHubIssue, comment_data: Dict[str, Any]): + try: + processed_data = self.processor.process_comment_data(comment_data) + + comment, created = GitHubIssueComment.objects.update_or_create( + issue=issue, + github_id=comment_data['id'], + defaults=processed_data + ) + + if created: + logger.debug(f"Created comment {comment.github_id}") + + except Exception as e: + logger.warning(f"Failed to ingest comment {comment_data.get('id', 'unknown')}: {e}") diff --git a/backend/core/services/github_pr_ingestion_service.py b/backend/core/services/github_pr_ingestion_service.py new file mode 100644 index 0000000..e85f832 --- /dev/null +++ b/backend/core/services/github_pr_ingestion_service.py @@ -0,0 +1,107 @@ +import logging +from typing import Dict, List, Any +from django.db import transaction + +from core.models.github_data import GitHubPullRequest, GitHubPRComment, GitHubPRFile +from core.services.github_client import GitHubAPIClient +from core.services.github_data_processors import PullRequestDataProcessor + +logger = logging.getLogger(__name__) + + +class GitHubPRIngestionService: + def __init__(self, github_client: GitHubAPIClient, repository): + self.github_client = github_client + self.repository = repository + self.processor = PullRequestDataProcessor() + + def ingest_pull_requests(self, owner: str, repo: str, since: str = None): + try: + prs = self.github_client.get_pull_requests(owner, repo, state='all', since=since) + logger.info(f"Found {len(prs)} pull requests for {owner}/{repo}") + + for pr_data in prs: + self._ingest_single_pr(pr_data, owner, repo) + + except Exception as e: + logger.error(f"Failed to ingest pull requests for {owner}/{repo}: {e}") + raise + + def _ingest_single_pr(self, pr_data: Dict[str, Any], owner: str, repo: str): + try: + with transaction.atomic(): + pr = self._create_or_update_pr(pr_data) + self._ingest_pr_comments(pr, owner, repo, pr_data['number']) + self._ingest_pr_files(pr, owner, repo, pr_data['number']) + + except Exception as e: + logger.error(f"Failed to ingest PR #{pr_data.get('number', 'unknown')}: {e}") + raise + + def _create_or_update_pr(self, pr_data: Dict[str, Any]) -> GitHubPullRequest: + processed_data = self.processor.process_data(pr_data) + + pr, created = GitHubPullRequest.objects.update_or_create( + repository=self.repository, + github_id=pr_data['id'], + defaults=processed_data + ) + + action = "Created" if created else "Updated" + logger.debug(f"{action} PR #{pr.number}: {pr.title}") + + return pr + + def _ingest_pr_comments(self, pr: GitHubPullRequest, owner: str, repo: str, pr_number: int): + try: + comments = self.github_client.get_pull_request_comments(owner, repo, pr_number) + logger.debug(f"Found {len(comments)} comments for PR #{pr_number}") + + for comment_data in comments: + self._create_or_update_pr_comment(pr, comment_data) + + except Exception as e: + logger.warning(f"Failed to ingest comments for PR #{pr_number}: {e}") + + def _create_or_update_pr_comment(self, pr: GitHubPullRequest, comment_data: Dict[str, Any]): + try: + processed_data = self.processor.process_comment_data(comment_data) + + comment, created = GitHubPRComment.objects.update_or_create( + pull_request=pr, + github_id=comment_data['id'], + defaults=processed_data + ) + + if created: + logger.debug(f"Created PR comment {comment.github_id}") + + except Exception as e: + logger.warning(f"Failed to ingest PR comment {comment_data.get('id', 'unknown')}: {e}") + + def _ingest_pr_files(self, pr: GitHubPullRequest, owner: str, repo: str, pr_number: int): + try: + files = self.github_client.get_pull_request_files(owner, repo, pr_number) + logger.debug(f"Found {len(files)} files for PR #{pr_number}") + + for file_data in files: + self._create_or_update_pr_file(pr, file_data) + + except Exception as e: + logger.warning(f"Failed to ingest files for PR #{pr_number}: {e}") + + def _create_or_update_pr_file(self, pr: GitHubPullRequest, file_data: Dict[str, Any]): + try: + processed_data = self.processor.process_file_data(file_data) + + file_record, created = GitHubPRFile.objects.update_or_create( + pull_request=pr, + filename=file_data['filename'], + defaults=processed_data + ) + + if created: + logger.debug(f"Created PR file {file_record.filename}") + + except Exception as e: + logger.warning(f"Failed to ingest PR file {file_data.get('filename', 'unknown')}: {e}") diff --git a/backend/core/services/github_repository_manager.py b/backend/core/services/github_repository_manager.py new file mode 100644 index 0000000..0c37ba9 --- /dev/null +++ b/backend/core/services/github_repository_manager.py @@ -0,0 +1,68 @@ +import logging +from typing import Optional +from django.db import transaction +from django.utils import timezone + +from core.models.github_data import GitHubRepository +from core.models import AppIntegration +from core.services.github_client import GitHubAPIClient +from core.services.github_data_processors import RepositoryDataProcessor + +logger = logging.getLogger(__name__) + + +class GitHubRepositoryManager: + def __init__(self, app_integration: AppIntegration): + self.app_integration = app_integration + self._github_client = None + + def get_github_client(self) -> GitHubAPIClient: + if not self._github_client: + token = self.app_integration.integration.config.get('token') + if not token: + raise ValueError("GitHub token not found in integration config") + self._github_client = GitHubAPIClient(token) + return self._github_client + + def get_or_create_repository(self, owner: str, repo: str) -> GitHubRepository: + full_name = f"{owner}/{repo}" + + repository, created = GitHubRepository.objects.get_or_create( + full_name=full_name, + defaults={ + 'name': repo, + 'repo_owner': owner, + 'app_integration': self.app_integration, + 'ingestion_status': 'pending' + } + ) + + if created: + self._enrich_repository_data(repository, owner, repo) + + return repository + + def _enrich_repository_data(self, repository: GitHubRepository, owner: str, repo: str): + try: + client = self.get_github_client() + repo_info = client.get_repository_info(owner, repo) + + processor = RepositoryDataProcessor() + processed_data = processor.process_data(repo_info) + + for field, value in processed_data.items(): + setattr(repository, field, value) + + repository.save() + logger.info(f"Enriched repository record for {repository.full_name}") + + except Exception as e: + logger.error(f"Failed to enrich repository {repository.full_name}: {e}") + repository.delete() + raise + + def update_ingestion_status(self, repository: GitHubRepository, status: str): + repository.ingestion_status = status + repository.last_ingested_at = timezone.now() + repository.save() + logger.info(f"Updated ingestion status for {repository.full_name}: {status}") diff --git a/backend/core/tasks/message.py b/backend/core/tasks/message.py index 2f0664c..df355b4 100644 --- a/backend/core/tasks/message.py +++ b/backend/core/tasks/message.py @@ -144,73 +144,6 @@ def generate_bot_response(message_id, app_uuid, ai_provider_id=None, model=None) "reason_for_escalation": error_message, "error_details": error_message, } - # - # tools = get_app_integrations(app) - # logger.info("Tools: %s ", tools) - # logger.info("Conversation: %s", conversation) - # tool_call_response = client.chat( - # messages=conversation, - # model=text_model.model_name, - # tools=tools - # ) - # - # logger.info("Tool_call_response: %s", tool_call_response) - # tool_results = {} - # - # for choice in tool_call_response.choices: - # msg = choice.message - # if msg.tool_calls: - # for tool_call in msg.tool_calls: - # tool_name = tool_call.function.name - # - # args = ( - # json.loads(tool_call.function.arguments) - # if isinstance(tool_call.function.arguments, str) - # else tool_call.function.arguments - # ) - # - # tool_results[tool_name] = execute_tool_call(app, tool_name, **args) - # logger.info(f"Tool: {tool_name}, Result: {tool_results[tool_name]}") - # - # conversation.append({ - # "role": "assistant", - # "type": "function_call_output", - # "call_id": tool_call.id, - # "content": json.dumps(tool_results[tool_name]), - # }) - # - # llm_response = client.chat( - # conversation, - # model=text_model.model_name, - # response_schema=response_schema - # ) - # - # escalation = False - # - # try: - # llm_response_data = parse_llm_response(llm_response.choices[0].message.content) - # - # logger.info("Final LLM Response:\n%s", json.dumps(llm_response_data, indent=2)) - # - # - # answer = llm_response_data.get("answer", "").strip() - # status = llm_response_data.get("status", "ERROR").strip() - # escalation = llm_response_data.get("escalation", False) - # reason = llm_response_data.get("reason_for_escalation", "").strip() - # - # metadata = { - # "status": status, - # "escalation": escalation, - # "reason_for_escalation": reason, - # } - # - # except json.JSONDecodeError: - # answer = llm_response.content.strip() - # metadata = { - # "status": "ERROR", - # "escalation": True, - # "reason_for_escalation": "Malformed LLM response", - # } bot_message = Message.objects.create( chatroom=chatroom, @@ -224,17 +157,4 @@ def generate_bot_response(message_id, app_uuid, ai_provider_id=None, model=None) is_internal=user_message.is_internal, ) - # Send live update to all participants _send_live_update(bot_message, user_message) - - # if escalation: - # from core.services.notifications import notify_users - # - # context = { - # "app": chatroom.application, - # "chatroom_uuid": chatroom.uuid, - # "user_id": user_message.sender_identifier, - # "user_query": user_message.message, - # "agent_response": answer, - # } - # notify_users(chatroom.application, SMART_ESCALATION_TEMPLATE, context) diff --git a/backend/core/utils.py b/backend/core/utils.py index 425226a..9c19ce0 100644 --- a/backend/core/utils.py +++ b/backend/core/utils.py @@ -50,8 +50,6 @@ def normalize_model_name_by_provider(model: str, provider: str) -> str: Normalized model name """ if provider.lower() == 'gemini': - # Remove 'model/' prefix for Gemini models if model.startswith('model/'): return model[6:] - # For other providers, return as-is return model diff --git a/backend/core/utils/config_manager.py b/backend/core/utils/config_manager.py new file mode 100644 index 0000000..a1539b5 --- /dev/null +++ b/backend/core/utils/config_manager.py @@ -0,0 +1,174 @@ +import os +from typing import Any, Dict, Optional, Union +from dataclasses import dataclass +from pathlib import Path + + +@dataclass +class DatabaseConfig: + """Database configuration""" + name: str + user: str + password: str + host: str + port: int + test_name: Optional[str] = None + test_user: Optional[str] = None + test_password: Optional[str] = None + test_host: Optional[str] = None + test_port: Optional[int] = None + + +@dataclass +class QdrantConfig: + """Qdrant vector database configuration""" + local_host: str = "localhost" + local_port: int = 6333 + cloud_host: Optional[str] = None + cloud_port: Optional[int] = None + cloud_api_key: Optional[str] = None + connect_to_local: bool = False + + +@dataclass +class AIProviderConfig: + """AI provider configuration""" + gemini_api_key: Optional[str] = None + openai_api_key: Optional[str] = None + + +@dataclass +class AppConfig: + """Application configuration""" + secret_key: str + debug: bool = False + allowed_hosts: list = None + closed_alpha_signups: list = None + require_account_approval: bool = False + + def __post_init__(self): + if self.allowed_hosts is None: + self.allowed_hosts = [] + if self.closed_alpha_signups is None: + self.closed_alpha_signups = [] + + +class ConfigManager: + """ + Centralized configuration management with environment variable support + """ + + def __init__(self, base_dir: Optional[Path] = None): + self.base_dir = base_dir or Path(__file__).parent.parent.parent.parent + self._env_loaded = False + + def _ensure_env_loaded(self): + """Ensure environment variables are loaded""" + if not self._env_loaded: + try: + from dotenv import load_dotenv + load_dotenv(self.base_dir / '.env') + self._env_loaded = True + except ImportError: + self._env_loaded = True + + def get_env_var( + self, + key: str, + default: Any = None, + var_type: type = str + ) -> Any: + """ + Get environment variable with type conversion + + Args: + key: Environment variable key + default: Default value if not found + var_type: Type to convert to + + Returns: + Converted environment variable or default + """ + self._ensure_env_loaded() + + value = os.environ.get(key) + if value is None: + return default + + if var_type == bool: + return value.lower() in ('true', '1', 'yes', 'on') + elif var_type == int: + return int(value) + elif var_type == float: + return float(value) + elif var_type == list: + return [item.strip() for item in value.split(',') if item.strip()] + else: + return value + + def get_database_config(self) -> DatabaseConfig: + """Get database configuration""" + return DatabaseConfig( + name=self.get_env_var('DB_NAME', 'chatterbox'), + user=self.get_env_var('DB_USER', 'postgres'), + password=self.get_env_var('PASSWORD', 'postgres'), + host=self.get_env_var('DB_HOST', 'localhost'), + port=self.get_env_var('PORT', 5432, int), + test_name=self.get_env_var('TEST_DB_NAME'), + test_user=self.get_env_var('TEST_DB_USER'), + test_password=self.get_env_var('TEST_DB_PASSWORD'), + test_host=self.get_env_var('TEST_DB_HOST'), + test_port=self.get_env_var('TEST_DB_PORT', None, int) + ) + + def get_qdrant_config(self) -> QdrantConfig: + """Get Qdrant configuration""" + return QdrantConfig( + local_host=self.get_env_var('QDRANT_LOCAL_HOST', 'localhost'), + local_port=self.get_env_var('QDRANT_LOCAL_PORT', 6333, int), + cloud_host=self.get_env_var('QDRANT_CLOUD_HOST'), + cloud_port=self.get_env_var('QDRANT_CLOUD_PORT', None, int), + cloud_api_key=self.get_env_var('QDRANT_CLOUD_API_KEY'), + connect_to_local=self.get_env_var('CONNECT_TO_LOCAL_VECTOR_DB', 'False', bool) + ) + + def get_ai_provider_config(self) -> AIProviderConfig: + """Get AI provider configuration""" + return AIProviderConfig( + gemini_api_key=self.get_env_var('GEMINI_API_KEY'), + openai_api_key=self.get_env_var('OPENAI_API_KEY') + ) + + def get_app_config(self) -> AppConfig: + """Get application configuration""" + return AppConfig( + secret_key=self.get_env_var('APP_SECRET_KEY', ''), + debug=self.get_env_var('DEBUG', 'False', bool), + allowed_hosts=self.get_env_var('ALLOWED_HOSTS', [], list), + closed_alpha_signups=self.get_env_var('CLOSED_ALPHA_SIGN_UPS', [], list), + require_account_approval=self.get_env_var('REQUIRE_ACCOUNT_APPROVAL', 'False', bool) + ) + + def get_url_config(self) -> Dict[str, str]: + """Get URL configuration""" + return { + 'api_base_url': self.get_env_var('API_BASE_URL', 'http://localhost:8000/api'), + 'frontend_url': self.get_env_var('FRONTEND_URL', 'http://localhost:3000'), + 'widget_url': self.get_env_var('WIDGET_URL', 'https://widget.ch8r.com') + } + + def get_email_config(self) -> Dict[str, str]: + """Get email configuration""" + return { + 'mailersend_api_key': self.get_env_var('MAILERSEND_API_KEY', ''), + 'default_from_email': self.get_env_var('DEFAULT_FROM_EMAIL', ''), + 'discord_signup_webhook_url': self.get_env_var('DISCORD_SIGNUP_WEBHOOK_URL', '') + } + + def get_security_config(self) -> Dict[str, str]: + """Get security configuration""" + return { + 'secret_encryption_key': self.get_env_var('SECRET_ENCRYPTION_KEY', ''), + } + +config_manager = ConfigManager() diff --git a/backend/core/utils/error_handling.py b/backend/core/utils/error_handling.py new file mode 100644 index 0000000..d984535 --- /dev/null +++ b/backend/core/utils/error_handling.py @@ -0,0 +1,190 @@ +import logging +import traceback +from typing import Optional, Dict, Any, Callable +from functools import wraps +from django.core.exceptions import ValidationError +from rest_framework.exceptions import APIException + +logger = logging.getLogger(__name__) + + +class GitHubIngestionError(Exception): + """Custom exception for GitHub ingestion errors""" + pass + + +class AIProviderError(Exception): + """Custom exception for AI provider errors""" + pass + + +class ValidationError(Exception): + """Custom validation error""" + pass + + +def handle_service_errors( + error_class: type = Exception, + default_message: str = "An error occurred", + log_level: str = "error" +): + """ + Decorator for consistent error handling in services + + Args: + error_class: Exception class to raise + default_message: Default error message + log_level: Logging level ('error', 'warning', 'info') + """ + def decorator(func: Callable): + @wraps(func) + def wrapper(*args, **kwargs): + try: + return func(*args, **kwargs) + except Exception as e: + log_method = getattr(logger, log_level) + + log_method( + f"Error in {func.__name__}: {str(e)}\n" + f"Traceback: {traceback.format_exc()}" + ) + + raise error_class(default_message) from e + + return wrapper + return decorator + + +def safe_execute( + func: Callable, + default_value: Any = None, + error_message: Optional[str] = None +) -> Any: + """ + Safely execute a function and return default value on error + + Args: + func: Function to execute + default_value: Value to return on error + error_message: Optional error message to log + + Returns: + Function result or default value + """ + try: + return func() + except Exception as e: + if error_message: + logger.warning(f"{error_message}: {str(e)}") + else: + logger.warning(f"Error in {func.__name__}: {str(e)}") + return default_value + + +def validate_required_fields(data: Dict[str, Any], required_fields: list) -> None: + """ + Validate that required fields are present in data + + Args: + data: Dictionary to validate + required_fields: List of required field names + + Raises: + ValidationError: If required fields are missing + """ + missing_fields = [field for field in required_fields if field not in data or not data[field]] + if missing_fields: + raise ValidationError(f"Missing required fields: {', '.join(missing_fields)}") + + +def log_api_call( + endpoint: str, + method: str = "GET", + params: Optional[Dict[str, Any]] = None, + response_status: Optional[int] = None, + error: Optional[str] = None +): + """ + Log API call details for debugging and monitoring + + Args: + endpoint: API endpoint + method: HTTP method + params: Request parameters + response_status: Response status code + error: Error message if any + """ + log_data = { + 'endpoint': endpoint, + 'method': method, + 'params': params or {} + } + + if response_status: + log_data['status'] = response_status + + if error: + log_data['error'] = error + logger.error(f"API call failed: {log_data}") + else: + logger.info(f"API call: {log_data}") + + +class ErrorContext: + """Context manager for error handling and logging""" + + def __init__( + self, + operation: str, + reraise: bool = True, + default_return: Any = None + ): + self.operation = operation + self.reraise = reraise + self.default_return = default_return + + def __enter__(self): + logger.info(f"Starting operation: {self.operation}") + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + if exc_type is None: + logger.info(f"Completed operation: {self.operation}") + return True + + logger.error( + f"Failed operation: {self.operation}\\n" + f"Error: {exc_val}\\n" + f"Traceback: {traceback.format_exception(exc_type, exc_val, exc_tb)}" + ) + + if self.reraise: + return False + + return True # Suppress the exception + + +def format_error_response( + error: Exception, + user_friendly_message: Optional[str] = None +) -> Dict[str, Any]: + """ + Format error response for API endpoints + + Args: + error: Exception that occurred + user_friendly_message: Optional user-friendly message + + Returns: + Formatted error response + """ + response = { + 'error': user_friendly_message or 'An unexpected error occurred', + 'type': error.__class__.__name__ + } + + if logger.isEnabledFor(logging.DEBUG): + response['details'] = str(error) + response['traceback'] = traceback.format_exc() + + return response diff --git a/backend/core/views/ai_provider.py b/backend/core/views/ai_provider.py index 47ed990..c48cf98 100644 --- a/backend/core/views/ai_provider.py +++ b/backend/core/views/ai_provider.py @@ -3,114 +3,92 @@ from rest_framework.pagination import PageNumberPagination from rest_framework.decorators import action from django.db import models + from core.serializers.ai_provider import AIProviderCreateSerializer, AIProviderSerializer from core.models import AIProvider, AIProviderModels from core.consts import SUPPORTED_AI_PROVIDERS +from core.services.ai_client_service import AIClientService + class AIProviderViewSet(viewsets.ModelViewSet): + permission_classes = [permissions.IsAuthenticated] lookup_field = 'uuid' - http_method_names = ['get', 'post', 'put','patch', 'delete'] + http_method_names = ['get', 'post', 'put', 'patch', 'delete'] pagination_class = PageNumberPagination - queryset = AIProvider.objects.all() - - def get_serializer_class(self): - if self.action in ['create', 'update', 'partial_update']: - return AIProviderCreateSerializer - return AIProviderSerializer + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.ai_service = AIClientService() def get_queryset(self): + """Get queryset filtered by user""" user = self.request.user return AIProvider.objects.filter( models.Q(creator=user) | models.Q(is_builtin=True) ) - def _validate_ai_provider(self, validated_data, instance=None): - from core.services.factories.ai_provider_factory import AIProviderFactory - - factory = AIProviderFactory() - main_fields = ['name', 'provider', 'provider_api_key'] - config = {} - - if instance: - current_data = { - 'name': instance.name, - 'provider': instance.provider, - 'provider_api_key': instance.provider_api_key - } - if instance.metadata: - config.update(instance.metadata) - update_data = {**current_data, **validated_data} - if not update_data['provider_api_key']: - update_data['provider_api_key'] = instance.provider_api_key - validation_data = update_data - else: - validation_data = validated_data - - for field, value in validation_data.items(): - if field not in main_fields: - config[field] = str(value).strip() if value is not None else '' - - is_valid, provider_models = factory.validate_provider( - provider_type=validation_data['provider'], - api_key=validation_data['provider_api_key'], - config=config - ) - - return is_valid, provider_models + def get_serializer_class(self): + """Get appropriate serializer based on action""" + if self.action in ['create', 'update', 'partial_update']: + return AIProviderCreateSerializer + return AIProviderSerializer def create(self, request, *args, **kwargs): + """Create a new AI provider with validation""" serializer = self.get_serializer(data=request.data) serializer.is_valid(raise_exception=True) - validated_data = serializer.validated_data - try: - is_valid, provider_models = self._validate_ai_provider(validated_data) - - if not is_valid: - return Response( - { - 'error': 'Failed to validate AI provider connection', - 'details': 'Unable to connect to the AI provider with the provided credentials' - }, - status=status.HTTP_400_BAD_REQUEST - ) - - ai_provider = serializer.save() - - AIProviderModels.objects.update_or_create( - ai_provider=ai_provider, - defaults={ - 'models_data': provider_models, - 'creator': request.user - } - ) - - response_serializer = AIProviderSerializer(ai_provider) - return Response( - { - 'ai_provider': response_serializer.data, - 'validation': { - 'is_valid': True, - 'models': provider_models - } - }, - status=status.HTTP_201_CREATED - ) + result = self._create_ai_provider(serializer.validated_data, request.user) + return Response(result, status=status.HTTP_201_CREATED) except Exception as e: - return Response( - { - 'error': 'Failed to validate AI provider connection', - 'details': str(e) - }, - status=status.HTTP_400_BAD_REQUEST - ) + return self._handle_validation_error(e) + + def _create_ai_provider(self, validated_data, user): + """Create AI provider with validation""" + is_valid, provider_models = self.ai_service.validate_ai_provider(validated_data) + + if not is_valid: + raise ValueError('Failed to validate AI provider connection') + + ai_provider = self.get_serializer().create(validated_data) + ai_provider.creator = user + ai_provider.save() + + self._store_provider_models(ai_provider, provider_models, user) + + return self._format_creation_response(ai_provider, provider_models) + + def _store_provider_models(self, ai_provider, provider_models, user): + """Store provider models in database""" + AIProviderModels.objects.update_or_create( + ai_provider=ai_provider, + defaults={ + 'models_data': provider_models, + 'creator': user + } + ) + + def _format_creation_response(self, ai_provider, provider_models): + """Format creation response""" + response_serializer = AIProviderSerializer(ai_provider) + return { + 'ai_provider': response_serializer.data, + 'validation': { + 'is_valid': True, + 'models': provider_models + } + } def list(self, request, *args, **kwargs): + """List AI providers with supported providers info""" response = super().list(request, *args, **kwargs) + return self._format_list_response(response) + + def _format_list_response(self, response): + """Format list response with supported providers""" if isinstance(response.data, dict): response.data['supported_ai_providers'] = SUPPORTED_AI_PROVIDERS else: @@ -121,115 +99,85 @@ def list(self, request, *args, **kwargs): return response def update(self, request, *args, **kwargs): + """Update AI provider with validation""" partial = kwargs.pop('partial', False) instance = self.get_object() serializer = self.get_serializer(instance, data=request.data, partial=partial) serializer.is_valid(raise_exception=True) + try: + result = self._update_ai_provider(serializer, instance, request.user) + return Response(result) + + except Exception as e: + return self._handle_validation_error(e) + + def _update_ai_provider(self, serializer, instance, user): validated_data = serializer.validated_data - api_key_to_validate = validated_data.get('provider_api_key') or instance.provider_api_key + api_key_to_validate = self._get_api_key_to_validate(validated_data, instance) if api_key_to_validate and api_key_to_validate.strip(): - try: - is_valid, provider_models = self._validate_ai_provider(validated_data, instance) - - if not is_valid: - return Response( - { - 'error': 'Failed to validate AI provider connection', - 'details': 'Unable to connect to the AI provider with the provided credentials' - }, - status=status.HTTP_400_BAD_REQUEST - ) - - except Exception as e: - return Response( - { - 'error': 'Failed to validate AI provider connection', - 'details': str(e) - }, - status=status.HTTP_400_BAD_REQUEST - ) - else: - return Response( - { - 'error': 'API key is required', - 'details': 'An API key must be provided to validate the AI provider connection' - }, - status=status.HTTP_400_BAD_REQUEST + is_valid, provider_models = self.ai_service.validate_ai_provider( + validated_data, instance ) + + if not is_valid: + raise ValueError('Failed to validate AI provider connection') + + self._store_provider_models(instance, provider_models, user) updated_instance = serializer.save() - AIProviderModels.objects.update_or_create( - ai_provider=updated_instance, - defaults={ - 'models_data': provider_models, - 'creator': request.user - } - ) - - response_serializer = AIProviderSerializer(updated_instance) - return Response(response_serializer.data) + return self._format_update_response(updated_instance) - def destroy(self, request, *args, **kwargs): - instance = self.get_object() - self.perform_destroy(instance) + def _get_api_key_to_validate(self, validated_data, instance): + """Get API key that needs validation""" + return validated_data.get('provider_api_key') or instance.provider_api_key + + def _format_update_response(self, instance): + """Format update response""" + response_serializer = AIProviderSerializer(instance) + return { + 'ai_provider': response_serializer.data, + 'message': 'AI provider updated successfully' + } + + def _handle_validation_error(self, error): + """Handle validation errors consistently""" return Response( - {"detail": "deleted"}, - status=status.HTTP_200_OK + { + 'error': 'Failed to validate AI provider connection', + 'details': str(error) + }, + status=status.HTTP_400_BAD_REQUEST ) - @action(detail=True, methods=['get']) - def models(self, request, uuid=None): - ai_provider = self.get_object() + @action(detail=True, methods=['post']) + def test_connection(self, request, uuid=None): + """Test AI provider connection""" + instance = self.get_object() try: - provider_models = AIProviderModels.objects.get(ai_provider=ai_provider) + test_data = { + 'provider': instance.provider, + 'provider_api_key': instance.provider_api_key, + **(instance.metadata or {}) + } + + is_valid, provider_models = self.ai_service.validate_ai_provider(test_data) + return Response({ - 'ai_provider': AIProviderSerializer(ai_provider).data, - 'ai_provider_models': { - 'id': provider_models.id, - 'models_data': provider_models.models_data, - 'created_at': provider_models.created_at, - 'updated_at': provider_models.updated_at - } + 'is_valid': is_valid, + 'models': provider_models if is_valid else None, + 'message': 'Connection successful' if is_valid else 'Connection failed' }) - except AIProviderModels.DoesNotExist: - return Response({ - 'error': 'Models data not found for this AI provider' - }, status=status.HTTP_404_NOT_FOUND) + except Exception as e: - return Response({ - 'error': f'Failed to retrieve models: {str(e)}' - }, status=status.HTTP_500_INTERNAL_SERVER_ERROR) - - @action(detail=False, methods=['get']) - def all_models(self, request): - user = request.user - - ai_providers = self.get_queryset().filter(creator=user) - - result = [] - for ai_provider in ai_providers: - try: - provider_models = AIProviderModels.objects.get(ai_provider=ai_provider) - result.append({ - 'ai_provider': AIProviderSerializer(ai_provider).data, - 'ai_provider_models': { - 'id': provider_models.id, - 'models_data': provider_models.models_data, - 'created_at': provider_models.created_at, - 'updated_at': provider_models.updated_at - } - }) - except AIProviderModels.DoesNotExist: - continue - except Exception as e: - print(f"Error retrieving models for provider {ai_provider.uuid}: {str(e)}") - continue - - return Response({ - 'providers': result - }) + return Response( + { + 'is_valid': False, + 'error': str(e) + }, + status=status.HTTP_400_BAD_REQUEST + ) diff --git a/backend/core/views/application.py b/backend/core/views/application.py index 9917d62..f7c4539 100644 --- a/backend/core/views/application.py +++ b/backend/core/views/application.py @@ -35,8 +35,6 @@ def create(self, request, *args, **kwargs): create_serializer.is_valid(raise_exception=True) app_instance = create_serializer.save(owner=request.user) - # TODO: may be we need to handle proper log and error messages if default - # TODO: models are not configured yet. AppModel.configure_defaults(app_instance) parsed_kb_items = parse_kb_from_request(request) @@ -110,9 +108,8 @@ def get(self, request, application_uuid): if not sender_identifier: return Response({'detail': 'sender_identifier is required.'}, status=status.HTTP_400_BAD_REQUEST) - chat_type = request.query_params.get('type') # 'human' or 'ai' + chat_type = request.query_params.get('type') - # Find chatrooms where this sender is a participant chatroom_ids = ChatroomParticipant.objects.filter( user_identifier=sender_identifier, chatroom__application=app, @@ -121,14 +118,12 @@ def get(self, request, application_uuid): chatrooms = ChatRoom.objects.filter(id__in=chatroom_ids) if chat_type == 'human': - # Chatrooms that have a human_agent participant human_chatroom_ids = ChatroomParticipant.objects.filter( chatroom_id__in=chatroom_ids, role='human_agent', ).values_list('chatroom_id', flat=True) chatrooms = chatrooms.filter(id__in=human_chatroom_ids) elif chat_type == 'ai': - # Chatrooms that have an agent (AI) participant but NOT a human_agent human_chatroom_ids = ChatroomParticipant.objects.filter( chatroom_id__in=chatroom_ids, role='human_agent', diff --git a/backend/core/views/dummy_view.py b/backend/core/views/dummy_view.py index 79f206d..35a113e 100644 --- a/backend/core/views/dummy_view.py +++ b/backend/core/views/dummy_view.py @@ -8,11 +8,6 @@ class DummyView(APIView): -# # authentication_classes = [APIKeyAuthentication] -# permission_classes = [HasAPIAccessPermission] -# api_action = 'widget_chat' -# -# authentication_classes = [APIKeyAuthentication] permission_classes = [IsAuthenticated | HasAPIKeyPermission] def get(self, request, application_uuid): return Response({ diff --git a/backend/core/views/ingestion.py b/backend/core/views/ingestion.py index 78ec66a..7ab83a5 100644 --- a/backend/core/views/ingestion.py +++ b/backend/core/views/ingestion.py @@ -7,14 +7,12 @@ from core.permissions import HasAPIKeyPermission from core.tasks import process_kb -# Not yet used - Use it when we allow users to re-process knowledge base class IngestApplicationKBView(APIView): permission_classes = [permissions.IsAuthenticated | HasAPIKeyPermission] def post(self, request, application_uuid): app = get_object_or_404(Application, uuid=application_uuid, owner=request.user) kbs = app.knowledge_bases.filter(status='pending') - - # TODO: Check if text & embedding models are configured + process_kb.delay([kb.id for kb in kbs]) return Response({"message": "Ingestion completed."}) diff --git a/backend/core/views/knowledge_base.py b/backend/core/views/knowledge_base.py index fd22829..de78a7a 100644 --- a/backend/core/views/knowledge_base.py +++ b/backend/core/views/knowledge_base.py @@ -102,8 +102,7 @@ def partial_update(self, request, *args, **kwargs): fields_to_update.append("path") kb.save(update_fields=fields_to_update) - - # TODO: We need to check whether text & embedding models are configured here as well + process_kb.delay([kb.id]) return Response(KnowledgeBaseViewSerializer(kb).data) diff --git a/frontend/package-lock.json b/frontend/package-lock.json index 602a8b8..208843c 100644 --- a/frontend/package-lock.json +++ b/frontend/package-lock.json @@ -11,6 +11,7 @@ "@nuxt/fonts": "^0.11.4", "@nuxt/icon": "^1.14.0", "@nuxt/image": "^1.10.0", + "@nuxtjs/color-mode": "^3.5.2", "@pinia/nuxt": "^0.11.1", "@tailwindcss/vite": "^4.1.10", "@tanstack/vue-table": "^8.21.3", @@ -41,7 +42,6 @@ "zod": "^3.25.76" }, "devDependencies": { - "@nuxtjs/color-mode": "^3.5.2", "prettier": "^3.5.3" } }, @@ -2791,7 +2791,6 @@ "version": "3.5.2", "resolved": "https://registry.npmjs.org/@nuxtjs/color-mode/-/color-mode-3.5.2.tgz", "integrity": "sha512-cC6RfgZh3guHBMLLjrBB2Uti5eUoGM9KyauOaYS9ETmxNWBMTvpgjvSiSJp1OFljIXPIqVTJ3xtJpSNZiO3ZaA==", - "dev": true, "license": "MIT", "dependencies": { "@nuxt/kit": "^3.13.2", @@ -2804,21 +2803,18 @@ "version": "0.1.8", "resolved": "https://registry.npmjs.org/confbox/-/confbox-0.1.8.tgz", "integrity": "sha512-RMtmw0iFkeR4YV+fUOSucriAQNb9g8zFR52MWCtl+cCZOFRNL6zeB395vPzFhEjjn4fMxXudmELnl/KF/WrK6w==", - "dev": true, "license": "MIT" }, "node_modules/@nuxtjs/color-mode/node_modules/pathe": { "version": "1.1.2", "resolved": "https://registry.npmjs.org/pathe/-/pathe-1.1.2.tgz", "integrity": "sha512-whLdWMYL2TwI08hn8/ZqAbrVemu0LNaNNJZX73O6qaIdCTfXutsLhMkjdENX0qhsQ9uIimo4/aQOmXkoon2nDQ==", - "dev": true, "license": "MIT" }, "node_modules/@nuxtjs/color-mode/node_modules/pkg-types": { "version": "1.3.1", "resolved": "https://registry.npmjs.org/pkg-types/-/pkg-types-1.3.1.tgz", "integrity": "sha512-/Jm5M4RvtBFVkKWRu2BLUTNP8/M2a+UwuAX+ae4770q1qVGtfjG+WTCupoZixokjmHiry8uI+dlY8KXYV5HVVQ==", - "dev": true, "license": "MIT", "dependencies": { "confbox": "^0.1.8", @@ -2830,7 +2826,6 @@ "version": "2.0.3", "resolved": "https://registry.npmjs.org/pathe/-/pathe-2.0.3.tgz", "integrity": "sha512-WUjGcAqP1gQacoQe+OBJsFA7Ld4DyXuUIjZ5cc75cLHvJ7dtNsTugphxIADwspS+AraAUePCKrSVtPLFj/F88w==", - "dev": true, "license": "MIT" }, "node_modules/@oxc-parser/binding-android-arm64": {