diff --git a/backend/authentication/admin.py b/backend/authentication/admin.py index 7dea0eceb..2a4b27d1d 100644 --- a/backend/authentication/admin.py +++ b/backend/authentication/admin.py @@ -1,6 +1,5 @@ from django.contrib import admin from django.contrib.auth.admin import UserAdmin - from authentication.models import CustomUser @@ -8,20 +7,28 @@ class CustomUserAdmin(UserAdmin): model = CustomUser list_display = ( "email", + "role", "is_staff", "is_active", ) list_filter = ( "email", + "role", "is_staff", "is_active", ) fieldsets = ( (None, {"fields": ("email", "password")}), - ("Permissions", {"fields": ("is_staff", "is_active")}), + ("Permissions", {"fields": ("role", "is_staff", "is_active")}), ) add_fieldsets = ( - (None, {"classes": ("wide",), "fields": ("email", "password1", "password2", "is_staff", "is_active")}), + ( + None, + { + "classes": ("wide",), + "fields": ("email", "password1", "password2", "role", "is_staff", "is_active"), + }, + ), ) search_fields = ("email",) ordering = ("email",) @@ -30,12 +37,10 @@ class CustomUserAdmin(UserAdmin): def make_active(self, request, queryset): queryset.update(is_active=True) - make_active.short_description = "Mark selected users as active" def make_inactive(self, request, queryset): queryset.update(is_active=False) - make_inactive.short_description = "Mark selected users as inactive" diff --git a/backend/authentication/migrations/0002_customuser_role_alter_customuser_is_active.py b/backend/authentication/migrations/0002_customuser_role_alter_customuser_is_active.py new file mode 100644 index 000000000..4e65eabb8 --- /dev/null +++ b/backend/authentication/migrations/0002_customuser_role_alter_customuser_is_active.py @@ -0,0 +1,23 @@ +# Generated by Django 5.2.4 on 2025-07-05 10:06 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('authentication', '0001_initial'), + ] + + operations = [ + migrations.AddField( + model_name='customuser', + name='role', + field=models.CharField(choices=[('admin', 'Admin'), ('staff', 'Staff'), ('uploader', 'Uploader'), ('viewer', 'Viewer')], default='viewer', max_length=20), + ), + migrations.AlterField( + model_name='customuser', + name='is_active', + field=models.BooleanField(default=True), + ), + ] diff --git a/backend/authentication/models.py b/backend/authentication/models.py index 4a565e6cd..0fc8dbaa1 100644 --- a/backend/authentication/models.py +++ b/backend/authentication/models.py @@ -3,37 +3,42 @@ class CustomUserManager(BaseUserManager): - def create_user(self, email, password, **extra_fields): + def create_user(self, email, password=None, **extra_fields): if not email: raise ValueError("The Email field must be set") - if not password: - raise ValueError("The Password field must be set") - - local, domain = email.split("@") - if "+" in local: - local = local.split("+")[0] - email = f"{local}@{domain.lower()}" email = self.normalize_email(email) + extra_fields.setdefault("is_active", True) user = self.model(email=email, **extra_fields) user.set_password(password) user.save(using=self._db) - return user - def create_superuser(self, email, password, **extra_fields): + def create_superuser(self, email, password=None, **extra_fields): extra_fields.setdefault("is_staff", True) - extra_fields.setdefault("is_active", True) extra_fields.setdefault("is_superuser", True) + if extra_fields.get("is_staff") is not True: + raise ValueError("Superuser must have is_staff=True.") + if extra_fields.get("is_superuser") is not True: + raise ValueError("Superuser must have is_superuser=True.") + return self.create_user(email, password, **extra_fields) class CustomUser(AbstractBaseUser, PermissionsMixin): email = models.EmailField(unique=True) - is_active = models.BooleanField(default=False) + is_active = models.BooleanField(default=True) is_staff = models.BooleanField(default=False) + ROLE_CHOICES = ( + ("admin", "Admin"), + ("staff", "Staff"), + ("uploader", "Uploader"), + ("viewer", "Viewer"), + ) + role = models.CharField(max_length=20, choices=ROLE_CHOICES, default="viewer") + objects = CustomUserManager() USERNAME_FIELD = "email" diff --git a/backend/backend/settings.py b/backend/backend/settings.py index 9de4f024a..8a36ea54b 100644 --- a/backend/backend/settings.py +++ b/backend/backend/settings.py @@ -17,9 +17,16 @@ load_dotenv() +GEMINI_API_KEY = os.getenv("GEMINI_API_KEY") + # Build paths inside the project like this: BASE_DIR / 'subdir'. BASE_DIR = Path(__file__).resolve().parent.parent + +MEDIA_ROOT = BASE_DIR / "media" +MEDIA_URL = "/media/" + + # Quick-start development settings - unsuitable for production # See https://docs.djangoproject.com/en/4.2/howto/deployment/checklist/ @@ -47,6 +54,8 @@ "authentication", "chat", "gpt", + 'django_crontab', + 'rest_framework_simplejwt', ] MIDDLEWARE = [ @@ -59,9 +68,20 @@ "django.middleware.clickjacking.XFrameOptionsMiddleware", "corsheaders.middleware.CorsMiddleware", ] - +CRONJOBS = [ + ('0 2 * * *', 'django.core.management.call_command', ['cleanup_conversations']), +] ROOT_URLCONF = "backend.urls" +REST_FRAMEWORK = { + "DEFAULT_PAGINATION_CLASS": "rest_framework.pagination.PageNumberPagination", + "PAGE_SIZE": 10, + 'DEFAULT_AUTHENTICATION_CLASSES': ( + 'rest_framework_simplejwt.authentication.JWTAuthentication', + ) +} + + TEMPLATES = [ { "BACKEND": "django.template.backends.django.DjangoTemplates", @@ -85,12 +105,17 @@ # https://docs.djangoproject.com/en/4.2/ref/settings/#databases DATABASES = { - "default": { - "ENGINE": "django.db.backends.sqlite3", - "NAME": BASE_DIR / "db.sqlite3", + 'default': { + 'ENGINE': 'django.db.backends.postgresql', + 'NAME': 'fullstack_db', + 'USER': 'postgres', + 'PASSWORD': 'Pvinod@123', + 'HOST': 'localhost', + 'PORT': '5432', } } + # Password validation # https://docs.djangoproject.com/en/4.2/ref/settings/#auth-password-validators @@ -149,3 +174,36 @@ SESSION_COOKIE_SECURE = True CSRF_COOKIE_SECURE = True CSRF_COOKIE_SAMESITE = "None" + +LOGGING = { + 'version': 1, + 'disable_existing_loggers': False, + 'formatters': { + 'verbose': { + 'format': '{asctime} [{levelname}] {name}: {message}', + 'style': '{', + }, + }, + 'handlers': { + 'file': { + 'level': 'INFO', + 'class': 'logging.FileHandler', + 'filename': BASE_DIR / 'file_activity.log', + 'formatter': 'verbose', + }, + }, + 'loggers': { + 'file_activity': { + 'handlers': ['file'], + 'level': 'INFO', + 'propagate': False, + }, + }, +} + +CACHES = { + 'default': { + 'BACKEND': 'django.core.cache.backends.filebased.FileBasedCache', + 'LOCATION': BASE_DIR / 'django_cache', # Directory to store cache files + } +} diff --git a/backend/backend/urls.py b/backend/backend/urls.py index fa154c7fb..08ee733ef 100644 --- a/backend/backend/urls.py +++ b/backend/backend/urls.py @@ -4,6 +4,7 @@ from django.http import JsonResponse from django.urls import include, path from rest_framework.decorators import api_view +from rest_framework_simplejwt.views import TokenObtainPairView, TokenRefreshView @api_view(["GET"]) @@ -16,5 +17,12 @@ def root_view(request): path("chat/", include("chat.urls")), path("gpt/", include("gpt.urls")), path("auth/", include("authentication.urls")), + path("api/", include("chat.urls")), path("", root_view), -] + static(settings.STATIC_URL, document_root=settings.STATIC_ROOT) + + # JWT Auth Endpoints + path("api/token/", TokenObtainPairView.as_view(), name="token_obtain_pair"), + path("api/token/refresh/", TokenRefreshView.as_view(), name="token_refresh"), +] + +urlpatterns += static(settings.MEDIA_URL, document_root=settings.MEDIA_ROOT) diff --git a/backend/chat/admin.py b/backend/chat/admin.py index a4e7d15fc..c2f540468 100644 --- a/backend/chat/admin.py +++ b/backend/chat/admin.py @@ -1,10 +1,24 @@ from django.contrib import admin from django.utils import timezone from nested_admin.nested import NestedModelAdmin, NestedStackedInline, NestedTabularInline +from django.utils.html import format_html +from django.conf import settings -from chat.models import Conversation, Message, Role, Version +from chat.models import Conversation, Message, Role, Version, UploadedFile +@admin.register(UploadedFile) +class UploadedFileAdmin(admin.ModelAdmin): + list_display = ['file', 'conversation', 'uploaded_at', 'download_link'] + list_filter = ['conversation'] + search_fields = ['file', 'conversation__title'] + + def download_link(self, obj): + return format_html( + f"Download" + ) + download_link.short_description = "Download" + class RoleAdmin(NestedModelAdmin): list_display = ["id", "name"] @@ -51,7 +65,7 @@ def queryset(self, request, queryset): class ConversationAdmin(NestedModelAdmin): actions = ["undelete_selected", "soft_delete_selected"] inlines = [VersionInline] - list_display = ("title", "id", "created_at", "modified_at", "deleted_at", "version_count", "is_deleted", "user") + list_display = ("title","topic","id", "created_at", "modified_at", "deleted_at", "version_count", "is_deleted", "user") list_filter = (DeletedListFilter,) ordering = ("-modified_at",) diff --git a/backend/chat/apps.py b/backend/chat/apps.py index 5f75238d2..5a7e8e863 100644 --- a/backend/chat/apps.py +++ b/backend/chat/apps.py @@ -4,3 +4,5 @@ class ChatConfig(AppConfig): default_auto_field = "django.db.models.BigAutoField" name = "chat" + def ready(self): + import chat.signals diff --git a/backend/chat/management/commands/cleanup_conversations.py b/backend/chat/management/commands/cleanup_conversations.py new file mode 100644 index 000000000..96a9e5bba --- /dev/null +++ b/backend/chat/management/commands/cleanup_conversations.py @@ -0,0 +1,14 @@ +from django.core.management.base import BaseCommand +from django.utils import timezone +from datetime import timedelta +from chat.models import Conversation + +class Command(BaseCommand): + help = "Soft deletes conversations older than 30 days" + + def handle(self, *args, **options): + threshold_date = timezone.now() - timedelta(days=30) + old_conversations = Conversation.objects.filter(created_at__lt=threshold_date, deleted_at__isnull=True) + + count = old_conversations.update(deleted_at=timezone.now()) + self.stdout.write(self.style.SUCCESS(f"{count} old conversations soft-deleted.")) diff --git a/backend/chat/management/commands/populate_search_vectors.py b/backend/chat/management/commands/populate_search_vectors.py new file mode 100644 index 000000000..5c3dffc21 --- /dev/null +++ b/backend/chat/management/commands/populate_search_vectors.py @@ -0,0 +1,17 @@ +from django.core.management.base import BaseCommand +from django.contrib.postgres.search import SearchVector +from chat.models import FileChunk + +class Command(BaseCommand): + help = "Populate search_vector field in FileChunk" + + def handle(self, *args, **kwargs): + chunks = FileChunk.objects.all() + count = 0 + + for chunk in chunks: + chunk.search_vector = SearchVector('content') + chunk.save() + count += 1 + + self.stdout.write(self.style.SUCCESS(f"Updated {count} FileChunks with search_vector")) diff --git a/backend/chat/migrations/0002_conversation_topic.py b/backend/chat/migrations/0002_conversation_topic.py new file mode 100644 index 000000000..a8e468eb0 --- /dev/null +++ b/backend/chat/migrations/0002_conversation_topic.py @@ -0,0 +1,18 @@ +# Generated by Django 5.2.4 on 2025-07-03 15:20 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('chat', '0001_initial'), + ] + + operations = [ + migrations.AddField( + model_name='conversation', + name='topic', + field=models.CharField(blank=True, max_length=100, null=True), + ), + ] diff --git a/backend/chat/migrations/0003_conversation_summary.py b/backend/chat/migrations/0003_conversation_summary.py new file mode 100644 index 000000000..de7f6f968 --- /dev/null +++ b/backend/chat/migrations/0003_conversation_summary.py @@ -0,0 +1,18 @@ +# Generated by Django 5.2.4 on 2025-07-03 15:39 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('chat', '0002_conversation_topic'), + ] + + operations = [ + migrations.AddField( + model_name='conversation', + name='summary', + field=models.TextField(blank=True, null=True), + ), + ] diff --git a/backend/chat/migrations/0004_uploadedfile.py b/backend/chat/migrations/0004_uploadedfile.py new file mode 100644 index 000000000..9391c8221 --- /dev/null +++ b/backend/chat/migrations/0004_uploadedfile.py @@ -0,0 +1,21 @@ +# Generated by Django 5.2.4 on 2025-07-03 18:24 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('chat', '0003_conversation_summary'), + ] + + operations = [ + migrations.CreateModel( + name='UploadedFile', + fields=[ + ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('file', models.FileField(upload_to='uploads/')), + ('uploaded_at', models.DateTimeField(auto_now_add=True)), + ], + ), + ] diff --git a/backend/chat/migrations/0005_uploadedfile_conversation.py b/backend/chat/migrations/0005_uploadedfile_conversation.py new file mode 100644 index 000000000..fa058fc79 --- /dev/null +++ b/backend/chat/migrations/0005_uploadedfile_conversation.py @@ -0,0 +1,19 @@ +# Generated by Django 5.2.4 on 2025-07-04 06:38 + +import django.db.models.deletion +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('chat', '0004_uploadedfile'), + ] + + operations = [ + migrations.AddField( + model_name='uploadedfile', + name='conversation', + field=models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.CASCADE, related_name='files', to='chat.conversation'), + ), + ] diff --git a/backend/chat/migrations/0006_conversation_text.py b/backend/chat/migrations/0006_conversation_text.py new file mode 100644 index 000000000..addc73084 --- /dev/null +++ b/backend/chat/migrations/0006_conversation_text.py @@ -0,0 +1,19 @@ +# Generated by Django 5.2.4 on 2025-07-04 14:05 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('chat', '0005_uploadedfile_conversation'), + ] + + operations = [ + migrations.AddField( + model_name='conversation', + name='text', + field=models.TextField(default='Placeholder conversation'), + preserve_default=False, + ), + ] diff --git a/backend/chat/migrations/0007_uploadedfile_file_hash.py b/backend/chat/migrations/0007_uploadedfile_file_hash.py new file mode 100644 index 000000000..d6485f23d --- /dev/null +++ b/backend/chat/migrations/0007_uploadedfile_file_hash.py @@ -0,0 +1,19 @@ +# Generated by Django 5.2.4 on 2025-07-04 18:03 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('chat', '0006_conversation_text'), + ] + + operations = [ + migrations.AddField( + model_name='uploadedfile', + name='file_hash', + field=models.CharField(default='dummy', max_length=64, unique=True), + preserve_default=False, + ), + ] diff --git a/backend/chat/migrations/0008_filechunk.py b/backend/chat/migrations/0008_filechunk.py new file mode 100644 index 000000000..325110810 --- /dev/null +++ b/backend/chat/migrations/0008_filechunk.py @@ -0,0 +1,23 @@ +# Generated by Django 5.2.4 on 2025-07-04 19:27 + +import django.db.models.deletion +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('chat', '0007_uploadedfile_file_hash'), + ] + + operations = [ + migrations.CreateModel( + name='FileChunk', + fields=[ + ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('content', models.TextField()), + ('created_at', models.DateTimeField(auto_now_add=True)), + ('file', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='chunks', to='chat.uploadedfile')), + ], + ), + ] diff --git a/backend/chat/migrations/0009_filechunk_chunk_index_filechunk_search_vector_and_more.py b/backend/chat/migrations/0009_filechunk_chunk_index_filechunk_search_vector_and_more.py new file mode 100644 index 000000000..4b4ba42bd --- /dev/null +++ b/backend/chat/migrations/0009_filechunk_chunk_index_filechunk_search_vector_and_more.py @@ -0,0 +1,29 @@ +# Generated by Django 5.2.4 on 2025-07-04 20:34 + +import django.contrib.postgres.indexes +import django.contrib.postgres.search +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('chat', '0008_filechunk'), + ] + + operations = [ + migrations.AddField( + model_name='filechunk', + name='chunk_index', + field=models.IntegerField(default=0), + ), + migrations.AddField( + model_name='filechunk', + name='search_vector', + field=django.contrib.postgres.search.SearchVectorField(null=True), + ), + migrations.AddIndex( + model_name='filechunk', + index=django.contrib.postgres.indexes.GinIndex(fields=['search_vector'], name='chat_filech_search__76f3b8_gin'), + ), + ] diff --git a/backend/chat/migrations/0010_alter_filechunk_options_and_more.py b/backend/chat/migrations/0010_alter_filechunk_options_and_more.py new file mode 100644 index 000000000..e5dd94a76 --- /dev/null +++ b/backend/chat/migrations/0010_alter_filechunk_options_and_more.py @@ -0,0 +1,23 @@ +# Generated by Django 5.2.4 on 2025-07-05 10:06 + +import django.contrib.postgres.search +from django.db import migrations + + +class Migration(migrations.Migration): + + dependencies = [ + ('chat', '0009_filechunk_chunk_index_filechunk_search_vector_and_more'), + ] + + operations = [ + migrations.AlterModelOptions( + name='filechunk', + options={'ordering': ['chunk_index']}, + ), + migrations.AlterField( + model_name='filechunk', + name='search_vector', + field=django.contrib.postgres.search.SearchVectorField(blank=True, null=True), + ), + ] diff --git a/backend/chat/models.py b/backend/chat/models.py index 242788f14..5d294b0cb 100644 --- a/backend/chat/models.py +++ b/backend/chat/models.py @@ -4,6 +4,60 @@ from authentication.models import CustomUser +from .utils.summarizer import generate_conversation_summary +from django.contrib.postgres.search import SearchVectorField +from django.contrib.postgres.indexes import GinIndex + + +from django.contrib.postgres.indexes import GinIndex +from django.contrib.postgres.search import SearchVectorField +from django.db import models +from django.core.exceptions import ValidationError + +class FileChunk(models.Model): + file = models.ForeignKey( + "UploadedFile", + on_delete=models.CASCADE, + related_name="chunks" + ) + content = models.TextField() + chunk_index = models.IntegerField(default=0) + created_at = models.DateTimeField(auto_now_add=True) + search_vector = SearchVectorField(null=True, blank=True) + + class Meta: + indexes = [ + GinIndex(fields=["search_vector"]), + ] + ordering = ["chunk_index"] + + def __str__(self): + return f"Chunk {self.chunk_index} of File ID {self.file_id}: {self.content[:60]}..." + + +class UploadedFile(models.Model): + file = models.FileField(upload_to="uploads/") + uploaded_at = models.DateTimeField(auto_now_add=True) + file_hash = models.CharField(max_length=64, unique=True) + conversation = models.ForeignKey( + "Conversation", + on_delete=models.CASCADE, + related_name="files", + null=True, + blank=True + ) + def delete(self, *args, **kwargs): + if self.file: + self.file.delete(save=False) + super().delete(*args, **kwargs) + + def __str__(self): + return self.file.name + + def clean(self): + if not self.file: + raise ValidationError("File cannot be empty.") + class Role(models.Model): name = models.CharField(max_length=20, blank=False, null=False, default="user") @@ -15,6 +69,8 @@ def __str__(self): class Conversation(models.Model): id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False) title = models.CharField(max_length=100, blank=False, null=False, default="Mock title") + topic = models.CharField(max_length=100, null=True, blank=True) + summary = models.TextField(null=True, blank=True) created_at = models.DateTimeField(auto_now_add=True) modified_at = models.DateTimeField(auto_now=True) active_version = models.ForeignKey( @@ -22,6 +78,13 @@ class Conversation(models.Model): ) deleted_at = models.DateTimeField(null=True, blank=True) user = models.ForeignKey(CustomUser, on_delete=models.CASCADE) + text = models.TextField() + summary = models.TextField(blank=True, null=True) + + def save(self, *args, **kwargs): + if self.text and not self.summary: + self.summary = generate_conversation_summary(self.text) + super().save(*args, **kwargs) def __str__(self): return self.title diff --git a/backend/chat/permissions.py b/backend/chat/permissions.py new file mode 100644 index 000000000..6df88ac6b --- /dev/null +++ b/backend/chat/permissions.py @@ -0,0 +1,15 @@ +from rest_framework.response import Response +from rest_framework import status +from functools import wraps + +def require_roles(*roles): + def decorator(view_func): + @wraps(view_func) + def _wrapped_view(request, *args, **kwargs): + if not request.user.is_authenticated: + return Response({"error": "Authentication required"}, status=status.HTTP_401_UNAUTHORIZED) + if request.user.role not in roles: + return Response({"error": "Permission denied"}, status=status.HTTP_403_FORBIDDEN) + return view_func(request, *args, **kwargs) + return _wrapped_view + return decorator diff --git a/backend/chat/serializers.py b/backend/chat/serializers.py index 0c721c061..d01366adb 100644 --- a/backend/chat/serializers.py +++ b/backend/chat/serializers.py @@ -3,6 +3,7 @@ from rest_framework import serializers from chat.models import Conversation, Message, Role, Version +from .models import UploadedFile def should_serialize(validated_data, field_name) -> bool: @@ -10,6 +11,33 @@ def should_serialize(validated_data, field_name) -> bool: return True +class UploadedFileSerializer(serializers.ModelSerializer): + + file = serializers.SerializerMethodField() + class Meta: + model = UploadedFile + fields = ['id', 'file', 'uploaded_at', 'file_hash', 'conversation'] + read_only_fields = ['file_hash'] + + # def get_file(self, obj): + # request = self.context.get('request') + # if request: + # return request.build_absolute_uri(obj.file.url) + # return obj.file.url if obj.file else None + def get_file(self, obj): + request = self.context.get('request') + if not obj.file: + return None + try: + if request: + return request.build_absolute_uri(obj.file.url) + return obj.file.url + except Exception: + return None + + + + class TitleSerializer(serializers.Serializer): title = serializers.CharField(max_length=100, required=True) @@ -110,15 +138,18 @@ def update(self, instance, validated_data): class ConversationSerializer(serializers.ModelSerializer): versions = VersionSerializer(many=True) - + files = UploadedFileSerializer(many=True, read_only=True) + class Meta: model = Conversation fields = [ "id", # DB "title", # required + 'created_at', "active_version", "versions", # optional "modified_at", # DB, read-only + 'files' ] def create(self, validated_data): diff --git a/backend/chat/signals.py b/backend/chat/signals.py new file mode 100644 index 000000000..225e8bfbf --- /dev/null +++ b/backend/chat/signals.py @@ -0,0 +1,66 @@ +from django.db.models.signals import post_save +from django.dispatch import receiver +from chat.models import Conversation, Message +from .models import UploadedFile, FileChunk +import mimetypes + +import os +import fitz # PyMuPDF +from docx import Document + +@receiver(post_save, sender=UploadedFile) +def process_uploaded_file(sender, instance, created, **kwargs): + + if not instance.file or not hasattr(instance.file, "path"): + return + + if not created: + return + + file_path = instance.file.path + file_ext = os.path.splitext(file_path)[1].lower() + + text = "" + + if file_ext == ".txt": + with open(file_path, "r", encoding="utf-8", errors="ignore") as f: + text = f.read() + + elif file_ext == ".pdf": + doc = fitz.open(file_path) + text = "\n".join([page.get_text() for page in doc]) + + elif file_ext == ".docx": + doc = Document(file_path) + text = "\n".join([para.text for para in doc.paragraphs]) + + else: + return # Skip unsupported formats + + # Chunk and save + chunk_size = 500 + for i in range(0, len(text), chunk_size): + FileChunk.objects.create( + file=instance, + content=text[i:i+chunk_size], + chunk_index=i // chunk_size + ) + + + +@receiver(post_save, sender=Message) +def generate_conversation_summary(sender, instance, created, **kwargs): + if not created: + return + + conversation = instance.version.conversation + + # Get all messages in the conversation + messages = Message.objects.filter(version__conversation=conversation).order_by("created_at") + + # Create a simple summary by joining the first few message contents + summary = " ".join([msg.content[:50] for msg in messages[:5]]) + + # Save the summary to the conversation + conversation.summary = summary + conversation.save() diff --git a/backend/chat/tests/test_views.py b/backend/chat/tests/test_views.py new file mode 100644 index 000000000..85d1b51a4 --- /dev/null +++ b/backend/chat/tests/test_views.py @@ -0,0 +1,69 @@ +# test_views.py +from django.core.files.uploadedfile import SimpleUploadedFile +from django.urls import reverse +from rest_framework.test import APITestCase +from rest_framework_simplejwt.tokens import RefreshToken +from chat.models import Conversation, UploadedFile +from authentication.models import CustomUser + +from django.test.utils import override_settings +import tempfile +import fitz +import docx + +@override_settings(MEDIA_ROOT=tempfile.gettempdir()) +class FileUploadTests(APITestCase): + def setUp(self): + self.user = CustomUser.objects.create_user( + email="user@example.com", password="testpass", role="uploader", is_active=True + ) + self.conversation = Conversation.objects.create(user=self.user, title="Test Conversation") + self.token = str(RefreshToken.for_user(self.user).access_token) + self.client.credentials(HTTP_AUTHORIZATION='Bearer ' + self.token) + + def test_file_upload(self): + url = reverse('file-upload') + file_data = SimpleUploadedFile("test.txt", b"Some sample content", content_type="text/plain") + + data = { + "file": file_data, + "conversation": str(self.conversation.id) + } + + response = self.client.post(url, data, format='multipart') + print(response.status_code) + print(response.data) + + self.assertEqual(response.status_code, 201) + self.assertTrue(UploadedFile.objects.exists()) + + def extract_text_from_file(self, uploaded_file): + try: + file_path = uploaded_file.file.path + except Exception: + # For test cases or in-memory files + return [uploaded_file.file.read().decode('utf-8', errors='ignore')] + + chunks = [] + + if file_path.endswith(".pdf"): + doc = fitz.open(file_path) + for page in doc: + text = page.get_text() + if text.strip(): + chunks.append(text.strip()) + + elif file_path.endswith(".docx"): + doc = docx.Document(file_path) + text = "\n".join([p.text for p in doc.paragraphs]) + if text.strip(): + chunks.append(text.strip()) + elif file_path.endswith(".txt"): + with open(file_path, "r", encoding="utf-8", errors="ignore") as f: + text = f.read() + if text.strip(): + chunks.append(text.strip()) + + + return chunks + diff --git a/backend/chat/urls.py b/backend/chat/urls.py index bd8ceadc0..5deef31bb 100644 --- a/backend/chat/urls.py +++ b/backend/chat/urls.py @@ -1,6 +1,16 @@ from django.urls import path + +from chat.views import FileListView from chat import views +from chat.views import FileUploadView +from django.conf import settings +from django.conf.urls.static import static +from chat.views import get_conversation_files +from chat.views import generate_summary_api +from chat.views import ConversationSummaryListView +from chat.views import FileDeleteView +from chat.views import rag_answer urlpatterns = [ path("", views.chat_root_view, name="chat_root_view"), @@ -19,4 +29,13 @@ ), path("conversations//delete/", views.conversation_soft_delete, name="conversation_delete"), path("versions//add_message/", views.version_add_message, name="version_add_message"), + path("files/upload/", FileUploadView.as_view(), name="file-upload"), + path("files/", FileListView.as_view(), name="file-list"), + path("conversations//files/", get_conversation_files, name="conversation-files"), + path("conversations//generate_summary/", generate_summary_api, name="generate-summary"), + path("summaries/", ConversationSummaryListView.as_view(), name="conversation-summary-list"), + path("files//delete/", FileDeleteView.as_view(), name="file-delete"), + path("rag/answer/", rag_answer, name="rag-answer"), ] + +urlpatterns += static(settings.MEDIA_URL, document_root=settings.MEDIA_ROOT) diff --git a/backend/chat/utils/file_processing.py b/backend/chat/utils/file_processing.py new file mode 100644 index 000000000..991ba1382 --- /dev/null +++ b/backend/chat/utils/file_processing.py @@ -0,0 +1,16 @@ +def split_into_chunks(text, max_length=500): + words = text.split() + chunks = [] + current_chunk = [] + + for word in words: + if sum(len(w) + 1 for w in current_chunk) + len(word) + 1 <= max_length: + current_chunk.append(word) + else: + chunks.append(" ".join(current_chunk)) + current_chunk = [word] + + if current_chunk: + chunks.append(" ".join(current_chunk)) + + return chunks diff --git a/backend/chat/utils/summarizer.py b/backend/chat/utils/summarizer.py new file mode 100644 index 000000000..233844947 --- /dev/null +++ b/backend/chat/utils/summarizer.py @@ -0,0 +1,49 @@ +import os +# import openai +# from dotenv import load_dotenv + +# load_dotenv() +# openai.api_key = os.getenv("OPENAI_API_KEY") + +# chat/utils/summarizer.py + +import google.generativeai as genai +import os + +genai.configure(api_key=os.getenv("GEMINI_API_KEY")) + +model = genai.GenerativeModel(model_name="models/gemini-1.5-flash") + +def generate_conversation_summary(conversation_text): + prompt = f"Summarize this conversation:\n\n{conversation_text}" + try: + response = model.generate_content(prompt) + return response.text + except Exception as e: + return f"[Error generating summary: {str(e)}]" + + + +# def generate_conversation_summary(conversation): +# messages = ( +# conversation.versions +# .prefetch_related("messages") +# .values_list("messages__content", flat=True) +# ) +# conversation_text = "\n".join(messages) + +# if not conversation_text.strip(): +# return "No content to summarize." + +# response = openai.ChatCompletion.create( +# model="gpt-3.5-turbo", +# messages=[ +# {"role": "system", "content": "Summarize this conversation in 3-5 lines."}, +# {"role": "user", "content": conversation_text}, +# ], +# temperature=0.7, +# max_tokens=150 +# ) + +# summary = response.choices[0].message.content.strip() +# return summary diff --git a/backend/chat/views.py b/backend/chat/views.py index 0d18f7a69..2e984504f 100644 --- a/backend/chat/views.py +++ b/backend/chat/views.py @@ -1,12 +1,288 @@ from django.contrib.auth.decorators import login_required from django.utils import timezone from rest_framework import status -from rest_framework.decorators import api_view +from rest_framework.decorators import api_view, permission_classes from rest_framework.response import Response -from chat.models import Conversation, Message, Version -from chat.serializers import ConversationSerializer, MessageSerializer, TitleSerializer, VersionSerializer +from chat.models import Conversation, Message, Version, UploadedFile +from chat.serializers import ConversationSerializer, MessageSerializer, TitleSerializer, VersionSerializer,UploadedFileSerializer from chat.utils.branching import make_branched_conversation +from rest_framework.views import APIView +from rest_framework.parsers import MultiPartParser, FormParser +from rest_framework.generics import ListAPIView +from .models import UploadedFile ,FileChunk +from .serializers import UploadedFileSerializer +from chat.utils.summarizer import generate_conversation_summary +from rest_framework.filters import SearchFilter, OrderingFilter +import hashlib +from rest_framework.generics import DestroyAPIView +from django.db import IntegrityError +import os +import fitz +import docx +import google.generativeai as genai +from rest_framework.permissions import IsAuthenticated +from django.shortcuts import get_object_or_404 +# from chat.utils.rag import generate_rag_answer +from chat.permissions import require_roles + +from .utils.file_processing import split_into_chunks + +genai.configure(api_key=os.getenv("GEMINI_API_KEY")) + +model = genai.GenerativeModel(model_name="models/gemini-1.5-flash") + +from rest_framework.permissions import BasePermission +from rest_framework.permissions import IsAuthenticated +from django.core.cache import cache +import logging +logger = logging.getLogger('file_activity') + + +@api_view(["POST"]) +def generate_summary_api(request, pk): + cache_key = f"conversation_summary_{pk}" + cached_summary = cache.get(cache_key) + + if cached_summary: + return Response({ + "message": "Summary from cache", + "summary": cached_summary + }) + + try: + conversation = Conversation.objects.get(pk=pk) + except Conversation.DoesNotExist: + return Response({"error": "Conversation not found"}, status=404) + + summary = generate_conversation_summary(conversation) + + conversation.summary = summary + conversation.save() + + cache.set(cache_key, summary, timeout=60 * 60) # Cache for 1 hour + + return Response({ + "message": "Summary generated", + "summary": summary + }) + +class IsUploaderOrAdmin(BasePermission): + def has_permission(self, request, view): + return request.user and request.user.role in ['admin', 'uploader'] + +class IsAdminOnly(BasePermission): + def has_permission(self, request, view): + return request.user and request.user.role == 'admin' + + +@api_view(["DELETE"]) +@permission_classes([IsAuthenticated, IsAdminOnly]) +def delete_file(request, pk): + try: + file = UploadedFile.objects.get(pk=pk) + file.delete() + logger.info(f"User {request.user} deleted file {file.file.name}") + return Response({"message": "File deleted successfully"}) + except UploadedFile.DoesNotExist: + return Response({"error": "File not found"}, status=404) + +class IsAdminOnly(BasePermission): + def has_permission(self, request, view): + return request.user and request.user.role == 'admin' + +class FileUploadView(APIView): + parser_classes = [MultiPartParser, FormParser] + permission_classes = [IsAuthenticated, IsUploaderOrAdmin] + + def extract_text_from_file(self, uploaded_file): + try: + file_path = uploaded_file.file.path + chunks = [] + + if file_path.endswith(".pdf"): + doc = fitz.open(file_path) + for page in doc: + text = page.get_text() + if text.strip(): + chunks.append(text.strip()) + + elif file_path.endswith(".docx"): + doc = docx.Document(file_path) + text = "\n".join([p.text for p in doc.paragraphs]) + if text.strip(): + chunks.append(text.strip()) + + else: + # Default to reading as plain text + if not file_content: + return Response({"error": "Uploaded file is empty."}, status=400) + + file_content = uploaded_file.read().decode('utf-8', errors='ignore') + chunks = split_into_chunks(file_content) + uploaded_file.seek(0) + + return chunks + except Exception as e: + # Likely triggered during tests or in-memory file + try: + raw_text = uploaded_file.file.read().decode("utf-8", errors="ignore") + return [raw_text] + except Exception as inner_e: + logger.error(f"Failed to extract text from file: {inner_e}") + return [] + + def post(self, request, *args, **kwargs): + uploaded_file = request.FILES.get('file') + if not uploaded_file: + return Response({"error": "File is required."}, status=400) + + logger.info(f"User {request.user} uploaded file {uploaded_file.name}") + conversation_id = request.data.get("conversation") + if not conversation_id: + return Response({"error": "Conversation ID is required."}, status=400) + + try: + conversation = Conversation.objects.get(id=conversation_id) + except Conversation.DoesNotExist: + return Response({"error": "Conversation not found."}, status=404) + + file_content = uploaded_file.read() + logger.info(f"File size: {len(file_content)} bytes") + + if not file_content: + return Response({"error": "Uploaded file is empty."}, status=400) + + file_hash = hashlib.sha256(file_content).hexdigest() + uploaded_file.seek(0) + + if UploadedFile.objects.filter(file_hash=file_hash).exists(): + logger.warning(f"Duplicate file upload attempt by user {request.user}: {uploaded_file.name}") + return Response({"error": "Duplicate file already exists."}, status=400) + # Save UploadedFile instance + + + data = request.data.copy() + data["file"] = uploaded_file + data["conversation"] = conversation.id + data["file_hash"] = file_hash + + serializer = UploadedFileSerializer(data=data, context={"request": request}) + if serializer.is_valid(): + + try: + file_instance = serializer.save() + except IntegrityError: + return Response({"error": "Duplicate file detected."}, status=400) + + # file_instance.file.open() + + try: + chunks = self.extract_text_from_file(file_instance) + for i, chunk in enumerate(chunks): + FileChunk.objects.create( + uploaded_file=file_instance, + content=chunk, + chunk_index=i + ) + except Exception as e: + logger.error(f"Text extraction failed: {e}") + return Response({"error": "File processing failed."}, status=500) + + + logger.info(f"User {request.user} uploaded file {uploaded_file.name}") + return Response( + {"message": "File uploaded successfully", "data": serializer.data}, + status=201 + ) + + logger.warning(f"File upload failed for user {request.user}: {serializer.errors}") + return Response(serializer.errors, status=400) + + +# @api_view(["POST"]) +# # @permission_classes([IsAuthenticated]) +# def generate_answer(request, pk): +# query = request.data.get("query", "") +# if not query: +# return Response({"error": "Query is required."}, status=400) + +# conversation = get_object_or_404(Conversation, pk=pk, user=request.user) + +# answer = generate_rag_answer(conversation, query) +# return Response({"answer": answer}) + + + + +@api_view(["POST"]) +def rag_answer(request): + query = request.data.get("query", "") + if not query: + return Response({"error": "Query is required."}, status=400) + + matching_chunks = FileChunk.objects.filter(content__icontains=query)[:5] + + if not matching_chunks: + return Response({"answer": "No relevant content found in uploaded files."}) + + context = "\n\n".join(chunk.content for chunk in matching_chunks) + + prompt = f"Context:\n{context}\n\nQuestion:\n{query}\n\nAnswer:" + try: + response = model.generate_content(prompt) + return Response({"answer": response.text}) + except Exception as e: + return Response({"error": f"Gemini API error: {str(e)}"}, status=500) + + +class FileDeleteView(DestroyAPIView): + queryset = UploadedFile.objects.all() + serializer_class = UploadedFileSerializer + permission_classes = [IsAuthenticated, IsUploaderOrAdmin] + lookup_field = 'pk' + + def perform_destroy(self, instance): + logger.info(f"User {self.request.user} deleted file {instance.file.name}") + instance.delete() + +class ConversationSummaryListView(ListAPIView): + queryset = Conversation.objects.exclude(summary__isnull=True).exclude(summary__exact="").order_by('-modified_at') + serializer_class = ConversationSerializer + filter_backends = [SearchFilter, OrderingFilter] + search_fields = ['title', 'summary'] + ordering_fields = ['created_at', 'modified_at'] + ordering = ['-modified_at'] + + +@api_view(["POST"]) +def generate_summary_api(request, pk): + try: + conversation = Conversation.objects.get(pk=pk) + except Conversation.DoesNotExist: + return Response({"error": "Conversation not found"}, status=404) + + summary = generate_conversation_summary(conversation) + conversation.summary = summary + conversation.save() + + return Response({"message": "Summary generated", "summary": summary}) + +@api_view(["GET"]) +def get_conversation_files(request, pk): + try: + conversation = Conversation.objects.get(pk=pk) + except Conversation.DoesNotExist: + return Response({"error": "Conversation not found"}, status=404) + + files = UploadedFile.objects.filter(conversation=conversation) + serialized = UploadedFileSerializer(files, many=True) + return Response(serialized.data) + + +class FileListView(ListAPIView): + queryset = UploadedFile.objects.all().order_by("-uploaded_at") + serializer_class = UploadedFileSerializer @api_view(["GET"]) @@ -14,7 +290,7 @@ def chat_root_view(request): return Response({"message": "Chat works!"}, status=status.HTTP_200_OK) -@login_required +@permission_classes([IsAuthenticated]) @api_view(["GET"]) def get_conversations(request): conversations = Conversation.objects.filter(user=request.user, deleted_at__isnull=True).order_by("-modified_at") @@ -22,7 +298,7 @@ def get_conversations(request): return Response(serializer.data, status=status.HTTP_200_OK) -@login_required +@permission_classes([IsAuthenticated]) @api_view(["GET"]) def get_conversations_branched(request): conversations = Conversation.objects.filter(user=request.user, deleted_at__isnull=True).order_by("-modified_at") @@ -35,7 +311,7 @@ def get_conversations_branched(request): return Response(conversations_data, status=status.HTTP_200_OK) -@login_required +@permission_classes([IsAuthenticated]) @api_view(["GET"]) def get_conversation_branched(request, pk): try: @@ -49,35 +325,35 @@ def get_conversation_branched(request, pk): return Response(conversation_data, status=status.HTTP_200_OK) - -@login_required @api_view(["POST"]) +@permission_classes([IsAuthenticated]) def add_conversation(request): + title = request.data.get("title", "") + messages_data = request.data.get("messages", []) + try: - conversation_data = {"title": request.data.get("title", "Mock title"), "user": request.user} - conversation = Conversation.objects.create(**conversation_data) + + conversation = Conversation.objects.create(user=request.user, title=title) version = Version.objects.create(conversation=conversation) - - messages_data = request.data.get("messages", []) + for idx, message_data in enumerate(messages_data): message_serializer = MessageSerializer(data=message_data) if message_serializer.is_valid(): message_serializer.save(version=version) - if idx == 0: - version.save() else: - return Response(message_serializer.errors, status=status.HTTP_400_BAD_REQUEST) - + return Response(message_serializer.errors, status=400) + conversation.active_version = version conversation.save() serializer = ConversationSerializer(conversation) - return Response(serializer.data, status=status.HTTP_201_CREATED) + return Response(serializer.data, status=201) + except Exception as e: - return Response({"detail": str(e)}, status=status.HTTP_400_BAD_REQUEST) + return Response({"detail": str(e)}, status=400) -@login_required +@permission_classes([IsAuthenticated]) @api_view(["GET", "PUT", "DELETE"]) def conversation_manage(request, pk): try: @@ -86,7 +362,8 @@ def conversation_manage(request, pk): return Response(status=status.HTTP_404_NOT_FOUND) if request.method == "GET": - serializer = ConversationSerializer(conversation) + # serializer = ConversationSerializer(conversation) + serializer = ConversationSerializer(conversation, context={"request": request}) return Response(serializer.data) elif request.method == "PUT": @@ -101,7 +378,7 @@ def conversation_manage(request, pk): return Response(status=status.HTTP_204_NO_CONTENT) -@login_required +@permission_classes([IsAuthenticated]) @api_view(["PUT"]) def conversation_change_title(request, pk): try: @@ -119,7 +396,7 @@ def conversation_change_title(request, pk): return Response({"detail": "Title not provided"}, status=status.HTTP_400_BAD_REQUEST) -@login_required +@permission_classes([IsAuthenticated]) @api_view(["PUT"]) def conversation_soft_delete(request, pk): try: @@ -132,7 +409,7 @@ def conversation_soft_delete(request, pk): return Response(status=status.HTTP_204_NO_CONTENT) -@login_required +@permission_classes([IsAuthenticated]) @api_view(["POST"]) def conversation_add_message(request, pk): try: @@ -158,7 +435,7 @@ def conversation_add_message(request, pk): return Response(serializer.errors, status=status.HTTP_400_BAD_REQUEST) -@login_required +@permission_classes([IsAuthenticated]) @api_view(["POST"]) def conversation_add_version(request, pk): try: @@ -194,7 +471,7 @@ def conversation_add_version(request, pk): return Response(serializer.data, status=status.HTTP_201_CREATED) -@login_required +@permission_classes([IsAuthenticated]) @api_view(["PUT"]) def conversation_switch_version(request, pk, version_id): try: @@ -211,7 +488,7 @@ def conversation_switch_version(request, pk, version_id): return Response(status=status.HTTP_204_NO_CONTENT) -@login_required +@permission_classes([IsAuthenticated]) @api_view(["POST"]) def version_add_message(request, pk): try: diff --git a/backend/gpt/views.py b/backend/gpt/views.py index e9c81cb2e..4aacadd9c 100644 --- a/backend/gpt/views.py +++ b/backend/gpt/views.py @@ -1,7 +1,7 @@ from django.contrib.auth.decorators import login_required from django.http import JsonResponse, StreamingHttpResponse -from rest_framework.decorators import api_view - +from rest_framework.decorators import api_view,permission_classes +from rest_framework.permissions import IsAuthenticated from src.utils.gpt import get_conversation_answer, get_gpt_title, get_simple_answer @@ -10,7 +10,7 @@ def gpt_root_view(request): return JsonResponse({"message": "GPT endpoint works!"}) -@login_required +@permission_classes([IsAuthenticated]) @api_view(["POST"]) def get_title(request): data = request.data @@ -18,14 +18,14 @@ def get_title(request): return JsonResponse({"content": title}) -@login_required +@permission_classes([IsAuthenticated]) @api_view(["POST"]) def get_answer(request): data = request.data return StreamingHttpResponse(get_simple_answer(data["user_question"], stream=True), content_type="text/html") -@login_required +@permission_classes([IsAuthenticated]) @api_view(["POST"]) def get_conversation(request): data = request.data diff --git a/backend/run_cleanup.bat b/backend/run_cleanup.bat new file mode 100644 index 000000000..331eb8567 --- /dev/null +++ b/backend/run_cleanup.bat @@ -0,0 +1,4 @@ +@echo off +cd /d "C:\Users\PAREPALLI VINOD\fullstack-assignment\backend" +call ..\venv\Scripts\activate.bat +python manage.py cleanup_conversations