From 8cb46cf7b0c05e1c78446c69ecf333476e7ca86e Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Thu, 15 Jan 2026 15:02:30 +0000
Subject: [PATCH 01/11] Initial plan
From 57de12d08d752476ef66468d0b3505df48a471a2 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Thu, 15 Jan 2026 15:09:06 +0000
Subject: [PATCH 02/11] Add copilot instructions and agent configuration files
- Create .github/copilot-instructions.md with repository overview
- Add .github/agents/celery-tasks.md for Celery task development
- Add .github/agents/testing.md for test development
- Add .github/agents/django-development.md for Django development
- Document all conventions, patterns, and best practices
Co-authored-by: danieleguido <1181642+danieleguido@users.noreply.github.com>
---
.github/agents/celery-tasks.md | 424 +++++++++++++++++
.github/agents/django-development.md | 666 +++++++++++++++++++++++++++
.github/agents/testing.md | 590 ++++++++++++++++++++++++
.github/copilot-instructions.md | 271 +++++++++++
4 files changed, 1951 insertions(+)
create mode 100644 .github/agents/celery-tasks.md
create mode 100644 .github/agents/django-development.md
create mode 100644 .github/agents/testing.md
create mode 100644 .github/copilot-instructions.md
diff --git a/.github/agents/celery-tasks.md b/.github/agents/celery-tasks.md
new file mode 100644
index 0000000..023faef
--- /dev/null
+++ b/.github/agents/celery-tasks.md
@@ -0,0 +1,424 @@
+# Agent: Celery Tasks Development
+
+This agent specializes in developing and maintaining Celery background tasks for the impresso-user-admin Django application.
+
+## Expertise
+
+- Creating new Celery tasks with proper decorators and configuration
+- Writing helper functions for task operations
+- Implementing job progress tracking
+- Integrating with Solr for search and indexing
+- Managing user permissions and access control
+- Error handling and retry logic
+- Structured logging
+
+## Task Development Guidelines
+
+### Task Definition Structure
+
+All Celery tasks should follow this pattern:
+
+```python
+from celery import shared_task
+from celery.utils.log import get_task_logger
+
+logger = get_task_logger(__name__)
+
+@shared_task(
+ bind=True,
+ autoretry_for=(Exception,),
+ exponential_backoff=2,
+ retry_kwargs={"max_retries": 5},
+ retry_jitter=True,
+)
+def task_name(self, param: type) -> return_type:
+ """
+ Task description.
+
+ Args:
+ param: Description
+
+ Returns:
+ Description
+ """
+ logger.info(f"[context] Starting task with param={param}")
+ # Implementation
+```
+
+### File Organization
+
+- **Task definitions**: Place in `impresso/tasks/`
+ - Use descriptive filenames ending in `_task.py` or `_tasks.py`
+ - Import and use helper functions from utils
+
+- **Helper functions**: Place in `impresso/utils/tasks/`
+ - Reusable logic that can be called by multiple tasks
+ - Database operations, API calls, data processing
+ - Keep helpers stateless and testable
+
+### Job Progress Tracking
+
+For long-running tasks, use the Job model to track progress:
+
+```python
+from impresso.models import Job
+from impresso.utils.tasks import (
+ update_job_progress,
+ update_job_completed,
+ is_task_stopped,
+ TASKSTATE_PROGRESS,
+)
+
+def long_running_task(self, job_id: int):
+ job = Job.objects.get(pk=job_id)
+
+ # Check if user stopped the job
+ if is_task_stopped(task=self, job=job, progress=0.0, logger=logger):
+ return
+
+ # Update progress
+ update_job_progress(
+ task=self,
+ job=job,
+ progress=0.5, # 50%
+ taskstate=TASKSTATE_PROGRESS,
+ extra={"current_step": "processing"},
+ message="Processing data...",
+ logger=logger,
+ )
+
+ # Complete the job
+ update_job_completed(
+ task=self,
+ job=job,
+ extra={"results": "summary"},
+ message="Task completed successfully",
+ logger=logger,
+ )
+```
+
+### Pagination with User Limits
+
+When processing large result sets from Solr:
+
+```python
+from impresso.utils.tasks import get_pagination
+from django.conf import settings
+
+# Calculate pagination respecting user and system limits
+page, loops, progress, max_loops = get_pagination(
+ skip=skip,
+ limit=limit,
+ total=total,
+ job=job,
+ ignore_max_loops=False # Set True only for admin operations
+)
+
+logger.info(
+ f"[job:{job.pk} user:{job.creator.pk}] "
+ f"page={page} loops={loops} progress={progress * 100:.2f}%"
+)
+
+# Loop through pages
+if page < loops:
+ # More pages to process
+ skip += limit
+ # Continue processing
+else:
+ # All pages processed
+ pass
+```
+
+### Solr Integration
+
+Use the provided Solr utilities:
+
+```python
+from impresso.solr import find_all, update
+from django.conf import settings
+
+# Query Solr
+results = find_all(
+ q="content_txt_fr:*",
+ fl="id,title,date",
+ skip=0,
+ limit=100,
+ logger=logger
+)
+
+total = results["response"]["numFound"]
+docs = results["response"]["docs"]
+
+# Update Solr (requires write credentials)
+update_result = update(
+ url=settings.IMPRESSO_SOLR_URL_UPDATE,
+ todos=[
+ {
+ "id": "doc-123",
+ "ucoll_ss": {"add": ["collection-id"]},
+ "_version_": doc_version
+ }
+ ],
+ logger=logger
+)
+```
+
+### Access Control and Content Redaction
+
+Always respect user permissions:
+
+```python
+from impresso.utils.bitmask import BitMask64
+from impresso.utils.solr import (
+ mapper_doc_remove_private_collections,
+ mapper_doc_redact_contents,
+)
+
+# Get user's bitmap for access control
+user_bitmask = BitMask64(job.creator.profile.user_bitmap_key)
+
+# Check if user has special no-redaction privilege
+user_allow_no_redaction = job.creator.groups.filter(
+ name=settings.IMPRESSO_GROUP_USER_PLAN_NO_REDACTION
+).exists()
+
+# Process each document
+for doc in docs:
+ # Remove private collections from user's view
+ doc = mapper_doc_remove_private_collections(
+ doc=doc,
+ prefix=job.creator.profile.uid
+ )
+
+ # Redact content based on permissions (unless user has privilege)
+ if not user_allow_no_redaction:
+ doc = mapper_doc_redact_contents(
+ doc=doc,
+ user_bitmask=user_bitmask,
+ )
+```
+
+### Email Operations
+
+Use the email utility functions:
+
+```python
+from impresso.utils.tasks.email import send_templated_email_with_context
+from django.conf import settings
+
+success = send_templated_email_with_context(
+ template='notification_name', # Uses emails/notification_name.txt and .html
+ subject='Email Subject',
+ from_email=f"Impresso Team <{settings.DEFAULT_FROM_EMAIL}>",
+ to=[user.email],
+ cc=[settings.DEFAULT_FROM_EMAIL],
+ reply_to=[settings.DEFAULT_FROM_EMAIL],
+ context={
+ 'user': user,
+ 'custom_data': 'value',
+ },
+ logger=logger,
+ fail_silently=False,
+)
+```
+
+### Error Handling
+
+Implement proper error handling with retries:
+
+```python
+from django.db.utils import IntegrityError
+from requests.exceptions import RequestException
+
+@shared_task(
+ bind=True,
+ autoretry_for=(RequestException, IntegrityError),
+ exponential_backoff=2,
+ retry_kwargs={"max_retries": 5},
+ retry_jitter=True,
+)
+def resilient_task(self, param: str):
+ try:
+ # Task logic
+ pass
+ except ValueError as e:
+ # Don't retry validation errors
+ logger.error(f"Validation error: {e}")
+ raise
+ except Exception as e:
+ # Log and let Celery handle retry
+ logger.exception(f"Unexpected error: {e}")
+ raise
+```
+
+### Logging Best Practices
+
+Use structured logging with context:
+
+```python
+# Always include relevant IDs
+logger.info(f"[job:{job.pk} user:{user.pk}] Starting operation")
+
+# Include metrics
+logger.info(
+ f"[job:{job.pk}] Processed {count} items in {qtime}ms "
+ f"(page {page}/{loops}, {progress*100:.2f}%)"
+)
+
+# Use appropriate levels
+logger.debug(f"Debug info: {data}")
+logger.info(f"Operation completed successfully")
+logger.warning(f"Potential issue: {warning}")
+logger.error(f"Error occurred: {error}")
+logger.exception(f"Exception with traceback: {e}") # Includes stack trace
+```
+
+## Testing Tasks
+
+Create tests in `impresso/tests/tasks/`:
+
+```python
+from django.test import TestCase, TransactionTestCase
+from django.contrib.auth.models import User
+from impresso.tasks.my_task import my_task
+from django.core import mail
+
+class TestMyTask(TransactionTestCase):
+ """
+ Test my_task functionality.
+
+ Run with:
+ ENV=dev pipenv run ./manage.py test impresso.tests.tasks.TestMyTask
+ """
+
+ def setUp(self):
+ self.user = User.objects.create_user(
+ username="testuser",
+ email="test@example.com",
+ password="password123"
+ )
+ # Create default groups
+ from impresso.signals import create_default_groups
+ create_default_groups(sender="impresso")
+
+ def test_task_execution(self):
+ # Clear mail outbox
+ mail.outbox = []
+
+ # Run task
+ result = my_task(user_id=self.user.id)
+
+ # Assertions
+ self.assertEqual(result, expected_value)
+ self.assertEqual(len(mail.outbox), 1)
+```
+
+## Common Patterns
+
+### Processing Collections
+
+```python
+def process_collection_items(
+ collection_id: str,
+ job: Job,
+ skip: int = 0,
+ limit: int = 100,
+ logger=default_logger
+) -> Tuple[int, int, float]:
+ """Process items in a collection with pagination."""
+
+ # Get collection
+ collection = Collection.objects.get(pk=collection_id)
+
+ # Query Solr for collection items
+ query = f"ucoll_ss:{collection_id}"
+ results = find_all(
+ q=query,
+ fl="id,title,date",
+ skip=skip,
+ limit=limit,
+ logger=logger
+ )
+
+ total = results["response"]["numFound"]
+ page, loops, progress, max_loops = get_pagination(
+ skip=skip, limit=limit, total=total, job=job
+ )
+
+ # Process items
+ for doc in results["response"]["docs"]:
+ # Process each item
+ pass
+
+ return page, loops, progress
+```
+
+### Export to CSV/ZIP
+
+```python
+import csv
+from zipfile import ZipFile, ZIP_DEFLATED
+
+def export_results_to_csv(job: Job, results: list, fieldnames: list):
+ """Export results to CSV and create ZIP archive."""
+
+ csv_path = job.attachment.upload.path
+
+ with open(csv_path, mode='a', encoding='utf-8-sig', newline='') as csvfile:
+ writer = csv.DictWriter(
+ csvfile,
+ delimiter=';',
+ quoting=csv.QUOTE_MINIMAL,
+ fieldnames=fieldnames,
+ )
+
+ # Write header on first page
+ if skip == 0:
+ writer.writeheader()
+
+ # Write rows
+ for row in results:
+ filtered_row = {k: v for k, v in row.items() if k in fieldnames}
+ writer.writerow(filtered_row)
+
+ # Create ZIP when done
+ zip_path = f"{csv_path}.zip"
+ with ZipFile(zip_path, 'w', ZIP_DEFLATED) as zipf:
+ zipf.write(csv_path, basename(csv_path))
+
+ # Update job attachment
+ job.attachment.upload.name = f"{job.attachment.upload.name}.zip"
+ job.attachment.save()
+
+ # Remove original CSV
+ if os.path.exists(csv_path):
+ os.remove(csv_path)
+```
+
+## Configuration Settings
+
+Key Celery and Solr settings from `settings.py`:
+
+- `CELERY_BROKER_URL` - Redis connection for Celery
+- `IMPRESSO_SOLR_URL` - Main Solr index URL
+- `IMPRESSO_SOLR_PASSAGES_URL_SELECT` - Text reuse passages query URL
+- `IMPRESSO_SOLR_PASSAGES_URL_UPDATE` - Text reuse passages update URL
+- `IMPRESSO_SOLR_EXEC_LIMIT` - Maximum rows per Solr query (default: 100)
+- `IMPRESSO_SOLR_EXEC_MAX_LOOPS` - Maximum query loops (default: 100)
+- `IMPRESSO_GROUP_USER_PLAN_*` - User plan group names
+- `DEFAULT_FROM_EMAIL` - Email sender address
+
+## Key Models
+
+- `Job` - Tracks long-running asynchronous tasks
+- `Collection` - User-created collections of content items
+- `CollectableItem` - Individual items in collections
+- `UserBitmap` - User access permissions as bitmap
+- `UserChangePlanRequest` - Plan upgrade/downgrade requests
+- `Profile` - User profile with uid and max_loops_allowed
+
+## References
+
+- Celery documentation: https://docs.celeryq.dev/
+- Django documentation: https://docs.djangoproject.com/
+- Apache Solr documentation: https://solr.apache.org/guide/
diff --git a/.github/agents/django-development.md b/.github/agents/django-development.md
new file mode 100644
index 0000000..c9653fb
--- /dev/null
+++ b/.github/agents/django-development.md
@@ -0,0 +1,666 @@
+# Agent: Django Development
+
+This agent specializes in Django application development for the impresso-user-admin project.
+
+## Expertise
+
+- Django models, views, and admin interface
+- User authentication and authorization
+- Django signals and middleware
+- URL routing and template rendering
+- Django management commands
+- Database migrations
+- Form handling and validation
+
+## Django Project Structure
+
+### Apps Organization
+
+The project is organized as a single Django app named `impresso` with the following structure:
+
+```
+impresso/
+├── __init__.py
+├── settings.py # Django settings
+├── base.py # Base settings and dotenv loading
+├── urls.py # URL routing
+├── wsgi.py # WSGI application
+├── celery.py # Celery configuration
+├── models/ # Database models
+├── views/ # View functions/classes
+├── admin/ # Admin customizations
+├── signals.py # Django signals
+├── management/
+│ └── commands/ # Custom management commands
+├── templates/ # HTML templates
+│ └── emails/ # Email templates
+├── static/ # Static files (CSS, JS, images)
+└── tests/ # Test suite
+```
+
+## Models
+
+### Model Conventions
+
+- Use `django.db.models.Model` as base class
+- Define `__str__()` method for readable representations
+- Use `Meta` class for model options
+- Add docstrings to models and complex fields
+- Use Django's built-in field types
+- Define proper relationships (ForeignKey, ManyToMany)
+
+### Key Models
+
+- **User** - Django's built-in User model (from `django.contrib.auth.models`)
+- **Profile** - User profile with `uid` and `max_loops_allowed`
+- **UserBitmap** - User access permissions as bitmap
+- **Job** - Tracks asynchronous background tasks
+- **Collection** - User-created collections of content items
+- **CollectableItem** - Items within collections
+- **UserChangePlanRequest** - Plan upgrade/downgrade requests
+- **UserSpecialMembershipRequest** - Special membership requests
+
+### Model Example
+
+```python
+from django.db import models
+from django.contrib.auth.models import User
+from django.utils import timezone
+
+class MyModel(models.Model):
+ """
+ Description of the model.
+ """
+ # Fields
+ name = models.CharField(max_length=255, help_text="Display name")
+ creator = models.ForeignKey(
+ User,
+ on_delete=models.CASCADE,
+ related_name="mymodels"
+ )
+ date_created = models.DateTimeField(default=timezone.now)
+ is_active = models.BooleanField(default=True)
+
+ class Meta:
+ ordering = ['-date_created']
+ verbose_name = "My Model"
+ verbose_name_plural = "My Models"
+ indexes = [
+ models.Index(fields=['creator', '-date_created']),
+ ]
+
+ def __str__(self):
+ return f"{self.name} (by {self.creator.username})"
+
+ def save(self, *args, **kwargs):
+ """Override save to add custom logic."""
+ # Custom logic before save
+ super().save(*args, **kwargs)
+ # Custom logic after save
+```
+
+## Django Admin
+
+### Admin Customization
+
+Customize the admin interface in `impresso/admin/`:
+
+```python
+from django.contrib import admin
+from impresso.models import MyModel
+
+@admin.register(MyModel)
+class MyModelAdmin(admin.ModelAdmin):
+ """Admin interface for MyModel."""
+
+ list_display = ('name', 'creator', 'date_created', 'is_active')
+ list_filter = ('is_active', 'date_created')
+ search_fields = ('name', 'creator__username')
+ readonly_fields = ('date_created',)
+ date_hierarchy = 'date_created'
+
+ fieldsets = (
+ ('Basic Information', {
+ 'fields': ('name', 'creator', 'is_active')
+ }),
+ ('Metadata', {
+ 'fields': ('date_created',),
+ 'classes': ('collapse',)
+ }),
+ )
+
+ def get_queryset(self, request):
+ """Optimize queryset with select_related."""
+ qs = super().get_queryset(request)
+ return qs.select_related('creator')
+```
+
+### Admin Actions
+
+```python
+@admin.register(MyModel)
+class MyModelAdmin(admin.ModelAdmin):
+ actions = ['activate_items', 'deactivate_items']
+
+ def activate_items(self, request, queryset):
+ """Activate selected items."""
+ count = queryset.update(is_active=True)
+ self.message_user(request, f"{count} items activated.")
+ activate_items.short_description = "Activate selected items"
+
+ def deactivate_items(self, request, queryset):
+ """Deactivate selected items."""
+ count = queryset.update(is_active=False)
+ self.message_user(request, f"{count} items deactivated.")
+ deactivate_items.short_description = "Deactivate selected items"
+```
+
+## Management Commands
+
+### Creating Management Commands
+
+Create custom commands in `impresso/management/commands/`:
+
+```python
+from django.core.management.base import BaseCommand, CommandError
+from django.contrib.auth.models import User
+from impresso.models import MyModel
+import logging
+
+logger = logging.getLogger(__name__)
+
+
+class Command(BaseCommand):
+ """
+ Management command description.
+
+ Usage:
+ ENV=dev pipenv run ./manage.py mycommand [options]
+ """
+ help = 'Command description'
+
+ def add_arguments(self, parser):
+ """Add command-line arguments."""
+ parser.add_argument(
+ 'user_id',
+ type=int,
+ help='User ID to process'
+ )
+ parser.add_argument(
+ '--dry-run',
+ action='store_true',
+ help='Run without making changes'
+ )
+ parser.add_argument(
+ '--verbose',
+ action='store_true',
+ help='Verbose output'
+ )
+
+ def handle(self, *args, **options):
+ """Execute command logic."""
+ user_id = options['user_id']
+ dry_run = options['dry_run']
+ verbose = options['verbose']
+
+ # Set logging level
+ if verbose:
+ logger.setLevel(logging.DEBUG)
+
+ try:
+ user = User.objects.get(pk=user_id)
+ logger.info(f"Processing user: {user.username}")
+
+ if dry_run:
+ self.stdout.write(
+ self.style.WARNING('DRY RUN - no changes made')
+ )
+ else:
+ # Do actual work
+ result = self.process_user(user)
+
+ self.stdout.write(
+ self.style.SUCCESS(f'Successfully processed: {result}')
+ )
+
+ except User.DoesNotExist:
+ raise CommandError(f'User with ID {user_id} does not exist')
+
+ except Exception as e:
+ logger.exception(f"Error processing user {user_id}")
+ raise CommandError(f'Error: {e}')
+
+ def process_user(self, user):
+ """Process user logic."""
+ # Implementation
+ return "result"
+```
+
+### Existing Commands
+
+Key management commands in the project:
+
+- `createaccount` - Create user accounts with random passwords
+- `createsuperuser` - Create admin user (built-in Django command)
+- `synccollection` - Sync a collection to Solr index
+- `exportqueryascsv` - Export Solr query results as CSV
+- `createcollection` - Create or get a collection
+- `addtocollectionfromquery` - Add query results to collection
+- `addtocollectionfromtrpassagesquery` - Add TR passages to collection
+- `stopjob` - Stop a running job
+
+## Settings Management
+
+### Environment-Based Settings
+
+Settings are loaded via dotenv files:
+
+```python
+# impresso/base.py
+import os
+from dotenv import load_dotenv
+
+# Load environment-specific .env file
+env = os.environ.get('ENV', 'dev')
+env_file = f'.{env}.env' if env != 'dev' else '.env'
+load_dotenv(env_file)
+
+# Access settings
+SECRET_KEY = os.environ.get('SECRET_KEY')
+DEBUG = os.environ.get('DEBUG', 'False') == 'True'
+```
+
+### Settings Structure
+
+- `impresso/base.py` - Base settings and dotenv loading
+- `impresso/settings.py` - Main settings file
+- `.example.env` - Template for environment variables
+- `.dev.env` - Development settings
+- `.prod.env` - Production settings
+
+### Key Settings
+
+```python
+# Database
+DATABASES = {
+ 'default': {
+ 'ENGINE': 'django.db.backends.mysql',
+ 'HOST': os.environ.get('IMPRESSO_DB_HOST'),
+ 'PORT': os.environ.get('IMPRESSO_DB_PORT'),
+ 'NAME': os.environ.get('IMPRESSO_DB_NAME'),
+ 'USER': os.environ.get('IMPRESSO_DB_USER'),
+ 'PASSWORD': os.environ.get('IMPRESSO_DB_PASSWORD'),
+ }
+}
+
+# Celery
+CELERY_BROKER_URL = os.environ.get('REDIS_HOST', 'redis://localhost:6379')
+
+# Email
+EMAIL_BACKEND = os.environ.get('EMAIL_BACKEND')
+DEFAULT_FROM_EMAIL = os.environ.get('DEFAULT_FROM_EMAIL')
+
+# Solr
+IMPRESSO_SOLR_URL = os.environ.get('IMPRESSO_SOLR_URL')
+IMPRESSO_SOLR_USER = os.environ.get('IMPRESSO_SOLR_USER')
+IMPRESSO_SOLR_PASSWORD = os.environ.get('IMPRESSO_SOLR_PASSWORD')
+
+# Custom settings
+IMPRESSO_BASE_URL = os.environ.get('IMPRESSO_BASE_URL')
+IMPRESSO_SOLR_EXEC_LIMIT = 100
+IMPRESSO_SOLR_EXEC_MAX_LOOPS = 100
+```
+
+## Django Signals
+
+### Signal Definitions
+
+Signals are defined in `impresso/signals.py`:
+
+```python
+from django.db.models.signals import post_save, pre_save
+from django.dispatch import receiver
+from django.contrib.auth.models import User
+from impresso.models import Profile, UserBitmap
+
+@receiver(post_save, sender=User)
+def create_user_profile(sender, instance, created, **kwargs):
+ """
+ Create Profile and UserBitmap when User is created.
+ """
+ if created:
+ Profile.objects.get_or_create(
+ user=instance,
+ defaults={'uid': f"user-{instance.username}"}
+ )
+ UserBitmap.objects.get_or_create(user=instance)
+
+@receiver(pre_save, sender=UserBitmap)
+def update_user_bitmap(sender, instance, **kwargs):
+ """
+ Update bitmap before saving based on user groups.
+ """
+ # Calculate bitmap value from user groups
+ instance.calculate_bitmap()
+```
+
+### Signal Registration
+
+Signals must be imported in `impresso/__init__.py`:
+
+```python
+default_app_config = 'impresso.apps.ImpressoConfig'
+```
+
+And in `impresso/apps.py`:
+
+```python
+from django.apps import AppConfig
+
+class ImpressoConfig(AppConfig):
+ name = 'impresso'
+
+ def ready(self):
+ """Import signals when app is ready."""
+ import impresso.signals
+```
+
+## User Authentication & Authorization
+
+### User Groups
+
+The project uses Django groups for user plans:
+
+- `settings.IMPRESSO_GROUP_USER_PLAN_BASIC` - Basic user plan
+- `settings.IMPRESSO_GROUP_USER_PLAN_RESEARCHER` - Researcher plan
+- `settings.IMPRESSO_GROUP_USER_PLAN_EDUCATIONAL` - Educational plan
+- `settings.IMPRESSO_GROUP_USER_PLAN_NO_REDACTION` - Special privilege
+
+### Checking User Permissions
+
+```python
+from django.conf import settings
+
+def check_user_plan(user):
+ """Check user's plan."""
+ if user.groups.filter(name=settings.IMPRESSO_GROUP_USER_PLAN_RESEARCHER).exists():
+ return 'researcher'
+ elif user.groups.filter(name=settings.IMPRESSO_GROUP_USER_PLAN_EDUCATIONAL).exists():
+ return 'educational'
+ else:
+ return 'basic'
+
+def user_has_no_redaction(user):
+ """Check if user has no-redaction privilege."""
+ return user.groups.filter(
+ name=settings.IMPRESSO_GROUP_USER_PLAN_NO_REDACTION
+ ).exists()
+```
+
+### User Profile Access
+
+```python
+def get_user_limits(user):
+ """Get user's execution limits."""
+ profile = user.profile
+ return {
+ 'max_loops': min(
+ profile.max_loops_allowed,
+ settings.IMPRESSO_SOLR_EXEC_MAX_LOOPS
+ ),
+ 'uid': profile.uid,
+ }
+```
+
+## Database Migrations
+
+### Creating Migrations
+
+```bash
+# Create migrations for changes
+ENV=dev pipenv run ./manage.py makemigrations
+
+# Create named migration
+ENV=dev pipenv run ./manage.py makemigrations --name add_field_to_model
+
+# Show SQL for migrations
+ENV=dev pipenv run ./manage.py sqlmigrate impresso 0001
+
+# Apply migrations
+ENV=dev pipenv run ./manage.py migrate
+
+# Show migration status
+ENV=dev pipenv run ./manage.py showmigrations
+```
+
+### Migration Best Practices
+
+- Keep migrations small and focused
+- Test migrations on copy of production data
+- Never modify applied migrations
+- Use `RunPython` for data migrations
+- Add `reverse_code` for rollback support
+
+### Data Migration Example
+
+```python
+from django.db import migrations
+
+def forwards_func(apps, schema_editor):
+ """Apply data migration."""
+ MyModel = apps.get_model('impresso', 'MyModel')
+ db_alias = schema_editor.connection.alias
+
+ # Update data
+ MyModel.objects.using(db_alias).filter(
+ old_field=True
+ ).update(new_field='value')
+
+def reverse_func(apps, schema_editor):
+ """Reverse data migration."""
+ MyModel = apps.get_model('impresso', 'MyModel')
+ db_alias = schema_editor.connection.alias
+
+ # Reverse changes
+ MyModel.objects.using(db_alias).filter(
+ new_field='value'
+ ).update(old_field=True)
+
+class Migration(migrations.Migration):
+ dependencies = [
+ ('impresso', '0001_initial'),
+ ]
+
+ operations = [
+ migrations.RunPython(forwards_func, reverse_func),
+ ]
+```
+
+## URL Configuration
+
+URLs are defined in `impresso/urls.py`:
+
+```python
+from django.urls import path, include
+from django.contrib import admin
+from impresso import views
+
+urlpatterns = [
+ path('admin/', admin.site.urls),
+ path('api/', include('impresso.api.urls')),
+ path('accounts/', include('django_registration.backends.activation.urls')),
+]
+```
+
+## Templates
+
+### Template Organization
+
+Templates are in `impresso/templates/`:
+
+```
+templates/
+├── base.html # Base template
+├── emails/ # Email templates
+│ ├── notification.txt # Plain text version
+│ └── notification.html # HTML version
+└── admin/ # Admin overrides
+```
+
+### Email Templates
+
+Email templates should have both .txt and .html versions:
+
+```html
+
+
+
+
+
+
+
+ Dear {{ user.first_name }},
+ {{ message }}
+ Best regards,
The Impresso Team
+
+
+```
+
+```text
+# emails/notification.txt
+Dear {{ user.first_name }},
+
+{{ message }}
+
+Best regards,
+The Impresso Team
+```
+
+## Middleware
+
+Custom middleware can be added to `impresso/middleware.py`:
+
+```python
+class CustomMiddleware:
+ """Custom middleware description."""
+
+ def __init__(self, get_response):
+ self.get_response = get_response
+
+ def __call__(self, request):
+ # Code executed before view
+
+ response = self.get_response(request)
+
+ # Code executed after view
+
+ return response
+```
+
+Register in settings:
+
+```python
+MIDDLEWARE = [
+ # Django defaults
+ 'django.middleware.security.SecurityMiddleware',
+ 'django.contrib.sessions.middleware.SessionMiddleware',
+ # ...
+ 'impresso.middleware.CustomMiddleware', # Add custom middleware
+]
+```
+
+## Database Optimization
+
+### Query Optimization
+
+```python
+# Use select_related for ForeignKey
+users = User.objects.select_related('profile').all()
+
+# Use prefetch_related for ManyToMany
+users = User.objects.prefetch_related('groups').all()
+
+# Use only() to fetch specific fields
+users = User.objects.only('id', 'username', 'email').all()
+
+# Use defer() to exclude fields
+users = User.objects.defer('password', 'last_login').all()
+
+# Use exists() instead of count() for existence check
+if User.objects.filter(email=email).exists():
+ # ...
+
+# Use values() for dictionary results
+user_data = User.objects.values('id', 'username', 'email')
+```
+
+### Database Transactions
+
+```python
+from django.db import transaction
+
+# Atomic decorator
+@transaction.atomic
+def create_user_with_profile(username, email):
+ """Create user and profile atomically."""
+ user = User.objects.create_user(username=username, email=email)
+ Profile.objects.create(user=user, uid=f"user-{username}")
+ return user
+
+# Context manager
+def update_user_plan(user, plan):
+ """Update user plan atomically."""
+ with transaction.atomic():
+ user.groups.clear()
+ user.groups.add(plan)
+ user.profile.plan_updated = timezone.now()
+ user.profile.save()
+```
+
+## Logging
+
+Configure logging in settings:
+
+```python
+LOGGING = {
+ 'version': 1,
+ 'disable_existing_loggers': False,
+ 'formatters': {
+ 'verbose': {
+ 'format': '{levelname} {asctime} {module} {message}',
+ 'style': '{',
+ },
+ },
+ 'handlers': {
+ 'console': {
+ 'class': 'logging.StreamHandler',
+ 'formatter': 'verbose',
+ },
+ },
+ 'loggers': {
+ 'impresso': {
+ 'handlers': ['console'],
+ 'level': 'INFO',
+ },
+ },
+}
+```
+
+## Security Best Practices
+
+- Use Django's built-in security features
+- Never store plaintext passwords
+- Use CSRF protection for forms
+- Validate and sanitize all user inputs
+- Use Django's ORM to prevent SQL injection
+- Keep SECRET_KEY secret and unique
+- Use HTTPS in production
+- Regularly update dependencies
+
+## References
+
+- Django Documentation: https://docs.djangoproject.com/
+- Django Admin: https://docs.djangoproject.com/en/stable/ref/contrib/admin/
+- Django Management Commands: https://docs.djangoproject.com/en/stable/howto/custom-management-commands/
+- Django Migrations: https://docs.djangoproject.com/en/stable/topics/migrations/
+- Django Signals: https://docs.djangoproject.com/en/stable/topics/signals/
diff --git a/.github/agents/testing.md b/.github/agents/testing.md
new file mode 100644
index 0000000..e0f0882
--- /dev/null
+++ b/.github/agents/testing.md
@@ -0,0 +1,590 @@
+# Agent: Testing
+
+This agent specializes in writing and maintaining tests for the impresso-user-admin Django application.
+
+## Expertise
+
+- Writing Django unit tests and integration tests
+- Testing Celery tasks and async operations
+- Mocking external services (Solr, SMTP)
+- Testing email functionality
+- Database transaction testing
+- User permission and access control testing
+
+## Test Framework
+
+The project uses Django's built-in testing framework based on unittest.
+
+### Test Types
+
+1. **TestCase** - Standard test case with database rollback
+ - Use for most tests
+ - Database changes are rolled back after each test
+ - Faster than TransactionTestCase
+
+2. **TransactionTestCase** - Test case with transaction support
+ - Use when testing transaction behavior
+ - Use when testing signals that depend on commits
+ - Database is flushed between tests (slower)
+
+## Test Organization
+
+### Directory Structure
+
+```
+impresso/tests/
+├── __init__.py
+├── test_runner.py # Custom test runner
+├── test_solr.py # Solr integration tests
+├── models/ # Model tests
+├── tasks/ # Task tests
+│ ├── __init__.py
+│ └── test_*.py
+└── utils/
+ └── tasks/ # Task utility tests
+ ├── __init__.py
+ ├── test_account.py
+ ├── test_userBitmap.py
+ └── email.py
+```
+
+### Test File Naming
+
+- Prefix test files with `test_`: `test_account.py`
+- Mirror the structure of the code being tested
+- Group related tests in the same file
+
+### Test Class Naming
+
+```python
+class TestFeatureName(TestCase):
+ """
+ Test feature description.
+
+ Run with:
+ ENV=dev pipenv run ./manage.py test impresso.tests.path.TestFeatureName
+ """
+```
+
+## Running Tests
+
+```bash
+# Run all tests
+ENV=dev pipenv run ./manage.py test
+
+# Run specific app tests
+ENV=dev pipenv run ./manage.py test impresso
+
+# Run specific test file
+ENV=dev pipenv run ./manage.py test impresso.tests.utils.tasks.test_account
+
+# Run specific test class
+ENV=dev pipenv run ./manage.py test impresso.tests.utils.tasks.test_account.TestAccountPlanChange
+
+# Run specific test method
+ENV=dev pipenv run ./manage.py test impresso.tests.utils.tasks.test_account.TestAccountPlanChange.test_send_email_plan_change
+
+# With console email backend (to see email output)
+EMAIL_BACKEND=django.core.mail.backends.console.EmailBackend ENV=dev pipenv run ./manage.py test
+
+# With verbose output
+ENV=dev pipenv run ./manage.py test --verbosity=2
+```
+
+## Test Structure
+
+### Basic Test Template
+
+```python
+import logging
+from django.test import TestCase, TransactionTestCase
+from django.contrib.auth.models import User, Group
+from django.core import mail
+from impresso.models import ModelName
+from impresso.utils.tasks.module import function_to_test
+
+logger = logging.getLogger("console")
+
+
+class TestFeature(TestCase):
+ """
+ Test feature functionality.
+
+ ENV=dev pipenv run ./manage.py test impresso.tests.module.TestFeature
+ """
+
+ def setUp(self):
+ """Set up test fixtures before each test method."""
+ # Create test user
+ self.user = User.objects.create_user(
+ username="testuser",
+ first_name="Jane",
+ last_name="Doe",
+ password="testpass123",
+ email="test@example.com",
+ )
+
+ # Create default groups (required for many tests)
+ from impresso.signals import create_default_groups
+ create_default_groups(sender="impresso")
+
+ # Clear mail outbox
+ mail.outbox = []
+
+ def tearDown(self):
+ """Clean up after each test method."""
+ pass
+
+ def test_feature_success(self):
+ """Test successful feature execution."""
+ # Arrange
+ expected_result = "expected"
+
+ # Act
+ result = function_to_test(self.user.id)
+
+ # Assert
+ self.assertEqual(result, expected_result)
+ self.assertEqual(len(mail.outbox), 1)
+ self.assertEqual(mail.outbox[0].subject, "Expected Subject")
+```
+
+### Testing with Transactions
+
+```python
+class TestFeatureWithTransaction(TransactionTestCase):
+ """
+ Test feature requiring transaction support.
+
+ ENV=dev pipenv run ./manage.py test impresso.tests.module.TestFeatureWithTransaction
+ """
+
+ def setUp(self):
+ """Set up test fixtures."""
+ self.user = User.objects.create_user(
+ username="testuser",
+ email="test@example.com",
+ password="testpass123"
+ )
+ from impresso.signals import create_default_groups
+ create_default_groups(sender="impresso")
+
+ def test_with_commit(self):
+ """Test behavior after transaction commit."""
+ # Your test code
+ pass
+```
+
+## Testing Email Functionality
+
+### Email Testing Pattern
+
+```python
+from django.core import mail
+from django.conf import settings
+
+def test_send_email(self):
+ """Test email sending functionality."""
+ # Clear outbox before test
+ mail.outbox = []
+
+ # Call function that sends email
+ send_email_function(user_id=self.user.id)
+
+ # Check email was sent
+ self.assertEqual(len(mail.outbox), 1)
+
+ # Check email properties
+ email = mail.outbox[0]
+ self.assertEqual(email.subject, "Expected Subject")
+ self.assertEqual(email.to, [self.user.email])
+ self.assertEqual(email.from_email, f"Impresso Team <{settings.DEFAULT_FROM_EMAIL}>")
+
+ # Check email content
+ self.assertIn("Dear Jane,", email.body)
+ self.assertIn("expected text", email.body)
+
+ # Check HTML alternative exists
+ self.assertEqual(len(email.alternatives), 1)
+ html_content, content_type = email.alternatives[0]
+ self.assertEqual(content_type, "text/html")
+ self.assertIn("", html_content)
+
+def test_multiple_emails(self):
+ """Test when multiple emails are sent."""
+ mail.outbox = []
+
+ # Function sends email to user and staff
+ send_emails_after_user_registration(self.user.id)
+
+ # Check both emails sent
+ self.assertEqual(len(mail.outbox), 2, "Should send email to user and staff")
+
+ # Check first email (to user)
+ self.assertEqual(mail.outbox[0].to, [self.user.email])
+
+ # Check second email (to staff)
+ self.assertEqual(mail.outbox[1].to, [settings.DEFAULT_FROM_EMAIL])
+```
+
+## Testing User Groups and Permissions
+
+### Group Setup
+
+```python
+def setUp(self):
+ """Set up user with specific plan."""
+ self.user = User.objects.create_user(
+ username="testuser",
+ email="test@example.com",
+ password="testpass123"
+ )
+
+ # Create default groups
+ from impresso.signals import create_default_groups
+ create_default_groups(sender="impresso")
+
+ # Add user to specific plan
+ group = Group.objects.get(name=settings.IMPRESSO_GROUP_USER_PLAN_RESEARCHER)
+ self.user.groups.add(group)
+ self.user.is_active = True
+ self.user.save()
+
+def test_user_permissions(self):
+ """Test user has correct permissions."""
+ # Check user is in group
+ group_names = list(self.user.groups.values_list("name", flat=True))
+ self.assertIn(settings.IMPRESSO_GROUP_USER_PLAN_RESEARCHER, group_names)
+
+ # Check user bitmap
+ from impresso.models import UserBitmap
+ user_bitmap = UserBitmap.objects.get(user=self.user)
+ self.assertEqual(
+ user_bitmap.get_bitmap_as_int(),
+ UserBitmap.USER_PLAN_RESEARCHER
+ )
+```
+
+## Testing Celery Tasks
+
+### Testing Task Execution
+
+```python
+from impresso.tasks.my_tasks import my_task
+from impresso.models import Job
+
+class TestCeleryTask(TransactionTestCase):
+ """Test Celery task functionality."""
+
+ def setUp(self):
+ self.user = User.objects.create_user(
+ username="testuser",
+ email="test@example.com"
+ )
+ from impresso.signals import create_default_groups
+ create_default_groups(sender="impresso")
+
+ def test_task_execution(self):
+ """Test task executes successfully."""
+ # Create job for tracking
+ job = Job.objects.create(
+ creator=self.user,
+ type=Job.EXP,
+ status=Job.RUN,
+ )
+
+ # Execute task (runs synchronously in tests)
+ result = my_task.apply(args=[job.id])
+
+ # Check result
+ self.assertTrue(result.successful())
+
+ # Refresh job from database
+ job.refresh_from_db()
+ self.assertEqual(job.status, Job.DONE)
+```
+
+### Testing Task Helpers
+
+```python
+from impresso.utils.tasks import get_pagination
+from impresso.models import Job, Profile
+
+def test_pagination(self):
+ """Test pagination calculation."""
+ # Create user with profile
+ profile = Profile.objects.create(
+ user=self.user,
+ uid="test-user",
+ max_loops_allowed=50
+ )
+
+ # Create job
+ job = Job.objects.create(
+ creator=self.user,
+ type=Job.EXP,
+ )
+
+ # Test pagination
+ page, loops, progress, max_loops = get_pagination(
+ skip=0,
+ limit=100,
+ total=1000,
+ job=job
+ )
+
+ self.assertEqual(page, 1)
+ self.assertEqual(loops, 10)
+ self.assertEqual(progress, 0.1)
+```
+
+## Testing Exceptions
+
+### Exception Testing Pattern
+
+```python
+def test_exception_raised(self):
+ """Test function raises appropriate exception."""
+ with self.assertRaises(ValueError, msg="Should raise ValueError"):
+ function_that_should_fail(invalid_param="bad")
+
+def test_user_not_found(self):
+ """Test handling of non-existent user."""
+ with self.assertRaises(User.DoesNotExist):
+ function_requiring_user(user_id=99999)
+
+def test_validation_error(self):
+ """Test validation error handling."""
+ from django.core.exceptions import ValidationError
+
+ with self.assertRaises(ValidationError):
+ function_with_validation(invalid_data)
+```
+
+## Mocking External Services
+
+### Mocking Solr
+
+```python
+from unittest.mock import patch, MagicMock
+
+@patch('impresso.solr.find_all')
+def test_with_mocked_solr(self, mock_find_all):
+ """Test function with mocked Solr response."""
+ # Setup mock response
+ mock_find_all.return_value = {
+ "response": {
+ "numFound": 10,
+ "docs": [
+ {"id": "doc-1", "title": "Test Document"},
+ {"id": "doc-2", "title": "Another Document"},
+ ]
+ },
+ "responseHeader": {"QTime": 5}
+ }
+
+ # Call function that uses Solr
+ result = function_using_solr(query="test")
+
+ # Verify mock was called correctly
+ mock_find_all.assert_called_once_with(
+ q="test",
+ fl="id,title",
+ skip=0,
+ logger=mock.ANY
+ )
+
+ # Check result
+ self.assertEqual(len(result), 2)
+```
+
+### Mocking SMTP
+
+```python
+from unittest.mock import patch
+import smtplib
+
+@patch('smtplib.SMTP')
+def test_email_smtp_error(self, mock_smtp):
+ """Test handling of SMTP errors."""
+ # Setup mock to raise exception
+ mock_smtp.side_effect = smtplib.SMTPException("Connection failed")
+
+ # Call function that sends email
+ with self.assertRaises(smtplib.SMTPException):
+ send_email_function(user_id=self.user.id)
+```
+
+## Testing Database Models
+
+```python
+from impresso.models import Collection, CollectableItem
+
+def test_model_creation(self):
+ """Test model instance creation."""
+ collection = Collection.objects.create(
+ name="Test Collection",
+ creator=self.user,
+ description="Test description"
+ )
+
+ self.assertEqual(collection.name, "Test Collection")
+ self.assertEqual(collection.creator, self.user)
+ self.assertIsNotNone(collection.date_created)
+
+def test_model_relationships(self):
+ """Test model relationships."""
+ collection = Collection.objects.create(
+ name="Test Collection",
+ creator=self.user
+ )
+
+ item = CollectableItem.objects.create(
+ collection=collection,
+ content_id="test-doc-1"
+ )
+
+ # Test relationship
+ self.assertEqual(item.collection, collection)
+ self.assertEqual(collection.collectableitem_set.count(), 1)
+```
+
+## Common Assertions
+
+```python
+# Equality
+self.assertEqual(actual, expected)
+self.assertNotEqual(actual, unexpected)
+
+# Truth
+self.assertTrue(condition)
+self.assertFalse(condition)
+
+# Existence
+self.assertIsNone(value)
+self.assertIsNotNone(value)
+
+# Collections
+self.assertIn(item, collection)
+self.assertNotIn(item, collection)
+self.assertEqual(len(collection), expected_length)
+
+# Strings
+self.assertIn("substring", text)
+self.assertTrue(text.startswith("prefix"))
+
+# Exceptions
+with self.assertRaises(ExceptionType):
+ function_that_raises()
+
+# Database queries
+self.assertEqual(Model.objects.count(), expected_count)
+self.assertTrue(Model.objects.filter(field=value).exists())
+```
+
+## Test Data Best Practices
+
+### Creating Test Users
+
+```python
+def setUp(self):
+ """Create test users with different roles."""
+ # Basic user
+ self.basic_user = User.objects.create_user(
+ username="basic",
+ email="basic@example.com",
+ password="testpass123"
+ )
+
+ # Staff user
+ self.staff_user = User.objects.create_user(
+ username="staff",
+ email="staff@example.com",
+ password="testpass123",
+ is_staff=True
+ )
+
+ # Superuser
+ self.admin_user = User.objects.create_superuser(
+ username="admin",
+ email="admin@example.com",
+ password="testpass123"
+ )
+```
+
+### Creating Test Data
+
+```python
+def setUp(self):
+ """Create test data."""
+ # Create groups
+ from impresso.signals import create_default_groups
+ create_default_groups(sender="impresso")
+
+ # Create profile
+ from impresso.models import Profile
+ self.profile = Profile.objects.create(
+ user=self.user,
+ uid=f"test-{self.user.username}",
+ max_loops_allowed=100
+ )
+
+ # Create user bitmap
+ from impresso.models import UserBitmap
+ self.user_bitmap = UserBitmap.objects.create(
+ user=self.user
+ )
+```
+
+## Debugging Tests
+
+### Print Debug Information
+
+```python
+def test_with_debug_output(self):
+ """Test with debug output."""
+ result = function_to_test()
+
+ # Print to console for debugging
+ print(f"Result: {result}")
+ print(f"Mail outbox: {mail.outbox}")
+ if mail.outbox:
+ print(f"Email body: {mail.outbox[0].body}")
+
+ # Your assertions
+ self.assertEqual(result, expected)
+```
+
+### Using Django Debug Toolbar
+
+The test runner can be configured to show SQL queries:
+
+```python
+# In test method
+from django.test.utils import override_settings
+from django.db import connection
+
+@override_settings(DEBUG=True)
+def test_with_query_debugging(self):
+ """Test with SQL query debugging."""
+ with self.assertNumQueries(expected_query_count):
+ function_to_test()
+
+ # Print queries
+ for query in connection.queries:
+ print(query['sql'])
+```
+
+## Test Coverage
+
+While not currently enforced, aim for:
+- 80%+ code coverage for critical paths
+- 100% coverage for security-sensitive code
+- Test both success and failure scenarios
+- Test edge cases and boundary conditions
+
+## References
+
+- Django Testing Documentation: https://docs.djangoproject.com/en/stable/topics/testing/
+- unittest Documentation: https://docs.python.org/3/library/unittest.html
+- Django Mail Testing: https://docs.djangoproject.com/en/stable/topics/testing/tools/#email-services
diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md
new file mode 100644
index 0000000..fa1eff8
--- /dev/null
+++ b/.github/copilot-instructions.md
@@ -0,0 +1,271 @@
+# GitHub Copilot Instructions for impresso-user-admin
+
+## Repository Overview
+
+This is a Django application that manages user-related information for the Impresso project's Master DB. The application uses **Celery** as the background task processing system for handling asynchronous operations like email sending, data exports, and collection management.
+
+## Technology Stack
+
+- **Framework**: Django (Python 3.12+)
+- **Task Queue**: Celery with Redis as the broker
+- **Database**: MySQL (managed via pymysql)
+- **Search**: Apache Solr
+- **Dependency Management**: pipenv
+- **Type Checking**: mypy
+- **Containerization**: Docker & docker-compose
+
+## Project Structure
+
+```
+impresso-user-admin/
+├── impresso/
+│ ├── celery.py # Celery application configuration
+│ ├── settings.py # Django settings
+│ ├── models/ # Django models
+│ ├── tasks/ # Celery task definitions
+│ ├── utils/
+│ │ └── tasks/ # Task helper functions and utilities
+│ ├── tests/ # Test suite
+│ └── solr/ # Solr integration utilities
+├── .github/
+│ ├── agents/ # Agent-specific instructions
+│ └── copilot-instructions.md
+└── manage.py
+```
+
+## Celery Task Organization
+
+### Task Modules
+
+The application organizes Celery tasks into two main directories:
+
+1. **`impresso/tasks/`** - Contains Celery task decorators and task definitions
+ - `userChangePlanRequest_task.py` - Plan change request tasks
+ - `userSpecialMembershipRequest_tasks.py` - Special membership tasks
+
+2. **`impresso/utils/tasks/`** - Contains helper functions used by tasks
+ - `__init__.py` - Common utilities (pagination, job progress tracking)
+ - `account.py` - User account and email operations
+ - `collection.py` - Collection management in Solr
+ - `export.py` - Data export to CSV/ZIP
+ - `textreuse.py` - Text reuse passage operations
+ - `userBitmap.py` - User permission bitmap updates
+ - `email.py` - Email rendering and sending utilities
+ - `userSpecialMembershipRequest.py` - Special membership operations
+
+### Task Helper Functions
+
+Common task utilities are provided in `impresso/utils/tasks/__init__.py`:
+
+- `get_pagination()` - Calculate pagination for Solr queries with user limits
+- `update_job_progress()` - Update job status and progress in DB and Redis
+- `update_job_completed()` - Mark a job as completed
+- `is_task_stopped()` - Check if user has stopped a job
+
+Task states:
+- `TASKSTATE_INIT` - Task initialization
+- `TASKSTATE_PROGRESS` - Task in progress
+- `TASKSTATE_SUCCESS` - Task completed successfully
+- `TASKSTATE_STOPPED` - Task stopped by user
+
+## Coding Conventions
+
+### General Python
+
+- Use Python 3.12+ type hints for all function signatures
+- Follow PEP 8 style guidelines
+- Use descriptive variable names
+- Include docstrings for all public functions and classes
+- Use f-strings for string formatting
+
+### Django Specific
+
+- Use Django ORM for all database operations
+- Follow Django naming conventions for models, views, and managers
+- Use Django's transaction management for atomic operations
+- Settings should be accessed via `django.conf.settings`
+
+### Celery Tasks
+
+- Define tasks in `impresso/tasks/` directory
+- Place helper functions in `impresso/utils/tasks/` directory
+- Use `@shared_task` or `@app.task` decorators with appropriate configuration
+- Always bind tasks when using `self` (e.g., for updating state)
+- Include retry logic with exponential backoff for resilient tasks
+- Use structured logging with task context (job_id, user_id)
+
+Example task pattern:
+```python
+from celery import shared_task
+from celery.utils.log import get_task_logger
+
+logger = get_task_logger(__name__)
+
+@shared_task(
+ bind=True,
+ autoretry_for=(Exception,),
+ exponential_backoff=2,
+ retry_kwargs={"max_retries": 5},
+ retry_jitter=True,
+)
+def my_task(self, user_id: int) -> None:
+ logger.info(f"[user:{user_id}] Starting task...")
+ # Task implementation
+```
+
+### Logging
+
+- Use structured logging with context: `logger.info(f"[job:{job.pk} user:{user.pk}] message")`
+- Include relevant IDs in log messages (job, user, collection, etc.)
+- Use appropriate log levels: DEBUG, INFO, WARNING, ERROR, EXCEPTION
+- Get logger via `get_task_logger(__name__)` in task files
+- Use default_logger pattern: `default_logger = logging.getLogger(__name__)` in utility files
+
+### Error Handling
+
+- Catch specific exceptions rather than generic Exception
+- Log exceptions with appropriate context
+- Use exponential backoff for retries
+- Handle database IntegrityErrors appropriately
+- Validate user input before processing
+
+### Email Operations
+
+- Use `send_templated_email_with_context()` from `impresso/utils/tasks/email.py`
+- Email templates are in `impresso/templates/emails/` (both .txt and .html)
+- Always include both text and HTML versions
+- Handle SMTP exceptions gracefully
+- Log email sending status
+
+### Solr Integration
+
+- Use helper functions from `impresso/solr/` module
+- Respect `settings.IMPRESSO_SOLR_EXEC_LIMIT` for query limits
+- Respect `settings.IMPRESSO_SOLR_EXEC_MAX_LOOPS` for maximum iterations
+- Consider user's `max_loops_allowed` profile setting
+- Use `find_all()` for queries and `update()` for updates
+- Handle both main index and passages index (`IMPRESSO_SOLR_PASSAGES_URL_*`)
+
+### Job Management
+
+- Jobs track long-running asynchronous tasks
+- Update job progress using `update_job_progress()`
+- Check for user-initiated stops with `is_task_stopped()`
+- Store task metadata in job.extra field as JSON
+- Include pagination info in job updates
+
+## Testing
+
+### Running Tests
+
+```bash
+# Run all tests
+ENV=dev pipenv run ./manage.py test
+
+# Run specific test module
+ENV=dev pipenv run ./manage.py test impresso.tests.utils.tasks.test_account
+
+# Run with email backend visible
+EMAIL_BACKEND=django.core.mail.backends.console.EmailBackend ENV=dev pipenv run ./manage.py test
+```
+
+### Test Organization
+
+- Tests are in `impresso/tests/` directory
+- Mirror the structure of the main codebase
+- Use `TestCase` for standard tests
+- Use `TransactionTestCase` for tests requiring DB transactions
+- Clear `mail.outbox` between test cases
+- Create default groups in setUp using `create_default_groups()`
+
+### Test Conventions
+
+- Name test methods descriptively: `test_send_email_plan_change`
+- Use assertions that provide clear failure messages
+- Test both success and error cases
+- Mock external services (SMTP, Solr) when appropriate
+- Test with different user plans and permissions
+
+## Development Workflow
+
+### Setting Up Environment
+
+```bash
+# Install dependencies
+pipenv install
+
+# Start Redis and MySQL
+docker compose up -d
+
+# Run migrations
+ENV=dev pipenv run ./manage.py migrate
+
+# Create superuser
+ENV=dev pipenv run ./manage.py createsuperuser
+
+# Run development server
+ENV=dev pipenv run ./manage.py runserver
+
+# Run Celery worker (in separate terminal)
+ENV=dev pipenv run celery -A impresso worker -l info
+```
+
+### Type Checking
+
+```bash
+# Run mypy
+pipenv run mypy --config-file ./.mypy.ini impresso
+```
+
+### Common Commands
+
+```bash
+# Create accounts
+ENV=dev pipenv run ./manage.py createaccount user@example.com
+
+# Sync collection
+ENV=dev pipenv run ./manage.py synccollection
+
+# Export query as CSV
+ENV=dev pipenv run ./manage.py exportqueryascsv ""
+
+# Stop a job
+ENV=dev pipenv run ./manage.py stopjob
+```
+
+## Security Considerations
+
+- Never commit secrets to source code
+- Use environment variables for sensitive configuration
+- Validate and sanitize user inputs
+- Use Django's built-in security features
+- Respect user permissions and bitmap access controls
+- Use `mapper_doc_remove_private_collections()` to filter user content
+- Apply `mapper_doc_redact_contents()` for content protection based on user bitmap
+
+## Configuration
+
+- Environment-specific settings via `.env` files (`.dev.env`, `.prod.env`)
+- Use `ENV` environment variable to select configuration: `ENV=dev`
+- See `.example.env` for available configuration options
+- Settings loaded via `dotenv` in `impresso/base.py`
+
+## Adding New Tasks
+
+When adding new Celery tasks:
+
+1. Create task definition in `impresso/tasks/` with proper decorators
+2. Create helper functions in `impresso/utils/tasks/` if needed
+3. Use structured logging with context
+4. Implement retry logic with exponential backoff
+5. Update job progress for long-running tasks
+6. Check for user-initiated stops in loops
+7. Handle errors gracefully
+8. Add tests in `impresso/tests/tasks/`
+9. Document the task purpose and parameters
+
+## Resources
+
+- Main repository: https://github.com/impresso/impresso-user-admin
+- Impresso project: https://impresso-project.ch
+- License: GNU Affero General Public License v3.0
From 7ef5489e5ccc85615f6419e332ca832e3cd39b51 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Thu, 15 Jan 2026 15:11:22 +0000
Subject: [PATCH 03/11] Add documentation agent instructions
- Create .github/agents/documentation.md
- Include comprehensive guidelines for writing and maintaining documentation
- Cover README structure, code docstrings, API documentation, changelog
- Add setup and deployment documentation guidelines
- Include troubleshooting and contributing guidelines
Co-authored-by: danieleguido <1181642+danieleguido@users.noreply.github.com>
---
.github/agents/documentation.md | 677 ++++++++++++++++++++++++++++++++
1 file changed, 677 insertions(+)
create mode 100644 .github/agents/documentation.md
diff --git a/.github/agents/documentation.md b/.github/agents/documentation.md
new file mode 100644
index 0000000..1acd786
--- /dev/null
+++ b/.github/agents/documentation.md
@@ -0,0 +1,677 @@
+# Agent: Documentation
+
+This agent specializes in creating and maintaining documentation for the impresso-user-admin project.
+
+## Expertise
+
+- Writing clear and comprehensive README files
+- Creating API documentation
+- Documenting code with docstrings
+- Writing setup and deployment guides
+- Creating user guides and tutorials
+- Maintaining changelog
+
+## Documentation Standards
+
+### README Structure
+
+A good README should include:
+
+1. **Project Overview** - Brief description of what the project does
+2. **Features** - Key features and capabilities
+3. **Technology Stack** - Technologies and frameworks used
+4. **Installation** - Step-by-step setup instructions
+5. **Configuration** - Environment variables and settings
+6. **Usage** - How to run and use the application
+7. **Development** - Development setup and workflow
+8. **Testing** - How to run tests
+9. **Deployment** - Production deployment instructions
+10. **Contributing** - Guidelines for contributors
+11. **License** - License information
+12. **Resources** - Links to related resources
+
+### Code Documentation
+
+#### Docstrings
+
+Follow Google-style docstrings for Python:
+
+```python
+def function_name(param1: type1, param2: type2) -> return_type:
+ """
+ Brief description of what the function does.
+
+ Longer description if needed, explaining the function's behavior,
+ edge cases, and any important implementation details.
+
+ Args:
+ param1: Description of param1
+ param2: Description of param2
+
+ Returns:
+ Description of return value
+
+ Raises:
+ ExceptionType: When this exception is raised
+
+ Example:
+ >>> result = function_name(value1, value2)
+ >>> print(result)
+ expected_output
+ """
+ # Implementation
+```
+
+#### Class Documentation
+
+```python
+class ClassName:
+ """
+ Brief description of the class.
+
+ Longer description explaining the class's purpose, relationships
+ with other classes, and usage patterns.
+
+ Attributes:
+ attribute1: Description of attribute1
+ attribute2: Description of attribute2
+
+ Example:
+ >>> obj = ClassName(param)
+ >>> obj.method()
+ expected_output
+ """
+
+ def __init__(self, param: type):
+ """
+ Initialize the class.
+
+ Args:
+ param: Description of initialization parameter
+ """
+ self.attribute1 = param
+```
+
+#### Module Documentation
+
+```python
+"""
+Module Name
+
+Brief description of what this module does.
+
+This module provides functionality for [purpose]. It includes
+classes and functions for [specific capabilities].
+
+Key Components:
+ - ClassName: Description
+ - function_name: Description
+
+Example:
+ Basic usage example:
+
+ >>> from module import ClassName
+ >>> obj = ClassName()
+ >>> result = obj.method()
+"""
+```
+
+## Django Project Documentation
+
+### Settings Documentation
+
+Document important settings in comments:
+
+```python
+# Celery Configuration
+# Redis is used as the message broker for Celery task queue
+CELERY_BROKER_URL = os.environ.get('REDIS_HOST', 'redis://localhost:6379')
+
+# Maximum number of results returned per Solr query
+# This limit prevents excessive resource usage
+IMPRESSO_SOLR_EXEC_LIMIT = 100
+
+# Maximum number of query loops allowed per job
+# This prevents infinite loops and resource exhaustion
+IMPRESSO_SOLR_EXEC_MAX_LOOPS = 100
+```
+
+### Model Documentation
+
+```python
+class Job(models.Model):
+ """
+ Tracks the execution of long-running asynchronous tasks.
+
+ Jobs are created when a user initiates a long-running operation
+ like exporting search results or creating a collection. The job
+ status is updated as the task progresses, allowing users to monitor
+ progress and cancel if needed.
+
+ Status Flow:
+ INIT -> RUN -> DONE (success)
+ INIT -> RUN -> RIP (stopped/failed)
+ """
+
+ # Status constants
+ INIT = 'init' # Job created but not started
+ RUN = 'run' # Job is running
+ DONE = 'done' # Job completed successfully
+ STOP = 'stop' # User requested stop
+ RIP = 'rip' # Job stopped or failed
+
+ STATUS_CHOICES = [
+ (INIT, 'Initialized'),
+ (RUN, 'Running'),
+ (DONE, 'Done'),
+ (STOP, 'Stop Requested'),
+ (RIP, 'Stopped'),
+ ]
+```
+
+### Management Command Documentation
+
+```python
+class Command(BaseCommand):
+ """
+ Export Solr query results to CSV file.
+
+ This command executes a Solr query and exports the results to a CSV
+ file, respecting user permissions and access controls. The export
+ is performed as an asynchronous Celery task with progress tracking.
+
+ Usage:
+ ENV=dev pipenv run ./manage.py exportqueryascsv USER_ID "QUERY"
+
+ Examples:
+ # Export French content mentioning "ministre"
+ ENV=dev pipenv run ./manage.py exportqueryascsv 1 "content_txt_fr:ministre"
+
+ # Export with specific date range
+ ENV=dev pipenv run ./manage.py exportqueryascsv 1 "content_txt_fr:* AND date_i:[1900 TO 1950]"
+
+ Output:
+ Creates a ZIP file containing the CSV export in the user's
+ upload directory.
+ """
+```
+
+## API Documentation
+
+### REST API Endpoints
+
+Document API endpoints with:
+
+- **Method** - HTTP method (GET, POST, PUT, DELETE)
+- **URL** - Endpoint URL with parameters
+- **Auth** - Authentication requirements
+- **Parameters** - Request parameters
+- **Response** - Response format and status codes
+- **Examples** - Request/response examples
+
+```markdown
+### Create Collection
+
+Create a new collection for the authenticated user.
+
+**URL**: `/api/collections/`
+
+**Method**: `POST`
+
+**Auth Required**: Yes
+
+**Permissions**: Authenticated users
+
+**Request Body**:
+```json
+{
+ "name": "My Collection",
+ "description": "Collection description"
+}
+```
+
+**Success Response**:
+- **Code**: 201 CREATED
+- **Content**:
+```json
+{
+ "id": "user-john-my-collection",
+ "name": "My Collection",
+ "description": "Collection description",
+ "date_created": "2024-01-15T10:30:00Z",
+ "creator": {
+ "id": 1,
+ "username": "john"
+ }
+}
+```
+
+**Error Responses**:
+- **Code**: 400 BAD REQUEST
+ - **Content**: `{"name": ["This field is required."]}`
+- **Code**: 401 UNAUTHORIZED
+ - **Content**: `{"detail": "Authentication credentials were not provided."}`
+
+**Example**:
+```bash
+curl -X POST https://api.example.com/api/collections/ \
+ -H "Authorization: Bearer TOKEN" \
+ -H "Content-Type: application/json" \
+ -d '{"name": "My Collection", "description": "Test collection"}'
+```
+```
+
+## Changelog
+
+Maintain a CHANGELOG.md following [Keep a Changelog](https://keepachangelog.com/) format:
+
+```markdown
+# Changelog
+
+All notable changes to this project will be documented in this file.
+
+The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
+and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
+
+## [Unreleased]
+
+### Added
+- New feature description
+
+### Changed
+- Changed feature description
+
+### Deprecated
+- Soon-to-be removed feature
+
+### Removed
+- Removed feature
+
+### Fixed
+- Bug fix description
+
+### Security
+- Security fix description
+
+## [1.0.0] - 2024-01-15
+
+### Added
+- Initial release with core features
+- User authentication and authorization
+- Celery task processing
+- Collection management
+- Export functionality
+
+[Unreleased]: https://github.com/impresso/impresso-user-admin/compare/v1.0.0...HEAD
+[1.0.0]: https://github.com/impresso/impresso-user-admin/releases/tag/v1.0.0
+```
+
+## Setup Documentation
+
+### Installation Guide
+
+```markdown
+## Installation
+
+### Prerequisites
+
+- Python 3.12+
+- pipenv
+- Docker and docker-compose
+- MySQL 8.0+
+- Redis 6.0+
+
+### Step 1: Clone Repository
+
+```bash
+git clone https://github.com/impresso/impresso-user-admin.git
+cd impresso-user-admin
+```
+
+### Step 2: Install Dependencies
+
+```bash
+# Install pyenv if not already installed
+curl https://pyenv.run | bash
+
+# Install Python version
+pyenv install 3.12.4
+
+# Install pipenv
+python -m pip install pipenv
+
+# Install project dependencies
+pipenv install
+```
+
+### Step 3: Configure Environment
+
+```bash
+# Copy example environment file
+cp .example.env .dev.env
+
+# Edit .dev.env with your settings
+nano .dev.env
+```
+
+### Step 4: Start Services
+
+```bash
+# Start Redis and MySQL
+docker compose up -d
+
+# Run migrations
+ENV=dev pipenv run ./manage.py migrate
+
+# Create superuser
+ENV=dev pipenv run ./manage.py createsuperuser
+```
+
+### Step 5: Run Application
+
+```bash
+# Terminal 1: Start Django server
+ENV=dev pipenv run ./manage.py runserver
+
+# Terminal 2: Start Celery worker
+ENV=dev pipenv run celery -A impresso worker -l info
+```
+
+### Step 6: Access Application
+
+- Admin interface: http://localhost:8000/admin/
+- Log in with your superuser credentials
+```
+
+## Configuration Documentation
+
+### Environment Variables
+
+Document all environment variables:
+
+```markdown
+## Environment Variables
+
+### Required Variables
+
+| Variable | Description | Example |
+|----------|-------------|---------|
+| `SECRET_KEY` | Django secret key (keep secret!) | `django-insecure-key123...` |
+| `DEBUG` | Enable debug mode (only in dev) | `True` |
+| `IMPRESSO_DB_HOST` | MySQL database host | `localhost` |
+| `IMPRESSO_DB_PORT` | MySQL database port | `3306` |
+| `IMPRESSO_DB_NAME` | Database name | `impresso` |
+| `IMPRESSO_DB_USER` | Database username | `impresso_user` |
+| `IMPRESSO_DB_PASSWORD` | Database password | `secure_password` |
+| `REDIS_HOST` | Redis connection URL | `redis://localhost:6379` |
+
+### Solr Configuration
+
+| Variable | Description | Example |
+|----------|-------------|---------|
+| `IMPRESSO_SOLR_URL` | Main Solr index URL | `http://localhost:8983/solr/impresso` |
+| `IMPRESSO_SOLR_USER` | Solr read-only user | `reader` |
+| `IMPRESSO_SOLR_PASSWORD` | Solr read-only password | `read_password` |
+| `IMPRESSO_SOLR_USER_WRITE` | Solr write user | `writer` |
+| `IMPRESSO_SOLR_PASSWORD_WRITE` | Solr write password | `write_password` |
+| `IMPRESSO_SOLR_PASSAGES_URL` | Text reuse passages index | `http://localhost:8983/solr/passages` |
+
+### Email Configuration
+
+| Variable | Description | Example |
+|----------|-------------|---------|
+| `EMAIL_BACKEND` | Django email backend | `django.core.mail.backends.smtp.EmailBackend` |
+| `EMAIL_HOST` | SMTP server host | `smtp.gmail.com` |
+| `EMAIL_PORT` | SMTP server port | `587` |
+| `EMAIL_USE_TLS` | Use TLS encryption | `True` |
+| `EMAIL_HOST_USER` | SMTP username | `user@example.com` |
+| `EMAIL_HOST_PASSWORD` | SMTP password | `app_password` |
+| `DEFAULT_FROM_EMAIL` | Default sender email | `noreply@impresso-project.ch` |
+
+### Optional Variables
+
+| Variable | Description | Default | Example |
+|----------|-------------|---------|---------|
+| `IMPRESSO_SOLR_EXEC_LIMIT` | Max rows per Solr query | `100` | `200` |
+| `IMPRESSO_SOLR_EXEC_MAX_LOOPS` | Max query loops | `100` | `200` |
+| `IMPRESSO_BASE_URL` | Base URL for links | - | `https://impresso-project.ch` |
+```
+
+## Troubleshooting Documentation
+
+```markdown
+## Troubleshooting
+
+### Common Issues
+
+#### Database Connection Errors
+
+**Problem**: `django.db.utils.OperationalError: (2003, "Can't connect to MySQL server")`
+
+**Solution**:
+1. Check MySQL is running: `docker ps`
+2. Verify connection settings in `.dev.env`
+3. Test connection: `mysql -h localhost -u user -p`
+
+#### Redis Connection Errors
+
+**Problem**: `redis.exceptions.ConnectionError: Error connecting to Redis`
+
+**Solution**:
+1. Check Redis is running: `docker ps`
+2. Test connection: `redis-cli ping`
+3. Verify `REDIS_HOST` in `.dev.env`
+
+#### Celery Tasks Not Processing
+
+**Problem**: Tasks are queued but not executed
+
+**Solution**:
+1. Check Celery worker is running
+2. Check Redis connection
+3. Verify task is registered: `pipenv run celery -A impresso inspect registered`
+4. Check worker logs for errors
+
+#### Import Errors
+
+**Problem**: `ModuleNotFoundError: No module named 'xyz'`
+
+**Solution**:
+1. Ensure you're in pipenv shell: `pipenv shell`
+2. Install dependencies: `pipenv install`
+3. Check Python version: `python --version`
+
+### Debug Mode
+
+Enable verbose logging:
+
+```python
+# settings.py
+LOGGING = {
+ 'version': 1,
+ 'disable_existing_loggers': False,
+ 'handlers': {
+ 'console': {
+ 'class': 'logging.StreamHandler',
+ },
+ },
+ 'root': {
+ 'handlers': ['console'],
+ 'level': 'DEBUG',
+ },
+}
+```
+```
+
+## Testing Documentation
+
+Document how to run and write tests:
+
+```markdown
+## Testing
+
+### Running Tests
+
+```bash
+# Run all tests
+ENV=dev pipenv run ./manage.py test
+
+# Run specific test module
+ENV=dev pipenv run ./manage.py test impresso.tests.utils.tasks.test_account
+
+# Run with coverage
+ENV=dev pipenv run coverage run --source='impresso' manage.py test
+ENV=dev pipenv run coverage report
+ENV=dev pipenv run coverage html
+
+# Run with verbose output
+ENV=dev pipenv run ./manage.py test --verbosity=2
+```
+
+### Writing Tests
+
+See `.github/agents/testing.md` for comprehensive testing guidelines.
+
+### Test Structure
+
+Tests are organized to mirror the application structure:
+
+```
+impresso/tests/
+├── models/ # Model tests
+├── tasks/ # Task tests
+├── utils/
+│ └── tasks/ # Task utility tests
+└── views/ # View tests
+```
+```
+
+## Deployment Documentation
+
+```markdown
+## Deployment
+
+### Production Setup
+
+#### Prerequisites
+
+- Docker installed on production server
+- SSL certificate configured
+- Domain name configured
+- Firewall rules configured
+
+#### Step 1: Prepare Environment
+
+```bash
+# Create production environment file
+cp .example.env .prod.env
+
+# Edit with production values
+nano .prod.env
+
+# Important: Set DEBUG=False
+# Important: Set strong SECRET_KEY
+# Important: Configure ALLOWED_HOSTS
+```
+
+#### Step 2: Build Docker Image
+
+```bash
+# Build image
+make build BUILD_TAG=v1.0.0
+
+# Test image locally
+make run BUILD_TAG=v1.0.0
+```
+
+#### Step 3: Deploy
+
+```bash
+# Push image to registry
+docker tag impresso/impresso-user-admin:v1.0.0 registry.example.com/impresso-user-admin:v1.0.0
+docker push registry.example.com/impresso-user-admin:v1.0.0
+
+# On production server
+docker pull registry.example.com/impresso-user-admin:v1.0.0
+docker-compose up -d
+```
+
+#### Step 4: Run Migrations
+
+```bash
+docker-compose exec web python manage.py migrate
+```
+
+#### Step 5: Collect Static Files
+
+```bash
+docker-compose exec web python manage.py collectstatic --noinput
+```
+
+### Monitoring
+
+- Check logs: `docker-compose logs -f web`
+- Check Celery: `docker-compose logs -f celery`
+- Monitor Redis: `redis-cli info`
+- Monitor MySQL: Check database connections
+
+### Backup
+
+```bash
+# Backup database
+docker-compose exec db mysqldump -u user -p database > backup.sql
+
+# Backup media files
+tar -czf media_backup.tar.gz media/
+```
+```
+
+## Contributing Guidelines
+
+```markdown
+## Contributing
+
+We welcome contributions! Please follow these guidelines:
+
+### Development Workflow
+
+1. Fork the repository
+2. Create a feature branch: `git checkout -b feature/my-feature`
+3. Make your changes
+4. Write or update tests
+5. Run tests: `ENV=dev pipenv run ./manage.py test`
+6. Run type checking: `pipenv run mypy impresso`
+7. Commit changes: `git commit -m "Add my feature"`
+8. Push to branch: `git push origin feature/my-feature`
+9. Create Pull Request
+
+### Code Style
+
+- Follow PEP 8 style guide
+- Use type hints for all functions
+- Write docstrings for all public functions/classes
+- Keep functions small and focused
+- Write descriptive commit messages
+
+### Testing
+
+- Write tests for all new features
+- Maintain test coverage above 80%
+- Test both success and error cases
+- Use meaningful test names
+
+### Documentation
+
+- Update README for new features
+- Add docstrings to new code
+- Update API documentation if applicable
+- Update CHANGELOG.md
+```
+
+## References
+
+- [Write the Docs](https://www.writethedocs.org/)
+- [Google Style Guide](https://google.github.io/styleguide/pyguide.html)
+- [Keep a Changelog](https://keepachangelog.com/)
+- [Semantic Versioning](https://semver.org/)
+- [Django Documentation](https://docs.djangoproject.com/)
From e310ddf83902603c31087673f2e0fa028ede9e3c Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Thu, 15 Jan 2026 15:14:38 +0000
Subject: [PATCH 04/11] Add README for .github directory
- Create .github/README.md explaining the structure
- Document purpose and usage of copilot instructions
- Provide overview of agent-specific instruction files
- Include repository overview and key concepts
- Add contributing guidelines for maintaining instructions
Co-authored-by: danieleguido <1181642+danieleguido@users.noreply.github.com>
---
.github/README.md | 103 ++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 103 insertions(+)
create mode 100644 .github/README.md
diff --git a/.github/README.md b/.github/README.md
new file mode 100644
index 0000000..7a6ff76
--- /dev/null
+++ b/.github/README.md
@@ -0,0 +1,103 @@
+# GitHub Copilot Agent Configuration
+
+This directory contains configuration files for GitHub Copilot and specialized AI agents.
+
+## Files Overview
+
+### Main Instructions
+- **`copilot-instructions.md`** - Main instructions for GitHub Copilot with repository overview, conventions, and guidelines
+
+### Agent-Specific Instructions (`agents/` directory)
+- **`celery-tasks.md`** - Guidelines for developing and maintaining Celery background tasks
+- **`django-development.md`** - Django application development patterns and best practices
+- **`testing.md`** - Testing framework, patterns, and conventions
+- **`documentation.md`** - Documentation standards and writing guidelines
+
+## Purpose
+
+These files provide:
+
+1. **Context for AI Assistants** - Help GitHub Copilot and other AI tools understand the codebase structure and conventions
+2. **Onboarding Documentation** - Guide new developers on project patterns and practices
+3. **Consistency** - Ensure consistent coding style and patterns across the codebase
+4. **Best Practices** - Document proven patterns for common tasks
+
+## Usage
+
+### For GitHub Copilot
+GitHub Copilot automatically reads `.github/copilot-instructions.md` to understand project conventions.
+
+### For Specialized Agents
+Agent-specific instruction files in `.github/agents/` provide detailed guidance for:
+- Celery task development with job tracking and Solr integration
+- Django models, views, admin interface, and management commands
+- Writing comprehensive tests with proper mocking and assertions
+- Creating and maintaining project documentation
+
+## Repository Overview
+
+**impresso-user-admin** is a Django application that manages user-related information for the Impresso project. Key features:
+
+- **Background Processing**: Celery with Redis for asynchronous tasks
+- **User Management**: Django authentication with custom user plans and permissions
+- **Search Integration**: Apache Solr for content search and indexing
+- **Export Functionality**: CSV/ZIP export of search results with user access controls
+- **Collection Management**: User-created collections of content items
+- **Email Notifications**: Multi-format emails (text + HTML) for user actions
+
+## Technology Stack
+
+- Python 3.12+ with type hints
+- Django web framework
+- Celery task queue with Redis
+- MySQL database
+- Apache Solr search
+- Docker for containerization
+- pipenv for dependency management
+- mypy for type checking
+
+## Key Concepts
+
+### Task Organization
+- **`impresso/tasks/`** - Celery task definitions with decorators
+- **`impresso/utils/tasks/`** - Helper functions used by tasks
+- Job progress tracking via database and Redis
+- User-based execution limits and permissions
+
+### User Permissions
+- User groups for different plans (Basic, Researcher, Educational)
+- UserBitmap for fine-grained access control
+- Content redaction based on user permissions
+- Profile with execution limits (max_loops_allowed)
+
+### Development Workflow
+```bash
+# Start services
+docker compose up -d
+
+# Run Django server
+ENV=dev pipenv run ./manage.py runserver
+
+# Run Celery worker (separate terminal)
+ENV=dev pipenv run celery -A impresso worker -l info
+
+# Run tests
+ENV=dev pipenv run ./manage.py test
+
+# Type checking
+pipenv run mypy --config-file ./.mypy.ini impresso
+```
+
+## Contributing
+
+When modifying these instruction files:
+1. Keep examples practical and based on actual code in the repository
+2. Update instructions when significant patterns or conventions change
+3. Ensure consistency across all agent instruction files
+4. Test that instructions are clear and actionable
+
+## Resources
+
+- Repository: https://github.com/impresso/impresso-user-admin
+- Impresso Project: https://impresso-project.ch
+- License: GNU Affero General Public License v3.0
From af695892ad6320e1a3f8f0796503e3392c6b576f Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Thu, 15 Jan 2026 15:36:07 +0000
Subject: [PATCH 05/11] Remove collection and export task documentation from
agent files
- Remove Solr integration documentation
- Remove collection management task patterns
- Remove CSV/ZIP export examples
- Remove pagination with Solr helpers
- Update task modules list to only include active tasks
- Update management commands to only list existing ones
- Remove references to removed utility modules (collection.py, export.py, textreuse.py)
- Simplify key models list to only include used models
- Update common commands examples
Co-authored-by: danieleguido <1181642+danieleguido@users.noreply.github.com>
---
.github/README.md | 9 +-
.github/agents/celery-tasks.md | 199 +--------------------------
.github/agents/django-development.md | 23 +---
.github/agents/testing.md | 72 ++--------
.github/copilot-instructions.md | 35 ++---
5 files changed, 29 insertions(+), 309 deletions(-)
diff --git a/.github/README.md b/.github/README.md
index 7a6ff76..f0981e8 100644
--- a/.github/README.md
+++ b/.github/README.md
@@ -40,9 +40,6 @@ Agent-specific instruction files in `.github/agents/` provide detailed guidance
- **Background Processing**: Celery with Redis for asynchronous tasks
- **User Management**: Django authentication with custom user plans and permissions
-- **Search Integration**: Apache Solr for content search and indexing
-- **Export Functionality**: CSV/ZIP export of search results with user access controls
-- **Collection Management**: User-created collections of content items
- **Email Notifications**: Multi-format emails (text + HTML) for user actions
## Technology Stack
@@ -51,7 +48,6 @@ Agent-specific instruction files in `.github/agents/` provide detailed guidance
- Django web framework
- Celery task queue with Redis
- MySQL database
-- Apache Solr search
- Docker for containerization
- pipenv for dependency management
- mypy for type checking
@@ -62,13 +58,12 @@ Agent-specific instruction files in `.github/agents/` provide detailed guidance
- **`impresso/tasks/`** - Celery task definitions with decorators
- **`impresso/utils/tasks/`** - Helper functions used by tasks
- Job progress tracking via database and Redis
-- User-based execution limits and permissions
+- User-based permissions
### User Permissions
- User groups for different plans (Basic, Researcher, Educational)
- UserBitmap for fine-grained access control
-- Content redaction based on user permissions
-- Profile with execution limits (max_loops_allowed)
+- Profile with user-specific settings
### Development Workflow
```bash
diff --git a/.github/agents/celery-tasks.md b/.github/agents/celery-tasks.md
index 023faef..78624c8 100644
--- a/.github/agents/celery-tasks.md
+++ b/.github/agents/celery-tasks.md
@@ -7,7 +7,6 @@ This agent specializes in developing and maintaining Celery background tasks for
- Creating new Celery tasks with proper decorators and configuration
- Writing helper functions for task operations
- Implementing job progress tracking
-- Integrating with Solr for search and indexing
- Managing user permissions and access control
- Error handling and retry logic
- Structured logging
@@ -97,107 +96,6 @@ def long_running_task(self, job_id: int):
)
```
-### Pagination with User Limits
-
-When processing large result sets from Solr:
-
-```python
-from impresso.utils.tasks import get_pagination
-from django.conf import settings
-
-# Calculate pagination respecting user and system limits
-page, loops, progress, max_loops = get_pagination(
- skip=skip,
- limit=limit,
- total=total,
- job=job,
- ignore_max_loops=False # Set True only for admin operations
-)
-
-logger.info(
- f"[job:{job.pk} user:{job.creator.pk}] "
- f"page={page} loops={loops} progress={progress * 100:.2f}%"
-)
-
-# Loop through pages
-if page < loops:
- # More pages to process
- skip += limit
- # Continue processing
-else:
- # All pages processed
- pass
-```
-
-### Solr Integration
-
-Use the provided Solr utilities:
-
-```python
-from impresso.solr import find_all, update
-from django.conf import settings
-
-# Query Solr
-results = find_all(
- q="content_txt_fr:*",
- fl="id,title,date",
- skip=0,
- limit=100,
- logger=logger
-)
-
-total = results["response"]["numFound"]
-docs = results["response"]["docs"]
-
-# Update Solr (requires write credentials)
-update_result = update(
- url=settings.IMPRESSO_SOLR_URL_UPDATE,
- todos=[
- {
- "id": "doc-123",
- "ucoll_ss": {"add": ["collection-id"]},
- "_version_": doc_version
- }
- ],
- logger=logger
-)
-```
-
-### Access Control and Content Redaction
-
-Always respect user permissions:
-
-```python
-from impresso.utils.bitmask import BitMask64
-from impresso.utils.solr import (
- mapper_doc_remove_private_collections,
- mapper_doc_redact_contents,
-)
-
-# Get user's bitmap for access control
-user_bitmask = BitMask64(job.creator.profile.user_bitmap_key)
-
-# Check if user has special no-redaction privilege
-user_allow_no_redaction = job.creator.groups.filter(
- name=settings.IMPRESSO_GROUP_USER_PLAN_NO_REDACTION
-).exists()
-
-# Process each document
-for doc in docs:
- # Remove private collections from user's view
- doc = mapper_doc_remove_private_collections(
- doc=doc,
- prefix=job.creator.profile.uid
- )
-
- # Redact content based on permissions (unless user has privilege)
- if not user_allow_no_redaction:
- doc = mapper_doc_redact_contents(
- doc=doc,
- user_bitmask=user_bitmask,
- )
-```
-
### Email Operations
Use the email utility functions:
@@ -222,8 +120,6 @@ success = send_templated_email_with_context(
)
```
-### Error Handling
-
Implement proper error handling with retries:
```python
@@ -313,112 +209,23 @@ class TestMyTask(TransactionTestCase):
self.assertEqual(len(mail.outbox), 1)
```
-## Common Patterns
-
-### Processing Collections
-
-```python
-def process_collection_items(
- collection_id: str,
- job: Job,
- skip: int = 0,
- limit: int = 100,
- logger=default_logger
-) -> Tuple[int, int, float]:
- """Process items in a collection with pagination."""
-
- # Get collection
- collection = Collection.objects.get(pk=collection_id)
-
- # Query Solr for collection items
- query = f"ucoll_ss:{collection_id}"
- results = find_all(
- q=query,
- fl="id,title,date",
- skip=skip,
- limit=limit,
- logger=logger
- )
-
- total = results["response"]["numFound"]
- page, loops, progress, max_loops = get_pagination(
- skip=skip, limit=limit, total=total, job=job
- )
-
- # Process items
- for doc in results["response"]["docs"]:
- # Process each item
- pass
-
- return page, loops, progress
-```
-
-### Export to CSV/ZIP
-
-```python
-import csv
-from zipfile import ZipFile, ZIP_DEFLATED
-
-def export_results_to_csv(job: Job, results: list, fieldnames: list):
- """Export results to CSV and create ZIP archive."""
-
- csv_path = job.attachment.upload.path
-
- with open(csv_path, mode='a', encoding='utf-8-sig', newline='') as csvfile:
- writer = csv.DictWriter(
- csvfile,
- delimiter=';',
- quoting=csv.QUOTE_MINIMAL,
- fieldnames=fieldnames,
- )
-
- # Write header on first page
- if skip == 0:
- writer.writeheader()
-
- # Write rows
- for row in results:
- filtered_row = {k: v for k, v in row.items() if k in fieldnames}
- writer.writerow(filtered_row)
-
- # Create ZIP when done
- zip_path = f"{csv_path}.zip"
- with ZipFile(zip_path, 'w', ZIP_DEFLATED) as zipf:
- zipf.write(csv_path, basename(csv_path))
-
- # Update job attachment
- job.attachment.upload.name = f"{job.attachment.upload.name}.zip"
- job.attachment.save()
-
- # Remove original CSV
- if os.path.exists(csv_path):
- os.remove(csv_path)
-```
-
## Configuration Settings
-Key Celery and Solr settings from `settings.py`:
+Key Celery settings from `settings.py`:
- `CELERY_BROKER_URL` - Redis connection for Celery
-- `IMPRESSO_SOLR_URL` - Main Solr index URL
-- `IMPRESSO_SOLR_PASSAGES_URL_SELECT` - Text reuse passages query URL
-- `IMPRESSO_SOLR_PASSAGES_URL_UPDATE` - Text reuse passages update URL
-- `IMPRESSO_SOLR_EXEC_LIMIT` - Maximum rows per Solr query (default: 100)
-- `IMPRESSO_SOLR_EXEC_MAX_LOOPS` - Maximum query loops (default: 100)
- `IMPRESSO_GROUP_USER_PLAN_*` - User plan group names
- `DEFAULT_FROM_EMAIL` - Email sender address
## Key Models
- `Job` - Tracks long-running asynchronous tasks
-- `Collection` - User-created collections of content items
-- `CollectableItem` - Individual items in collections
- `UserBitmap` - User access permissions as bitmap
- `UserChangePlanRequest` - Plan upgrade/downgrade requests
-- `Profile` - User profile with uid and max_loops_allowed
+- `UserSpecialMembershipRequest` - Special membership requests
+- `Profile` - User profile with uid
## References
- Celery documentation: https://docs.celeryq.dev/
- Django documentation: https://docs.djangoproject.com/
-- Apache Solr documentation: https://solr.apache.org/guide/
diff --git a/.github/agents/django-development.md b/.github/agents/django-development.md
index c9653fb..14744bd 100644
--- a/.github/agents/django-development.md
+++ b/.github/agents/django-development.md
@@ -52,11 +52,9 @@ impresso/
### Key Models
- **User** - Django's built-in User model (from `django.contrib.auth.models`)
-- **Profile** - User profile with `uid` and `max_loops_allowed`
+- **Profile** - User profile with `uid`
- **UserBitmap** - User access permissions as bitmap
- **Job** - Tracks asynchronous background tasks
-- **Collection** - User-created collections of content items
-- **CollectableItem** - Items within collections
- **UserChangePlanRequest** - Plan upgrade/downgrade requests
- **UserSpecialMembershipRequest** - Special membership requests
@@ -242,12 +240,10 @@ Key management commands in the project:
- `createaccount` - Create user accounts with random passwords
- `createsuperuser` - Create admin user (built-in Django command)
-- `synccollection` - Sync a collection to Solr index
-- `exportqueryascsv` - Export Solr query results as CSV
- `createcollection` - Create or get a collection
-- `addtocollectionfromquery` - Add query results to collection
-- `addtocollectionfromtrpassagesquery` - Add TR passages to collection
- `stopjob` - Stop a running job
+- `updateuserbitmap` - Update user bitmap
+- `updatespecialmembership` - Update special membership status
## Settings Management
@@ -300,15 +296,8 @@ CELERY_BROKER_URL = os.environ.get('REDIS_HOST', 'redis://localhost:6379')
EMAIL_BACKEND = os.environ.get('EMAIL_BACKEND')
DEFAULT_FROM_EMAIL = os.environ.get('DEFAULT_FROM_EMAIL')
-# Solr
-IMPRESSO_SOLR_URL = os.environ.get('IMPRESSO_SOLR_URL')
-IMPRESSO_SOLR_USER = os.environ.get('IMPRESSO_SOLR_USER')
-IMPRESSO_SOLR_PASSWORD = os.environ.get('IMPRESSO_SOLR_PASSWORD')
-
# Custom settings
IMPRESSO_BASE_URL = os.environ.get('IMPRESSO_BASE_URL')
-IMPRESSO_SOLR_EXEC_LIMIT = 100
-IMPRESSO_SOLR_EXEC_MAX_LOOPS = 100
```
## Django Signals
@@ -401,13 +390,9 @@ def user_has_no_redaction(user):
```python
def get_user_limits(user):
- """Get user's execution limits."""
+ """Get user's profile information."""
profile = user.profile
return {
- 'max_loops': min(
- profile.max_loops_allowed,
- settings.IMPRESSO_SOLR_EXEC_MAX_LOOPS
- ),
'uid': profile.uid,
}
```
diff --git a/.github/agents/testing.md b/.github/agents/testing.md
index e0f0882..3776378 100644
--- a/.github/agents/testing.md
+++ b/.github/agents/testing.md
@@ -6,7 +6,7 @@ This agent specializes in writing and maintaining tests for the impresso-user-ad
- Writing Django unit tests and integration tests
- Testing Celery tasks and async operations
-- Mocking external services (Solr, SMTP)
+- Mocking external services (SMTP)
- Testing email functionality
- Database transaction testing
- User permission and access control testing
@@ -35,7 +35,6 @@ The project uses Django's built-in testing framework based on unittest.
impresso/tests/
├── __init__.py
├── test_runner.py # Custom test runner
-├── test_solr.py # Solr integration tests
├── models/ # Model tests
├── tasks/ # Task tests
│ ├── __init__.py
@@ -363,41 +362,6 @@ def test_validation_error(self):
## Mocking External Services
-### Mocking Solr
-
-```python
-from unittest.mock import patch, MagicMock
-
-@patch('impresso.solr.find_all')
-def test_with_mocked_solr(self, mock_find_all):
- """Test function with mocked Solr response."""
- # Setup mock response
- mock_find_all.return_value = {
- "response": {
- "numFound": 10,
- "docs": [
- {"id": "doc-1", "title": "Test Document"},
- {"id": "doc-2", "title": "Another Document"},
- ]
- },
- "responseHeader": {"QTime": 5}
- }
-
- # Call function that uses Solr
- result = function_using_solr(query="test")
-
- # Verify mock was called correctly
- mock_find_all.assert_called_once_with(
- q="test",
- fl="id,title",
- skip=0,
- logger=mock.ANY
- )
-
- # Check result
- self.assertEqual(len(result), 2)
-```
-
### Mocking SMTP
```python
@@ -418,35 +382,23 @@ def test_email_smtp_error(self, mock_smtp):
## Testing Database Models
```python
-from impresso.models import Collection, CollectableItem
+from impresso.models import UserBitmap
def test_model_creation(self):
"""Test model instance creation."""
- collection = Collection.objects.create(
- name="Test Collection",
- creator=self.user,
- description="Test description"
+ user_bitmap = UserBitmap.objects.create(
+ user=self.user
)
- self.assertEqual(collection.name, "Test Collection")
- self.assertEqual(collection.creator, self.user)
- self.assertIsNotNone(collection.date_created)
+ self.assertEqual(user_bitmap.user, self.user)
+ self.assertIsNotNone(user_bitmap.date_created)
def test_model_relationships(self):
"""Test model relationships."""
- collection = Collection.objects.create(
- name="Test Collection",
- creator=self.user
- )
-
- item = CollectableItem.objects.create(
- collection=collection,
- content_id="test-doc-1"
- )
+ user_bitmap = UserBitmap.objects.get(user=self.user)
# Test relationship
- self.assertEqual(item.collection, collection)
- self.assertEqual(collection.collectableitem_set.count(), 1)
+ self.assertEqual(user_bitmap.user, self.user)
```
## Common Assertions
@@ -464,10 +416,10 @@ self.assertFalse(condition)
self.assertIsNone(value)
self.assertIsNotNone(value)
-# Collections
-self.assertIn(item, collection)
-self.assertNotIn(item, collection)
-self.assertEqual(len(collection), expected_length)
+# Collections (lists, sets, etc.)
+self.assertIn(item, list_or_set)
+self.assertNotIn(item, list_or_set)
+self.assertEqual(len(list_or_set), expected_length)
# Strings
self.assertIn("substring", text)
diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md
index fa1eff8..4e6bdb8 100644
--- a/.github/copilot-instructions.md
+++ b/.github/copilot-instructions.md
@@ -2,14 +2,13 @@
## Repository Overview
-This is a Django application that manages user-related information for the Impresso project's Master DB. The application uses **Celery** as the background task processing system for handling asynchronous operations like email sending, data exports, and collection management.
+This is a Django application that manages user-related information for the Impresso project's Master DB. The application uses **Celery** as the background task processing system for handling asynchronous operations like email sending and user account management.
## Technology Stack
- **Framework**: Django (Python 3.12+)
- **Task Queue**: Celery with Redis as the broker
- **Database**: MySQL (managed via pymysql)
-- **Search**: Apache Solr
- **Dependency Management**: pipenv
- **Type Checking**: mypy
- **Containerization**: Docker & docker-compose
@@ -25,8 +24,7 @@ impresso-user-admin/
│ ├── tasks/ # Celery task definitions
│ ├── utils/
│ │ └── tasks/ # Task helper functions and utilities
-│ ├── tests/ # Test suite
-│ └── solr/ # Solr integration utilities
+│ └── tests/ # Test suite
├── .github/
│ ├── agents/ # Agent-specific instructions
│ └── copilot-instructions.md
@@ -44,11 +42,8 @@ The application organizes Celery tasks into two main directories:
- `userSpecialMembershipRequest_tasks.py` - Special membership tasks
2. **`impresso/utils/tasks/`** - Contains helper functions used by tasks
- - `__init__.py` - Common utilities (pagination, job progress tracking)
+ - `__init__.py` - Common utilities (job progress tracking)
- `account.py` - User account and email operations
- - `collection.py` - Collection management in Solr
- - `export.py` - Data export to CSV/ZIP
- - `textreuse.py` - Text reuse passage operations
- `userBitmap.py` - User permission bitmap updates
- `email.py` - Email rendering and sending utilities
- `userSpecialMembershipRequest.py` - Special membership operations
@@ -57,7 +52,6 @@ The application organizes Celery tasks into two main directories:
Common task utilities are provided in `impresso/utils/tasks/__init__.py`:
-- `get_pagination()` - Calculate pagination for Solr queries with user limits
- `update_job_progress()` - Update job status and progress in DB and Redis
- `update_job_completed()` - Mark a job as completed
- `is_task_stopped()` - Check if user has stopped a job
@@ -116,7 +110,7 @@ def my_task(self, user_id: int) -> None:
### Logging
- Use structured logging with context: `logger.info(f"[job:{job.pk} user:{user.pk}] message")`
-- Include relevant IDs in log messages (job, user, collection, etc.)
+- Include relevant IDs in log messages (job, user, etc.)
- Use appropriate log levels: DEBUG, INFO, WARNING, ERROR, EXCEPTION
- Get logger via `get_task_logger(__name__)` in task files
- Use default_logger pattern: `default_logger = logging.getLogger(__name__)` in utility files
@@ -137,14 +131,7 @@ def my_task(self, user_id: int) -> None:
- Handle SMTP exceptions gracefully
- Log email sending status
-### Solr Integration
-- Use helper functions from `impresso/solr/` module
-- Respect `settings.IMPRESSO_SOLR_EXEC_LIMIT` for query limits
-- Respect `settings.IMPRESSO_SOLR_EXEC_MAX_LOOPS` for maximum iterations
-- Consider user's `max_loops_allowed` profile setting
-- Use `find_all()` for queries and `update()` for updates
-- Handle both main index and passages index (`IMPRESSO_SOLR_PASSAGES_URL_*`)
### Job Management
@@ -152,7 +139,6 @@ def my_task(self, user_id: int) -> None:
- Update job progress using `update_job_progress()`
- Check for user-initiated stops with `is_task_stopped()`
- Store task metadata in job.extra field as JSON
-- Include pagination info in job updates
## Testing
@@ -183,7 +169,7 @@ EMAIL_BACKEND=django.core.mail.backends.console.EmailBackend ENV=dev pipenv run
- Name test methods descriptively: `test_send_email_plan_change`
- Use assertions that provide clear failure messages
- Test both success and error cases
-- Mock external services (SMTP, Solr) when appropriate
+- Mock external services (SMTP) when appropriate
- Test with different user plans and permissions
## Development Workflow
@@ -223,14 +209,11 @@ pipenv run mypy --config-file ./.mypy.ini impresso
# Create accounts
ENV=dev pipenv run ./manage.py createaccount user@example.com
-# Sync collection
-ENV=dev pipenv run ./manage.py synccollection
-
-# Export query as CSV
-ENV=dev pipenv run ./manage.py exportqueryascsv ""
-
# Stop a job
ENV=dev pipenv run ./manage.py stopjob
+
+# Update user bitmap
+ENV=dev pipenv run ./manage.py updateuserbitmap
```
## Security Considerations
@@ -240,8 +223,6 @@ ENV=dev pipenv run ./manage.py stopjob
- Validate and sanitize user inputs
- Use Django's built-in security features
- Respect user permissions and bitmap access controls
-- Use `mapper_doc_remove_private_collections()` to filter user content
-- Apply `mapper_doc_redact_contents()` for content protection based on user bitmap
## Configuration
From fbb68c06f70287b3b2e5883e1e65b733daa3a021 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Thu, 15 Jan 2026 15:40:00 +0000
Subject: [PATCH 06/11] Update documentation agent file to remove export
command examples
- Replace exportqueryascsv example with createaccount example
- Update changelog example to reflect current features
- Remove references to export and collection functionality
Co-authored-by: danieleguido <1181642+danieleguido@users.noreply.github.com>
---
.github/agents/documentation.md | 24 +++++++++++-------------
1 file changed, 11 insertions(+), 13 deletions(-)
diff --git a/.github/agents/documentation.md b/.github/agents/documentation.md
index 1acd786..2eb0395 100644
--- a/.github/agents/documentation.md
+++ b/.github/agents/documentation.md
@@ -174,25 +174,23 @@ class Job(models.Model):
```python
class Command(BaseCommand):
"""
- Export Solr query results to CSV file.
+ Create a user account with random password.
- This command executes a Solr query and exports the results to a CSV
- file, respecting user permissions and access controls. The export
- is performed as an asynchronous Celery task with progress tracking.
+ This command creates a new user account with a randomly generated
+ password and assigns them to the default user plan (Basic).
Usage:
- ENV=dev pipenv run ./manage.py exportqueryascsv USER_ID "QUERY"
+ ENV=dev pipenv run ./manage.py createaccount EMAIL [EMAIL ...]
Examples:
- # Export French content mentioning "ministre"
- ENV=dev pipenv run ./manage.py exportqueryascsv 1 "content_txt_fr:ministre"
+ # Create a single account
+ ENV=dev pipenv run ./manage.py createaccount user@example.com
- # Export with specific date range
- ENV=dev pipenv run ./manage.py exportqueryascsv 1 "content_txt_fr:* AND date_i:[1900 TO 1950]"
+ # Create multiple accounts at once
+ ENV=dev pipenv run ./manage.py createaccount user1@example.com user2@example.com
Output:
- Creates a ZIP file containing the CSV export in the user's
- upload directory.
+ Prints the generated password for each created account.
"""
```
@@ -299,8 +297,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Initial release with core features
- User authentication and authorization
- Celery task processing
-- Collection management
-- Export functionality
+- Email notification system
+- User bitmap permissions
[Unreleased]: https://github.com/impresso/impresso-user-admin/compare/v1.0.0...HEAD
[1.0.0]: https://github.com/impresso/impresso-user-admin/releases/tag/v1.0.0
From 10b83d32b43c9b2fb348303e3600c6692d1ce5c4 Mon Sep 17 00:00:00 2001
From: Daniele Guido <1181642+danieleguido@users.noreply.github.com>
Date: Thu, 15 Jan 2026 16:41:40 +0100
Subject: [PATCH 07/11] Update README.md
---
README.md | 41 ++---------------------------------------
1 file changed, 2 insertions(+), 39 deletions(-)
diff --git a/README.md b/README.md
index 6734d90..df30c91 100644
--- a/README.md
+++ b/README.md
@@ -87,7 +87,7 @@ python -m pip install pipenv
Then run
```sh
-pipenv --python 3.6.9 install
+pipenv --python 3.12 install
```
To create and activate the virtualenv. Once in the shell, you can go back with the `exit` command and reactivate the virtualenv simply `pipenv shell`
@@ -124,36 +124,6 @@ Create multiple users at once, with randomly generated password.
ENV=dev pipenv run ./manage.py createaccount guestA@uni.lu guestB@uni.lu
```
-Index a collection stored in the db using its :
-
-```sh
-ENV=dev ./manage.py synccollection test-abcd
-```
-
-Export query as csv using (first argument being `user_id` then the solr query):
-
-```sh
-ENV=dev ./manage.py exportqueryascsv 1 "content_txt_fr:\"premier ministre portugais\""
-```
-
-Create (or get) a collection:
-
-```sh
-ENV=dev pipenv run ./manage.py createcollection "name of the collection" my-username
-```
-
-Then once you get the collection id, usually a concatenation of the creator profile uid and of the slugified version of the desired name, you can add query results to the collection:
-
-```sh
-ENV=dev pipenv run python ./manage.py addtocollectionfromquery local-user_name-of-the-collection "content_txt_fr:\"premier ministre portugais\""
-```
-
-Index a collection from a list of tr-passages ids resulting from a solr query:
-
-```sh
-ENV=dev pipenv run python ./manage.py addtocollectionfromtrpassagesquery local-dg-abcde "cluster_id_s:tr-nobp-all-v01-c8590083914"
-```
-
Stop a specific job from command line:
```sh
@@ -165,7 +135,7 @@ ENV=dev pipenv run python ./manage.py stopjob 1234
Specify the environment variable `ENV=test` to run the tests with the `console` email backend:
```sh
-EMAIL_BACKEND=django.core.mail.backends.console.EmailBackend ENV=dev pipenv run ./manage.py test
+EMAIL_BACKEND=django.core.mail.backends.console.EmailBackend ENV=test pipenv run ./manage.py test
```
## Use in production
@@ -178,13 +148,6 @@ Test image locally:
make run
```
-### Note on collection syncronisation between indices. (now deprecated, see impresso-middle-layer for more details)
-
-Collections are simple identifiers assigned to a set of newspaper articles and stored in the `search` index. However, other indices (e.g. `tr_passages`) can be linked to a collection to allow cross-indices search.
-The task of creating a collection is a long running one because it uses a solr search query to filter the `content items` and a solr update request to add the collection tag to the various indices. Every search request is limited to `settings.IMPRESSO_SOLR_EXEC_LIMIT` rows (100 by default) and the number of loops is limited to the user `max_allowed_loops` parameter in the database and in general cannot be higher of `settings.IMPRESSO_SOLR_MAX_LOOPS` (100 recommended for a total of 100\*100 rows default max). Set both parameters in the `.env` file accordingly.
-
-The task of creating a collection is delegated to the _Celery_ task manager and a `Job` instance stored in the database is assigned to the task to allow the follow-up of the task progress. The task is executed asynchronously. In the future releases, the user will be notified via email when the task is completed (still todo).
-
### Using a proxy
If the database is only accessible via a socks proxy, add the following to your `.env` file:
From 8f01cb3b8d91fef2142a5733bb77c0c7b3c04183 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Thu, 15 Jan 2026 15:42:24 +0000
Subject: [PATCH 08/11] Remove last Solr reference from README
- Remove "Solr integration" from agent capabilities description
Co-authored-by: danieleguido <1181642+danieleguido@users.noreply.github.com>
---
.github/README.md | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/.github/README.md b/.github/README.md
index f0981e8..de74fb6 100644
--- a/.github/README.md
+++ b/.github/README.md
@@ -29,7 +29,7 @@ GitHub Copilot automatically reads `.github/copilot-instructions.md` to understa
### For Specialized Agents
Agent-specific instruction files in `.github/agents/` provide detailed guidance for:
-- Celery task development with job tracking and Solr integration
+- Celery task development with job tracking
- Django models, views, admin interface, and management commands
- Writing comprehensive tests with proper mocking and assertions
- Creating and maintaining project documentation
From ac95d8cb6d8680726672d116c80d388828ab5674 Mon Sep 17 00:00:00 2001
From: Daniele Guido <1181642+danieleguido@users.noreply.github.com>
Date: Thu, 15 Jan 2026 16:46:15 +0100
Subject: [PATCH 09/11] Create AGENTS.md
---
AGENTS.md | 25 +++++++++++++++++++++++++
1 file changed, 25 insertions(+)
create mode 100644 AGENTS.md
diff --git a/AGENTS.md b/AGENTS.md
new file mode 100644
index 0000000..e005cb0
--- /dev/null
+++ b/AGENTS.md
@@ -0,0 +1,25 @@
+## Repository Overview
+
+This is a Django application that manages user-related information for the Impresso project's Master DB. The application uses **Celery** as the background task processing system for handling asynchronous operations like email sending and user account management.
+
+## Technology Stack
+
+- **Framework**: Django (Python 3.12+)
+- **Task Queue**: Celery with Redis as the broker
+- **Database**: MySQL (managed via pymysql)
+- **Dependency Management**: pipenv
+- **Type Checking**: mypy
+- **Containerization**: Docker & docker-compose
+
+## Project Structure, AI & Agent Instructions
+
+This repository contains AI coding instructions and architectural guidelines in:
+
+- `.github/copilot-instructions.md`
+
+Those instructions define:
+- Coding style
+- Task conventions
+- Architectural constraints
+
+All agents and contributors MUST follow those rules when adding or modifying tasks.
From 21bff92c3983647cd2f0b9ac17044465108a7ab8 Mon Sep 17 00:00:00 2001
From: Daniele Guido <1181642+danieleguido@users.noreply.github.com>
Date: Thu, 15 Jan 2026 16:49:24 +0100
Subject: [PATCH 10/11] Update docker-build-publish.yml
---
.github/workflows/docker-build-publish.yml | 7 +++++++
1 file changed, 7 insertions(+)
diff --git a/.github/workflows/docker-build-publish.yml b/.github/workflows/docker-build-publish.yml
index 785a302..14e828b 100644
--- a/.github/workflows/docker-build-publish.yml
+++ b/.github/workflows/docker-build-publish.yml
@@ -5,6 +5,13 @@ on:
push:
branches:
- develop
+ paths-ignore:
+ - 'README.md'
+ - 'LICENSE'
+ - '**/*.md'
+ - '.github/**'
+ - '.cursorrules'
+ workflow_dispatch:
jobs:
build:
runs-on: ubuntu-latest
From 2b02e1eb07c26c8532650e8187c03cae8830c2cd Mon Sep 17 00:00:00 2001
From: Daniele Guido <1181642+danieleguido@users.noreply.github.com>
Date: Thu, 15 Jan 2026 16:50:37 +0100
Subject: [PATCH 11/11] Update Dockerfile
---
Dockerfile | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/Dockerfile b/Dockerfile
index 851c54e..e5b2fcf 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,4 +1,4 @@
-FROM python:3.12.2-alpine
+FROM python:3.12.4-alpine
# RUN set -ex \
# # Create a non-root user