Merge remote-tracking branch 'origin/develop' into develop #26

Workflow file for this run

.github/workflows/incident-response.yml at 78f3076

	name: Incident Response
Check failure on line 1 in .github/workflows/incident-response.yml View workflow run for this annotation GitHub Actions / .github/workflows/incident-response.yml Invalid workflow file `(Line: 4, Col: 3): Unexpected value 'branches', (Line: 4, Col: 13): A sequence was not expected`

	on:
	branches: [ main ]
	workflow_dispatch:
	inputs:
	incident_type:
	description: 'Type of incident'
	required: true
	type: choice
	options:
	- production_down
	- performance_degradation
	- security_breach
	- data_corruption
	- deployment_failure
	- high_error_rate
	severity:
	description: 'Incident severity'
	required: true
	type: choice
	options:
	- critical
	- high
	- medium
	- low
	description:
	description: 'Incident description'
	required: true
	type: string
	affected_services:
	description: 'Affected services (comma-separated)'
	required: false
	type: string
	default: 'all'

	jobs:
	create-incident-issue:
	name: Create Incident Issue
	runs-on: ubuntu-latest

	steps:
	- uses: actions/checkout@v4

	- name: Create incident issue
	id: create_issue
	uses: actions/github-script@v7
	with:
	script: \|
	const incidentType = '${{ github.event.inputs.incident_type }}';
	const severity = '${{ github.event.inputs.severity }}';
	const description = '${{ github.event.inputs.description }}';
	const affectedServices = '${{ github.event.inputs.affected_services }}';

	const timestamp = new Date().toISOString();

	const issueBody = `
	# Incident Report

	Type: ${incidentType}
	Severity: ${severity.toUpperCase()}
	Reported: ${timestamp}
	Affected Services: ${affectedServices}

	## Description

	${description}

	## Status

	- [x] Incident detected
	- [ ] Team notified
	- [ ] Investigation started
	- [ ] Root cause identified
	- [ ] Mitigation deployed
	- [ ] Incident resolved
	- [ ] Post-mortem scheduled

	## Timeline

	- ${timestamp}: Incident detected and reported

	## Investigation Notes

	(Add investigation notes here)

	## Mitigation Steps

	(Document mitigation steps here)

	## Post-incident Actions

	- [ ] Conduct post-mortem
	- [ ] Update runbooks
	- [ ] Implement preventive measures
	- [ ] Update monitoring/alerts

	---

	Auto-generated by Incident Response workflow
	`;

	const issue = await github.rest.issues.create({
	owner: context.repo.owner,
	repo: context.repo.repo,
	title: `[INCIDENT] ${incidentType} - ${severity.toUpperCase()}`,
	body: issueBody,
	labels: ['incident', `severity-${severity}`, `type-${incidentType}`]
	});

	console.log(`Created issue #${issue.data.number}`);
	return issue.data.number;

	outputs:
	issue_number: ${{ steps.create_issue.outputs.result }}

	gather-diagnostics:
	name: Gather Diagnostic Information
	runs-on: ubuntu-latest
	needs: create-incident-issue

	steps:
	- uses: actions/checkout@v4

	- name: Gather system information
	run: \|
	cat << 'EOF' > diagnostics.md
	# Diagnostic Information

	Timestamp: $(date -u +"%Y-%m-%d %H:%M:%S UTC")
	Incident: #${{ needs.create-incident-issue.outputs.issue_number }}

	## Git Information

	- Branch: ${{ github.ref }}
	- Commit: ${{ github.sha }}
	- Last deployment: (Check deployment logs)

	## Recent Commits

	EOF

	git log --oneline -10 >> diagnostics.md

	cat << 'EOF' >> diagnostics.md

	## Recent Workflow Runs

	(Check Actions tab for recent deployments and CI runs)

	## Configuration Status

	- Django settings: (Check for DEBUG, ALLOWED_HOSTS)
	- Database: MySQL primary, PostgreSQL secondary
	- Session backend: MySQL (django.contrib.sessions.backends.db)
	- Redis usage: NONE (RNF-002)

	## Monitoring Links

	- Application logs: /var/log/iact/
	- Database monitoring: (MySQL slow query log)
	- Health endpoint: /api/health

	---

	Generated by Incident Response workflow
	EOF

	cat diagnostics.md

	- name: Upload diagnostics
	uses: actions/upload-artifact@v4
	with:
	name: incident-diagnostics
	path: diagnostics.md
	retention-days: 90

	- name: Comment diagnostics on incident issue
	uses: actions/github-script@v7
	with:
	script: \|
	const fs = require('fs');
	const diagnostics = fs.readFileSync('diagnostics.md', 'utf8');

	await github.rest.issues.createComment({
	owner: context.repo.owner,
	repo: context.repo.repo,
	issue_number: ${{ needs.create-incident-issue.outputs.issue_number }},
	body: diagnostics
	});

	execute-incident-playbook:
	name: Execute Incident Playbook
	runs-on: ubuntu-latest
	needs: [create-incident-issue, gather-diagnostics]

	steps:
	- uses: actions/checkout@v4

	- name: Execute playbook based on incident type
	run: \|
	INCIDENT_TYPE="${{ github.event.inputs.incident_type }}"

	echo "Executing playbook for: $INCIDENT_TYPE"

	case $INCIDENT_TYPE in
	production_down)
	echo "=== Production Down Playbook ==="
	echo "1. Check health endpoint"
	echo "2. Check application server status"
	echo "3. Check database connectivity"
	echo "4. Check disk space"
	echo "5. Review recent deployments"
	echo "6. Consider rollback if recent deployment"
	;;

	performance_degradation)
	echo "=== Performance Degradation Playbook ==="
	echo "1. Check database query performance"
	echo "2. Check MySQL session table size"
	echo "3. Review slow query log"
	echo "4. Check memory and CPU usage"
	echo "5. Review recent code changes"
	echo "6. Consider scaling resources"
	;;

	security_breach)
	echo "=== Security Breach Playbook ==="
	echo "1. CRITICAL: Isolate affected systems"
	echo "2. Preserve logs for forensics"
	echo "3. Reset credentials"
	echo "4. Scan for malware"
	echo "5. Review access logs"
	echo "6. Notify security team"
	;;

	data_corruption)
	echo "=== Data Corruption Playbook ==="
	echo "1. Stop writes to affected data"
	echo "2. Identify scope of corruption"
	echo "3. Restore from backup (if necessary)"
	echo "4. Identify root cause"
	echo "5. Verify data integrity"
	echo "6. Resume operations"
	;;

	deployment_failure)
	echo "=== Deployment Failure Playbook ==="
	echo "1. Execute rollback plan"
	echo "2. Restore database from backup (if needed)"
	echo "3. Verify rollback successful"
	echo "4. Review deployment logs"
	echo "5. Identify failure cause"
	echo "6. Fix and re-deploy"
	;;

	high_error_rate)
	echo "=== High Error Rate Playbook ==="
	echo "1. Check error logs"
	echo "2. Identify error patterns"
	echo "3. Check database connectivity"
	echo "4. Check external dependencies"
	echo "5. Review recent changes"
	echo "6. Apply hotfix if needed"
	;;

	*)
	echo "Unknown incident type: $INCIDENT_TYPE"
	;;
	esac

	echo ""
	echo "Playbook steps documented. Execute manually as needed."

	- name: Generate incident playbook
	run: \|
	INCIDENT_TYPE="${{ github.event.inputs.incident_type }}"

	cat << EOF > playbook-$INCIDENT_TYPE.md
	# Incident Playbook: $INCIDENT_TYPE

	Severity: ${{ github.event.inputs.severity }}
	Incident: #${{ needs.create-incident-issue.outputs.issue_number }}

	## Immediate Actions

	$(case $INCIDENT_TYPE in
	production_down)
	echo "1. Verify production is down via health endpoint"
	echo "2. Check application server: sudo systemctl status gunicorn-iact-production"
	echo "3. Check database: mysql -h \$DB_HOST -u root -p -e 'SELECT 1;'"
	echo "4. Check logs: sudo tail -f /var/log/iact/error.log"
	;;
	performance_degradation)
	echo "1. Check session table: SELECT COUNT(*) FROM django_session;"
	echo "2. Run cleanup: python manage.py clearsessions"
	echo "3. Check slow queries: SELECT * FROM mysql.slow_log LIMIT 10;"
	echo "4. Monitor CPU/memory: top, htop"
	;;
	security_breach)
	echo "1. ISOLATE affected systems immediately"
	echo "2. Preserve logs: cp /var/log/iact/* /backup/forensics/"
	echo "3. Reset all passwords"
	echo "4. Scan: sudo clamscan -r /var/www/iact"
	;;
	deployment_failure)
	echo "1. Execute rollback: bash scripts/rollback.sh"
	echo "2. Verify rollback: curl -f https://iact.example.com/api/health"
	echo "3. Check logs: sudo journalctl -u gunicorn-iact-production -n 100"
	;;
	*)
	echo "Refer to incident type specific runbook"
	;;
	esac)

	## Communication

	- Notify team via Slack/Teams
	- Update status page
	- Notify affected users (if customer-facing)

	## Investigation Checklist

	- [ ] Review logs
	- [ ] Check recent deployments
	- [ ] Check recent configuration changes
	- [ ] Identify root cause
	- [ ] Document timeline

	## Mitigation Checklist

	- [ ] Apply immediate fix
	- [ ] Verify fix deployed
	- [ ] Monitor for recurrence
	- [ ] Update incident issue

	## Resolution Checklist

	- [ ] Incident resolved
	- [ ] Service fully restored
	- [ ] Schedule post-mortem
	- [ ] Update runbooks
	- [ ] Implement preventive measures

	---

	Generated by Incident Response workflow
	EOF

	cat playbook-$INCIDENT_TYPE.md

	- name: Upload playbook
	uses: actions/upload-artifact@v4
	with:
	name: incident-playbook
	path: playbook-*.md
	retention-days: 90

	- name: Comment playbook on incident issue
	uses: actions/github-script@v7
	with:
	script: \|
	const fs = require('fs');
	const files = fs.readdirSync('.').filter(f => f.startsWith('playbook-'));

	if (files.length > 0) {
	const playbook = fs.readFileSync(files[0], 'utf8');

	await github.rest.issues.createComment({
	owner: context.repo.owner,
	repo: context.repo.repo,
	issue_number: ${{ needs.create-incident-issue.outputs.issue_number }},
	body: playbook
	});
	}

	notify-team:
	name: Notify Team
	runs-on: ubuntu-latest
	needs: [create-incident-issue, execute-incident-playbook]
	if: always()

	steps:
	- name: Prepare notification
	run: \|
	SEVERITY="${{ github.event.inputs.severity }}"
	INCIDENT_TYPE="${{ github.event.inputs.incident_type }}"
	ISSUE_NUMBER="${{ needs.create-incident-issue.outputs.issue_number }}"

	cat << EOF > notification.txt
	INCIDENT ALERT

	Severity: $SEVERITY
	Type: $INCIDENT_TYPE
	Issue: #$ISSUE_NUMBER
	Description: ${{ github.event.inputs.description }}

	Affected Services: ${{ github.event.inputs.affected_services }}

	Actions Required:
	1. Review incident issue #$ISSUE_NUMBER
	2. Follow incident playbook
	3. Update incident issue with progress

	Incident Issue: ${{ github.server_url }}/${{ github.repository }}/issues/$ISSUE_NUMBER
	EOF

	cat notification.txt

	- name: Notify via InternalMessage (IACT compliant)
	run: \|
	echo "Notification prepared (send via InternalMessage system)"
	echo "Note: NO EMAIL per IACT restrictions"
	echo "Use InternalMessage.objects.create() to notify users"

	- name: Comment notification sent
	uses: actions/github-script@v7
	with:
	script: \|
	await github.rest.issues.createComment({
	owner: context.repo.owner,
	repo: context.repo.repo,
	issue_number: ${{ needs.create-incident-issue.outputs.issue_number }},
	body: '[PASS] Team notified via InternalMessage system (IACT RNF-002 compliant - NO EMAIL)'
	});

	summary:
	name: Incident Response Summary
	runs-on: ubuntu-latest
	needs: [create-incident-issue, gather-diagnostics, execute-incident-playbook, notify-team]
	if: always()

	steps:
	- name: Generate summary
	run: \|
	echo "========================================="
	echo "INCIDENT RESPONSE SUMMARY"
	echo "========================================="
	echo "Incident Type: ${{ github.event.inputs.incident_type }}"
	echo "Severity: ${{ github.event.inputs.severity }}"
	echo "Incident Issue: #${{ needs.create-incident-issue.outputs.issue_number }}"
	echo ""
	echo "Status:"
	echo " Issue Created: ${{ needs.create-incident-issue.result }}"
	echo " Diagnostics Gathered: ${{ needs.gather-diagnostics.result }}"
	echo " Playbook Executed: ${{ needs.execute-incident-playbook.result }}"
	echo " Team Notified: ${{ needs.notify-team.result }}"
	echo ""
	echo "Next Steps:"
	echo "1. Follow playbook steps manually"
	echo "2. Update incident issue with progress"
	echo "3. Resolve incident"
	echo "4. Schedule post-mortem"
	echo "========================================="

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Merge remote-tracking branch 'origin/develop' into develop #26

Workflow file

Merge remote-tracking branch 'origin/develop' into develop #26

Uh oh!

Workflow file for this run

GitHub Actions / .github/workflows/incident-response.yml