Skip to content

[User Story] Implement Conflict Detection Algorithm #46

@prodigy

Description

@prodigy

User Story

As a Git integration service, I want to intelligently detect and categorize merge conflicts during rebase operations, so that the system can apply appropriate resolution strategies based on conflict types.

Acceptance Criteria

  • Detect all types of Git conflicts (content, rename, delete/modify)
  • Parse conflict markers accurately across different file types
  • Extract comprehensive context around conflicts (full function/class scope)
  • Categorize conflicts by complexity (simple, moderate, complex)
  • Identify conflicts in binary files and mark for manual resolution
  • Support custom conflict detection rules per file type
  • Track conflict patterns for analytics and improvement
  • Provide detailed conflict metadata for resolution
  • Handle conflicts in special files (package.json, go.mod, etc.)
  • Generate conflict reports with visualization

Technical Implementation

Conflict Detection Service

// pkg/git/conflict_detector.go
package git

import (
    "bufio"
    "context"
    "regexp"
    "github.com/go-git/go-git/v5"
)

type ConflictDetector struct {
    parsers    map[string]ConflictParser
    analyzer   *ConflictAnalyzer
    classifier *ConflictClassifier
    config     *DetectorConfig
}

type DetectorConfig struct {
    ExtractFullContext   bool
    ContextLines         int
    MaxConflictSize      int64
    BinaryFileThreshold  float64
}

type Conflict struct {
    ID            string
    FilePath      string
    FileType      string
    Type          ConflictType
    Complexity    ComplexityLevel
    CurrentBlock  CodeBlock
    IncomingBlock CodeBlock
    Context       ConflictContext
    Metadata      map[string]interface{}
}

type ConflictType string

const (
    ConflictTypeContent      ConflictType = "content"
    ConflictTypeRename       ConflictType = "rename"
    ConflictTypeDeleteModify ConflictType = "delete_modify"
    ConflictTypeBinary       ConflictType = "binary"
    ConflictTypeWhitespace   ConflictType = "whitespace"
    ConflictTypeMergeHeader  ConflictType = "merge_header"
)

type ComplexityLevel string

const (
    ComplexitySimple   ComplexityLevel = "simple"   // Single line, no logic changes
    ComplexityModerate ComplexityLevel = "moderate" // Multiple lines, same function
    ComplexityComplex  ComplexityLevel = "complex"  // Structural changes, multiple functions
)

func (d *ConflictDetector) DetectConflicts(ctx context.Context, repo *git.Repository) ([]Conflict, error) {
    w, err := repo.Worktree()
    if err != nil {
        return nil, err
    }
    
    status, err := w.Status()
    if err != nil {
        return nil, err
    }
    
    var conflicts []Conflict
    
    for path, fileStatus := range status {
        if fileStatus.Staging == git.Unmerged {
            conflict, err := d.analyzeConflict(ctx, path, repo)
            if err != nil {
                log.Warnf("Failed to analyze conflict in %s: %v", path, err)
                continue
            }
            
            conflicts = append(conflicts, conflict)
        }
    }
    
    // Sort by complexity for prioritized resolution
    sort.Slice(conflicts, func(i, j int) bool {
        return d.getComplexityScore(conflicts[i]) < d.getComplexityScore(conflicts[j])
    })
    
    return conflicts, nil
}

func (d *ConflictDetector) analyzeConflict(ctx context.Context, path string, repo *git.Repository) (Conflict, error) {
    // Read file content
    content, err := d.readFile(repo, path)
    if err != nil {
        return Conflict{}, err
    }
    
    // Check if binary
    if d.isBinary(content) {
        return Conflict{
            FilePath:   path,
            Type:       ConflictTypeBinary,
            Complexity: ComplexityComplex,
        }, nil
    }
    
    // Get appropriate parser
    parser := d.getParser(path)
    
    // Parse conflict blocks
    blocks, err := parser.ParseConflictBlocks(content)
    if err != nil {
        return Conflict{}, err
    }
    
    // Extract context
    context := d.extractContext(content, blocks, path)
    
    // Classify complexity
    complexity := d.classifier.ClassifyComplexity(blocks, context)
    
    return Conflict{
        ID:            generateConflictID(path),
        FilePath:      path,
        FileType:      d.getFileType(path),
        Type:          d.determineConflictType(blocks),
        Complexity:    complexity,
        CurrentBlock:  blocks.Current,
        IncomingBlock: blocks.Incoming,
        Context:       context,
        Metadata: map[string]interface{}{
            "line_count":     blocks.LineCount,
            "function_scope": context.FunctionName,
            "class_scope":    context.ClassName,
        },
    }, nil
}

Conflict Parsing

// pkg/git/conflict_parser.go
type ConflictParser interface {
    ParseConflictBlocks(content []byte) (*ConflictBlocks, error)
    ExtractContext(content []byte, blocks *ConflictBlocks) ConflictContext
}

type ConflictBlocks struct {
    Current   CodeBlock
    Incoming  CodeBlock
    Base      CodeBlock // If available (diff3 style)
    LineStart int
    LineEnd   int
    LineCount int
}

type CodeBlock struct {
    Content    string
    StartLine  int
    EndLine    int
    Hash       string
}

// Generic parser for most file types
type GenericConflictParser struct {
    markerRegex *regexp.Regexp
}

func NewGenericConflictParser() *GenericConflictParser {
    return &GenericConflictParser{
        markerRegex: regexp.MustCompile(`(?m)^(<{7}|={7}|>{7}|\\|{7})`),
    }
}

func (p *GenericConflictParser) ParseConflictBlocks(content []byte) (*ConflictBlocks, error) {
    scanner := bufio.NewScanner(bytes.NewReader(content))
    
    var (
        inConflict    bool
        currentBlock  strings.Builder
        incomingBlock strings.Builder
        baseBlock     strings.Builder
        section       = "none"
        lineNum       = 0
        startLine     = 0
    )
    
    blocks := &ConflictBlocks{}
    
    for scanner.Scan() {
        lineNum++
        line := scanner.Text()
        
        if strings.HasPrefix(line, "<<<<<<<") {
            inConflict = true
            startLine = lineNum
            section = "current"
            continue
        } else if strings.HasPrefix(line, "|||||||") {
            section = "base"
            continue
        } else if strings.HasPrefix(line, "=======") {
            section = "incoming"
            continue
        } else if strings.HasPrefix(line, ">>>>>>>") {
            blocks.LineEnd = lineNum
            break
        }
        
        if inConflict {
            switch section {
            case "current":
                currentBlock.WriteString(line + "\n")
            case "base":
                baseBlock.WriteString(line + "\n")
            case "incoming":
                incomingBlock.WriteString(line + "\n")
            }
        }
    }
    
    blocks.Current = CodeBlock{
        Content:   currentBlock.String(),
        StartLine: startLine,
        Hash:      hashContent(currentBlock.String()),
    }
    blocks.Incoming = CodeBlock{
        Content: incomingBlock.String(),
        Hash:    hashContent(incomingBlock.String()),
    }
    blocks.Base = CodeBlock{
        Content: baseBlock.String(),
        Hash:    hashContent(baseBlock.String()),
    }
    blocks.LineStart = startLine
    blocks.LineCount = blocks.LineEnd - startLine
    
    return blocks, nil
}

Language-Specific Parsers

// pkg/git/language_parsers.go
type PythonConflictParser struct {
    *GenericConflictParser
    astParser *PythonASTParser
}

func (p *PythonConflictParser) ExtractContext(content []byte, blocks *ConflictBlocks) ConflictContext {
    // Use AST to find function/class context
    ast, err := p.astParser.Parse(content)
    if err != nil {
        return p.GenericConflictParser.ExtractContext(content, blocks)
    }
    
    // Find the function/class containing the conflict
    node := ast.FindNodeAtLine(blocks.LineStart)
    
    context := ConflictContext{
        Language: "python",
    }
    
    if funcNode, ok := node.(*FunctionNode); ok {
        context.FunctionName = funcNode.Name
        context.FunctionSignature = funcNode.Signature
        context.SurroundingCode = p.extractFunctionBody(content, funcNode)
    }
    
    if classNode := ast.FindParentClass(node); classNode != nil {
        context.ClassName = classNode.Name
    }
    
    return context
}

// JavaScript/TypeScript parser with better context
type JavaScriptConflictParser struct {
    *GenericConflictParser
}

func (p *JavaScriptConflictParser) ExtractContext(content []byte, blocks *ConflictBlocks) ConflictContext {
    // Extract function/class context using regex patterns
    functionPattern := regexp.MustCompile(`(?m)^(?:export\s+)?(?:async\s+)?function\s+(\w+)|(?:const|let|var)\s+(\w+)\s*=\s*(?:async\s+)?\(`)
    classPattern := regexp.MustCompile(`(?m)^(?:export\s+)?class\s+(\w+)`)
    
    lines := bytes.Split(content, []byte("\n"))
    
    // Search backwards for function/class definition
    for i := blocks.LineStart - 1; i >= 0 && i > blocks.LineStart-50; i-- {
        line := string(lines[i])
        
        if matches := functionPattern.FindStringSubmatch(line); len(matches) > 0 {
            funcName := matches[1]
            if funcName == "" {
                funcName = matches[2]
            }
            
            return ConflictContext{
                Language:         "javascript",
                FunctionName:     funcName,
                SurroundingCode:  p.extractSurroundingLines(lines, blocks.LineStart, 10),
            }
        }
        
        if matches := classPattern.FindStringSubmatch(line); len(matches) > 0 {
            return ConflictContext{
                Language:        "javascript",
                ClassName:       matches[1],
                SurroundingCode: p.extractSurroundingLines(lines, blocks.LineStart, 10),
            }
        }
    }
    
    return p.GenericConflictParser.ExtractContext(content, blocks)
}

Conflict Classification

// pkg/git/conflict_classifier.go
type ConflictClassifier struct {
    rules []ClassificationRule
}

type ClassificationRule struct {
    Name      string
    Condition func(blocks *ConflictBlocks, context ConflictContext) bool
    Score     int
}

func (c *ConflictClassifier) ClassifyComplexity(blocks *ConflictBlocks, context ConflictContext) ComplexityLevel {
    score := 0
    
    // Apply rules
    for _, rule := range c.rules {
        if rule.Condition(blocks, context) {
            score += rule.Score
        }
    }
    
    // Determine complexity based on score
    switch {
    case score <= 10:
        return ComplexitySimple
    case score <= 30:
        return ComplexityModerate
    default:
        return ComplexityComplex
    }
}

func DefaultClassificationRules() []ClassificationRule {
    return []ClassificationRule{
        {
            Name: "single_line_change",
            Condition: func(b *ConflictBlocks, _ ConflictContext) bool {
                return b.LineCount <= 3
            },
            Score: 5,
        },
        {
            Name: "whitespace_only",
            Condition: func(b *ConflictBlocks, _ ConflictContext) bool {
                current := strings.TrimSpace(b.Current.Content)
                incoming := strings.TrimSpace(b.Incoming.Content)
                return current == incoming
            },
            Score: 1,
        },
        {
            Name: "import_conflict",
            Condition: func(b *ConflictBlocks, c ConflictContext) bool {
                return strings.Contains(b.Current.Content, "import") ||
                       strings.Contains(b.Current.Content, "require")
            },
            Score: 10,
        },
        {
            Name: "structural_change",
            Condition: func(b *ConflictBlocks, _ ConflictContext) bool {
                // Check for class/function definition changes
                structuralKeywords := []string{"class", "function", "def", "interface", "struct"}
                for _, keyword := range structuralKeywords {
                    if strings.Contains(b.Current.Content, keyword) ||
                       strings.Contains(b.Incoming.Content, keyword) {
                        return true
                    }
                }
                return false
            },
            Score: 30,
        },
        {
            Name: "logic_change",
            Condition: func(b *ConflictBlocks, _ ConflictContext) bool {
                logicKeywords := []string{"if", "else", "for", "while", "switch", "case"}
                count := 0
                for _, keyword := range logicKeywords {
                    if strings.Contains(b.Current.Content, keyword) {
                        count++
                    }
                }
                return count > 2
            },
            Score: 20,
        },
    }
}

Conflict Analytics

// pkg/git/conflict_analytics.go
type ConflictAnalytics struct {
    db      *AnalyticsDB
    metrics *MetricsCollector
}

func (a *ConflictAnalytics) RecordConflict(conflict Conflict, resolution Resolution) error {
    record := ConflictRecord{
        ID:           generateID(),
        ConflictID:   conflict.ID,
        FilePath:     conflict.FilePath,
        FileType:     conflict.FileType,
        Type:         conflict.Type,
        Complexity:   conflict.Complexity,
        ResolutionMethod: resolution.Method,
        Success:      resolution.Success,
        Duration:     resolution.Duration,
        Timestamp:    time.Now(),
    }
    
    // Store in database for pattern analysis
    if err := a.db.StoreConflictRecord(record); err != nil {
        return err
    }
    
    // Update metrics
    a.metrics.RecordConflict(conflict.Type, conflict.Complexity)
    
    return nil
}

func (a *ConflictAnalytics) GetConflictPatterns(timeRange TimeRange) ([]ConflictPattern, error) {
    records, err := a.db.GetConflictRecords(timeRange)
    if err != nil {
        return nil, err
    }
    
    // Analyze patterns
    patterns := a.analyzePatterns(records)
    
    return patterns, nil
}

Architecture References

Conflict Detection in Git Service

Reference: /docs/02-system-components.md:251-264

async def rebase_with_validation(self, repo: Repository, target: str) -> RebaseResult:
    """Attempt rebase with comprehensive conflict resolution"""
    try:
        # Attempt automatic rebase
        result = await repo.git.rebase(target)
        return RebaseResult(success=True, conflicts=[])
    except GitConflictError as e:
        # Handle conflicts with validation
        conflicts = await self._parse_conflicts(repo)
        resolved_conflicts = await self.conflict_resolver.resolve_with_validation(
            conflicts, target
        )
        return RebaseResult(success=False, conflicts=resolved_conflicts)

Conflict Resolution Flow

Reference: /docs/03-data-flow.md:156-162

stateDiagram-v2
    Rebasing --> ConflictDetection: Start rebase
    ConflictDetection --> ConflictResolution: Conflicts found
    ConflictDetection --> PushChanges: No conflicts
    ConflictResolution --> ContinueRebase: Resolved
Loading

Database Schema for Tracking

Reference: /docs/02-system-components.md:613-627

CREATE TABLE conflict_resolutions (
    id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
    job_id UUID REFERENCES jobs(id),
    file_path TEXT NOT NULL,
    conflict_type VARCHAR(50),
    resolution_method VARCHAR(50),
    validation_results JSONB,
    success BOOLEAN,
    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);

Dependencies

  • go-git/v5: Git operations
  • tree-sitter: Multi-language AST parsing
  • PostgreSQL: Conflict analytics storage
  • Prometheus: Metrics collection

Definition of Done

  • Unit tests cover all conflict types with 90%+ coverage
  • Integration tests verify detection on real merge conflicts
  • Performance tests show <100ms detection for typical conflicts
  • Language-specific parsers work for Python, JS/TS, Go, Java
  • Conflict patterns dashboard shows analytics
  • Documentation includes conflict type examples
  • Metrics track detection accuracy

Effort Estimate

13 Story Points - Complex parsing and classification logic

Labels

  • backend
  • git
  • epic-5

Metadata

Metadata

Assignees

No one assigned

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions