diff --git a/.codeboarding/Analysis_Visualization.html b/.codeboarding/Analysis_Visualization.html
new file mode 100644
index 0000000..37b6306
--- /dev/null
+++ b/.codeboarding/Analysis_Visualization.html
@@ -0,0 +1,423 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>CodeBoarding Analysis - django</title>
+    <!-- Load dagre first, then cytoscape, then cytoscape-dagre -->
+    <script src="https://unpkg.com/dagre@0.8.5/dist/dagre.min.js"></script>
+    <script src="https://unpkg.com/cytoscape@3.23.0/dist/cytoscape.min.js"></script>
+    <script src="https://unpkg.com/cytoscape-dagre@2.4.0/cytoscape-dagre.js"></script>
+    <style>
+        body {
+            font-family: Arial, sans-serif;
+            line-height: 1.6;
+            margin: 0 auto;
+            padding: 20px;
+            background-color: #f8f9fa;
+        }
+        
+        .component {
+            border-left: 3px solid #6c757d;
+            padding-left: 15px;
+            margin-bottom: 20px;
+            background: white;
+            padding: 15px;
+            border-radius: 8px;
+            box-shadow: 0 2px 4px rgba(0,0,0,0.1);
+        }
+        
+        .component h3 {
+            color: #495057;
+            margin-top: 0;
+        }
+        
+        code {
+            background-color: #e9ecef;
+            padding: 2px 4px;
+            border-radius: 3px;
+            font-family: monospace;
+        }
+        
+        #cy {
+            width: 100%;
+            height: 600px;
+            border: 1px solid #dee2e6;
+            margin: 20px 0;
+            border-radius: 8px;
+            background-color: #ffffff;
+        }
+        
+        .badges {
+            margin: 20px 0;
+        }
+        
+        .badge {
+            display: inline-block;
+            margin-right: 10px;
+        }
+        
+        .badge img {
+            vertical-align: middle;
+        }
+        
+        .references {
+            list-style: none;
+            padding-left: 0;
+            margin: 10px 0;
+        }
+        
+        .references li {
+            margin: 4px 0;
+        }
+        
+        .diagram-controls {
+            margin: 10px 0;
+            text-align: center;
+        }
+        
+        .diagram-controls button {
+            margin: 0 5px;
+            padding: 8px 16px;
+            background: #6c757d;
+            color: white;
+            border: none;
+            border-radius: 4px;
+            cursor: pointer;
+        }
+        
+        .diagram-controls button:hover {
+            background: #495057;
+        }
+    </style>
+</head>
+<body>
+    <h1>CodeBoarding Analysis - django</h1>
+    
+    <div class="badges">
+        <a href="https://github.com/CodeBoarding/GeneratedOnBoardings" class="badge">
+            <img src="https://img.shields.io/badge/Generated%20by-CodeBoarding-9cf?style=flat-square" alt="Generated by CodeBoarding">
+        </a>
+        <a href="https://www.codeboarding.org/demo" class="badge">
+            <img src="https://img.shields.io/badge/Try%20our-Demo-blue?style=flat-square" alt="Try our Demo">
+        </a>
+        <a href="mailto:contact@codeboarding.org" class="badge">
+            <img src="https://img.shields.io/badge/Contact%20us%20-%20contact@codeboarding.org-lightgrey?style=flat-square" alt="Contact us">
+        </a>
+    </div>
+    
+    <div class="diagram-controls">
+        <button onclick="resetLayout()">Reset Layout</button>
+        <button onclick="fitToView()">Fit to View</button>
+        <button onclick="exportImage()">Export PNG</button>
+    </div>
+    
+    <div id="cy"></div>
+    
+    <h2>Details</h2>
+    <p>The `Analysis & Visualization` component, as described, serves as an umbrella for functionalities related to protein metrics, visualization, and the management of their underlying external dependencies. Based on the provided `Analysis summary` and the related classes/methods, this component can be broken down into four fundamental sub-components, each representing a distinct and crucial aspect of the `proteinflow` library. These components are: `Data Management`, `Visualization`, `Metrics and Analysis`, and `External Dependencies and Utilities`.</p>
+    
+    
+        <div class="component">
+            <h3 id="Data_Management">Data Management <a href="./Data_Management.html">[Expand]</a></h3>
+            <p>This component is responsible for defining and managing the core data structures that represent protein information. It handles the loading of protein entries from various formats, such as PDB files and serialized pickle files, and provides foundational data objects for the entire system. The class hierarchy shows `SAbDabEntry` inheriting from `PDBEntry`, indicating a structured approach to handling different protein data types.</p>
+            <h4>Related Classes/Methods:</h4><ul class="references"><li><code>`SAbDabEntry` (1:1)</code></li><li><code>`PDBEntry` (1:1)</code></li></ul>
+        </div>
+        
+        <div class="component">
+            <h3 id="Visualization">Visualization <a href="./Visualization.html">[Expand]</a></h3>
+            <p>This component focuses on the graphical representation and animation of protein structures. It takes processed protein data and renders it for user viewing, offering functionalities like showing animations from PDB or pickle files, and merging multiple protein structures for combined display.</p>
+            <h4>Related Classes/Methods:</h4><ul class="references"><li><code>`visualize` (1:1)</code></li></ul>
+        </div>
+        
+        <div class="component">
+            <h3 id="Metrics_and_Analysis">Metrics and Analysis</h3>
+            <p>This component offers a comprehensive suite of computational tools for analyzing protein sequences and structures. It includes functions for calculating various biological and structural metrics (e.g., BLOSUM62 score, TM-score, language model perplexity) and integrating with external models for structure generation (e.g., ESMFold, IgFold).</p>
+            <h4>Related Classes/Methods:</h4><ul class="references"><li><code>`metrics` (1:1)</code></li></ul>
+        </div>
+        
+        <div class="component">
+            <h3 id="External_Dependencies_and_Utilities">External Dependencies and Utilities</h3>
+            <p>This component manages optional external dependencies and provides general utility functions. Its primary roles include checking for the availability of required external packages (`requires_extra`) and facilitating the acquisition of visualization views (`_get_view`). It acts as an abstraction layer, ensuring that core functionalities can gracefully handle optional integrations.</p>
+            <h4>Related Classes/Methods:</h4><ul class="references"><li><code>`requires_extra` (1:1)</code></li><li><code>`_get_view` (1:1)</code></li></ul>
+        </div>
+        
+    
+    <h3><a href="https://github.com/CodeBoarding/GeneratedOnBoardings/tree/main?tab=readme-ov-file#faq">FAQ</a></h3>
+    
+    <script>
+        // Wait for all scripts to load before initializing
+        document.addEventListener('DOMContentLoaded', function() {
+            // Check if all required libraries are loaded
+            if (typeof cytoscape === 'undefined') {
+                console.error('Cytoscape is not loaded');
+                document.getElementById('cy').innerHTML = '<div style="padding: 20px; text-align: center; color: #666;">Error loading diagram. Please refresh the page.</div>';
+                return;
+            }
+            
+            if (typeof dagre === 'undefined') {
+                console.error('Dagre is not loaded');
+                document.getElementById('cy').innerHTML = '<div style="padding: 20px; text-align: center; color: #666;">Error loading diagram. Please refresh the page.</div>';
+                return;
+            }
+            
+            if (typeof cytoscapeDagre === 'undefined') {
+                console.error('Cytoscape-dagre extension is not loaded');
+                document.getElementById('cy').innerHTML = '<div style="padding: 20px; text-align: center; color: #666;">Error loading diagram. Please refresh the page.</div>';
+                return;
+            }
+            
+            // Register the dagre extension
+            try {
+                cytoscape.use(cytoscapeDagre);
+                console.log('Dagre extension registered successfully');
+            } catch (e) {
+                console.error('Failed to register dagre extension:', e);
+                document.getElementById('cy').innerHTML = '<div style="padding: 20px; text-align: center; color: #666;">Error loading diagram. Please refresh the page.</div>';
+                return;
+            }
+            
+            const cytoscapeData = {
+  "elements": [
+    {
+      "data": {
+        "id": "Data_Management",
+        "label": "Data Management",
+        "description": "This component is responsible for defining and managing the core data structures that represent protein information. It handles the loading of protein entries from various formats, such as PDB files and serialized pickle files, and provides foundational data objects for the entire system. The class hierarchy shows `SAbDabEntry` inheriting from `PDBEntry`, indicating a structured approach to handling different protein data types.",
+        "hasLink": true,
+        "linkUrl": "./Data_Management.html"
+      }
+    },
+    {
+      "data": {
+        "id": "Visualization",
+        "label": "Visualization",
+        "description": "This component focuses on the graphical representation and animation of protein structures. It takes processed protein data and renders it for user viewing, offering functionalities like showing animations from PDB or pickle files, and merging multiple protein structures for combined display.",
+        "hasLink": true,
+        "linkUrl": "./Visualization.html"
+      }
+    },
+    {
+      "data": {
+        "id": "Metrics_and_Analysis",
+        "label": "Metrics and Analysis",
+        "description": "This component offers a comprehensive suite of computational tools for analyzing protein sequences and structures. It includes functions for calculating various biological and structural metrics (e.g., BLOSUM62 score, TM-score, language model perplexity) and integrating with external models for structure generation (e.g., ESMFold, IgFold).",
+        "hasLink": false
+      }
+    },
+    {
+      "data": {
+        "id": "External_Dependencies_and_Utilities",
+        "label": "External Dependencies and Utilities",
+        "description": "This component manages optional external dependencies and provides general utility functions. Its primary roles include checking for the availability of required external packages (`requires_extra`) and facilitating the acquisition of visualization views (`_get_view`). It acts as an abstraction layer, ensuring that core functionalities can gracefully handle optional integrations.",
+        "hasLink": false
+      }
+    },
+    {
+      "data": {
+        "id": "edge_0",
+        "source": "Visualization",
+        "target": "Data_Management",
+        "label": "uses"
+      }
+    },
+    {
+      "data": {
+        "id": "edge_1",
+        "source": "Metrics_and_Analysis",
+        "target": "Data_Management",
+        "label": "uses"
+      }
+    },
+    {
+      "data": {
+        "id": "edge_2",
+        "source": "Visualization",
+        "target": "External_Dependencies_and_Utilities",
+        "label": "uses"
+      }
+    },
+    {
+      "data": {
+        "id": "edge_3",
+        "source": "Metrics_and_Analysis",
+        "target": "External_Dependencies_and_Utilities",
+        "label": "uses"
+      }
+    },
+    {
+      "data": {
+        "id": "edge_4",
+        "source": "Data_Management",
+        "target": "External_Dependencies_and_Utilities",
+        "label": "uses"
+      }
+    }
+  ]
+};
+            
+            try {
+                const cy = cytoscape({
+                    container: document.getElementById('cy'),
+                    
+                    elements: cytoscapeData.elements,
+                    
+                    style: [
+                        {
+                            selector: 'node',
+                            style: {
+                                'background-color': '#f8f9fa',
+                                'label': 'data(label)',
+                                'text-valign': 'center',
+                                'text-halign': 'center',
+                                'color': '#495057',
+                                'text-wrap': 'wrap',
+                                'font-size': '11px',
+                                'font-weight': '500',
+                                'width': 'label',
+                                'height': 'label',
+                                'padding': '15px',
+                                'shape': 'roundrectangle',
+                                'border-width': 2,
+                                'border-color': '#dee2e6'
+                            }
+                        },
+                        {
+                            selector: 'node[hasLink = true]',
+                            style: {
+                                'background-color': '#e9ecef',
+                                'border-color': '#6c757d',
+                                'cursor': 'pointer',
+                                'border-width': 3
+                            }
+                        },
+                        {
+                            selector: 'node:hover',
+                            style: {
+                                'background-color': '#dee2e6',
+                                'border-color': '#495057',
+                                'border-width': 3
+                            }
+                        },
+                        {
+                            selector: 'edge',
+                            style: {
+                                'width': 2,
+                                'line-color': '#adb5bd',
+                                'target-arrow-color': '#adb5bd',
+                                'target-arrow-shape': 'triangle',
+                                'curve-style': 'bezier',
+                                'label': 'data(label)',
+                                'font-size': '10px',
+                                'color': '#6c757d',
+                                'text-rotation': 'autorotate',
+                                'text-margin-y': -10,
+                                'text-background-color': '#ffffff',
+                                'text-background-opacity': 0.8,
+                                'text-background-padding': '2px',
+                                'text-background-shape': 'roundrectangle'
+                            }
+                        }
+                    ],
+                    
+                    layout: {
+                        name: 'dagre',
+                        directed: true,
+                        padding: 30,
+                        rankDir: 'LR',
+                        nodeSep: 80,
+                        edgeSep: 20,
+                        rankSep: 150
+                    }
+                });
+                
+                // Apply dagre layout after cytoscape is initialized
+                cy.layout({
+                    name: 'dagre',
+                    directed: true,
+                    padding: 30,
+                    rankDir: 'LR',
+                    nodeSep: 80,
+                    edgeSep: 20,
+                    rankSep: 150
+                }).run();
+                
+                // Add click handler for nodes with links
+                cy.on('tap', 'node[hasLink = true]', function(evt) {
+                    const node = evt.target;
+                    const linkUrl = node.data('linkUrl');
+                    if (linkUrl) {
+                        window.open(linkUrl, '_blank');
+                    }
+                });
+                
+                // Add simple tooltip on hover
+                cy.on('mouseover', 'node', function(evt) {
+                    const node = evt.target;
+                    const description = node.data('description');
+                    if (description) {
+                        // Simple tooltip implementation
+                        const tooltip = document.createElement('div');
+                        tooltip.innerHTML = description;
+                        tooltip.style.cssText = `
+                            position: fixed;
+                            background: #333;
+                            color: white;
+                            padding: 8px;
+                            border-radius: 4px;
+                            font-size: 12px;
+                            max-width: 300px;
+                            z-index: 1000;
+                            pointer-events: none;
+                        `;
+                        document.body.appendChild(tooltip);
+                        
+                        const updateTooltip = (e) => {
+                            tooltip.style.left = (e.clientX + 10) + 'px';
+                            tooltip.style.top = (e.clientY + 10) + 'px';
+                        };
+                        
+                        document.addEventListener('mousemove', updateTooltip);
+                        
+                        node.on('mouseout', () => {
+                            document.removeEventListener('mousemove', updateTooltip);
+                            if (tooltip.parentNode) {
+                                tooltip.parentNode.removeChild(tooltip);
+                            }
+                        });
+                    }
+                });
+                
+                // Make control functions globally available
+                window.resetLayout = function() {
+                    cy.layout({
+                        name: 'dagre',
+                        directed: true,
+                        padding: 30,
+                        rankDir: 'LR',
+                        nodeSep: 80,
+                        edgeSep: 20,
+                        rankSep: 150
+                    }).run();
+                };
+                
+                window.fitToView = function() {
+                    cy.fit();
+                };
+                
+                window.exportImage = function() {
+                    const png64 = cy.png({ scale: 2, full: true });
+                    const link = document.createElement('a');
+                    link.download = 'diagram.png';
+                    link.href = png64;
+                    link.click();
+                };
+                
+            } catch (error) {
+                console.error('Error initializing Cytoscape:', error);
+                document.getElementById('cy').innerHTML = '<div style="padding: 20px; text-align: center; color: #666;">Error loading diagram. Please refresh the page.</div>';
+            }
+        });
+    </script>
+</body>
+</html>
\ No newline at end of file
diff --git a/.codeboarding/Core_Data_Management.html b/.codeboarding/Core_Data_Management.html
new file mode 100644
index 0000000..15a62cc
--- /dev/null
+++ b/.codeboarding/Core_Data_Management.html
@@ -0,0 +1,481 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>CodeBoarding Analysis - django</title>
+    <!-- Load dagre first, then cytoscape, then cytoscape-dagre -->
+    <script src="https://unpkg.com/dagre@0.8.5/dist/dagre.min.js"></script>
+    <script src="https://unpkg.com/cytoscape@3.23.0/dist/cytoscape.min.js"></script>
+    <script src="https://unpkg.com/cytoscape-dagre@2.4.0/cytoscape-dagre.js"></script>
+    <style>
+        body {
+            font-family: Arial, sans-serif;
+            line-height: 1.6;
+            margin: 0 auto;
+            padding: 20px;
+            background-color: #f8f9fa;
+        }
+        
+        .component {
+            border-left: 3px solid #6c757d;
+            padding-left: 15px;
+            margin-bottom: 20px;
+            background: white;
+            padding: 15px;
+            border-radius: 8px;
+            box-shadow: 0 2px 4px rgba(0,0,0,0.1);
+        }
+        
+        .component h3 {
+            color: #495057;
+            margin-top: 0;
+        }
+        
+        code {
+            background-color: #e9ecef;
+            padding: 2px 4px;
+            border-radius: 3px;
+            font-family: monospace;
+        }
+        
+        #cy {
+            width: 100%;
+            height: 600px;
+            border: 1px solid #dee2e6;
+            margin: 20px 0;
+            border-radius: 8px;
+            background-color: #ffffff;
+        }
+        
+        .badges {
+            margin: 20px 0;
+        }
+        
+        .badge {
+            display: inline-block;
+            margin-right: 10px;
+        }
+        
+        .badge img {
+            vertical-align: middle;
+        }
+        
+        .references {
+            list-style: none;
+            padding-left: 0;
+            margin: 10px 0;
+        }
+        
+        .references li {
+            margin: 4px 0;
+        }
+        
+        .diagram-controls {
+            margin: 10px 0;
+            text-align: center;
+        }
+        
+        .diagram-controls button {
+            margin: 0 5px;
+            padding: 8px 16px;
+            background: #6c757d;
+            color: white;
+            border: none;
+            border-radius: 4px;
+            cursor: pointer;
+        }
+        
+        .diagram-controls button:hover {
+            background: #495057;
+        }
+    </style>
+</head>
+<body>
+    <h1>CodeBoarding Analysis - django</h1>
+    
+    <div class="badges">
+        <a href="https://github.com/CodeBoarding/GeneratedOnBoardings" class="badge">
+            <img src="https://img.shields.io/badge/Generated%20by-CodeBoarding-9cf?style=flat-square" alt="Generated by CodeBoarding">
+        </a>
+        <a href="https://www.codeboarding.org/demo" class="badge">
+            <img src="https://img.shields.io/badge/Try%20our-Demo-blue?style=flat-square" alt="Try our Demo">
+        </a>
+        <a href="mailto:contact@codeboarding.org" class="badge">
+            <img src="https://img.shields.io/badge/Contact%20us%20-%20contact@codeboarding.org-lightgrey?style=flat-square" alt="Contact us">
+        </a>
+    </div>
+    
+    <div class="diagram-controls">
+        <button onclick="resetLayout()">Reset Layout</button>
+        <button onclick="fitToView()">Fit to View</button>
+        <button onclick="exportImage()">Export PNG</button>
+    </div>
+    
+    <div id="cy"></div>
+    
+    <h2>Details</h2>
+    <p>The `Core Data Management` component is fundamental to `proteinflow` as it establishes the initial pipeline for acquiring, structuring, and preparing raw protein data. It ensures that all subsequent operations, such as feature extraction and model training, have access to high-quality, standardized input. Without these foundational steps, the project would lack the necessary data integrity and accessibility to function effectively.</p>
+    
+    
+        <div class="component">
+            <h3 id="proteinflow_download">proteinflow.download</h3>
+            <p>This component is responsible for fetching raw protein data (PDB and SAbDab files) from external databases and managing their local storage. It acts as the primary entry point for data acquisition.</p>
+            <h4>Related Classes/Methods:</h4><ul class="references"><li><code>`proteinflow.download` (1:1)</code></li></ul>
+        </div>
+        
+        <div class="component">
+            <h3 id="proteinflow_data_PDBEntry">proteinflow.data.PDBEntry</h3>
+            <p>This class serves as the foundational data structure for parsing and representing information from standard PDB or mmCIF files. It extracts atomic coordinates, sequences, and basic structural properties, including initial ligand information.</p>
+            <h4>Related Classes/Methods:</h4><ul class="references"><li><code>`proteinflow.data.PDBEntry` (1:1)</code></li></ul>
+        </div>
+        
+        <div class="component">
+            <h3 id="proteinflow_data_SAbDabEntry">proteinflow.data.SAbDabEntry</h3>
+            <p>Extending `PDBEntry`, this specialized class handles antibody structures from the SAbDab database. It incorporates specific logic for identifying Complementarity Determining Regions (CDRs) and managing antibody chain types, building upon the base PDB structure.</p>
+            <h4>Related Classes/Methods:</h4><ul class="references"><li><code>`proteinflow.data.SAbDabEntry` (1:1)</code></li></ul>
+        </div>
+        
+        <div class="component">
+            <h3 id="proteinflow_data_ProteinEntry">proteinflow.data.ProteinEntry</h3>
+            <p>This is the central, standardized data model that aggregates and processes information from `PDBEntry` and `SAbDabEntry`. It represents the cleaned, filtered, and unified protein data, ready for feature extraction and downstream analysis.</p>
+            <h4>Related Classes/Methods:</h4><ul class="references"><li><code>`proteinflow.data.ProteinEntry` (1:1)</code></li></ul>
+        </div>
+        
+        <div class="component">
+            <h3 id="proteinflow_ligand">proteinflow.ligand</h3>
+            <p>This module is dedicated to the identification, parsing, and detailed processing of ligand molecules associated with protein structures. It handles tasks such as extracting ligand data from PDB files and managing their chemical properties.</p>
+            <h4>Related Classes/Methods:</h4><ul class="references"><li><code>`proteinflow.ligand` (1:1)</code></li></ul>
+        </div>
+        
+        <div class="component">
+            <h3 id="proteinflow_processing">proteinflow.processing</h3>
+            <p>This component orchestrates the overall data processing pipeline. It manages the filtering, cleaning, and conversion of raw protein entries into standardized `ProteinEntry` objects, ensuring data quality and preparing it for further use.</p>
+            <h4>Related Classes/Methods:</h4><ul class="references"><li><code>`proteinflow.processing` (1:1)</code></li></ul>
+        </div>
+        
+    
+    <h3><a href="https://github.com/CodeBoarding/GeneratedOnBoardings/tree/main?tab=readme-ov-file#faq">FAQ</a></h3>
+    
+    <script>
+        // Wait for all scripts to load before initializing
+        document.addEventListener('DOMContentLoaded', function() {
+            // Check if all required libraries are loaded
+            if (typeof cytoscape === 'undefined') {
+                console.error('Cytoscape is not loaded');
+                document.getElementById('cy').innerHTML = '<div style="padding: 20px; text-align: center; color: #666;">Error loading diagram. Please refresh the page.</div>';
+                return;
+            }
+            
+            if (typeof dagre === 'undefined') {
+                console.error('Dagre is not loaded');
+                document.getElementById('cy').innerHTML = '<div style="padding: 20px; text-align: center; color: #666;">Error loading diagram. Please refresh the page.</div>';
+                return;
+            }
+            
+            if (typeof cytoscapeDagre === 'undefined') {
+                console.error('Cytoscape-dagre extension is not loaded');
+                document.getElementById('cy').innerHTML = '<div style="padding: 20px; text-align: center; color: #666;">Error loading diagram. Please refresh the page.</div>';
+                return;
+            }
+            
+            // Register the dagre extension
+            try {
+                cytoscape.use(cytoscapeDagre);
+                console.log('Dagre extension registered successfully');
+            } catch (e) {
+                console.error('Failed to register dagre extension:', e);
+                document.getElementById('cy').innerHTML = '<div style="padding: 20px; text-align: center; color: #666;">Error loading diagram. Please refresh the page.</div>';
+                return;
+            }
+            
+            const cytoscapeData = {
+  "elements": [
+    {
+      "data": {
+        "id": "proteinflow_download",
+        "label": "proteinflow.download",
+        "description": "This component is responsible for fetching raw protein data (PDB and SAbDab files) from external databases and managing their local storage. It acts as the primary entry point for data acquisition.",
+        "hasLink": false
+      }
+    },
+    {
+      "data": {
+        "id": "proteinflow_data_PDBEntry",
+        "label": "proteinflow.data.PDBEntry",
+        "description": "This class serves as the foundational data structure for parsing and representing information from standard PDB or mmCIF files. It extracts atomic coordinates, sequences, and basic structural properties, including initial ligand information.",
+        "hasLink": false
+      }
+    },
+    {
+      "data": {
+        "id": "proteinflow_data_SAbDabEntry",
+        "label": "proteinflow.data.SAbDabEntry",
+        "description": "Extending `PDBEntry`, this specialized class handles antibody structures from the SAbDab database. It incorporates specific logic for identifying Complementarity Determining Regions (CDRs) and managing antibody chain types, building upon the base PDB structure.",
+        "hasLink": false
+      }
+    },
+    {
+      "data": {
+        "id": "proteinflow_data_ProteinEntry",
+        "label": "proteinflow.data.ProteinEntry",
+        "description": "This is the central, standardized data model that aggregates and processes information from `PDBEntry` and `SAbDabEntry`. It represents the cleaned, filtered, and unified protein data, ready for feature extraction and downstream analysis.",
+        "hasLink": false
+      }
+    },
+    {
+      "data": {
+        "id": "proteinflow_ligand",
+        "label": "proteinflow.ligand",
+        "description": "This module is dedicated to the identification, parsing, and detailed processing of ligand molecules associated with protein structures. It handles tasks such as extracting ligand data from PDB files and managing their chemical properties.",
+        "hasLink": false
+      }
+    },
+    {
+      "data": {
+        "id": "proteinflow_processing",
+        "label": "proteinflow.processing",
+        "description": "This component orchestrates the overall data processing pipeline. It manages the filtering, cleaning, and conversion of raw protein entries into standardized `ProteinEntry` objects, ensuring data quality and preparing it for further use.",
+        "hasLink": false
+      }
+    },
+    {
+      "data": {
+        "id": "edge_0",
+        "source": "proteinflow_download",
+        "target": "proteinflow_processing",
+        "label": "provides raw files and initiates acquisition"
+      }
+    },
+    {
+      "data": {
+        "id": "edge_1",
+        "source": "proteinflow_processing",
+        "target": "proteinflow_data_PDBEntry",
+        "label": "creates and processes"
+      }
+    },
+    {
+      "data": {
+        "id": "edge_2",
+        "source": "proteinflow_processing",
+        "target": "proteinflow_data_SAbDabEntry",
+        "label": "creates and processes"
+      }
+    },
+    {
+      "data": {
+        "id": "edge_3",
+        "source": "proteinflow_data_PDBEntry",
+        "target": "proteinflow_ligand",
+        "label": "utilizes for ligand extraction"
+      }
+    },
+    {
+      "data": {
+        "id": "edge_4",
+        "source": "proteinflow_ligand",
+        "target": "proteinflow_data_PDBEntry",
+        "label": "processes ligand data for"
+      }
+    },
+    {
+      "data": {
+        "id": "edge_5",
+        "source": "proteinflow_data_SAbDabEntry",
+        "target": "proteinflow_data_PDBEntry",
+        "label": "inherits from"
+      }
+    },
+    {
+      "data": {
+        "id": "edge_6",
+        "source": "proteinflow_data_PDBEntry",
+        "target": "proteinflow_data_ProteinEntry",
+        "label": "provides raw data for"
+      }
+    },
+    {
+      "data": {
+        "id": "edge_7",
+        "source": "proteinflow_data_SAbDabEntry",
+        "target": "proteinflow_data_ProteinEntry",
+        "label": "provides raw data for"
+      }
+    },
+    {
+      "data": {
+        "id": "edge_8",
+        "source": "proteinflow_processing",
+        "target": "proteinflow_data_ProteinEntry",
+        "label": "transforms into standardized objects"
+      }
+    }
+  ]
+};
+            
+            try {
+                const cy = cytoscape({
+                    container: document.getElementById('cy'),
+                    
+                    elements: cytoscapeData.elements,
+                    
+                    style: [
+                        {
+                            selector: 'node',
+                            style: {
+                                'background-color': '#f8f9fa',
+                                'label': 'data(label)',
+                                'text-valign': 'center',
+                                'text-halign': 'center',
+                                'color': '#495057',
+                                'text-wrap': 'wrap',
+                                'font-size': '11px',
+                                'font-weight': '500',
+                                'width': 'label',
+                                'height': 'label',
+                                'padding': '15px',
+                                'shape': 'roundrectangle',
+                                'border-width': 2,
+                                'border-color': '#dee2e6'
+                            }
+                        },
+                        {
+                            selector: 'node[hasLink = true]',
+                            style: {
+                                'background-color': '#e9ecef',
+                                'border-color': '#6c757d',
+                                'cursor': 'pointer',
+                                'border-width': 3
+                            }
+                        },
+                        {
+                            selector: 'node:hover',
+                            style: {
+                                'background-color': '#dee2e6',
+                                'border-color': '#495057',
+                                'border-width': 3
+                            }
+                        },
+                        {
+                            selector: 'edge',
+                            style: {
+                                'width': 2,
+                                'line-color': '#adb5bd',
+                                'target-arrow-color': '#adb5bd',
+                                'target-arrow-shape': 'triangle',
+                                'curve-style': 'bezier',
+                                'label': 'data(label)',
+                                'font-size': '10px',
+                                'color': '#6c757d',
+                                'text-rotation': 'autorotate',
+                                'text-margin-y': -10,
+                                'text-background-color': '#ffffff',
+                                'text-background-opacity': 0.8,
+                                'text-background-padding': '2px',
+                                'text-background-shape': 'roundrectangle'
+                            }
+                        }
+                    ],
+                    
+                    layout: {
+                        name: 'dagre',
+                        directed: true,
+                        padding: 30,
+                        rankDir: 'LR',
+                        nodeSep: 80,
+                        edgeSep: 20,
+                        rankSep: 150
+                    }
+                });
+                
+                // Apply dagre layout after cytoscape is initialized
+                cy.layout({
+                    name: 'dagre',
+                    directed: true,
+                    padding: 30,
+                    rankDir: 'LR',
+                    nodeSep: 80,
+                    edgeSep: 20,
+                    rankSep: 150
+                }).run();
+                
+                // Add click handler for nodes with links
+                cy.on('tap', 'node[hasLink = true]', function(evt) {
+                    const node = evt.target;
+                    const linkUrl = node.data('linkUrl');
+                    if (linkUrl) {
+                        window.open(linkUrl, '_blank');
+                    }
+                });
+                
+                // Add simple tooltip on hover
+                cy.on('mouseover', 'node', function(evt) {
+                    const node = evt.target;
+                    const description = node.data('description');
+                    if (description) {
+                        // Simple tooltip implementation
+                        const tooltip = document.createElement('div');
+                        tooltip.innerHTML = description;
+                        tooltip.style.cssText = `
+                            position: fixed;
+                            background: #333;
+                            color: white;
+                            padding: 8px;
+                            border-radius: 4px;
+                            font-size: 12px;
+                            max-width: 300px;
+                            z-index: 1000;
+                            pointer-events: none;
+                        `;
+                        document.body.appendChild(tooltip);
+                        
+                        const updateTooltip = (e) => {
+                            tooltip.style.left = (e.clientX + 10) + 'px';
+                            tooltip.style.top = (e.clientY + 10) + 'px';
+                        };
+                        
+                        document.addEventListener('mousemove', updateTooltip);
+                        
+                        node.on('mouseout', () => {
+                            document.removeEventListener('mousemove', updateTooltip);
+                            if (tooltip.parentNode) {
+                                tooltip.parentNode.removeChild(tooltip);
+                            }
+                        });
+                    }
+                });
+                
+                // Make control functions globally available
+                window.resetLayout = function() {
+                    cy.layout({
+                        name: 'dagre',
+                        directed: true,
+                        padding: 30,
+                        rankDir: 'LR',
+                        nodeSep: 80,
+                        edgeSep: 20,
+                        rankSep: 150
+                    }).run();
+                };
+                
+                window.fitToView = function() {
+                    cy.fit();
+                };
+                
+                window.exportImage = function() {
+                    const png64 = cy.png({ scale: 2, full: true });
+                    const link = document.createElement('a');
+                    link.download = 'diagram.png';
+                    link.href = png64;
+                    link.click();
+                };
+                
+            } catch (error) {
+                console.error('Error initializing Cytoscape:', error);
+                document.getElementById('cy').innerHTML = '<div style="padding: 20px; text-align: center; color: #666;">Error loading diagram. Please refresh the page.</div>';
+            }
+        });
+    </script>
+</body>
+</html>
\ No newline at end of file
diff --git a/.codeboarding/Data_Preparation_for_ML.html b/.codeboarding/Data_Preparation_for_ML.html
new file mode 100644
index 0000000..058b6fb
--- /dev/null
+++ b/.codeboarding/Data_Preparation_for_ML.html
@@ -0,0 +1,443 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>CodeBoarding Analysis - django</title>
+    <!-- Load dagre first, then cytoscape, then cytoscape-dagre -->
+    <script src="https://unpkg.com/dagre@0.8.5/dist/dagre.min.js"></script>
+    <script src="https://unpkg.com/cytoscape@3.23.0/dist/cytoscape.min.js"></script>
+    <script src="https://unpkg.com/cytoscape-dagre@2.4.0/cytoscape-dagre.js"></script>
+    <style>
+        body {
+            font-family: Arial, sans-serif;
+            line-height: 1.6;
+            margin: 0 auto;
+            padding: 20px;
+            background-color: #f8f9fa;
+        }
+        
+        .component {
+            border-left: 3px solid #6c757d;
+            padding-left: 15px;
+            margin-bottom: 20px;
+            background: white;
+            padding: 15px;
+            border-radius: 8px;
+            box-shadow: 0 2px 4px rgba(0,0,0,0.1);
+        }
+        
+        .component h3 {
+            color: #495057;
+            margin-top: 0;
+        }
+        
+        code {
+            background-color: #e9ecef;
+            padding: 2px 4px;
+            border-radius: 3px;
+            font-family: monospace;
+        }
+        
+        #cy {
+            width: 100%;
+            height: 600px;
+            border: 1px solid #dee2e6;
+            margin: 20px 0;
+            border-radius: 8px;
+            background-color: #ffffff;
+        }
+        
+        .badges {
+            margin: 20px 0;
+        }
+        
+        .badge {
+            display: inline-block;
+            margin-right: 10px;
+        }
+        
+        .badge img {
+            vertical-align: middle;
+        }
+        
+        .references {
+            list-style: none;
+            padding-left: 0;
+            margin: 10px 0;
+        }
+        
+        .references li {
+            margin: 4px 0;
+        }
+        
+        .diagram-controls {
+            margin: 10px 0;
+            text-align: center;
+        }
+        
+        .diagram-controls button {
+            margin: 0 5px;
+            padding: 8px 16px;
+            background: #6c757d;
+            color: white;
+            border: none;
+            border-radius: 4px;
+            cursor: pointer;
+        }
+        
+        .diagram-controls button:hover {
+            background: #495057;
+        }
+    </style>
+</head>
+<body>
+    <h1>CodeBoarding Analysis - django</h1>
+    
+    <div class="badges">
+        <a href="https://github.com/CodeBoarding/GeneratedOnBoardings" class="badge">
+            <img src="https://img.shields.io/badge/Generated%20by-CodeBoarding-9cf?style=flat-square" alt="Generated by CodeBoarding">
+        </a>
+        <a href="https://www.codeboarding.org/demo" class="badge">
+            <img src="https://img.shields.io/badge/Try%20our-Demo-blue?style=flat-square" alt="Try our Demo">
+        </a>
+        <a href="mailto:contact@codeboarding.org" class="badge">
+            <img src="https://img.shields.io/badge/Contact%20us%20-%20contact@codeboarding.org-lightgrey?style=flat-square" alt="Contact us">
+        </a>
+    </div>
+    
+    <div class="diagram-controls">
+        <button onclick="resetLayout()">Reset Layout</button>
+        <button onclick="fitToView()">Fit to View</button>
+        <button onclick="exportImage()">Export PNG</button>
+    </div>
+    
+    <div id="cy"></div>
+    
+    <h2>Details</h2>
+    <p>This subsystem focuses on organizing and partitioning processed protein data into distinct train, validation, and test sets, often employing clustering techniques to ensure diverse and representative splits. It also provides PyTorch-compatible `Dataset` and `DataLoader` classes for efficient batching and preparation of data, making it ready for machine learning model training and evaluation.</p>
+    
+    
+        <div class="component">
+            <h3 id="Data_Splitting_Module">Data Splitting Module</h3>
+            <p>This module orchestrates the division of the protein dataset into training, validation, and test sets. It employs advanced strategies, including sequence and structural similarity-based clustering (e.g., using MMseqs2 and Foldseek), to ensure robust data separation and prevent data leakage, crucial for unbiased model evaluation.</p>
+            <h4>Related Classes/Methods:</h4><ul class="references"><li><code>`proteinflow.split` (0:0)</code></li><li><code>`proteinflow.split.utils` (0:0)</code></li><li><code>`proteinflow.split.split_data` (0:0)</code></li><li><code>`proteinflow.split._build_dataset_partition` (0:0)</code></li><li><code>`proteinflow.split._split_dataset_with_graphs` (0:0)</code></li><li><code>`proteinflow.split._get_split_dictionaries` (0:0)</code></li></ul>
+        </div>
+        
+        <div class="component">
+            <h3 id="PyTorch_Data_Module">PyTorch Data Module</h3>
+            <p>This module provides the necessary PyTorch-compatible `Dataset` and `DataLoader` classes, facilitating the seamless integration of processed protein data with deep learning models. It handles efficient data loading, batching, and preparation for training and evaluation.</p>
+            <h4>Related Classes/Methods:</h4><ul class="references"><li><code>`proteinflow.data.torch` (0:0)</code></li><li><code>`proteinflow.data.torch.ProteinDataset` (242:1131)</code></li><li><code>`proteinflow.data.torch.ProteinLoader` (67:239)</code></li></ul>
+        </div>
+        
+        <div class="component">
+            <h3 id="Protein_Data_Structure">Protein Data Structure</h3>
+            <p>This fundamental component defines the structure for encapsulating all relevant information for a single protein entry, including sequence, coordinates, chain IDs, and associated ligand data. It provides methods for parsing, validating, and extracting specific features, serving as the core data representation throughout the data preparation pipeline. `proteinflow.data.SAbDabEntry` inherits from `proteinflow.data.PDBEntry`, extending the base protein data structure for antibody-specific entries.</p>
+            <h4>Related Classes/Methods:</h4><ul class="references"><li><code>`proteinflow.data` (0:0)</code></li><li><code>`proteinflow.data.utils` (0:0)</code></li><li><code>`proteinflow.data.PDBEntry` (0:0)</code></li><li><code>`proteinflow.data.SAbDabEntry` (0:0)</code></li><li><code>`proteinflow.data.utils.from_pickle` (0:0)</code></li><li><code>`proteinflow.data.utils.to_pdb` (0:0)</code></li><li><code>`proteinflow.data.utils.get_chains` (0:0)</code></li><li><code>`proteinflow.data.utils.get_sequence` (0:0)</code></li><li><code>`proteinflow.data.utils.get_coordinates` (0:0)</code></li><li><code>`proteinflow.data.utils.retrieve_ligands_from_pickle` (0:0)</code></li></ul>
+        </div>
+        
+        <div class="component">
+            <h3 id="Ligand_Processing_Module">Ligand Processing Module</h3>
+            <p>This module specializes in handling ligand-related data within protein entries. It includes functionalities for loading ligand information (e.g., SMILES strings) and performing chemical similarity-based clustering, which can be integrated into data splitting strategies.</p>
+            <h4>Related Classes/Methods:</h4><ul class="references"><li><code>`proteinflow.ligand` (0:0)</code></li><li><code>`proteinflow.ligand._load_smiles` (653:678)</code></li><li><code>`proteinflow.ligand._merge_chains_ligands` (694:737)</code></li><li><code>`proteinflow.ligand._run_tanimoto_clustering` (983:1001)</code></li></ul>
+        </div>
+        
+        <div class="component">
+            <h3 id="Splitting_Utilities">Splitting Utilities</h3>
+            <p>This module provides a suite of helper functions that support the intricate logic within the `Data Splitting Module`. These utilities are essential for tasks such as finding correspondences between protein chains, loading PDB files, merging chains, and managing biounit information during the data splitting process.</p>
+            <h4>Related Classes/Methods:</h4><ul class="references"><li><code>`proteinflow.split.utils` (0:0)</code></li><li><code>`proteinflow.split.utils._find_correspondences` (139:149)</code></li><li><code>`proteinflow.split.utils._load_pdbs` (72:99)</code></li><li><code>`proteinflow.split.utils._merge_chains` (25:69)</code></li><li><code>`proteinflow.split.utils._biounits_in_clusters_dict` (152:164)</code></li></ul>
+        </div>
+        
+    
+    <h3><a href="https://github.com/CodeBoarding/GeneratedOnBoardings/tree/main?tab=readme-ov-file#faq">FAQ</a></h3>
+    
+    <script>
+        // Wait for all scripts to load before initializing
+        document.addEventListener('DOMContentLoaded', function() {
+            // Check if all required libraries are loaded
+            if (typeof cytoscape === 'undefined') {
+                console.error('Cytoscape is not loaded');
+                document.getElementById('cy').innerHTML = '<div style="padding: 20px; text-align: center; color: #666;">Error loading diagram. Please refresh the page.</div>';
+                return;
+            }
+            
+            if (typeof dagre === 'undefined') {
+                console.error('Dagre is not loaded');
+                document.getElementById('cy').innerHTML = '<div style="padding: 20px; text-align: center; color: #666;">Error loading diagram. Please refresh the page.</div>';
+                return;
+            }
+            
+            if (typeof cytoscapeDagre === 'undefined') {
+                console.error('Cytoscape-dagre extension is not loaded');
+                document.getElementById('cy').innerHTML = '<div style="padding: 20px; text-align: center; color: #666;">Error loading diagram. Please refresh the page.</div>';
+                return;
+            }
+            
+            // Register the dagre extension
+            try {
+                cytoscape.use(cytoscapeDagre);
+                console.log('Dagre extension registered successfully');
+            } catch (e) {
+                console.error('Failed to register dagre extension:', e);
+                document.getElementById('cy').innerHTML = '<div style="padding: 20px; text-align: center; color: #666;">Error loading diagram. Please refresh the page.</div>';
+                return;
+            }
+            
+            const cytoscapeData = {
+  "elements": [
+    {
+      "data": {
+        "id": "Data_Splitting_Module",
+        "label": "Data Splitting Module",
+        "description": "This module orchestrates the division of the protein dataset into training, validation, and test sets. It employs advanced strategies, including sequence and structural similarity-based clustering (e.g., using MMseqs2 and Foldseek), to ensure robust data separation and prevent data leakage, crucial for unbiased model evaluation.",
+        "hasLink": false
+      }
+    },
+    {
+      "data": {
+        "id": "PyTorch_Data_Module",
+        "label": "PyTorch Data Module",
+        "description": "This module provides the necessary PyTorch-compatible `Dataset` and `DataLoader` classes, facilitating the seamless integration of processed protein data with deep learning models. It handles efficient data loading, batching, and preparation for training and evaluation.",
+        "hasLink": false
+      }
+    },
+    {
+      "data": {
+        "id": "Protein_Data_Structure",
+        "label": "Protein Data Structure",
+        "description": "This fundamental component defines the structure for encapsulating all relevant information for a single protein entry, including sequence, coordinates, chain IDs, and associated ligand data. It provides methods for parsing, validating, and extracting specific features, serving as the core data representation throughout the data preparation pipeline. `proteinflow.data.SAbDabEntry` inherits from `proteinflow.data.PDBEntry`, extending the base protein data structure for antibody-specific entries.",
+        "hasLink": false
+      }
+    },
+    {
+      "data": {
+        "id": "Ligand_Processing_Module",
+        "label": "Ligand Processing Module",
+        "description": "This module specializes in handling ligand-related data within protein entries. It includes functionalities for loading ligand information (e.g., SMILES strings) and performing chemical similarity-based clustering, which can be integrated into data splitting strategies.",
+        "hasLink": false
+      }
+    },
+    {
+      "data": {
+        "id": "Splitting_Utilities",
+        "label": "Splitting Utilities",
+        "description": "This module provides a suite of helper functions that support the intricate logic within the `Data Splitting Module`. These utilities are essential for tasks such as finding correspondences between protein chains, loading PDB files, merging chains, and managing biounit information during the data splitting process.",
+        "hasLink": false
+      }
+    },
+    {
+      "data": {
+        "id": "edge_0",
+        "source": "Data_Splitting_Module",
+        "target": "Protein_Data_Structure",
+        "label": "uses"
+      }
+    },
+    {
+      "data": {
+        "id": "edge_1",
+        "source": "Data_Splitting_Module",
+        "target": "Ligand_Processing_Module",
+        "label": "uses"
+      }
+    },
+    {
+      "data": {
+        "id": "edge_2",
+        "source": "Data_Splitting_Module",
+        "target": "Splitting_Utilities",
+        "label": "uses"
+      }
+    },
+    {
+      "data": {
+        "id": "edge_3",
+        "source": "PyTorch_Data_Module",
+        "target": "Protein_Data_Structure",
+        "label": "uses"
+      }
+    },
+    {
+      "data": {
+        "id": "edge_4",
+        "source": "Ligand_Processing_Module",
+        "target": "Protein_Data_Structure",
+        "label": "uses"
+      }
+    },
+    {
+      "data": {
+        "id": "edge_5",
+        "source": "Splitting_Utilities",
+        "target": "Protein_Data_Structure",
+        "label": "uses"
+      }
+    }
+  ]
+};
+            
+            try {
+                const cy = cytoscape({
+                    container: document.getElementById('cy'),
+                    
+                    elements: cytoscapeData.elements,
+                    
+                    style: [
+                        {
+                            selector: 'node',
+                            style: {
+                                'background-color': '#f8f9fa',
+                                'label': 'data(label)',
+                                'text-valign': 'center',
+                                'text-halign': 'center',
+                                'color': '#495057',
+                                'text-wrap': 'wrap',
+                                'font-size': '11px',
+                                'font-weight': '500',
+                                'width': 'label',
+                                'height': 'label',
+                                'padding': '15px',
+                                'shape': 'roundrectangle',
+                                'border-width': 2,
+                                'border-color': '#dee2e6'
+                            }
+                        },
+                        {
+                            selector: 'node[hasLink = true]',
+                            style: {
+                                'background-color': '#e9ecef',
+                                'border-color': '#6c757d',
+                                'cursor': 'pointer',
+                                'border-width': 3
+                            }
+                        },
+                        {
+                            selector: 'node:hover',
+                            style: {
+                                'background-color': '#dee2e6',
+                                'border-color': '#495057',
+                                'border-width': 3
+                            }
+                        },
+                        {
+                            selector: 'edge',
+                            style: {
+                                'width': 2,
+                                'line-color': '#adb5bd',
+                                'target-arrow-color': '#adb5bd',
+                                'target-arrow-shape': 'triangle',
+                                'curve-style': 'bezier',
+                                'label': 'data(label)',
+                                'font-size': '10px',
+                                'color': '#6c757d',
+                                'text-rotation': 'autorotate',
+                                'text-margin-y': -10,
+                                'text-background-color': '#ffffff',
+                                'text-background-opacity': 0.8,
+                                'text-background-padding': '2px',
+                                'text-background-shape': 'roundrectangle'
+                            }
+                        }
+                    ],
+                    
+                    layout: {
+                        name: 'dagre',
+                        directed: true,
+                        padding: 30,
+                        rankDir: 'LR',
+                        nodeSep: 80,
+                        edgeSep: 20,
+                        rankSep: 150
+                    }
+                });
+                
+                // Apply dagre layout after cytoscape is initialized
+                cy.layout({
+                    name: 'dagre',
+                    directed: true,
+                    padding: 30,
+                    rankDir: 'LR',
+                    nodeSep: 80,
+                    edgeSep: 20,
+                    rankSep: 150
+                }).run();
+                
+                // Add click handler for nodes with links
+                cy.on('tap', 'node[hasLink = true]', function(evt) {
+                    const node = evt.target;
+                    const linkUrl = node.data('linkUrl');
+                    if (linkUrl) {
+                        window.open(linkUrl, '_blank');
+                    }
+                });
+                
+                // Add simple tooltip on hover
+                cy.on('mouseover', 'node', function(evt) {
+                    const node = evt.target;
+                    const description = node.data('description');
+                    if (description) {
+                        // Simple tooltip implementation
+                        const tooltip = document.createElement('div');
+                        tooltip.innerHTML = description;
+                        tooltip.style.cssText = `
+                            position: fixed;
+                            background: #333;
+                            color: white;
+                            padding: 8px;
+                            border-radius: 4px;
+                            font-size: 12px;
+                            max-width: 300px;
+                            z-index: 1000;
+                            pointer-events: none;
+                        `;
+                        document.body.appendChild(tooltip);
+                        
+                        const updateTooltip = (e) => {
+                            tooltip.style.left = (e.clientX + 10) + 'px';
+                            tooltip.style.top = (e.clientY + 10) + 'px';
+                        };
+                        
+                        document.addEventListener('mousemove', updateTooltip);
+                        
+                        node.on('mouseout', () => {
+                            document.removeEventListener('mousemove', updateTooltip);
+                            if (tooltip.parentNode) {
+                                tooltip.parentNode.removeChild(tooltip);
+                            }
+                        });
+                    }
+                });
+                
+                // Make control functions globally available
+                window.resetLayout = function() {
+                    cy.layout({
+                        name: 'dagre',
+                        directed: true,
+                        padding: 30,
+                        rankDir: 'LR',
+                        nodeSep: 80,
+                        edgeSep: 20,
+                        rankSep: 150
+                    }).run();
+                };
+                
+                window.fitToView = function() {
+                    cy.fit();
+                };
+                
+                window.exportImage = function() {
+                    const png64 = cy.png({ scale: 2, full: true });
+                    const link = document.createElement('a');
+                    link.download = 'diagram.png';
+                    link.href = png64;
+                    link.click();
+                };
+                
+            } catch (error) {
+                console.error('Error initializing Cytoscape:', error);
+                document.getElementById('cy').innerHTML = '<div style="padding: 20px; text-align: center; color: #666;">Error loading diagram. Please refresh the page.</div>';
+            }
+        });
+    </script>
+</body>
+</html>
\ No newline at end of file
diff --git a/.codeboarding/User_Interface_CLI_.html b/.codeboarding/User_Interface_CLI_.html
new file mode 100644
index 0000000..5c5f09a
--- /dev/null
+++ b/.codeboarding/User_Interface_CLI_.html
@@ -0,0 +1,427 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>CodeBoarding Analysis - django</title>
+    <!-- Load dagre first, then cytoscape, then cytoscape-dagre -->
+    <script src="https://unpkg.com/dagre@0.8.5/dist/dagre.min.js"></script>
+    <script src="https://unpkg.com/cytoscape@3.23.0/dist/cytoscape.min.js"></script>
+    <script src="https://unpkg.com/cytoscape-dagre@2.4.0/cytoscape-dagre.js"></script>
+    <style>
+        body {
+            font-family: Arial, sans-serif;
+            line-height: 1.6;
+            margin: 0 auto;
+            padding: 20px;
+            background-color: #f8f9fa;
+        }
+        
+        .component {
+            border-left: 3px solid #6c757d;
+            padding-left: 15px;
+            margin-bottom: 20px;
+            background: white;
+            padding: 15px;
+            border-radius: 8px;
+            box-shadow: 0 2px 4px rgba(0,0,0,0.1);
+        }
+        
+        .component h3 {
+            color: #495057;
+            margin-top: 0;
+        }
+        
+        code {
+            background-color: #e9ecef;
+            padding: 2px 4px;
+            border-radius: 3px;
+            font-family: monospace;
+        }
+        
+        #cy {
+            width: 100%;
+            height: 600px;
+            border: 1px solid #dee2e6;
+            margin: 20px 0;
+            border-radius: 8px;
+            background-color: #ffffff;
+        }
+        
+        .badges {
+            margin: 20px 0;
+        }
+        
+        .badge {
+            display: inline-block;
+            margin-right: 10px;
+        }
+        
+        .badge img {
+            vertical-align: middle;
+        }
+        
+        .references {
+            list-style: none;
+            padding-left: 0;
+            margin: 10px 0;
+        }
+        
+        .references li {
+            margin: 4px 0;
+        }
+        
+        .diagram-controls {
+            margin: 10px 0;
+            text-align: center;
+        }
+        
+        .diagram-controls button {
+            margin: 0 5px;
+            padding: 8px 16px;
+            background: #6c757d;
+            color: white;
+            border: none;
+            border-radius: 4px;
+            cursor: pointer;
+        }
+        
+        .diagram-controls button:hover {
+            background: #495057;
+        }
+    </style>
+</head>
+<body>
+    <h1>CodeBoarding Analysis - django</h1>
+    
+    <div class="badges">
+        <a href="https://github.com/CodeBoarding/GeneratedOnBoardings" class="badge">
+            <img src="https://img.shields.io/badge/Generated%20by-CodeBoarding-9cf?style=flat-square" alt="Generated by CodeBoarding">
+        </a>
+        <a href="https://www.codeboarding.org/demo" class="badge">
+            <img src="https://img.shields.io/badge/Try%20our-Demo-blue?style=flat-square" alt="Try our Demo">
+        </a>
+        <a href="mailto:contact@codeboarding.org" class="badge">
+            <img src="https://img.shields.io/badge/Contact%20us%20-%20contact@codeboarding.org-lightgrey?style=flat-square" alt="Contact us">
+        </a>
+    </div>
+    
+    <div class="diagram-controls">
+        <button onclick="resetLayout()">Reset Layout</button>
+        <button onclick="fitToView()">Fit to View</button>
+        <button onclick="exportImage()">Export PNG</button>
+    </div>
+    
+    <div id="cy"></div>
+    
+    <h2>Details</h2>
+    <p>The CLI Interface is fundamental because it is the user's gateway to the entire ProteinFlow system. Without it, users would not be able to initiate or control any of the data pipeline operations. It abstracts away the underlying complexity of the data processing components, providing a simplified and unified command-line experience. Its role as an orchestrator and dispatcher is critical for coordinating the execution of various data-related tasks (downloading, generating, splitting) in a structured manner. The integration with Logging and Reporting is also vital, as it provides the necessary feedback loop for users to understand the status and outcomes of their initiated processes, making the system robust and user-friendly.</p>
+    
+    
+        <div class="component">
+            <h3 id="CLI_Interface">CLI Interface</h3>
+            <p>The CLI Interface serves as the primary command-line entry point for users to interact with the ProteinFlow data pipeline. Its fundamental role is to orchestrate the entire data processing workflow by translating user commands into specific actions. It acts as a dispatcher, invoking the appropriate backend functions from other core components such as the Data Downloader, Data Generator, and Data Splitter. Furthermore, it integrates with the Logging and Reporting component to provide operational feedback, status updates, and error summaries to the user, ensuring transparency and aiding in debugging. This component is crucial because it provides the user-facing control mechanism, making the complex data pipeline accessible and manageable.</p>
+            <h4>Related Classes/Methods:</h4><ul class="references"><li><code>`proteinflow.cli` (18:20)</code></li></ul>
+        </div>
+        
+        <div class="component">
+            <h3 id="Data_Downloader">Data Downloader</h3>
+            <p>Handles the acquisition of data.</p>
+            <h4>Related Classes/Methods:</h4><p><em>None</em></p>
+        </div>
+        
+        <div class="component">
+            <h3 id="Data_Generator">Data Generator</h3>
+            <p>Manages the synthesis or transformation of data.</p>
+            <h4>Related Classes/Methods:</h4><p><em>None</em></p>
+        </div>
+        
+        <div class="component">
+            <h3 id="Data_Splitter">Data Splitter</h3>
+            <p>Manages dataset partitioning and re-consolidation.</p>
+            <h4>Related Classes/Methods:</h4><p><em>None</em></p>
+        </div>
+        
+        <div class="component">
+            <h3 id="Logging_and_Reporting">Logging and Reporting</h3>
+            <p>Provides operational feedback, status updates, and error summaries.</p>
+            <h4>Related Classes/Methods:</h4><p><em>None</em></p>
+        </div>
+        
+    
+    <h3><a href="https://github.com/CodeBoarding/GeneratedOnBoardings/tree/main?tab=readme-ov-file#faq">FAQ</a></h3>
+    
+    <script>
+        // Wait for all scripts to load before initializing
+        document.addEventListener('DOMContentLoaded', function() {
+            // Check if all required libraries are loaded
+            if (typeof cytoscape === 'undefined') {
+                console.error('Cytoscape is not loaded');
+                document.getElementById('cy').innerHTML = '<div style="padding: 20px; text-align: center; color: #666;">Error loading diagram. Please refresh the page.</div>';
+                return;
+            }
+            
+            if (typeof dagre === 'undefined') {
+                console.error('Dagre is not loaded');
+                document.getElementById('cy').innerHTML = '<div style="padding: 20px; text-align: center; color: #666;">Error loading diagram. Please refresh the page.</div>';
+                return;
+            }
+            
+            if (typeof cytoscapeDagre === 'undefined') {
+                console.error('Cytoscape-dagre extension is not loaded');
+                document.getElementById('cy').innerHTML = '<div style="padding: 20px; text-align: center; color: #666;">Error loading diagram. Please refresh the page.</div>';
+                return;
+            }
+            
+            // Register the dagre extension
+            try {
+                cytoscape.use(cytoscapeDagre);
+                console.log('Dagre extension registered successfully');
+            } catch (e) {
+                console.error('Failed to register dagre extension:', e);
+                document.getElementById('cy').innerHTML = '<div style="padding: 20px; text-align: center; color: #666;">Error loading diagram. Please refresh the page.</div>';
+                return;
+            }
+            
+            const cytoscapeData = {
+  "elements": [
+    {
+      "data": {
+        "id": "CLI_Interface",
+        "label": "CLI Interface",
+        "description": "The CLI Interface serves as the primary command-line entry point for users to interact with the ProteinFlow data pipeline. Its fundamental role is to orchestrate the entire data processing workflow by translating user commands into specific actions. It acts as a dispatcher, invoking the appropriate backend functions from other core components such as the Data Downloader, Data Generator, and Data Splitter. Furthermore, it integrates with the Logging and Reporting component to provide operational feedback, status updates, and error summaries to the user, ensuring transparency and aiding in debugging. This component is crucial because it provides the user-facing control mechanism, making the complex data pipeline accessible and manageable.",
+        "hasLink": false
+      }
+    },
+    {
+      "data": {
+        "id": "Data_Downloader",
+        "label": "Data Downloader",
+        "description": "Handles the acquisition of data.",
+        "hasLink": false
+      }
+    },
+    {
+      "data": {
+        "id": "Data_Generator",
+        "label": "Data Generator",
+        "description": "Manages the synthesis or transformation of data.",
+        "hasLink": false
+      }
+    },
+    {
+      "data": {
+        "id": "Data_Splitter",
+        "label": "Data Splitter",
+        "description": "Manages dataset partitioning and re-consolidation.",
+        "hasLink": false
+      }
+    },
+    {
+      "data": {
+        "id": "Logging_and_Reporting",
+        "label": "Logging and Reporting",
+        "description": "Provides operational feedback, status updates, and error summaries.",
+        "hasLink": false
+      }
+    },
+    {
+      "data": {
+        "id": "edge_0",
+        "source": "CLI_Interface",
+        "target": "Data_Downloader",
+        "label": "calls"
+      }
+    },
+    {
+      "data": {
+        "id": "edge_1",
+        "source": "CLI_Interface",
+        "target": "Data_Generator",
+        "label": "calls"
+      }
+    },
+    {
+      "data": {
+        "id": "edge_2",
+        "source": "CLI_Interface",
+        "target": "Data_Splitter",
+        "label": "calls"
+      }
+    },
+    {
+      "data": {
+        "id": "edge_3",
+        "source": "CLI_Interface",
+        "target": "Logging_and_Reporting",
+        "label": "uses"
+      }
+    }
+  ]
+};
+            
+            try {
+                const cy = cytoscape({
+                    container: document.getElementById('cy'),
+                    
+                    elements: cytoscapeData.elements,
+                    
+                    style: [
+                        {
+                            selector: 'node',
+                            style: {
+                                'background-color': '#f8f9fa',
+                                'label': 'data(label)',
+                                'text-valign': 'center',
+                                'text-halign': 'center',
+                                'color': '#495057',
+                                'text-wrap': 'wrap',
+                                'font-size': '11px',
+                                'font-weight': '500',
+                                'width': 'label',
+                                'height': 'label',
+                                'padding': '15px',
+                                'shape': 'roundrectangle',
+                                'border-width': 2,
+                                'border-color': '#dee2e6'
+                            }
+                        },
+                        {
+                            selector: 'node[hasLink = true]',
+                            style: {
+                                'background-color': '#e9ecef',
+                                'border-color': '#6c757d',
+                                'cursor': 'pointer',
+                                'border-width': 3
+                            }
+                        },
+                        {
+                            selector: 'node:hover',
+                            style: {
+                                'background-color': '#dee2e6',
+                                'border-color': '#495057',
+                                'border-width': 3
+                            }
+                        },
+                        {
+                            selector: 'edge',
+                            style: {
+                                'width': 2,
+                                'line-color': '#adb5bd',
+                                'target-arrow-color': '#adb5bd',
+                                'target-arrow-shape': 'triangle',
+                                'curve-style': 'bezier',
+                                'label': 'data(label)',
+                                'font-size': '10px',
+                                'color': '#6c757d',
+                                'text-rotation': 'autorotate',
+                                'text-margin-y': -10,
+                                'text-background-color': '#ffffff',
+                                'text-background-opacity': 0.8,
+                                'text-background-padding': '2px',
+                                'text-background-shape': 'roundrectangle'
+                            }
+                        }
+                    ],
+                    
+                    layout: {
+                        name: 'dagre',
+                        directed: true,
+                        padding: 30,
+                        rankDir: 'LR',
+                        nodeSep: 80,
+                        edgeSep: 20,
+                        rankSep: 150
+                    }
+                });
+                
+                // Apply dagre layout after cytoscape is initialized
+                cy.layout({
+                    name: 'dagre',
+                    directed: true,
+                    padding: 30,
+                    rankDir: 'LR',
+                    nodeSep: 80,
+                    edgeSep: 20,
+                    rankSep: 150
+                }).run();
+                
+                // Add click handler for nodes with links
+                cy.on('tap', 'node[hasLink = true]', function(evt) {
+                    const node = evt.target;
+                    const linkUrl = node.data('linkUrl');
+                    if (linkUrl) {
+                        window.open(linkUrl, '_blank');
+                    }
+                });
+                
+                // Add simple tooltip on hover
+                cy.on('mouseover', 'node', function(evt) {
+                    const node = evt.target;
+                    const description = node.data('description');
+                    if (description) {
+                        // Simple tooltip implementation
+                        const tooltip = document.createElement('div');
+                        tooltip.innerHTML = description;
+                        tooltip.style.cssText = `
+                            position: fixed;
+                            background: #333;
+                            color: white;
+                            padding: 8px;
+                            border-radius: 4px;
+                            font-size: 12px;
+                            max-width: 300px;
+                            z-index: 1000;
+                            pointer-events: none;
+                        `;
+                        document.body.appendChild(tooltip);
+                        
+                        const updateTooltip = (e) => {
+                            tooltip.style.left = (e.clientX + 10) + 'px';
+                            tooltip.style.top = (e.clientY + 10) + 'px';
+                        };
+                        
+                        document.addEventListener('mousemove', updateTooltip);
+                        
+                        node.on('mouseout', () => {
+                            document.removeEventListener('mousemove', updateTooltip);
+                            if (tooltip.parentNode) {
+                                tooltip.parentNode.removeChild(tooltip);
+                            }
+                        });
+                    }
+                });
+                
+                // Make control functions globally available
+                window.resetLayout = function() {
+                    cy.layout({
+                        name: 'dagre',
+                        directed: true,
+                        padding: 30,
+                        rankDir: 'LR',
+                        nodeSep: 80,
+                        edgeSep: 20,
+                        rankSep: 150
+                    }).run();
+                };
+                
+                window.fitToView = function() {
+                    cy.fit();
+                };
+                
+                window.exportImage = function() {
+                    const png64 = cy.png({ scale: 2, full: true });
+                    const link = document.createElement('a');
+                    link.download = 'diagram.png';
+                    link.href = png64;
+                    link.click();
+                };
+                
+            } catch (error) {
+                console.error('Error initializing Cytoscape:', error);
+                document.getElementById('cy').innerHTML = '<div style="padding: 20px; text-align: center; color: #666;">Error loading diagram. Please refresh the page.</div>';
+            }
+        });
+    </script>
+</body>
+</html>
\ No newline at end of file
diff --git a/.codeboarding/on_boarding.html b/.codeboarding/on_boarding.html
new file mode 100644
index 0000000..22d3205
--- /dev/null
+++ b/.codeboarding/on_boarding.html
@@ -0,0 +1,417 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>CodeBoarding Analysis - django</title>
+    <!-- Load dagre first, then cytoscape, then cytoscape-dagre -->
+    <script src="https://unpkg.com/dagre@0.8.5/dist/dagre.min.js"></script>
+    <script src="https://unpkg.com/cytoscape@3.23.0/dist/cytoscape.min.js"></script>
+    <script src="https://unpkg.com/cytoscape-dagre@2.4.0/cytoscape-dagre.js"></script>
+    <style>
+        body {
+            font-family: Arial, sans-serif;
+            line-height: 1.6;
+            margin: 0 auto;
+            padding: 20px;
+            background-color: #f8f9fa;
+        }
+        
+        .component {
+            border-left: 3px solid #6c757d;
+            padding-left: 15px;
+            margin-bottom: 20px;
+            background: white;
+            padding: 15px;
+            border-radius: 8px;
+            box-shadow: 0 2px 4px rgba(0,0,0,0.1);
+        }
+        
+        .component h3 {
+            color: #495057;
+            margin-top: 0;
+        }
+        
+        code {
+            background-color: #e9ecef;
+            padding: 2px 4px;
+            border-radius: 3px;
+            font-family: monospace;
+        }
+        
+        #cy {
+            width: 100%;
+            height: 600px;
+            border: 1px solid #dee2e6;
+            margin: 20px 0;
+            border-radius: 8px;
+            background-color: #ffffff;
+        }
+        
+        .badges {
+            margin: 20px 0;
+        }
+        
+        .badge {
+            display: inline-block;
+            margin-right: 10px;
+        }
+        
+        .badge img {
+            vertical-align: middle;
+        }
+        
+        .references {
+            list-style: none;
+            padding-left: 0;
+            margin: 10px 0;
+        }
+        
+        .references li {
+            margin: 4px 0;
+        }
+        
+        .diagram-controls {
+            margin: 10px 0;
+            text-align: center;
+        }
+        
+        .diagram-controls button {
+            margin: 0 5px;
+            padding: 8px 16px;
+            background: #6c757d;
+            color: white;
+            border: none;
+            border-radius: 4px;
+            cursor: pointer;
+        }
+        
+        .diagram-controls button:hover {
+            background: #495057;
+        }
+    </style>
+</head>
+<body>
+    <h1>CodeBoarding Analysis - django</h1>
+    
+    <div class="badges">
+        <a href="https://github.com/CodeBoarding/GeneratedOnBoardings" class="badge">
+            <img src="https://img.shields.io/badge/Generated%20by-CodeBoarding-9cf?style=flat-square" alt="Generated by CodeBoarding">
+        </a>
+        <a href="https://www.codeboarding.org/demo" class="badge">
+            <img src="https://img.shields.io/badge/Try%20our-Demo-blue?style=flat-square" alt="Try our Demo">
+        </a>
+        <a href="mailto:contact@codeboarding.org" class="badge">
+            <img src="https://img.shields.io/badge/Contact%20us%20-%20contact@codeboarding.org-lightgrey?style=flat-square" alt="Contact us">
+        </a>
+    </div>
+    
+    <div class="diagram-controls">
+        <button onclick="resetLayout()">Reset Layout</button>
+        <button onclick="fitToView()">Fit to View</button>
+        <button onclick="exportImage()">Export PNG</button>
+    </div>
+    
+    <div id="cy"></div>
+    
+    <h2>Details</h2>
+    <p>The `ProteinFlow` project is structured around a streamlined pipeline for acquiring, processing, organizing, and preparing protein data for machine learning tasks, complemented by analysis and visualization capabilities. The architecture is designed to facilitate efficient handling of large biological datasets.</p>
+    
+    
+        <div class="component">
+            <h3 id="User_Interface_CLI_">User Interface (CLI) <a href="./User_Interface_CLI_.html">[Expand]</a></h3>
+            <p>The primary command-line interface that serves as the entry point for users to initiate and control the entire data pipeline. It orchestrates the execution of data acquisition, processing, and organization workflows.</p>
+            <h4>Related Classes/Methods:</h4><ul class="references"><li><code>`proteinflow.cli` (18:20)</code></li></ul>
+        </div>
+        
+        <div class="component">
+            <h3 id="Core_Data_Management">Core Data Management <a href="./Core_Data_Management.html">[Expand]</a></h3>
+            <p>This foundational component is responsible for the acquisition of raw protein data (PDB and SAbDab files), defining the core data structures for representing proteins and associated ligands, and performing the initial processing steps. This includes filtering, cleaning, and converting raw data into standardized `ProteinEntry` objects, handling quality checks, and managing ligand-specific details.</p>
+            <h4>Related Classes/Methods:</h4><ul class="references"><li><code>`proteinflow.data` (1:1)</code></li><li><code>`proteinflow.data.PDBEntry` (1:1)</code></li><li><code>`proteinflow.data.SAbDabEntry` (1:1)</code></li><li><code>`proteinflow.download` (1:1)</code></li><li><code>`proteinflow.processing` (1:1)</code></li><li><code>`proteinflow.ligand` (1:1)</code></li></ul>
+        </div>
+        
+        <div class="component">
+            <h3 id="Data_Preparation_for_ML">Data Preparation for ML <a href="./Data_Preparation_for_ML.html">[Expand]</a></h3>
+            <p>Focuses on organizing and partitioning the processed protein data into distinct train, validation, and test sets, often employing clustering techniques to ensure diverse and representative splits. It also provides PyTorch-compatible `Dataset` and `DataLoader` classes for efficient batching and preparation of data, making it ready for machine learning model training and evaluation.</p>
+            <h4>Related Classes/Methods:</h4><ul class="references"><li><code>`proteinflow.split` (1:1)</code></li><li><code>`proteinflow.data.torch` (1:1)</code></li></ul>
+        </div>
+        
+        <div class="component">
+            <h3 id="Analysis_Visualization">Analysis & Visualization <a href="./Analysis_Visualization.html">[Expand]</a></h3>
+            <p>Offers a comprehensive suite of tools for calculating various protein-related metrics (e.g., sequence similarity, language model perplexity) and for visualizing protein structures and animations. This component also manages optional external dependencies required for its advanced functionalities.</p>
+            <h4>Related Classes/Methods:</h4><ul class="references"><li><code>`proteinflow.metrics` (1:1)</code></li><li><code>`proteinflow.visualize` (1:1)</code></li><li><code>`proteinflow.extra` (1:1)</code></li></ul>
+        </div>
+        
+    
+    <h3><a href="https://github.com/CodeBoarding/GeneratedOnBoardings/tree/main?tab=readme-ov-file#faq">FAQ</a></h3>
+    
+    <script>
+        // Wait for all scripts to load before initializing
+        document.addEventListener('DOMContentLoaded', function() {
+            // Check if all required libraries are loaded
+            if (typeof cytoscape === 'undefined') {
+                console.error('Cytoscape is not loaded');
+                document.getElementById('cy').innerHTML = '<div style="padding: 20px; text-align: center; color: #666;">Error loading diagram. Please refresh the page.</div>';
+                return;
+            }
+            
+            if (typeof dagre === 'undefined') {
+                console.error('Dagre is not loaded');
+                document.getElementById('cy').innerHTML = '<div style="padding: 20px; text-align: center; color: #666;">Error loading diagram. Please refresh the page.</div>';
+                return;
+            }
+            
+            if (typeof cytoscapeDagre === 'undefined') {
+                console.error('Cytoscape-dagre extension is not loaded');
+                document.getElementById('cy').innerHTML = '<div style="padding: 20px; text-align: center; color: #666;">Error loading diagram. Please refresh the page.</div>';
+                return;
+            }
+            
+            // Register the dagre extension
+            try {
+                cytoscape.use(cytoscapeDagre);
+                console.log('Dagre extension registered successfully');
+            } catch (e) {
+                console.error('Failed to register dagre extension:', e);
+                document.getElementById('cy').innerHTML = '<div style="padding: 20px; text-align: center; color: #666;">Error loading diagram. Please refresh the page.</div>';
+                return;
+            }
+            
+            const cytoscapeData = {
+  "elements": [
+    {
+      "data": {
+        "id": "User_Interface_CLI_",
+        "label": "User Interface (CLI)",
+        "description": "The primary command-line interface that serves as the entry point for users to initiate and control the entire data pipeline. It orchestrates the execution of data acquisition, processing, and organization workflows.",
+        "hasLink": true,
+        "linkUrl": "./User_Interface_CLI_.html"
+      }
+    },
+    {
+      "data": {
+        "id": "Core_Data_Management",
+        "label": "Core Data Management",
+        "description": "This foundational component is responsible for the acquisition of raw protein data (PDB and SAbDab files), defining the core data structures for representing proteins and associated ligands, and performing the initial processing steps. This includes filtering, cleaning, and converting raw data into standardized `ProteinEntry` objects, handling quality checks, and managing ligand-specific details.",
+        "hasLink": true,
+        "linkUrl": "./Core_Data_Management.html"
+      }
+    },
+    {
+      "data": {
+        "id": "Data_Preparation_for_ML",
+        "label": "Data Preparation for ML",
+        "description": "Focuses on organizing and partitioning the processed protein data into distinct train, validation, and test sets, often employing clustering techniques to ensure diverse and representative splits. It also provides PyTorch-compatible `Dataset` and `DataLoader` classes for efficient batching and preparation of data, making it ready for machine learning model training and evaluation.",
+        "hasLink": true,
+        "linkUrl": "./Data_Preparation_for_ML.html"
+      }
+    },
+    {
+      "data": {
+        "id": "Analysis_Visualization",
+        "label": "Analysis & Visualization",
+        "description": "Offers a comprehensive suite of tools for calculating various protein-related metrics (e.g., sequence similarity, language model perplexity) and for visualizing protein structures and animations. This component also manages optional external dependencies required for its advanced functionalities.",
+        "hasLink": true,
+        "linkUrl": "./Analysis_Visualization.html"
+      }
+    },
+    {
+      "data": {
+        "id": "edge_0",
+        "source": "User_Interface_CLI_",
+        "target": "Core_Data_Management",
+        "label": "Initiates operations in"
+      }
+    },
+    {
+      "data": {
+        "id": "edge_1",
+        "source": "User_Interface_CLI_",
+        "target": "Data_Preparation_for_ML",
+        "label": "Initiates operations in"
+      }
+    },
+    {
+      "data": {
+        "id": "edge_2",
+        "source": "Core_Data_Management",
+        "target": "Data_Preparation_for_ML",
+        "label": "Provides processed data to"
+      }
+    },
+    {
+      "data": {
+        "id": "edge_3",
+        "source": "Core_Data_Management",
+        "target": "Analysis_Visualization",
+        "label": "Provides data to"
+      }
+    }
+  ]
+};
+            
+            try {
+                const cy = cytoscape({
+                    container: document.getElementById('cy'),
+                    
+                    elements: cytoscapeData.elements,
+                    
+                    style: [
+                        {
+                            selector: 'node',
+                            style: {
+                                'background-color': '#f8f9fa',
+                                'label': 'data(label)',
+                                'text-valign': 'center',
+                                'text-halign': 'center',
+                                'color': '#495057',
+                                'text-wrap': 'wrap',
+                                'font-size': '11px',
+                                'font-weight': '500',
+                                'width': 'label',
+                                'height': 'label',
+                                'padding': '15px',
+                                'shape': 'roundrectangle',
+                                'border-width': 2,
+                                'border-color': '#dee2e6'
+                            }
+                        },
+                        {
+                            selector: 'node[hasLink = true]',
+                            style: {
+                                'background-color': '#e9ecef',
+                                'border-color': '#6c757d',
+                                'cursor': 'pointer',
+                                'border-width': 3
+                            }
+                        },
+                        {
+                            selector: 'node:hover',
+                            style: {
+                                'background-color': '#dee2e6',
+                                'border-color': '#495057',
+                                'border-width': 3
+                            }
+                        },
+                        {
+                            selector: 'edge',
+                            style: {
+                                'width': 2,
+                                'line-color': '#adb5bd',
+                                'target-arrow-color': '#adb5bd',
+                                'target-arrow-shape': 'triangle',
+                                'curve-style': 'bezier',
+                                'label': 'data(label)',
+                                'font-size': '10px',
+                                'color': '#6c757d',
+                                'text-rotation': 'autorotate',
+                                'text-margin-y': -10,
+                                'text-background-color': '#ffffff',
+                                'text-background-opacity': 0.8,
+                                'text-background-padding': '2px',
+                                'text-background-shape': 'roundrectangle'
+                            }
+                        }
+                    ],
+                    
+                    layout: {
+                        name: 'dagre',
+                        directed: true,
+                        padding: 30,
+                        rankDir: 'LR',
+                        nodeSep: 80,
+                        edgeSep: 20,
+                        rankSep: 150
+                    }
+                });
+                
+                // Apply dagre layout after cytoscape is initialized
+                cy.layout({
+                    name: 'dagre',
+                    directed: true,
+                    padding: 30,
+                    rankDir: 'LR',
+                    nodeSep: 80,
+                    edgeSep: 20,
+                    rankSep: 150
+                }).run();
+                
+                // Add click handler for nodes with links
+                cy.on('tap', 'node[hasLink = true]', function(evt) {
+                    const node = evt.target;
+                    const linkUrl = node.data('linkUrl');
+                    if (linkUrl) {
+                        window.open(linkUrl, '_blank');
+                    }
+                });
+                
+                // Add simple tooltip on hover
+                cy.on('mouseover', 'node', function(evt) {
+                    const node = evt.target;
+                    const description = node.data('description');
+                    if (description) {
+                        // Simple tooltip implementation
+                        const tooltip = document.createElement('div');
+                        tooltip.innerHTML = description;
+                        tooltip.style.cssText = `
+                            position: fixed;
+                            background: #333;
+                            color: white;
+                            padding: 8px;
+                            border-radius: 4px;
+                            font-size: 12px;
+                            max-width: 300px;
+                            z-index: 1000;
+                            pointer-events: none;
+                        `;
+                        document.body.appendChild(tooltip);
+                        
+                        const updateTooltip = (e) => {
+                            tooltip.style.left = (e.clientX + 10) + 'px';
+                            tooltip.style.top = (e.clientY + 10) + 'px';
+                        };
+                        
+                        document.addEventListener('mousemove', updateTooltip);
+                        
+                        node.on('mouseout', () => {
+                            document.removeEventListener('mousemove', updateTooltip);
+                            if (tooltip.parentNode) {
+                                tooltip.parentNode.removeChild(tooltip);
+                            }
+                        });
+                    }
+                });
+                
+                // Make control functions globally available
+                window.resetLayout = function() {
+                    cy.layout({
+                        name: 'dagre',
+                        directed: true,
+                        padding: 30,
+                        rankDir: 'LR',
+                        nodeSep: 80,
+                        edgeSep: 20,
+                        rankSep: 150
+                    }).run();
+                };
+                
+                window.fitToView = function() {
+                    cy.fit();
+                };
+                
+                window.exportImage = function() {
+                    const png64 = cy.png({ scale: 2, full: true });
+                    const link = document.createElement('a');
+                    link.download = 'diagram.png';
+                    link.href = png64;
+                    link.click();
+                };
+                
+            } catch (error) {
+                console.error('Error initializing Cytoscape:', error);
+                document.getElementById('cy').innerHTML = '<div style="padding: 20px; text-align: center; color: #666;">Error loading diagram. Please refresh the page.</div>';
+            }
+        });
+    </script>
+</body>
+</html>
\ No newline at end of file
diff --git a/.github/workflows/update-codeboarding-codemap.yml b/.github/workflows/update-codeboarding-codemap.yml
new file mode 100644
index 0000000..f3bb013
--- /dev/null
+++ b/.github/workflows/update-codeboarding-codemap.yml
@@ -0,0 +1,120 @@
+name: ProteinFlow CodeBoarding Documentation Update
+
+on:
+  workflow_dispatch:
+    inputs:
+      repository_url:
+        description: 'Repository URL to analyze'
+        required: false
+        default: 'https://github.com/adaptyvbio/ProteinFlow'
+        type: string
+      source_branch:
+        description: 'Source branch for generation'
+        required: false
+        default: 'main'
+        type: string
+      target_branch:
+        description: 'Target branch for pull request'
+        required: false
+        default: 'main'
+        type: string
+      output_format:
+        description: 'Output format for documentation'
+        required: false
+        default: '.html'
+        type: choice
+        options:
+          - '.html'
+          - '.md'
+          - '.rst'
+      output_directory:
+        description: 'Output directory for documentation files'
+        required: false
+        default: '.codeboarding'
+        type: string
+
+jobs:
+  update-proteinflow-docs:
+    runs-on: ubuntu-latest
+    timeout-minutes: 45
+    permissions:
+      contents: write
+      pull-requests: write
+    steps:
+      - name: Checkout ProteinFlow repository
+        uses: actions/checkout@v4
+        with:
+          token: ${{ secrets.GITHUB_TOKEN }}
+          fetch-depth: 0  # Required to access branch history
+
+      # Determine branches based on context
+      - name: Set branch variables
+        id: set-branches
+        run: |
+          if [ "${{ github.event_name }}" = "pull_request" ]; then
+            echo "source_branch=${{ github.head_ref }}" >> $GITHUB_OUTPUT
+            echo "target_branch=${{ github.base_ref }}" >> $GITHUB_OUTPUT
+          elif [ "${{ github.event.inputs.source_branch }}" != "" ] && [ "${{ github.event.inputs.target_branch }}" != "" ]; then
+            echo "source_branch=${{ github.event.inputs.source_branch }}" >> $GITHUB_OUTPUT
+            echo "target_branch=${{ github.event.inputs.target_branch }}" >> $GITHUB_OUTPUT
+          else
+            echo "source_branch=main" >> $GITHUB_OUTPUT
+            echo "target_branch=main" >> $GITHUB_OUTPUT
+          fi
+
+      - name: Fetch CodeBoarding Documentation
+        timeout-minutes: 30
+        id: codeboarding
+        uses: CodeBoarding/CodeBoarding-GHAction@main
+        with:
+          repository_url: ${{ github.event.inputs.repository_url || 'https://github.com/adaptyvbio/ProteinFlow' }}
+          source_branch: ${{ steps.set-branches.outputs.source_branch }}
+          target_branch: ${{ steps.set-branches.outputs.target_branch }}
+          output_directory: ${{ github.event.inputs.output_directory || '.codeboarding' }}
+          output_format: ${{ github.event.inputs.output_format || '.html' }}
+
+      - name: Display ProteinFlow Documentation Results
+        run: |
+          echo "ProteinFlow documentation files created: ${{ steps.codeboarding.outputs.markdown_files_created }}"
+          echo "JSON files created: ${{ steps.codeboarding.outputs.json_files_created }}"
+          echo "Documentation directory: ${{ steps.codeboarding.outputs.output_directory }}"
+          echo "JSON directory: ${{ steps.codeboarding.outputs.json_directory }}"
+          echo "Has changes: ${{ steps.codeboarding.outputs.has_changes }}"
+
+      # Check if we have any changes to commit
+      - name: Check for changes in ProteinFlow docs
+        id: git-changes
+        run: |
+          if [ -n "$(git status --porcelain)" ]; then
+            echo "has_git_changes=true" >> $GITHUB_OUTPUT
+          else
+            echo "has_git_changes=false" >> $GITHUB_OUTPUT
+          fi
+
+      - name: Create ProteinFlow Documentation Pull Request
+        if: steps.git-changes.outputs.has_git_changes == 'true' && steps.codeboarding.outputs.has_changes == 'true'
+        uses: peter-evans/create-pull-request@v5
+        with:
+          token: ${{ secrets.PAT_TOKEN }}
+          commit-message: "docs: update ProteinFlow codeboarding documentation"
+          title: "📚 ProteinFlow CodeBoarding Documentation Update"
+          body: |
+            ## 📚 ProteinFlow Documentation Update
+            This PR contains updated documentation files for ProteinFlow generated by the CodeBoarding service.
+            
+            ### 📊 Summary
+            - **Documentation files created/updated**: ${{ steps.codeboarding.outputs.markdown_files_created }}
+            - **JSON files created/updated**: ${{ steps.codeboarding.outputs.json_files_created }}
+            - **Documentation directory**: `${{ steps.codeboarding.outputs.output_directory }}/`
+            - **JSON directory**: `${{ steps.codeboarding.outputs.json_directory }}/`
+            - **Output format**: `${{ github.event.inputs.output_format || '.html' }}`
+            - **Repository analyzed**: `${{ steps.codeboarding.outputs.repo_url }}`
+            
+            ### 🧬 ProteinFlow Integration
+            These documentation files will be automatically integrated into the ProteinFlow documentation site
+            and will appear in the CodeBoarding section of the sidebar navigation.
+            
+            🤖 This PR was automatically generated by the ProteinFlow CodeBoarding documentation update workflow.
+          branch: docs/proteinflow-codeboarding-update
+          base: main
+          delete-branch: true
\ No newline at end of file
diff --git a/dev/integrate_html_files.py b/dev/integrate_html_files.py
new file mode 100644
index 0000000..fbee4e3
--- /dev/null
+++ b/dev/integrate_html_files.py
@@ -0,0 +1,241 @@
+#!/usr/bin/env python3
+"""
+Script to integrate standalone HTML files into the pdoc template structure.
+
+This script generates wrapper pages that maintain the sidebar navigation.
+"""
+
+import os
+import re
+from pathlib import Path
+
+
+def extract_html_content(filepath):
+    """Extract content and metadata from HTML file."""
+    try:
+        with open(filepath, 'r', encoding='utf-8') as f:
+            content = f.read()
+        
+        # Extract title
+        title_match = re.search(r'<title[^>]*>(.*?)</title>', content, re.IGNORECASE | re.DOTALL)
+        title = title_match.group(1).strip() if title_match else "Documentation"
+        
+        # Extract body content
+        body_match = re.search(r'<body[^>]*>(.*?)</body>', content, re.DOTALL | re.IGNORECASE)
+        if body_match:
+            body_content = body_match.group(1).strip()
+        else:
+            # If no body tags, use everything after head
+            head_end = content.find('</head>')
+            if head_end != -1:
+                body_content = content[head_end + 7:].strip()
+                # Remove html and body tags if present
+                body_content = re.sub(r'</?html[^>]*>', '', body_content, flags=re.IGNORECASE)
+                body_content = re.sub(r'</?body[^>]*>', '', body_content, flags=re.IGNORECASE)
+            else:
+                body_content = content
+        
+        # Extract custom styles from the head section
+        style_pattern = r'<style[^>]*>(.*?)</style>'
+        style_matches = re.findall(style_pattern, content, re.DOTALL | re.IGNORECASE)
+        
+        # Filter out potentially conflicting styles but keep useful ones
+        useful_styles = []
+        for style in style_matches:
+            # Keep styles that seem specific to content rather than layout
+            if any(keyword in style.lower() for keyword in ['badge', 'mermaid', 'highlight', 'code', 'pre', 'table', 'img']):
+                useful_styles.append(style.strip())
+        
+        styles = '\n'.join(useful_styles) if useful_styles else ''
+        
+        # Extract any scripts that might be needed (like mermaid)
+        script_pattern = r'<script[^>]*(?:src=["\'][^"\']*["\']|type=["\'][^"\']*["\'])*[^>]*>.*?</script>'
+        script_matches = re.findall(script_pattern, content, re.DOTALL | re.IGNORECASE)
+        scripts = '\n'.join(script_matches) if script_matches else ''
+        
+        # Also extract script tags without closing tags (like imports)
+        import_script_matches = re.findall(r'<script[^>]*type=["\']module["\'][^>]*>.*?</script>', content, re.DOTALL | re.IGNORECASE)
+        if import_script_matches:
+            scripts = '\n'.join(import_script_matches) + '\n' + scripts
+        
+        return {
+            'title': title,
+            'content': body_content,
+            'styles': styles,
+            'scripts': scripts
+        }
+    except Exception as e:
+        print(f"Error reading {filepath}: {e}")
+        return None
+
+
+def generate_pdoc_template():
+    """Get the base pdoc template structure from existing docs."""
+    index_path = "docs/index.html"
+    if not os.path.exists(index_path):
+        return None
+    
+    with open(index_path, 'r', encoding='utf-8') as f:
+        content = f.read()
+    
+    # Extract the base template structure
+    # We'll use everything up to the main content section
+    content_start = content.find('<article id="content">')
+    if content_start == -1:
+        return None
+    
+    before_content = content[:content_start]
+    
+    # Find the sidebar
+    sidebar_start = content.find('<nav id="sidebar">')
+    sidebar_end = content.find('</nav>', sidebar_start) + 6
+    if sidebar_start == -1 or sidebar_end == -1:
+        return None
+
+    sidebar = content[sidebar_start:sidebar_end]
+
+    # Find the footer
+    footer_start = content.find('<footer id="footer">')
+    footer_end = content.find('</html>')
+    if footer_start == -1 or footer_end == -1:
+        return None
+
+    footer = content[footer_start:footer_end]
+
+    return {
+        'before_content': before_content,
+        'sidebar': sidebar,
+        'footer': footer
+    }
+
+
+def create_integrated_page(html_data, template_data, output_path, standalone_files, page_type="guide"):
+    """Create an integrated page with pdoc template structure."""
+    # The integration script only handles wrapping content in pdoc structure
+    # All navigation (Additional Resources, Codeboardings) is managed by html.mako template
+    
+    # Insert content into sidebar
+    sidebar = template_data['sidebar']
+    
+    # Fix all links in the sidebar that start with # to point to index.html#
+    # This ensures navigation back to the main index page works correctly
+    import re
+    sidebar = re.sub(r'href="#([^"]+)"', r'href="index.html#\1"', sidebar)
+    
+    integrated_html = f"""<!doctype html>
+<html lang="en">
+<head>
+  <meta charset="utf-8">
+  <meta name="viewport" content="width=device-width, initial-scale=1, minimum-scale=1" />
+  <meta name="generator" content="pdoc 0.11.6" />
+  <title>{html_data['title']}</title>
+  <meta name="description" content="{html_data['title']}" />
+  <link rel="preload stylesheet" as="style" href="https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/11.0.1/sanitize.min.css" integrity="sha256-PK9q560IAAa6WVRRh76LtCaI8pjTJ2z11v0miyNNjrs=" crossorigin>
+  <link rel="preload stylesheet" as="style" href="https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/11.0.1/typography.min.css" integrity="sha256-7l/o7C8jubJiy74VsKTidCy1yBkRtiUGbVkYBylBqUg=" crossorigin>
+  <link rel="stylesheet preload" as="style" href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/10.1.1/styles/github.min.css" crossorigin>
+  
+  <!-- Pdoc Styles -->
+  <style>:root{{--highlight-color:#fe9}}.flex{{display:flex !important}}body{{line-height:1.5em}}#content{{padding:20px}}#sidebar{{padding:30px;overflow:hidden}}#sidebar > *:last-child{{margin-bottom:2cm}}.http-server-breadcrumbs{{font-size:130%;margin:0 0 15px 0}}#footer{{font-size:.75em;padding:5px 30px;border-top:1px solid #ddd;text-align:right}}#footer p{{margin:0 0 0 1em;display:inline-block}}#footer p:last-child{{margin-right:30px}}h1,h2,h3,h4,h5{{font-weight:300}}h1{{font-size:2.5em;line-height:1.1em}}h2{{font-size:1.75em;margin:1em 0 .50em 0}}h3{{font-size:1.4em;margin:25px 0 10px 0}}h4{{margin:0;font-size:105%}}h1:target,h2:target,h3:target,h4:target,h5:target,h6:target{{background:var(--highlight-color);padding:.2em 0}}a{{color:#058;text-decoration:none;transition:color .3s ease-in-out}}a:hover{{color:#e82}}.title code{{font-weight:bold}}h2[id^="header-"]{{margin-top:2em}}.ident{{color:#900}}pre code{{background:#f8f8f8;font-size:.8em;line-height:1.4em}}code{{background:#f2f2f1;padding:1px 4px;overflow-wrap:break-word}}h1 code{{background:transparent}}pre{{background:#f8f8f8;border:0;border-top:1px solid #ccc;border-bottom:1px solid #ccc;margin:1em 0;padding:1ex}}#http-server-module-list{{display:flex;flex-flow:column}}#http-server-module-list div{{display:flex}}#http-server-module-list dt{{min-width:10%}}#http-server-module-list p{{margin-top:0}}.toc ul,#index{{list-style-type:none;margin:0;padding:0}}#index code{{background:transparent}}#index h3{{border-bottom:1px solid #ddd}}#index ul{{padding:0}}#index h4{{margin-top:.6em;font-weight:bold}}@media (min-width:200ex){{#index .two-column{{column-count:2}}}}@media (min-width:300ex){{#index .two-column{{column-count:3}}}}dl{{margin-bottom:2em}}dl dl:last-child{{margin-bottom:4em}}dd{{margin:0 0 1em 3em}}#header-classes + dl > dd{{margin-bottom:3em}}dd dd{{margin-left:2em}}dd p{{margin:10px 0}}.name{{background:#eee;font-weight:bold;font-size:.85em;padding:5px 10px;display:inline-block;min-width:40%}}.name:hover{{background:#e0e0e0}}dt:target .name{{background:var(--highlight-color)}}.name > span:first-child{{white-space:nowrap}}.name.class > span:nth-child(2){{margin-left:.4em}}.inherited{{color:#999;border-left:5px solid #eee;padding-left:1em}}.inheritance em{{font-style:normal;font-weight:bold}}.desc h2{{font-weight:400;font-size:1.25em}}.desc h3{{font-size:1em}}.desc dt code{{background:inherit}}.source summary,.git-link-div{{color:#666;text-align:right;font-weight:400;font-size:.8em;text-transform:uppercase}}.source summary > *{{white-space:nowrap;cursor:pointer}}.git-link{{color:inherit;margin-left:1em}}.source pre{{max-height:500px;overflow:auto;margin:0}}.source pre code{{font-size:12px;overflow:visible}}.hlist{{list-style:none}}.hlist li{{display:inline}}.hlist li:after{{content:',\\2002'}}.hlist li:last-child:after{{content:none}}.hlist .hlist{{display:inline;padding-left:1em}}img{{max-width:100%}}td{{padding:0 .5em}}.admonition{{padding:.1em .5em;margin-bottom:1em}}.admonition-title{{font-weight:bold}}.admonition.note,.admonition.info,.admonition.important{{background:#aef}}.admonition.todo,.admonition.versionadded,.admonition.tip,.admonition.hint{{background:#dfd}}.admonition.warning,.admonition.versionchanged,.admonition.deprecated{{background:#fd4}}.admonition.error,.admonition.danger,.admonition.caution{{background:lightpink}}</style>
+  <style media="screen and (min-width: 700px)">@media screen and (min-width:700px){{#sidebar{{width:30%;height:100vh;overflow:auto;position:sticky;top:0}}#content{{width:70%;padding:3em 4em;border-left:1px solid #ddd}}pre code{{font-size:1em}}.item .name{{font-size:1em}}main{{display:flex;flex-direction:row-reverse;justify-content:flex-end}}.toc ul ul,#index ul{{padding-left:1.5em}}.toc > ul > li{{margin-top:.5em}}}}</style>
+  <style media="print">@media print{{#sidebar h1{{page-break-before:always}}.source{{display:none}}}}@media print{{*{{background:transparent !important;color:#000 !important;box-shadow:none !important;text-shadow:none !important}}a[href]:after{{content:" (" attr(href) ")";font-size:90%}}a[href][title]:after{{content:none}}abbr[title]:after{{content:" (" attr(title) ")"}} .ir a:after,a[href^="javascript:"]:after,a[href^="#"]:after{{content:""}}pre,blockquote{{border:1px solid #999;page-break-inside:avoid}}thead{{display:table-header-group}}tr,img{{page-break-inside:avoid}}img{{max-width:100% !important}}@page{{margin:0.5cm}}p,h2,h3{{orphans:3;widows:3}}h1,h2,h3,h4,h5,h6{{page-break-after:avoid}}}}</style>
+
+  <!-- Additional styles for custom content -->
+  <style>
+    .badges img {{
+      margin-right: 0.5rem;
+      margin-bottom: 0.25rem;
+    }}
+    .mermaid {{
+      text-align: center;
+      margin: 1rem 0;
+    }}
+    #section-intro .badges {{
+      margin-bottom: 1rem;
+    }}
+    /* Custom styles from original HTML */
+    {html_data['styles']}
+  </style>
+  <script defer src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/10.1.1/highlight.min.js" integrity="sha256-Uv3H6lx7dJmRfRvH8TH6kJD1TSK1aFcwgx+mdg3epi8=" crossorigin></script>
+  <script>window.addEventListener('DOMContentLoaded', () => hljs.initHighlighting())</script>
+  <link rel="shortcut icon" type="image/x-icon" href="adaptyv_logo.png?">
+</head>
+<body>
+<main>
+<article id="content">
+<section id="section-intro">
+{html_data['content']}
+</section>
+</article>
+
+{sidebar}
+</main>
+
+{template_data['footer']}
+
+{html_data['scripts']}
+"""
+    with open(output_path, 'w', encoding='utf-8') as f:
+        f.write(integrated_html)
+
+
+def main():
+    """Integrate HTML files into pdoc template structure."""
+    docs_dir = Path("docs")
+    if not docs_dir.exists():
+        print("docs directory not found")
+        return
+    
+    # Get template structure
+    template_data = generate_pdoc_template()
+    if not template_data:
+        print("Could not extract template structure from index.html")
+        return
+    
+    # Find all standalone HTML files (exclude index.html and pdoc-generated files)
+    # These are typically files that don't have the pdoc structure
+    standalone_files = []
+    exclude_patterns = ['index.html']  # Files to skip
+    
+    for html_file in docs_dir.glob('*.html'):
+        filename = html_file.name
+        if filename not in exclude_patterns:
+            # Check if it's a standalone file by looking for pdoc structure
+            try:
+                with open(html_file, 'r', encoding='utf-8') as f:
+                    content = f.read()
+                # If it doesn't have the pdoc sidebar, it's standalone
+                if '<nav id="sidebar">' not in content:
+                    standalone_files.append(filename)
+            except Exception as e:
+                print(f"Warning: Could not read {filename}: {e}")
+    
+    if not standalone_files:
+        print("No standalone HTML files found to integrate")
+        return
+    
+    print(f"Found {len(standalone_files)} standalone HTML files to integrate:")
+    for filename in standalone_files:
+        print(f"  - {filename}")
+    
+    # Process each standalone file
+    for filename in standalone_files:
+        filepath = docs_dir / filename
+        if filepath.exists():
+            print(f"\nIntegrating {filename}...")
+            
+            # Extract content from the HTML file
+            html_data = extract_html_content(str(filepath))
+            if html_data:
+                # Create integrated version with sidebar navigation
+                create_integrated_page(html_data, template_data, str(filepath), standalone_files)
+                print(f"✓ Successfully integrated {filename}")
+            else:
+                print(f"✗ Failed to extract content from {filename}")
+        else:
+            print(f"File {filename} not found, skipping...")
+    
+    print(f"\nIntegration complete! Processed {len(standalone_files)} files.")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/dev/make_docs.sh b/dev/make_docs.sh
index 5ef3b6c..f66c44c 100644
--- a/dev/make_docs.sh
+++ b/dev/make_docs.sh
@@ -3,4 +3,11 @@ rm -r docs/*
 pdoc3 -o docs --html --force --template-dir pdoc/templates proteinflow 
 mv docs/proteinflow/* docs/
 rm -r docs/proteinflow
-cp media/adaptyv_logo.png docs/
\ No newline at end of file
+cp media/adaptyv_logo.png docs/
+# Copy codeboarding HTML files to docs directory if they exist
+if [ -d ".codeboarding" ]; then
+  cp -r .codeboarding/*.html docs/ 2>/dev/null || true
+fi
+
+# Generate integrated versions of standalone HTML files
+python dev/integrate_html_files.py
\ No newline at end of file
diff --git a/pdoc/templates/html.mako b/pdoc/templates/html.mako
index f051788..7787329 100644
--- a/pdoc/templates/html.mako
+++ b/pdoc/templates/html.mako
@@ -1,5 +1,6 @@
 <%
   import os
+  import glob
 
   import pdoc
   from pdoc.html_helpers import extract_toc, glimpse, to_html as _to_html, format_git_link
@@ -23,6 +24,18 @@
     if annot:
         annot = ' ' + sep + '\N{NBSP}' + annot
     return annot
+    
+  # Get codeboarding files if they exist
+  codeboarding_files = []
+  codeboarding_dir = os.path.join(os.getcwd(), '.codeboarding')
+  if os.path.exists(codeboarding_dir):
+    all_files = [os.path.basename(f) for f in glob.glob(os.path.join(codeboarding_dir, '*.html'))]
+    # Sort with on_boarding.html first, then alphabetically
+    codeboarding_files = []
+    if 'on_boarding.html' in all_files:
+      codeboarding_files.append('on_boarding.html')
+      all_files.remove('on_boarding.html')
+    codeboarding_files.extend(sorted(all_files))
 %>
 
 <%def name="ident(name)"><span class="ident">${name}</span></%def>
@@ -101,7 +114,7 @@
   classes = module.classes(sort=sort_identifiers)
   functions = module.functions(sort=sort_identifiers)
   submodules = module.submodules()
-  %>
+  %> 
 
   <%def name="show_func(f)">
     <dt id="${f.refname}"><code class="name flex">
@@ -356,6 +369,29 @@
       </ul>
     </li>
     % endif
+    
+    % if codeboarding_files:
+    <li><h3>Additional Resources</h3>
+      <ul>
+        <li><a href="on_boarding.html"><code>OnBoarding Guide</code></a></li>
+        <li><a href="visualize.html"><code>Visualization Guide</code></a></li>
+      </ul>
+    </li>
+    <li><h3>CodeBoardings</h3>
+      <ul>
+      % for file in codeboarding_files:
+        <li><a href="${file}"><code>${os.path.splitext(file)[0].replace("_", " ")}</code></a></li>
+      % endfor
+      </ul>
+    </li>
+    % else:
+    <li><h3>Additional Resources</h3>
+      <ul>
+        <li><a href="on_boarding.html"><code>OnBoarding Guide</code></a></li>
+        <li><a href="visualize.html"><code>Visualization Guide</code></a></li>
+      </ul>
+    </li>
+    % endif
 
     </ul>
   </nav>