From d5c964afc44b9dad82075cf7ae9320f435d165e3 Mon Sep 17 00:00:00 2001
From: MIS Center <mis@criticalsys.net>
Date: Mon, 29 Dec 2025 21:13:24 -0600
Subject: [PATCH 1/3] Packaging Configuration

---
 .github/dependabot.yml | 11 +++++++++++
 .gitignore             |  3 +++
 repoview.spec          |  7 ++++---
 3 files changed, 18 insertions(+), 3 deletions(-)
 create mode 100644 .github/dependabot.yml
 create mode 100644 .gitignore

diff --git a/.github/dependabot.yml b/.github/dependabot.yml
new file mode 100644
index 0000000..9d866e3
--- /dev/null
+++ b/.github/dependabot.yml
@@ -0,0 +1,11 @@
+# To get started with Dependabot version updates, you'll need to specify which
+# package ecosystems to update and where the package manifests are located.
+# Please see the documentation for all configuration options:
+# https://docs.github.com/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file
+
+version: 2
+updates:
+  - package-ecosystem: "pip" # See documentation for possible values
+    directory: "/" # Location of package manifests
+    schedule:
+      interval: "weekly"
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..e146eb0
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,3 @@
+.pylintrc
+.ruff_cache/
+.vscode/
diff --git a/repoview.spec b/repoview.spec
index d65b7a2..6b38235 100644
--- a/repoview.spec
+++ b/repoview.spec
@@ -8,9 +8,10 @@ URL:            https://github.com/sergiomb2/repoview
 Source0:        https://github.com/sergiomb2/repoview/archive/v%{version}/%{name}-%{version}.tar.gz
 BuildArch:      noarch
 
-Requires:       python >= 3.5
-Requires:       python-genshi >= 0.6.3
-Requires:       python-libcomps
+Requires:       python3 >= 3.5
+Requires:       python3-genshi >= 0.6.3
+Requires:       python3-libcomps
+Requires:       python3-rpm
 
 %description
 RepoView creates a set of static HTML pages in a yum/dnf repository for easy

From 90175c7ce858f5d06e5a618eb841c466fab6d0c2 Mon Sep 17 00:00:00 2001
From: MIS Center <mis@criticalsys.net>
Date: Mon, 29 Dec 2025 21:14:19 -0600
Subject: [PATCH 2/3] Documentation

---
 ARCHITECTURE.md | 170 +++++++++++++++++++++++++++++++++++++++++++++
 CHARTS.md       | 181 ++++++++++++++++++++++++++++++++++++++++++++++++
 SECURITY.md     |  29 ++++++++
 3 files changed, 380 insertions(+)
 create mode 100644 ARCHITECTURE.md
 create mode 100644 CHARTS.md
 create mode 100644 SECURITY.md

diff --git a/ARCHITECTURE.md b/ARCHITECTURE.md
new file mode 100644
index 0000000..77fe165
--- /dev/null
+++ b/ARCHITECTURE.md
@@ -0,0 +1,170 @@
+# Repoview Architecture Documentation
+
+## Application Overview and Objectives
+
+**Repoview** is a static site generator designed to make YUM repositories easily browseable via a web browser. 
+
+The primary objective of Repoview is to transform the raw metadata of a YUM repository (packages, groups, changelogs) into a set of interlinked, user-friendly HTML pages. This allows users to explore the contents of a repository without needing to use command-line tools like `yum` or `dnf`.
+
+Key features include:
+-   **Static Output**: Generates pure HTML/CSS/XML files, requiring no active server-side processing (like PHP or Python) on the hosting web server.
+-   **Incremental Generation**: Tracks the state of generated files to only regenerate pages for packages or groups that have changed, significantly speeding up updates for large repositories.
+-   **Templating Support**: Uses the Genshi templating engine, allowing for complete customization of the output look and feel.
+-   **RSS Feeds**: Optionally generates RSS feeds for the latest package updates.
+
+## Architecture and Design Choices
+
+Repoview is written in **Python 3** and follows a procedural workflow encapsulated within a main controller class.
+
+### Execution Phases
+
+The `Repoview` constructor performs the entire workflow in a deterministic set of phases (mirrored by inline comments in `repoview.py`):
+
+1. **Repository Discovery** – validate the `repodata/repomd.xml`, locate compressed SQLite artifacts (`primary`, `other`, optional `group`), and open database handles. Compressed inputs (`.gz`, `.bz2`, `.xz`) are streamed into temporary files and scheduled for cleanup.
+2. **Filesystem Preparation** – resolve the output directory (user-selectable via `-o/--output-dir`, always nested under the repo root), optionally wipe it when `--force` is set, copy the `layout/` assets from the template directory, and initialize the incremental `state.sqlite` database (optionally stored outside the repo via `--state-dir`, with repository-specific filenames hashed via MD5).
+3. **Grouping & Package Rendering** – load groups either from `comps.xml`, RPM `Group` tags, or synthesized letter buckets. For each group, build package summaries, render package pages (with change detection, avoiding duplicate renders through an in-memory cache), and then render the group page if any dependency changed.
+4. **Aggregate Views** – compute the latest packages list, render `index.html`, and optionally generate `latest-feed.xml` using the RSS template and package data.
+5. **State Finalization** – clean up stale files left from previous runs and commit the updated checksums to `state.sqlite` so subsequent invocations stay incremental.
+
+### Core Components
+
+1.  **Data Ingestion (YUM Metadata)**:
+    -   Repoview does not parse RPM headers directly. Instead, it relies on the SQLite metadata databases (`primary.sqlite`, `other.sqlite`) generated by `createrepo`.
+    -   **Compression Handling**: It automatically detects and decompresses metadata databases (supporting `.gz`, `.bz2`, and `.xz` formats) into temporary files for processing.
+    -   It verifies the repository structure by parsing `repodata/repomd.xml`.
+    -   It connects to these SQLite databases to query package details, file lists, and changelogs.
+
+2.  **State Management (Incremental Builds)**:
+    -   To avoid rebuilding the entire site on every run, Repoview maintains a local SQLite database (`state.sqlite`).
+    -   **Checksumming**: For every generated page (package, group, index), a content-based checksum (MD5) is calculated.
+    -   **Change Detection**:
+        -   Before writing a file to disk, the calculated checksum is compared against the stored checksum in `state.sqlite`. If they match, the file write is skipped.
+        -   Package data is memoized per name (`self.written`) so packages that appear in multiple groups are rendered once but referenced many times.
+    -   **Stale File Cleanup**: The system tracks which files are visited during a run. Files present in the output directory but not visited are considered "stale" (e.g., deleted packages) and are removed.
+
+3.  **Templating Engine**:
+    -   **Genshi**: The application uses Genshi for rendering HTML.
+    -   **Structure**:
+        -   `index.kid`: The main entry page listing groups and latest packages.
+        -   `group.kid`: Displays lists of packages within a specific group.
+        -   `package.kid`: detailed view of a single package.
+        -   `rss.kid`: XML template for the RSS feed.
+    -   **Layout**: A `layout` directory containing static assets (CSS, images) is copied to the output directory.
+
+4.  **Grouping Logic**:
+    -   **Comps.xml**: If available, Repoview uses the `comps.xml` file to organize packages into logical groups (e.g., "Development", "System Tools").
+    -   **RPM Groups**: As a fallback, it can group packages based on the `Group` tag in the RPM metadata.
+    -   **Alphabetical**: It automatically generates "Letter Groups" (Packages A, Packages B, etc.) for easier browsing. These groups share the same rendering pipeline and benefit from the package memoization cache.
+
+### Data Flow
+
+1.  **Initialization**: Parse arguments, setup output directories, initialize state DB.
+2.  **Repo Connection**: Connect to `primary` and `other` SQLite databases.
+3.  **Group Processing**:
+    -   Iterate through each defined group.
+    -   For each package in the group, fetch details and changelogs.
+    -   Render package page -> Checksum -> Write if changed.
+    -   Render group page -> Checksum -> Write if changed.
+4.  **Index Generation**: Aggregate group lists and "latest modified" packages to render `index.html`.
+5.  **Finalization**: Commit state changes and delete stale files.
+
+### Python Environment
+
+Repoview is built as a self-contained, single-file Python utility (`repoview.py`) designed for ease of deployment and broad compatibility.
+
+#### Core Dependencies
+
+While the application leans heavily on the Python standard library, it requires a few key external modules to function:
+
+-   **Genshi (`genshi.template`)**:
+    -   *Role*: The primary templating engine used to render HTML and XML output.
+    -   *Usage*: It processes `.kid` template files, injecting Python objects (package lists, repository metadata) into the markup.
+
+-   **RPM Bindings (`rpm`)**:
+    -   *Role*: Provides native RPM functionality.
+    -   *Usage*: Specifically used for `rpm.labelCompare` to accurately sort and compare package versions (Epoch-Version-Release) and architectures.
+
+-   **Libcomps (`libcomps`)**:
+    -   *Role*: Library for parsing `comps.xml` files.
+    -   *Usage*: Optional but recommended. It is used to parse group definitions when `comps.xml` is present or specified.
+
+-   **SQLite (`sqlite3`)**:
+    -   *Role*: Database Interface.
+    -   *Usage*: Used to interact with the YUM metadata databases (`primary.sqlite`, `other.sqlite`) and the internal state tracking database (`state.sqlite`).
+
+#### Code Strategy
+
+-   **Single-File Distribution**: The entire application logic resides in `repoview.py`, making it easy to copy and run without complex installation procedures.
+-   **Standard Library First**: It prioritizes standard library modules (`os`, `sys`, `shutil`, `hashlib`, `xml.etree`, `optparse`) to minimize external dependencies.
+-   **Compression Support**: It uses standard libraries (`gzip`, `bz2`, `lzma`) to transparently handle compressed metadata files commonly found in repositories.
+-   **Graceful Degradation**: The code includes try-except blocks for imports to handle different environment configurations (e.g., falling back to `cElementTree` or different `sqlite` import paths).
+
+## Command Line Arguments
+
+Repoview CLI accepts the following arguments:
+
+| Argument | Type | Default | Description |
+| :--- | :--- | :--- | :--- |
+| `repodir` | Path | (Required) | The root directory of the repository (containing the `repodata` folder). |
+| `-i`, `--ignore-package` | String (Glob) | `[]` | Ignore packages matching the glob pattern (e.g., `*debuginfo*`). Can be specified multiple times. |
+| `-x`, `--exclude-arch` | String | `[]` | Exclude packages for specific architectures (e.g., `src`). Can be specified multiple times. |
+| `-k`, `--template-dir` | Path | `/usr/share/repoview/templates/default` | Path to a custom directory containing Genshi templates (`*.kid`) and layout files. |
+| `-o`, `--output-dir` | Path | `repoview` | Subdirectory (within `repodir`) where HTML files will be generated. |
+| `-s`, `--state-dir` | Path | `[output-dir]` | Directory to store the `state.sqlite` database. Defaults to the output directory. |
+| `-t`, `--title` | String | `"Repoview"` | Title of the repository to be displayed on generated pages. |
+| `-u`, `--url` | URL | `None` | Base URL of the repository. Required for generating valid RSS feed links. |
+| `-f`, `--force` | Flag | `False` | Force regeneration of all pages, ignoring the state database checksums. |
+| `-q`, `--quiet` | Flag | `False` | Suppress standard output status messages. Only fatal errors are printed. |
+| `-c`, `--comps` | Path | `None` | Path to an alternative `comps.xml` file, overriding the one in `repomd.xml`. |
+| `-V`, `--version` | Flag | - | Print version number and exit. |
+| `-h`, `--help` | Flag | - | Print usage message and exit. |
+
+## Examples
+
+### Basic Usage
+
+Generate repoview pages for a repository located at `/var/www/html/repo`. The output will be in `/var/www/html/repo/repoview`.
+
+```bash
+repoview /var/www/html/repo
+```
+
+### Custom Title and RSS Feed
+
+Generate pages with a specific title and enable RSS feed generation (requires URL).
+
+```bash
+repoview -t "My Enterprise Updates" -u "http://updates.example.com/repo" /var/www/html/repo
+```
+
+### Excluding Debug Packages
+
+Skip processing for debuginfo and documentation packages to save time and space.
+
+```bash
+repoview -i "*debuginfo*" -i "*doc*" /var/www/html/repo
+```
+
+### Force Regeneration
+
+Force a complete rebuild of the site, useful after changing templates or upgrading Repoview.
+
+```bash
+repoview --force /var/www/html/repo
+```
+
+### Using Custom Templates
+
+Use a custom set of templates located in `~/my-templates`.
+
+```bash
+repoview -k ~/my-templates /var/www/html/repo
+```
+
+### Custom Output Location
+
+Write the generated site to `/var/www/html/repo/docs` instead of the default `repoview` folder.
+
+```bash
+repoview -o docs /var/www/html/repo
+```
diff --git a/CHARTS.md b/CHARTS.md
new file mode 100644
index 0000000..9783d9a
--- /dev/null
+++ b/CHARTS.md
@@ -0,0 +1,181 @@
+# Repoview Data Processing and Generation Flow
+
+This codemap traces the complete data processing pipeline of Repoview, from repository metadata parsing through static HTML generation. Key locations include the main entry point, repository validation, package data querying, template rendering, and state management.
+
+## Trace 1: Repository Initialization and Setup
+**Description**: Entry point flow from command line to repository validation and database connections
+
+```mermaid
+graph TD
+    subgraph Initialization
+        A["main command line parser"] -->|"parse_args"| B["Repoview with opts"]
+        B --> C["Repoview.__init__"]
+    end
+
+    subgraph Constructor
+        C --> D["setup_repo"]
+        C --> H["setup_state_db"]
+        C --> I["setup_outdir"]
+        C --> J["process groups and packages"]
+    end
+
+    subgraph RepoSetup
+        D --> E["parse repomd.xml file"]
+        E --> E1["repoxml = open(repomd).read"]
+        D --> F["locate metadata databases"]
+        F --> F1["primary_db for packages"]
+        F --> F2["other_db for changelogs"]
+        D --> G["establish SQLite connections"]
+        G --> G1["self.pconn = sqlite.connect"]
+    end
+    
+    J --> K["Repository validation complete"]
+```
+
+### Key Locations
+| ID | Title | Description | Source | Code |
+|---|---|---|---|---|
+| 1a | Main Entry Point | Instantiates the Repoview controller with parsed command line options | repoview.py:1048 | `Repoview(opts)` instantiates the controller |
+| 1b | Repository Setup | Validates repository structure and locates metadata files | repoview.py:196 | `self.setup_repo()` kicks off repository validation |
+| 1c | Metadata Parsing | Reads and parses repomd.xml to find database locations | repoview.py:379 | `repoxml = open(repomd).read()` loads metadata XML |
+| 1d | Database Connections | Establishes SQLite connections to primary and other metadata databases | repoview.py:420 | `self.pconn = sqlite.connect(primary)` opens primary DB |
+
+
+## Trace 2: Group Discovery and Organization
+**Description**: How packages are organized into groups using comps.xml or RPM group tags
+
+```mermaid
+graph TD
+    A["Repository setup completion"] --> B["Check for custom comps file"]
+    B --> C{"Use comps.xml?"}
+    
+    C -->|"Yes"| D["Parse comps file path"]
+    D --> E["Setup comps groups"]
+    E --> F["Load libcomps parser"]
+    E --> G["Parse XML structure"]
+    
+    C -->|"No"| H["Fallback to RPM groups"]
+    H --> I["Setup RPM groups"]
+    I --> J["Query distinct RPM group tags"]
+    I --> K["Group packages by RPM metadata"]
+```
+
+### Key Locations
+| ID | Title | Description | Source | Code |
+|---|---|---|---|---|
+| 2a | Comps File Check | Determines whether to use custom comps.xml or repository default | repoview.py:428 | `if self.opts.comps:` honors CLI override |
+| 2b | Comps Groups Setup | Parses comps.xml to extract package group definitions | repoview.py:432 | `self.setup_comps_groups(comps)` loads comps data |
+| 2c | XML Parsing | Uses libcomps to parse the comps.xml file structure | repoview.py:794 | `comps.fromxml_f(compsxml)` parses comps XML |
+| 2d | RPM Groups Fallback | Uses RPM group tags when comps.xml is not available | repoview.py:203 | `self.setup_rpm_groups()` builds fallback groups |
+
+
+## Trace 3: Package Data Processing and Generation
+**Description**: Core data flow from package querying to individual HTML page generation
+
+```mermaid
+graph TD
+    A["do_packages entry point"] --> B["for each package in group"]
+    B --> C["get_package_data"]
+    
+    subgraph DataFetching
+        C --> D["SQL query construction"]
+        D --> E["pcursor.execute"]
+        C --> F["fetch package versions"]
+        C --> G["get changelog data"]
+    end
+    
+    B --> H["calculate checksum"]
+    H --> I["mk_checksum"]
+    
+    B --> J{"check if changed"}
+    J -->|"Yes"| K["return package tuples"]
+    J -->|"No"| K
+    
+    K --> L["Template rendering phase"]
+    L --> M["Generate HTML if changed"]
+```
+
+### Key Locations
+| ID | Title | Description | Source | Code |
+|---|---|---|---|---|
+| 3a | Package Processing Init | Starts processing packages for each group | repoview.py:239 | `packages = self.do_packages(...)` drives group build |
+| 3b | Package Data Query | Queries SQLite databases for package metadata and changelogs | repoview.py:641 | `pkg_data = self.get_package_data(pkgname)` |
+| 3c | Database Query Execution | Executes SQL to fetch package versions and metadata | repoview.py:532 | `pcursor.execute(query)` runs package query |
+| 3d | Change Detection | Calculates checksum to determine if regeneration is needed | repoview.py:650 | `checksum = self.mk_checksum(...)` |
+
+
+## Trace 4: Template Rendering and HTML Generation
+**Description**: How Genshi templates are processed to generate the final HTML output
+
+```mermaid
+graph TD
+    A["do_packages processes group"] --> B["get_package_data"]
+    B --> C["Package data collected"]
+    A --> D["mk_checksum"]
+    D --> E{"has_changed?"}
+    
+    E -->|"True"| F["Template Loading Phase"]
+    F --> G["pkg_kid.load PKGKID"]
+    G --> H["Template Generation Phase"]
+    
+    subgraph Rendering
+        H --> I["tmpl.generate"]
+        I --> J["Injects group_data"]
+        I --> K["Injects pkg_data"]
+        I --> L["Injects repo_data"]
+    end
+    
+    H --> M["HTML Rendering Phase"]
+    M --> N["stream.render to XHTML"]
+    N --> O["f.write saves to file"]
+    
+    P["Index Page Generation"] --> Q["idx_kid.load IDXKID"]
+```
+
+### Key Locations
+| ID | Title | Description | Source | Code |
+|---|---|---|---|---|
+| 4a | Template Loading | Loads the package template using Genshi TemplateLoader | repoview.py:658 | `tmpl = self.pkg_kid.load(PKGKID)` fetches template |
+| 4b | Template Generation | Generates template stream with package and repository data | repoview.py:660 | `stream = tmpl.generate(...)` |
+| 4c | HTML Rendering | Renders the template to XHTML and writes to file | repoview.py:666 | `handle.write(stream.render(...))` writes XHTML |
+| 4d | Index Generation | Generates the main index page with group listings | repoview.py:278 | `tmpl = idx_kid.load(IDXKID)` prepares index template |
+
+
+## Trace 5: State Management and Incremental Builds
+**Description**: Efficiency mechanisms for tracking changes and avoiding unnecessary regeneration
+
+```mermaid
+graph TD
+    A["Repoview.__init__"] --> B["setup_state_db"]
+    
+    subgraph Setup
+        B --> C["Load existing checksums"]
+        B --> D["Initialize state SQLite DB"]
+    end
+    
+    A --> E["do_packages"]
+    
+    subgraph Processing
+        E --> F["get_package_data"]
+        E --> G["mk_checksum"]
+        E --> H["has_changed"]
+        H --> I["Compare with stored checksum"]
+        H --> J["INSERT INTO state"]
+    end
+    
+    A --> K["Final cleanup phase"]
+    K --> L["remove_stale"]
+    
+    subgraph Cleanup
+        L --> M["Find orphaned files"]
+        L --> N["DELETE FROM state cleanup"]
+    end
+```
+
+### Key Locations
+| ID | Title | Description | Source | Code |
+|---|---|---|---|---|
+| 5a | State Database Setup | Initializes SQLite database for tracking file checksums | repoview.py:199 | `self.setup_state_db()` prepares state tracking |
+| 5b | Change Detection | Checks if file content has changed since last generation | repoview.py:651 | `if self.has_changed(...):` guards writes |
+| 5c | State Tracking | Records new file checksums in the state database | repoview.py:716 | `INSERT INTO state (filename, checksum)` |
+| 5d | Cleanup Process | Removes files that are no longer present in the repository | repoview.py:295 | `self.remove_stale()` prunes files |
diff --git a/SECURITY.md b/SECURITY.md
new file mode 100644
index 0000000..1512e9e
--- /dev/null
+++ b/SECURITY.md
@@ -0,0 +1,29 @@
+# Security Policy
+
+We are committed to ensuring the security of our application, and addressing security issues with a high priority.
+
+## Supported Versions
+
+We recommend always using the latest commit from the `main` branch, as we currently do not have a formal versioning scheme with designated security support.
+
+## Reporting a Vulnerability
+
+If you discover a security vulnerability, please report via the following methods:
+
+1.  **GitHub Private Vulnerability Reporting**: If this feature is enabled for the repository, please use it to submit your report. This is the most secure and preferred method.
+2.  **Create a Confidential Issue**: If private vulnerability reporting is not available, please create an issue on our GitHub repository. Please provide a clear and descriptive title, such as "Security Vulnerability: [Brief Description]", and include as much detail as possible in the issue description. If you have the option to make the issue confidential, please do so.
+
+Please include the following information in your report:
+- A clear description of the vulnerability.
+- Steps to reproduce the vulnerability.
+- The version of the application you are using.
+- The potential impact of the vulnerability.
+- Any suggested mitigations or fixes, if you have them.
+
+We appreciate your efforts to responsibly disclose your findings, and we will make every effort to acknowledge your contributions.
+We will make our best effort to respond to your report promptly, acknowledge the issue, and keep you updated on our progress toward a fix.
+We kindly ask that you do not disclose the vulnerability publicly until we have had a chance to address it.
+
+Please do not report security vulnerabilities through public GitHub issues nor PR.
+
+Thank you for helping to keep our project secure.

From 7ec3d270aeddd6b6e793ef609d521910f46846a4 Mon Sep 17 00:00:00 2001
From: MIS Center <mis@criticalsys.net>
Date: Mon, 29 Dec 2025 21:17:22 -0600
Subject: [PATCH 3/3] Fix python3 code issues, and resolved all linting errors
 detected by pyright, pylint and ruff

---
 repoview.py | 584 ++++++++++++++++++++++++++++++----------------------
 1 file changed, 336 insertions(+), 248 deletions(-)

diff --git a/repoview.py b/repoview.py
index 4255229..3da1535 100755
--- a/repoview.py
+++ b/repoview.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python3 -tt
+#!/usr/bin/env python3
 # -*- mode: Python; indent-tabs-mode: nil; -*-
 """
 Repoview is a small utility to generate static HTML pages for a repodata
@@ -30,7 +30,6 @@
 # Copyright (C) 2007 by Konstantin Ryabitsev and contributors
 # Author: Konstantin Ryabitsev <icon@fedoraproject.org>
 #
-#pylint: disable-msg=F0401,W0704
 
 __revision__ = '$Id$'
 
@@ -38,22 +37,34 @@
 import shutil
 import sys
 import time
-import hashlib as md5
+import hashlib
 import functools
-import rpm
+from typing import Any, Dict, List, Optional, Sequence, Tuple
+from bz2 import BZ2File
+from gzip import GzipFile
+from lzma import LZMAFile
+from tempfile import mkstemp
+
+try:
+    import rpm  # type: ignore[import]
+except ImportError as exc:
+    raise ImportError('Repoview requires the "rpm" Python bindings.') from exc
 
 from optparse import OptionParser
-from genshi.template      import TemplateLoader
 
 try:
-    from xml.etree.cElementTree import fromstring, ElementTree, TreeBuilder
-except ImportError:
-    from cElementTree import fromstring, ElementTree, TreeBuilder
+    from genshi.template import TemplateLoader  # type: ignore[import]
+except ImportError as exc:
+    raise ImportError('Repoview requires the "genshi" package.') from exc
+
+from xml.etree.ElementTree import fromstring, ElementTree, TreeBuilder
+
+import sqlite3 as sqlite
 
 try:
-    import sqlite3 as sqlite
+    import libcomps  # type: ignore[import]
 except ImportError:
-    import sqlite
+    libcomps = None  # type: ignore[assignment]
 
 ##
 # Some hardcoded constants
@@ -72,6 +83,13 @@
 SUPPORTED_DB_VERSION = 10
 DEFAULT_TEMPLATEDIR = '/usr/share/repoview/templates/default'
 
+# High-level execution pipeline (mirrored by the inline "Phase" comments below):
+#   1. Parse CLI arguments (main) and instantiate Repoview.
+#   2. Locate repository assets, prepare output/state directories, build exclusion SQL.
+#   3. Build group definitions (comps, RPM groups, letter groups) and render package pages.
+#   4. Render aggregate views (group pages, index, optional RSS).
+#   5. Persist incremental state and delete artifacts from previous runs.
+
 def _mkid(text):
     """
     Make a web-friendly filename out of group names and package names.
@@ -86,7 +104,7 @@ def _mkid(text):
     text = text.replace(' ', '_')
     return text
 
-def _humansize(bytes):
+def _humansize(num_bytes):
     """
     This will return the size in sane units (KiB or MiB).
 
@@ -96,18 +114,19 @@ def _humansize(bytes):
     @return: human-readable string
     @rtype:  str
     """
-    if bytes < 1024:
-        return '%d Bytes' % bytes
-    bytes = int(bytes)
-    kbytes = bytes/1024
-    if kbytes/1024 < 1:
-        return '%d KiB' % kbytes
-    else:
-        return '%0.1f MiB' % (float(kbytes)/1024)
+    if num_bytes < 1024:
+        return f'{num_bytes:d} Bytes'
+    num_bytes = int(num_bytes)
+    kbytes = num_bytes // 1024
+    if kbytes // 1024 < 1:
+        return f'{kbytes:d} KiB'
+    return f'{float(kbytes)/1024:0.1f} MiB'
 
 def _compare_evra(one, two):
     """
-    Just a quickie sorting helper. Yes, I'm avoiding using lambdas.
+    Comparison helper for sorting packages by EVR (Epoch, Version, Release).
+    
+    It adapts the tuple format for use with rpm.labelCompare.
 
     @param one: tuple of (e,v,r,a)
     @type  one: tuple
@@ -125,7 +144,14 @@ def _compare_evra(one, two):
 
 class Repoview:
     """
-    The working horse class.
+    The main controller class for Repoview.
+    
+    This class handles the entire workflow:
+    1. initializing repository connections and state database,
+    2. processing groups and packages,
+    3. managing incremental builds via checksums,
+    4. rendering templates using Genshi,
+    5. and generating the final HTML output and RSS feeds.
     """
 
     def __del__(self):
@@ -138,23 +164,37 @@ def __init__(self, opts):
         @param opts: OptionParser's opts
         @type  opts: OptionParser
         """
+        # The constructor orchestrates the full build pipeline up front so that
+        # later helper methods can assume all shared state (database handles,
+        # exclusion SQL, template loaders, etc.) already exists.
+        # The initialization order below mirrors the chronological order of a
+        # repoview run: collect inputs → prepare filesystem → prepare state →
+        # compute grouping metadata → render pages → persist state.
         # list of files to remove at the end of processing
-        self.cleanup = []
+        self.cleanup: List[str] = []
         self.opts    = opts
-        self.outdir  = os.path.join(opts.repodir, 'repoview')
+        # Honor the CLI-provided output directory name (defaults to "repoview")
+        # but always treat it as a subdirectory of the repository root.
+        self.outdir  = os.path.join(opts.repodir, opts.outdir)
 
         self.exclude    = '1=1'
-        self.state_data = {} #?
-        self.written    = {} #?
+        # Dictionary storing filename -> checksum mapping from the state database (previous run).
+        # Used to determine if a file needs to be regenerated.
+        self.state_data: Dict[str, str] = {}
+        # Dictionary tracking packages processed in the current run to handle duplicates
+        # and avoid re-processing. Maps pkgname -> pkg_tuple.
+        self.written: Dict[str, Tuple[str, str, Optional[str]]] = {}
 
-        self.groups        = []
-        self.letter_groups = []
+        self.groups: List[Sequence[Any]] = []
+        self.letter_groups: List[Sequence[Any]] = []
 
-        self.pconn = None # primary.sqlite
-        self.oconn = None # other.sqlite
-        self.sconn = None # state db
+        self.pconn: Optional[sqlite.Connection] = None # primary.sqlite
+        self.oconn: Optional[sqlite.Connection] = None # other.sqlite
+        self.sconn: Optional[sqlite.Connection] = None # state db
 
+        # Phase 1: locate repository metadata, initialize database handles.
         self.setup_repo()
+        # Phase 2: prepare filesystem targets and incremental build state.
         self.setup_outdir()
         self.setup_state_db()
         self.setup_excludes()
@@ -169,17 +209,20 @@ def __init__(self, opts):
                      'letters':    letters,
                      'my_version': VERSION
                     }
+        # Template engine handles page rendering.  Each TemplateLoader instance can cache
+        # compiled templates, so we keep dedicated loaders for packages and groups to
+        # avoid cross-assignment of contextual attributes.
         group_kid = TemplateLoader(opts.templatedir)
-        group_kid.assume_encoding = "utf-8"
-        group_kid.repo_data = repo_data
         self.group_kid = group_kid
 
         pkg_kid = TemplateLoader(opts.templatedir)
-        pkg_kid.assume_encoding = "utf-8"
-        pkg_kid.repo_data = repo_data
         self.pkg_kid = pkg_kid
 
         count = 0
+        # Phase 3: Iterate through all logical groups (explicit comps groups plus
+        # auto-generated alphabetical "Letter" buckets).  Each iteration renders
+        # the packages belonging to the group, wires them into group metadata, and
+        # produces the HTML if any of the constituent checksums changed.
         for group_data in self.groups + self.letter_groups:
             (grp_name, grp_filename, grp_description, pkgnames) = group_data
             pkgnames.sort()
@@ -190,6 +233,9 @@ def __init__(self, opts):
                           'filename':    grp_filename,
                           }
 
+            # Package pages double as a cache warm-up for group pages: the call returns
+            # summary tuples used on the group listing while also writing/refining the
+            # individual package HTML files.
             packages = self.do_packages(repo_data, group_data, pkgnames)
 
             if not packages:
@@ -204,16 +250,17 @@ def __init__(self, opts):
             checksum = self.mk_checksum(repo_data, group_data)
             if self.has_changed(grp_filename, checksum):
                 # write group file
-                self.say('Writing group %s\n' % grp_filename)
+                self.say(f'Writing group {grp_filename}\n')
                 self.group_kid.group_data = group_data
                 outfile = os.path.join(self.outdir, grp_filename)
 
-                tmpl= self.group_kid.load( GRPKID )
+                tmpl = self.group_kid.load(GRPKID)
 
-                stream=tmpl.generate(group_data=group_data, repo_data=repo_data)
-                with open( outfile, "w" ) as f:
-                   f.write( stream.render('xhtml', doctype='xhtml-strict'))
+                stream = tmpl.generate(group_data=group_data, repo_data=repo_data)
+                with open(outfile, "w", encoding="utf-8") as handle:
+                    handle.write(stream.render('xhtml', doctype='xhtml-strict'))
 
+        # Phase 4: Build aggregated views (latest packages list, index page, optional RSS).
         latest = self.get_latest_packages()
         repo_data['latest'] = latest
         repo_data['groups'] = self.groups
@@ -222,39 +269,64 @@ def __init__(self, opts):
         if self.has_changed('index.html', checksum):
             # Write index.html and rss feed (if asked)
             self.say('Writing index.html...')
-            idx_tpt = os.path.join(self.opts.templatedir, IDXKID)
             idx_kid = TemplateLoader(self.opts.templatedir)
-            idx_kid.assume_encoding = "utf-8"
-            idx_kid.repo_data = repo_data
             idx_kid.url = self.opts.url
             idx_kid.latest = latest
             idx_kid.groups = self.groups
             outfile = os.path.join(self.outdir, 'index.html')
 
-            tmpl= idx_kid.load( IDXKID  )
+            tmpl = idx_kid.load(IDXKID)
 
-            stream=tmpl.generate( repo_data = repo_data, url=self.opts.url, groups = self.groups, latest = latest )
-            with open( outfile, "w" ) as f:
-               f.write( stream.render('xhtml', doctype='xhtml-strict'))
+            stream = tmpl.generate(
+                repo_data=repo_data,
+                url=self.opts.url,
+                groups=self.groups,
+                latest=latest,
+            )
+            with open(outfile, "w", encoding="utf-8") as handle:
+                handle.write(stream.render('xhtml', doctype='xhtml-strict'))
             self.say('done\n')
 
             # rss feed
             if self.opts.url:
                 self.do_rss(repo_data, latest)
 
+        # Phase 5: Delete orphaned files and persist state so the next run can stay incremental.
         self.remove_stale()
-        self.sconn.commit()
+        self._ensure_connection(self.sconn, 'state').commit()
+
+    def _ensure_connection(
+        self, conn: Optional[sqlite.Connection], label: str
+    ) -> sqlite.Connection:
+        """
+        Raise a helpful error if a SQLite connection has not been initialized yet.
+        """
+        if conn is None:
+            msg = f'{label} database connection is not initialized.'
+            raise RuntimeError(msg)
+        return conn
+
+    def _cursor(self, conn: Optional[sqlite.Connection], label: str) -> sqlite.Cursor:
+        """
+        Convenience helper for retrieving a cursor from a (possibly optional) connection.
+        """
+        return self._ensure_connection(conn, label).cursor()
 
     def setup_state_db(self):
         """
-        Sets up the state-tracking database.
+        Initializes the SQLite database used for incremental build state tracking.
+        
+        The database stores checksums of previously generated files to avoid 
+        unnecessary writes. If a specific state directory is not provided, 
+        it creates 'state.sqlite' in the output directory.
 
         @rtype: void
         """
         self.say('Examining state db...')
         if self.opts.statedir:
             # we'll use the md5sum of the repo location to make it unique
-            unique = '%s.state.sqlite' % md5.md5(self.outdir).hexdigest()
+            # among multiple repositories sharing the same statedir.
+            unique = f"{hashlib.md5(self.outdir.encode()).hexdigest()}.state.sqlite"
             statedb = os.path.join(self.opts.statedir, unique)
         else:
             statedb = os.path.join(self.outdir, 'state.sqlite')
@@ -268,16 +340,16 @@ def setup_state_db(self):
             self.opts.force = True
 
         self.sconn = sqlite.connect(statedb)
-        scursor = self.sconn.cursor()
+        scursor = self._cursor(self.sconn, 'state')
 
-        query = """CREATE TABLE IF NOT EXISTS state (
+        scursor.execute(
+            """CREATE TABLE IF NOT EXISTS state (
                           filename TEXT UNIQUE,
                           checksum TEXT)"""
-        scursor.execute(query)
+        )
 
         # read all state data into memory to track orphaned files
-        query = """SELECT filename, checksum FROM state"""
-        scursor.execute(query)
+        scursor.execute("SELECT filename, checksum FROM state")
         while True:
             row = scursor.fetchone()
             if row is None:
@@ -287,8 +359,11 @@ def setup_state_db(self):
 
     def setup_repo(self):
         """
-        Examines the repository, makes sure that it's valid and supported,
-        and then opens the necessary databases.
+        Validates the repository structure and initializes database connections.
+        
+        It parses 'repodata/repomd.xml' to locate the 'primary' (packages) and 
+        'other' (changelogs) SQLite databases, as well as the 'group' (comps) file.
+        It also checks for schema version compatibility.
 
         @rtype: void
         """
@@ -296,11 +371,12 @@ def setup_repo(self):
         repomd = os.path.join(self.opts.repodir, 'repodata', 'repomd.xml')
 
         if not os.access(repomd, os.R_OK):
-            sys.stderr.write('Not found: %s\n' % repomd)
+            sys.stderr.write(f'Not found: {repomd}\n')
             sys.stderr.write('Does not look like a repository. Exiting.\n')
             sys.exit(1)
 
-        repoxml = open(repomd).read()
+        with open(repomd, encoding='utf-8') as repomd_fp:
+            repoxml = repomd_fp.read()
 
         xml = fromstring(repoxml) #IGNORE:E1101
         # look for primary_db, other_db, and optionally group
@@ -309,10 +385,18 @@ def setup_repo(self):
 
         xmlns = 'http://linux.duke.edu/metadata/repo'
         for datanode in xml.findall('{%s}data' % xmlns):
-            href = datanode.find('{%s}location' % xmlns).attrib['href']
-            if datanode.attrib['type'] == 'primary_db':
+            location_node = datanode.find('{%s}location' % xmlns)
+            if location_node is None:
+                continue
+            href = location_node.attrib.get('href')
+            if href is None:
+                continue
+            dtype = datanode.attrib.get('type')
+            if dtype == 'primary_db':
                 primary = os.path.join(self.opts.repodir, href)
-                dbversion = datanode.find('{%s}database_version' % xmlns).text
+                version_node = datanode.find('{%s}database_version' % xmlns)
+                if version_node is not None and version_node.text is not None:
+                    dbversion = version_node.text
             elif datanode.attrib['type'] == 'other_db':
                 other = os.path.join(self.opts.repodir, href)
             elif datanode.attrib['type'] == 'group':
@@ -324,10 +408,9 @@ def setup_repo(self):
             sys.exit(1)
 
         if int(dbversion) > SUPPORTED_DB_VERSION:
-            self.say('Sorry, the db_version in the repository is %s, but '
-                     'repoview only supports versions up to %s. Please check '
-                     'for a newer repoview version.\n' % (dbversion,
-                                                          SUPPORTED_DB_VERSION))
+            self.say(f'Sorry, the db_version in the repository is {dbversion}, but '
+                     f'repoview only supports versions up to {SUPPORTED_DB_VERSION}. '
+                     'Please check for a newer repoview version.\n')
             sys.exit(1)
 
         self.say('done\n')
@@ -362,30 +445,32 @@ def say(self, text):
 
     def setup_excludes(self):
         """
-        Formulates an SQL exclusion rule that we use throughout in order
-        to respect the ignores passed on the command line.
+        Constructs the 'self.exclude' SQL clause to filter packages based on 
+        command-line ignore patterns and architecture exclusions.
 
         @rtype: void
         """
         # Formulate exclusion rule
         xarches = []
         for xarch in self.opts.xarch:
-            xarch = xarch.replace("'", "''")
-            xarches.append("arch != '%s'" % xarch)
+            safe_xarch = xarch.replace("'", "''")
+            xarches.append(f"arch != '{safe_xarch}'")
         if xarches:
             self.exclude += ' AND ' + ' AND '.join(xarches)
 
         pkgs = []
         for pkg in self.opts.ignore:
-            pkg = pkg.replace("'", "''")
-            pkg = pkg.replace("*", "%")
-            pkgs.append("name NOT LIKE '%s'" % pkg)
+            safe_pkg = pkg.replace("'", "''").replace("*", "%")
+            pkgs.append(f"name NOT LIKE '{safe_pkg}'")
         if pkgs:
             self.exclude += ' AND ' + ' AND '.join(pkgs)
 
     def setup_outdir(self):
         """
-        Sets up the output directory.
+        Prepares the output directory for generating the static site.
+        
+        It handles cleaning up if force mode is active, ensures correct permissions (755),
+        and copies static layout assets (CSS, images) from the template directory.
 
         @rtype: void
         """
@@ -407,9 +492,12 @@ def setup_outdir(self):
 
     def get_package_data(self, pkgname):
         """
-        Queries the packages and changelog databases and returns package data
-        in a dict:
+        Queries the packages and changelog databases to construct a detailed package record.
+        
+        It aggregates all available versions/architectures of the package into a single
+        dictionary structure.
 
+        Returns a dictionary with the following structure:
         pkg_data = {
                     'name':          str,
                     'filename':      str,
@@ -419,40 +507,29 @@ def get_package_data(self, pkgname):
                     'rpm_license':   str,
                     'rpm_sourcerpm': str,
                     'vendor':        str,
-                    'rpms':          []
+                    'rpms':          [] # List of version tuples
                     }
 
-        the "rpms" key is a list of tuples with the following members:
+        The "rpms" key list contains tuples:
             (epoch, version, release, arch, time_build, size, location_href,
              author, changelog, time_added)
 
-
         @param pkgname: the name of the package to look up
         @type  pkgname: str
 
-        @return: a REALLY hairy dict of values
-        @rtype:  list
+        @return: A dictionary containing the package details and version history.
+        @rtype:  dict
         """
         # fetch versions
-        query = """SELECT pkgKey,
-                          epoch,
-                          version,
-                          release,
-                          arch,
-                          summary,
-                          description,
-                          url,
-                          time_build,
-                          rpm_license,
-                          rpm_sourcerpm,
-                          size_package,
-                          location_href,
-                          rpm_vendor
-                     FROM packages
-                    WHERE name='%s' AND %s
-                 ORDER BY arch ASC""" % (pkgname, self.exclude)
-        pcursor = self.pconn.cursor()
-        pcursor.execute(query)
+        query = (
+            "SELECT pkgKey, epoch, version, release, arch, summary, "
+            "description, url, time_build, rpm_license, rpm_sourcerpm, "
+            "size_package, location_href, rpm_vendor "
+            "FROM packages WHERE name=? AND "
+            f"{self.exclude} ORDER BY arch ASC"
+        )
+        pcursor = self._cursor(self.pconn, 'primary')
+        pcursor.execute(query, (pkgname,))
 
         rows = pcursor.fetchall()
 
@@ -491,6 +568,9 @@ def get_package_data(self, pkgname):
                     'rpms':          []
                     }
 
+        # Build a human-readable payload for the template system.  The first row
+        # encountered becomes the canonical metadata, while every row contributes
+        # an RPM tuple (epoch, version, release, arch, etc.) for the download table.
         for row in versions:
             (pkg_key, epoch, version, release, arch, summary,
              description, url, time_build, rpm_license, rpm_sourcerpm,
@@ -506,11 +586,13 @@ def get_package_data(self, pkgname):
             size = _humansize(size_package)
 
             # Get latest changelog entry for each version
-            query = '''SELECT author, date, changelog
-                         FROM changelog WHERE pkgKey=%d
-                     ORDER BY date DESC LIMIT 1''' % pkg_key
-            ocursor = self.oconn.cursor()
-            ocursor.execute(query)
+            query = (
+                "SELECT author, date, changelog "
+                "FROM changelog WHERE pkgKey=? "
+                "ORDER BY date DESC LIMIT 1"
+            )
+            ocursor = self._cursor(self.oconn, 'other')
+            ocursor.execute(query, (pkg_key,))
             orow = ocursor.fetchone()
             if not orow:
                 author = time_added = changelog = None
@@ -544,13 +626,15 @@ def do_packages(self, repo_data, group_data, pkgnames):
                  (pkg_name, pkg_filename, pkg_summary)
         @rtype:  list
         """
-        # this is what we return for the group object
+        # Each group page needs a compact listing with (name, filename, summary).
+        # pkg_tuples doubles as that listing and as an in-memory cache so we do
+        # not re-render the same package when it appears in multiple groups.
         pkg_tuples = []
 
         for pkgname in pkgnames:
             pkg_filename = _mkid(PKGFILE % pkgname)
 
-            if pkgname in self.written.keys():
+            if pkgname in self.written:
                 pkg_tuples.append(self.written[pkgname])
                 continue
 
@@ -565,17 +649,21 @@ def do_packages(self, repo_data, group_data, pkgnames):
 
             checksum = self.mk_checksum(repo_data, group_data, pkg_data)
             if self.has_changed(pkg_filename, checksum):
-                self.say('Writing package %s\n' % pkg_filename)
+                self.say(f'Writing package {pkg_filename}\n')
                 self.pkg_kid.group_data = group_data
                 self.pkg_kid.pkg_data = pkg_data
                 outfile = os.path.join(self.outdir, pkg_filename)
                 self.pkg_kid = TemplateLoader(self.opts.templatedir)
 
-                tmpl= self.pkg_kid.load( PKGKID )
+                tmpl = self.pkg_kid.load(PKGKID)
 
-                stream=tmpl.generate(group_data=group_data, pkg_data=pkg_data, repo_data=repo_data)
-                with open( outfile, "w" ) as f:
-                   f.write( stream.render('xhtml', doctype='xhtml-strict'))
+                stream = tmpl.generate(
+                    group_data=group_data,
+                    pkg_data=pkg_data,
+                    repo_data=repo_data,
+                )
+                with open(outfile, "w", encoding="utf-8") as handle:
+                    handle.write(stream.render('xhtml', doctype='xhtml-strict'))
                 self.written[pkgname] = pkg_tuple
             else:
                 self.written[pkgname] = pkg_tuple
@@ -584,13 +672,15 @@ def do_packages(self, repo_data, group_data, pkgnames):
 
     def mk_checksum(self, *args):
         """
-        A fairly dirty function used for state tracking. This is how we know
-        if the contents of the page have changed or not.
-
-        @param *args: dicts
-        @rtype *args: dicts
-
-        @return: an md5 checksum of the dicts passed
+        Calculates a deterministic MD5 checksum for the provided data dictionaries.
+        
+        This checksum is used for state tracking to detect if the content of a page
+        would change based on the data. It sorts dictionary keys to ensure consistency
+        before hashing.
+
+        @param *args: One or more dictionaries containing data to be hashed.
+        
+        @return: An MD5 checksum string of the serialized data.
         @rtype:  str
         """
         mangle = []
@@ -602,7 +692,7 @@ def mk_checksum(self, *args):
 
             for key in keys:
                 mangle.append(data[key])
-        return md5.md5((str(mangle)).encode()).hexdigest()
+        return hashlib.md5((str(mangle)).encode()).hexdigest()
 
     def has_changed(self, filename, checksum):
         """
@@ -618,19 +708,19 @@ def has_changed(self, filename, checksum):
         @rtype:  bool
         """
         # calculate checksum
-        scursor = self.sconn.cursor()
-        if filename not in self.state_data.keys():
+        scursor = self._cursor(self.sconn, 'state')
+        if filename not in self.state_data:
             # totally new entry
             query = '''INSERT INTO state (filename, checksum)
-                                  VALUES ('%s', '%s')''' % (filename, checksum)
-            scursor.execute(query)
+                                  VALUES (?, ?)'''
+            scursor.execute(query, (filename, checksum))
             return True
         if self.state_data[filename] != checksum:
             # old entry, but changed
             query = """UPDATE state
-                          SET checksum='%s'
-                        WHERE filename='%s'""" % (checksum, filename)
-            scursor.execute(query)
+                          SET checksum=?
+                        WHERE filename=?"""
+            scursor.execute(query, (checksum, filename))
 
             # remove it from state_data tracking, so we know we've seen it
             del self.state_data[filename]
@@ -646,54 +736,44 @@ def remove_stale(self):
 
         @rtype void
         """
-        scursor = self.sconn.cursor()
-        for filename in self.state_data.keys():
-            self.say('Removing stale file %s\n' % filename)
+        scursor = self._cursor(self.sconn, 'state')
+        for filename in self.state_data:
+            self.say(f'Removing stale file {filename}\n')
             fullpath = os.path.join(self.outdir, filename)
             if os.access(fullpath, os.W_OK):
                 os.unlink(fullpath)
-            query = """DELETE FROM state WHERE filename='%s'""" % filename
-            scursor.execute(query)
+            scursor.execute("DELETE FROM state WHERE filename=?", (filename,))
 
     def z_handler(self, dbfile):
         """
         If the database file is compressed, uncompresses it and returns the
         filename of the uncompressed file.
-
+        
         @param dbfile: the name of the file
         @type  dbfile: str
-
+        
         @return: the name of the uncompressed file
         @rtype:  str
         """
-        (junk, ext) = os.path.splitext(dbfile)
-
-        if ext == '.bz2':
-            from bz2 import BZ2File
-            zfd = BZ2File(dbfile)
-        elif ext == '.gz':
-            from gzip import GzipFile
-            zfd = GzipFile(dbfile)
-        elif ext == '.xz':
-            from lzma import LZMAFile
-            zfd = LZMAFile(dbfile)
-        else:
+        (_, ext) = os.path.splitext(dbfile)
+        opener = {
+            '.bz2': BZ2File,
+            '.gz': GzipFile,
+            '.xz': LZMAFile,
+        }.get(ext)
+        if opener is None:
             # not compressed (or something odd)
             return dbfile
 
-        import tempfile
-        (unzfd, unzname) = tempfile.mkstemp('.repoview')
+        fd, unzname = mkstemp('.repoview')
         self.cleanup.append(unzname)
 
-        unzfd = open(unzname, 'wb')
-
-        while True:
-            data = zfd.read(16384)
-            if not data:
-                break
-            unzfd.write(data)
-        zfd.close()
-        unzfd.close()
+        with opener(dbfile) as zfd, os.fdopen(fd, 'wb') as unzfd:
+            while True:
+                data = zfd.read(16384)
+                if not data:
+                    break
+                unzfd.write(data)
 
         return unzname
 
@@ -706,48 +786,51 @@ def setup_comps_groups(self, compsxml):
 
         @rtype: void
         """
-        import libcomps
+        if libcomps is None:
+            raise ImportError('Repoview requires the "libcomps" package to parse comps.xml.')
 
         self.say('Parsing comps.xml...')
         comps = libcomps.Comps()
         comps.fromxml_f(compsxml)
 
         for group in comps.groups:
-            #if not group.uservisible:
-                #continue
+            # if not group.uservisible:
+            #     continue
             if not group.packages:
-               continue
+                continue
 
             group_filename = _mkid(GRPFILE % group.id)
             pkg_names = [pkg.name for pkg in group.packages]
-            self.groups.append([ group.name, group_filename, group.desc, pkg_names ])
+            self.groups.append([group.name, group_filename, group.desc, pkg_names])
         self.say('done\n')
 
     def setup_rpm_groups(self):
         """
-        When comps is not around, we use the (useless) RPM groups.
+        Fallback method to group packages using their RPM 'Group' tag 
+        when a valid comps.xml is not available.
 
         @rtype: void
         """
         self.say('Collecting group information...')
-        query = """SELECT DISTINCT lower(rpm_group) AS rpm_group
-                     FROM packages
-                 ORDER BY rpm_group ASC"""
-        pcursor = self.pconn.cursor()
+        query = (
+            "SELECT DISTINCT lower(rpm_group) AS rpm_group "
+            "FROM packages ORDER BY rpm_group ASC"
+        )
+        pcursor = self._cursor(self.pconn, 'primary')
         pcursor.execute(query)
 
         for (rpmgroup,) in pcursor.fetchall():
-            qgroup = rpmgroup.replace("'", "''")
-            query = """SELECT DISTINCT name
-                         FROM packages
-                        WHERE lower(rpm_group)='%s'
-                          AND %s
-                     ORDER BY name""" % (qgroup, self.exclude)
-            pcursor.execute(query)
-            pkgnames = []
-            for (pkgname,) in pcursor.fetchall():
-                pkgnames.append(pkgname)
-
+            pcursor.execute(
+                (
+                    "SELECT DISTINCT name "
+                    "FROM packages "
+                    "WHERE lower(rpm_group)=? "
+                    f"  AND {self.exclude} "
+                    "ORDER BY name"
+                ),
+                (rpmgroup,),
+            )
+            pkgnames = [pkgname for (pkgname,) in pcursor.fetchall()]
             group_filename = _mkid(GRPFILE % rpmgroup)
             self.groups.append([rpmgroup, group_filename, None, pkgnames])
         self.say('done\n')
@@ -764,23 +847,27 @@ def get_latest_packages(self, limit=30):
         @rtype: list
         """
         self.say('Collecting latest packages...')
-        query = """SELECT name
-                     FROM packages
-                    WHERE %s
-                    GROUP BY name
-                 ORDER BY MAX(time_build) DESC LIMIT %s""" % (self.exclude, limit)
-        pcursor = self.pconn.cursor()
+        query = (
+            "SELECT name "
+            "FROM packages "
+            f"WHERE {self.exclude} "
+            "GROUP BY name "
+            f"ORDER BY MAX(time_build) DESC LIMIT {limit}"
+        )
+        pcursor = self._cursor(self.pconn, 'primary')
         pcursor.execute(query)
 
         latest = []
-        query = """SELECT version, release, time_build
-                     FROM packages
-                    WHERE name = '%s'
-                    ORDER BY time_build DESC LIMIT 1"""
+        query = (
+            "SELECT version, release, time_build "
+            "FROM packages "
+            "WHERE name = ? "
+            "ORDER BY time_build DESC LIMIT 1"
+        )
         for (pkgname,) in pcursor.fetchall():
             filename = _mkid(PKGFILE % pkgname.replace("'", "''"))
 
-            pcursor.execute(query % pkgname)
+            pcursor.execute(query, (pkgname,))
             (version, release, built) = pcursor.fetchone()
 
             latest.append((pkgname, filename, version, release, built))
@@ -796,25 +883,29 @@ def setup_letter_groups(self):
         @rtype:  str
         """
         self.say('Collecting letters...')
-        query = """SELECT DISTINCT substr(upper(name), 1, 1) AS letter
-                     FROM packages
-                    WHERE %s
-                 ORDER BY letter""" % self.exclude
-        pcursor = self.pconn.cursor()
+        query = (
+            "SELECT DISTINCT substr(upper(name), 1, 1) AS letter "
+            "FROM packages "
+            f"WHERE {self.exclude} "
+            "ORDER BY letter"
+        )
+        pcursor = self._cursor(self.pconn, 'primary')
         pcursor.execute(query)
 
         letters = ''
         for (letter,) in pcursor.fetchall():
             letters += letter
-            rpmgroup = 'Letter %s' % letter
-            description = 'Packages beginning with letter "%s".' % letter
+            rpmgroup = f'Letter {letter}'
+            description = f'Packages beginning with letter "{letter}".'
 
             pkgnames = []
-            query = """SELECT DISTINCT name
-                         FROM packages
-                        WHERE name LIKE '%s%%'
-                          AND %s""" % (letter, self.exclude)
-            pcursor.execute(query)
+            query = (
+                "SELECT DISTINCT name "
+                "FROM packages "
+                "WHERE name LIKE ? "
+                f"  AND {self.exclude}"
+            )
+            pcursor.execute(query, (f'{letter}%',))
             for (pkgname,) in pcursor.fetchall():
                 pkgnames.append(pkgname)
 
@@ -839,56 +930,20 @@ def do_rss(self, repo_data, latest):
         etb = TreeBuilder()
         out = os.path.join(self.outdir, RSSFILE)
         etb.start('rss', {'version': '2.0'})
-        etb.start('channel')
-        etb.start('title')
-        etb.data(repo_data['title'])
-        etb.end('title')
-        etb.start('link')
-        etb.data('%s/repoview/%s' % (self.opts.url, RSSFILE))
-        etb.end('link')
-        etb.start('description')
-        etb.data('Latest packages for %s' % repo_data['title'])
-        etb.end('description')
-        etb.start('lastBuildDate')
-        etb.data(time.strftime(ISOFORMAT))
-        etb.end('lastBuildDate')
-        etb.start('generator')
-        etb.data('Repoview-%s' % repo_data['my_version'])
-        etb.end('generator')
-
-        rss_tpt = os.path.join(self.opts.templatedir, RSSKID)
-        rss_kid = Template(file=rss_tpt)
-        rss_kid.assume_encoding = "utf-8"
-        rss_kid.repo_data = repo_data
-        rss_kid.url = self.opts.url
-
+        etb.start('channel', {})
+        self._rss_add_text(etb, 'title', repo_data['title'])
+        self._rss_add_text(etb, 'link', f'{self.opts.url}/repoview/{RSSFILE}')
+        self._rss_add_text(etb, 'description', f"Latest packages for {repo_data['title']}")
+        self._rss_add_text(etb, 'lastBuildDate', time.strftime(ISOFORMAT))
+        self._rss_add_text(etb, 'generator', f"Repoview-{repo_data['my_version']}")
+
+        rss_kid = self.pkg_kid.load(RSSKID)
         for row in latest:
             pkg_data = self.get_package_data(row[0])
+            if pkg_data is None:
+                continue
 
-            rpm = pkg_data['rpms'][0]
-            (epoch, version, release, arch, built) = rpm[:5]
-            etb.start('item')
-            etb.start('guid')
-            etb.data('%s/repoview/%s+%s:%s-%s.%s' % (self.opts.url,
-                                                     pkg_data['filename'],
-                                                     epoch, version, release,
-                                                     arch))
-            etb.end('guid')
-            etb.start('link')
-            etb.data('%s/repoview/%s' % (self.opts.url, pkg_data['filename']))
-            etb.end('link')
-            etb.start('pubDate')
-            etb.data(time.strftime(ISOFORMAT, time.gmtime(int(built))))
-            etb.end('pubDate')
-            etb.start('title')
-            etb.data('Update: %s-%s-%s' % (pkg_data['name'], version, release))
-            etb.end('title')
-            rss_kid.pkg_data = pkg_data
-            description = rss_kid.serialize()
-            etb.start('description')
-            etb.data(description.decode('utf-8'))
-            etb.end('description')
-            etb.end('item')
+            self._rss_add_item(etb, rss_kid, repo_data, pkg_data)
 
         etb.end('channel')
         etb.end('rss')
@@ -899,6 +954,40 @@ def do_rss(self, repo_data, latest):
         etree.write(out, 'utf-8')
         self.say('done\n')
 
+    def _rss_add_item(self, builder, rss_kid, repo_data, pkg_data):
+        """
+        Append a single package entry to the RSS feed builder.
+        """
+        rpm_entry = pkg_data['rpms'][0]
+        epoch, version, release, arch, built = rpm_entry[:5]
+
+        builder.start('item', {})
+
+        pkg_url = f"{self.opts.url}/repoview/{pkg_data['filename']}"
+        guid = (
+            f"{pkg_url}+{epoch}:{version}-"
+            f"{release}.{arch}"
+        )
+        self._rss_add_text(builder, 'guid', guid)
+        self._rss_add_text(builder, 'link', pkg_url)
+        pub_date = time.strftime(ISOFORMAT, time.gmtime(int(built)))
+        self._rss_add_text(builder, 'pubDate', pub_date)
+        title = f"Update: {pkg_data['name']}-{version}-{release}"
+        self._rss_add_text(builder, 'title', title)
+        description = rss_kid.generate(
+            pkg_data=pkg_data, repo_data=repo_data, url=self.opts.url
+        ).render()
+        self._rss_add_text(builder, 'description', description)
+
+        builder.end('item')
+
+    @staticmethod
+    def _rss_add_text(builder, tag, text):
+        """Helper for rss field generation."""
+        builder.start(tag, {})
+        builder.data(text)
+        builder.end(tag)
+
 
 def main():
     """
@@ -960,4 +1049,3 @@ def main():
 
 if __name__ == '__main__':
     main()
-#!/usr/bin/python3 -tt