Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -26,3 +26,7 @@ fruitbot-*.tar
/tmp/

.env

# Markov model files and backups
coach_model
model_backups/
72 changes: 72 additions & 0 deletions MARKOV_BACKUP.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
# Markov Model Backup System

This document describes the model corruption protection system implemented for the fruitbot's markov chain functionality.

## Overview

The backup system protects the `coach_model` file from corruption by:
- Creating automatic backups before model operations
- Performing periodic backups every 30 minutes
- Maintaining a rotation of the last 10 backups
- Providing automatic recovery on model operation failures
- Offering manual restore functionality

## Components

### Fruitbot.MarkovBackup Module

Located in `lib/fruitbot/markov_backup.ex`, this module provides:

- `create_backup()` - Creates a timestamped backup of the current model
- `restore_latest_backup()` - Restores the most recent backup
- `safe_model_operation(function)` - Wraps model operations with backup/recovery
- `get_latest_backup()` - Gets path to the most recent backup

### Backup Storage

- Backups are stored in `./model_backups/` directory
- Each backup is named `coach_model_<timestamp>`
- Directory is automatically created if it doesn't exist
- Old backups are automatically cleaned up (keeps last 10)

## Usage

### Automatic Protection

All markov operations are automatically protected:
- `!advice` command generation
- Model training on unrecognized commands

### Manual Recovery

If the model becomes corrupted, use:
```
!restore-model
```

This command will restore the most recent backup.

## Configuration

- Backup interval: 30 minutes (configurable in worker.ex)
- Max backups kept: 10 (configurable in markov_backup.ex)
- Model path: `./coach_model`
- Backup directory: `./model_backups/`

## Files Excluded from Git

The following are added to `.gitignore`:
- `coach_model` - The main model file
- `model_backups/` - All backup files

This prevents large model files from being committed to the repository.

## Error Handling

If any model operation fails:
1. The error is logged
2. The system attempts to restore from the latest backup
3. The original error is re-raised for proper error propagation
4. Operators are notified via logs

This ensures the bot remains functional even if model corruption occurs.
33 changes: 28 additions & 5 deletions lib/fruitbot/commands.ex
Original file line number Diff line number Diff line change
Expand Up @@ -34,12 +34,14 @@ defmodule Fruitbot.Commands do
end

def say_advice(_query) do
{:ok, model} = Markov.load("./coach_model", sanitize_tokens: true, store_log: [:train])
:ok = Markov.configure(model, shift_probabilities: true)
Fruitbot.MarkovBackup.safe_model_operation(fn ->
{:ok, model} = Markov.load("./coach_model", sanitize_tokens: true, store_log: [:train])
:ok = Markov.configure(model, shift_probabilities: true)

{:ok, msg} = Markov.generate_text(model)
Markov.unload(model)
{:ok, msg}
{:ok, msg} = Markov.generate_text(model)
Markov.unload(model)
{:ok, msg}
end)
end

def say_next(_query) do
Expand Down Expand Up @@ -119,6 +121,25 @@ defmodule Fruitbot.Commands do
message = "all fruits must abide by the code of conduct https://datafruits.fm/coc"
{ :ok, message }
end

def say_hydrate(_query) do
message = "hey everyone make sure youre drinking enough water and taking time for yourselves and eating some nutritious food and taking breaks from social media coz i care abt you and i want you to be happy n healthy"
{ :ok, message }
end

def say_restore_model(_query) do
case Fruitbot.MarkovBackup.restore_latest_backup() do
{:ok, backup_path} ->
message = "Model restored from backup: #{backup_path}"
{:ok, message}
{:error, :no_backups} ->
message = "No model backups available to restore from"
{:ok, message}
{:error, reason} ->
message = "Failed to restore model: #{reason}"
{:ok, message}
end
end
end

@commands [
Expand All @@ -139,6 +160,8 @@ defmodule Fruitbot.Commands do
%Fruitbot.Command{aliases: ["help"], handler: &Handlers.say_help/1},
%Fruitbot.Command{aliases: ["label", "bandcamp"], handler: &Handlers.say_label/1},
%Fruitbot.Command{aliases: ["coc", "conduct"], handler: &Handlers.say_coc/1},
%Fruitbot.Command{aliases: ["hydrate"], handler: &Handlers.say_hydrate/1},
%Fruitbot.Command{aliases: ["restore-model"], handler: &Handlers.say_restore_model/1},
]

def all_commands(), do: @commands
Expand Down
114 changes: 114 additions & 0 deletions lib/fruitbot/markov_backup.ex
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
defmodule Fruitbot.MarkovBackup do
@moduledoc """
Module to handle backing up and restoring markov model files to prevent corruption.
"""

require Logger

@model_path "./coach_model"
@backup_dir "./model_backups"
@max_backups 10

def ensure_backup_dir do
File.mkdir_p(@backup_dir)
end

def create_backup do
ensure_backup_dir()

if File.exists?(@model_path) do
timestamp = DateTime.utc_now() |> DateTime.to_unix() |> to_string()
backup_path = Path.join(@backup_dir, "coach_model_#{timestamp}")

case File.cp_r(@model_path, backup_path) do
{:ok, _} ->
Logger.info("Created markov model backup at #{backup_path}")
cleanup_old_backups()
{:ok, backup_path}
{:error, reason} ->
Logger.error("Failed to create markov model backup: #{reason}")
{:error, reason}
end
else
Logger.warn("No model file found at #{@model_path} to backup")
{:error, :no_model_file}
end
end

def restore_latest_backup do
ensure_backup_dir()

case get_latest_backup() do
{:ok, backup_path} ->
case File.cp_r(backup_path, @model_path) do
{:ok, _} ->
Logger.info("Restored markov model from backup at #{backup_path}")
{:ok, backup_path}
{:error, reason} ->
Logger.error("Failed to restore markov model from backup: #{reason}")
{:error, reason}
end
{:error, reason} ->
Logger.error("No backup available to restore: #{reason}")
{:error, reason}
end
end

def get_latest_backup do
case File.ls(@backup_dir) do
{:ok, files} ->
backup_files = files
|> Enum.filter(fn f -> String.starts_with?(f, "coach_model_") end)
|> Enum.sort(:desc)

case backup_files do
[latest | _] ->
{:ok, Path.join(@backup_dir, latest)}
[] ->
{:error, :no_backups}
end
{:error, reason} ->
{:error, reason}
end
end

defp cleanup_old_backups do
case File.ls(@backup_dir) do
{:ok, files} ->
backup_files = files
|> Enum.filter(fn f -> String.starts_with?(f, "coach_model_") end)
|> Enum.sort(:desc)

if length(backup_files) > @max_backups do
files_to_delete = Enum.drop(backup_files, @max_backups)
Enum.each(files_to_delete, fn file ->
file_path = Path.join(@backup_dir, file)
case File.rm_rf(file_path) do
{:ok, _} ->
Logger.info("Cleaned up old backup: #{file}")
{:error, reason} ->
Logger.error("Failed to clean up old backup #{file}: #{reason}")
end
end)
end
{:error, reason} ->
Logger.error("Failed to list backup directory: #{reason}")
end
end

def safe_model_operation(operation) do
# Create backup before any model operation
create_backup()

try do
result = operation.()
result
rescue
error ->
Logger.error("Model operation failed: #{inspect(error)}")
Logger.info("Attempting to restore from backup...")
restore_latest_backup()
reraise error, __STACKTRACE__
end
end
end
22 changes: 19 additions & 3 deletions lib/fruitbot/worker.ex
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ defmodule Fruitbot.Worker do
@topic "rooms:lobby"

@interval 120 * 60 * 1000
@backup_interval 30 * 60 * 1000 # Backup every 30 minutes

def start_link(args) do
Slipstream.start_link(__MODULE__, args, name: __MODULE__)
Expand All @@ -15,6 +16,7 @@ defmodule Fruitbot.Worker do
@impl Slipstream
def init(config) do
Process.send_after(self(), :send_periodic_message, @interval)
Process.send_after(self(), :backup_model, @backup_interval)
{:ok, connect!(config)}
end

Expand Down Expand Up @@ -56,6 +58,17 @@ defmodule Fruitbot.Worker do
{:noreply, socket}
end

@impl true
def handle_info(:backup_model, socket) do
IO.puts("Creating periodic backup of markov model...")
Fruitbot.MarkovBackup.create_backup()

# Schedule the next backup
Process.send_after(self(), :backup_model, @backup_interval)

{:noreply, socket}
end


@impl true
def handle_cast({:send_discord_msg, msg}, socket) do
Expand Down Expand Up @@ -114,9 +127,12 @@ defmodule Fruitbot.Worker do
send_message(socket, message)

{:error, :bad_command} ->
{:ok, model} = Markov.load("./coach_model", sanitize_tokens: true, store_log: [:train])
:ok = Markov.train(model, message["body"])
Markov.unload(model)
Fruitbot.MarkovBackup.safe_model_operation(fn ->
{:ok, model} = Markov.load("./coach_model", sanitize_tokens: true, store_log: [:train])
:ok = Markov.train(model, message["body"])
Markov.unload(model)
:ok
end)
# noop
IO.puts("Coach doesn't understand this command. Try another!")
:ignore
Expand Down