diff --git a/.gitignore b/.gitignore index 5adb15e..45ac63a 100644 --- a/.gitignore +++ b/.gitignore @@ -26,3 +26,7 @@ fruitbot-*.tar /tmp/ .env + +# Markov model files and backups +coach_model +model_backups/ diff --git a/MARKOV_BACKUP.md b/MARKOV_BACKUP.md new file mode 100644 index 0000000..0c0562f --- /dev/null +++ b/MARKOV_BACKUP.md @@ -0,0 +1,72 @@ +# Markov Model Backup System + +This document describes the model corruption protection system implemented for the fruitbot's markov chain functionality. + +## Overview + +The backup system protects the `coach_model` file from corruption by: +- Creating automatic backups before model operations +- Performing periodic backups every 30 minutes +- Maintaining a rotation of the last 10 backups +- Providing automatic recovery on model operation failures +- Offering manual restore functionality + +## Components + +### Fruitbot.MarkovBackup Module + +Located in `lib/fruitbot/markov_backup.ex`, this module provides: + +- `create_backup()` - Creates a timestamped backup of the current model +- `restore_latest_backup()` - Restores the most recent backup +- `safe_model_operation(function)` - Wraps model operations with backup/recovery +- `get_latest_backup()` - Gets path to the most recent backup + +### Backup Storage + +- Backups are stored in `./model_backups/` directory +- Each backup is named `coach_model_` +- Directory is automatically created if it doesn't exist +- Old backups are automatically cleaned up (keeps last 10) + +## Usage + +### Automatic Protection + +All markov operations are automatically protected: +- `!advice` command generation +- Model training on unrecognized commands + +### Manual Recovery + +If the model becomes corrupted, use: +``` +!restore-model +``` + +This command will restore the most recent backup. + +## Configuration + +- Backup interval: 30 minutes (configurable in worker.ex) +- Max backups kept: 10 (configurable in markov_backup.ex) +- Model path: `./coach_model` +- Backup directory: `./model_backups/` + +## Files Excluded from Git + +The following are added to `.gitignore`: +- `coach_model` - The main model file +- `model_backups/` - All backup files + +This prevents large model files from being committed to the repository. + +## Error Handling + +If any model operation fails: +1. The error is logged +2. The system attempts to restore from the latest backup +3. The original error is re-raised for proper error propagation +4. Operators are notified via logs + +This ensures the bot remains functional even if model corruption occurs. \ No newline at end of file diff --git a/lib/fruitbot/commands.ex b/lib/fruitbot/commands.ex index e31ecf5..8f9982b 100644 --- a/lib/fruitbot/commands.ex +++ b/lib/fruitbot/commands.ex @@ -34,12 +34,14 @@ defmodule Fruitbot.Commands do end def say_advice(_query) do - {:ok, model} = Markov.load("./coach_model", sanitize_tokens: true, store_log: [:train]) - :ok = Markov.configure(model, shift_probabilities: true) + Fruitbot.MarkovBackup.safe_model_operation(fn -> + {:ok, model} = Markov.load("./coach_model", sanitize_tokens: true, store_log: [:train]) + :ok = Markov.configure(model, shift_probabilities: true) - {:ok, msg} = Markov.generate_text(model) - Markov.unload(model) - {:ok, msg} + {:ok, msg} = Markov.generate_text(model) + Markov.unload(model) + {:ok, msg} + end) end def say_next(_query) do @@ -119,6 +121,25 @@ defmodule Fruitbot.Commands do message = "all fruits must abide by the code of conduct https://datafruits.fm/coc" { :ok, message } end + + def say_hydrate(_query) do + message = "hey everyone make sure youre drinking enough water and taking time for yourselves and eating some nutritious food and taking breaks from social media coz i care abt you and i want you to be happy n healthy" + { :ok, message } + end + + def say_restore_model(_query) do + case Fruitbot.MarkovBackup.restore_latest_backup() do + {:ok, backup_path} -> + message = "Model restored from backup: #{backup_path}" + {:ok, message} + {:error, :no_backups} -> + message = "No model backups available to restore from" + {:ok, message} + {:error, reason} -> + message = "Failed to restore model: #{reason}" + {:ok, message} + end + end end @commands [ @@ -139,6 +160,8 @@ defmodule Fruitbot.Commands do %Fruitbot.Command{aliases: ["help"], handler: &Handlers.say_help/1}, %Fruitbot.Command{aliases: ["label", "bandcamp"], handler: &Handlers.say_label/1}, %Fruitbot.Command{aliases: ["coc", "conduct"], handler: &Handlers.say_coc/1}, + %Fruitbot.Command{aliases: ["hydrate"], handler: &Handlers.say_hydrate/1}, + %Fruitbot.Command{aliases: ["restore-model"], handler: &Handlers.say_restore_model/1}, ] def all_commands(), do: @commands diff --git a/lib/fruitbot/markov_backup.ex b/lib/fruitbot/markov_backup.ex new file mode 100644 index 0000000..d999515 --- /dev/null +++ b/lib/fruitbot/markov_backup.ex @@ -0,0 +1,114 @@ +defmodule Fruitbot.MarkovBackup do + @moduledoc """ + Module to handle backing up and restoring markov model files to prevent corruption. + """ + + require Logger + + @model_path "./coach_model" + @backup_dir "./model_backups" + @max_backups 10 + + def ensure_backup_dir do + File.mkdir_p(@backup_dir) + end + + def create_backup do + ensure_backup_dir() + + if File.exists?(@model_path) do + timestamp = DateTime.utc_now() |> DateTime.to_unix() |> to_string() + backup_path = Path.join(@backup_dir, "coach_model_#{timestamp}") + + case File.cp_r(@model_path, backup_path) do + {:ok, _} -> + Logger.info("Created markov model backup at #{backup_path}") + cleanup_old_backups() + {:ok, backup_path} + {:error, reason} -> + Logger.error("Failed to create markov model backup: #{reason}") + {:error, reason} + end + else + Logger.warn("No model file found at #{@model_path} to backup") + {:error, :no_model_file} + end + end + + def restore_latest_backup do + ensure_backup_dir() + + case get_latest_backup() do + {:ok, backup_path} -> + case File.cp_r(backup_path, @model_path) do + {:ok, _} -> + Logger.info("Restored markov model from backup at #{backup_path}") + {:ok, backup_path} + {:error, reason} -> + Logger.error("Failed to restore markov model from backup: #{reason}") + {:error, reason} + end + {:error, reason} -> + Logger.error("No backup available to restore: #{reason}") + {:error, reason} + end + end + + def get_latest_backup do + case File.ls(@backup_dir) do + {:ok, files} -> + backup_files = files + |> Enum.filter(fn f -> String.starts_with?(f, "coach_model_") end) + |> Enum.sort(:desc) + + case backup_files do + [latest | _] -> + {:ok, Path.join(@backup_dir, latest)} + [] -> + {:error, :no_backups} + end + {:error, reason} -> + {:error, reason} + end + end + + defp cleanup_old_backups do + case File.ls(@backup_dir) do + {:ok, files} -> + backup_files = files + |> Enum.filter(fn f -> String.starts_with?(f, "coach_model_") end) + |> Enum.sort(:desc) + + if length(backup_files) > @max_backups do + files_to_delete = Enum.drop(backup_files, @max_backups) + Enum.each(files_to_delete, fn file -> + file_path = Path.join(@backup_dir, file) + case File.rm_rf(file_path) do + {:ok, _} -> + Logger.info("Cleaned up old backup: #{file}") + {:error, reason} -> + Logger.error("Failed to clean up old backup #{file}: #{reason}") + end + end) + end + {:error, reason} -> + Logger.error("Failed to list backup directory: #{reason}") + end + end + + def safe_model_operation(operation) do + # Create backup before any model operation + create_backup() + + try do + result = operation.() + result + rescue + error -> + Logger.error("Model operation failed: #{inspect(error)}") + Logger.info("Attempting to restore from backup...") + restore_latest_backup() + reraise error, __STACKTRACE__ + end + end +end \ No newline at end of file diff --git a/lib/fruitbot/worker.ex b/lib/fruitbot/worker.ex index eace8de..ef85981 100644 --- a/lib/fruitbot/worker.ex +++ b/lib/fruitbot/worker.ex @@ -7,6 +7,7 @@ defmodule Fruitbot.Worker do @topic "rooms:lobby" @interval 120 * 60 * 1000 + @backup_interval 30 * 60 * 1000 # Backup every 30 minutes def start_link(args) do Slipstream.start_link(__MODULE__, args, name: __MODULE__) @@ -15,6 +16,7 @@ defmodule Fruitbot.Worker do @impl Slipstream def init(config) do Process.send_after(self(), :send_periodic_message, @interval) + Process.send_after(self(), :backup_model, @backup_interval) {:ok, connect!(config)} end @@ -56,6 +58,17 @@ defmodule Fruitbot.Worker do {:noreply, socket} end + @impl true + def handle_info(:backup_model, socket) do + IO.puts("Creating periodic backup of markov model...") + Fruitbot.MarkovBackup.create_backup() + + # Schedule the next backup + Process.send_after(self(), :backup_model, @backup_interval) + + {:noreply, socket} + end + @impl true def handle_cast({:send_discord_msg, msg}, socket) do @@ -114,9 +127,12 @@ defmodule Fruitbot.Worker do send_message(socket, message) {:error, :bad_command} -> - {:ok, model} = Markov.load("./coach_model", sanitize_tokens: true, store_log: [:train]) - :ok = Markov.train(model, message["body"]) - Markov.unload(model) + Fruitbot.MarkovBackup.safe_model_operation(fn -> + {:ok, model} = Markov.load("./coach_model", sanitize_tokens: true, store_log: [:train]) + :ok = Markov.train(model, message["body"]) + Markov.unload(model) + :ok + end) # noop IO.puts("Coach doesn't understand this command. Try another!") :ignore