From 67cac30a0210ae23020866afccf3bf923a32131a Mon Sep 17 00:00:00 2001 From: Rory Gianni Date: Wed, 28 Jan 2026 13:45:58 +0000 Subject: [PATCH] Adds sensemaker job model and specs --- app/models/sensemaker/job.rb | 221 +++++++ .../20251113104941_create_sensemaker_jobs.rb | 23 + db/schema.rb | 26 +- spec/factories/sensemaker/jobs.rb | 21 + spec/models/sensemaker/job_spec.rb | 560 ++++++++++++++++++ 5 files changed, 850 insertions(+), 1 deletion(-) create mode 100644 app/models/sensemaker/job.rb create mode 100644 db/migrate/20251113104941_create_sensemaker_jobs.rb create mode 100644 spec/factories/sensemaker/jobs.rb create mode 100644 spec/models/sensemaker/job_spec.rb diff --git a/app/models/sensemaker/job.rb b/app/models/sensemaker/job.rb new file mode 100644 index 00000000000..4f194cb4a84 --- /dev/null +++ b/app/models/sensemaker/job.rb @@ -0,0 +1,221 @@ +module Sensemaker + class Job < ApplicationRecord + self.table_name = "sensemaker_jobs" + + ANALYSABLE_TYPES = [ + "Debate", + "Proposal", + "Poll", + "Topic", + "Legislation::Question", + "Legislation::Proposal", + "Legislation::QuestionOption", + "Budget", + "Budget::Group" + ].freeze + + validates :analysable_type, inclusion: { in: ANALYSABLE_TYPES } + + belongs_to :user, optional: false + belongs_to :parent_job, class_name: "Sensemaker::Job", optional: true + has_many :children, class_name: "Sensemaker::Job", foreign_key: :parent_job_id, inverse_of: :parent_job, + dependent: :nullify + + validates :analysable_type, presence: true + validates :analysable_id, presence: true, unless: -> { analysable_type == "Proposal" } + + belongs_to :analysable, polymorphic: true, optional: true + + before_save :set_persisted_output_if_successful + after_destroy :cleanup_associated_files + + scope :published, -> { where(published: true) } + scope :unpublished, -> { where(published: false) } + + def started? + started_at.present? + end + + def finished? + finished_at.present? + end + + def errored? + error.present? + end + + def cancelled? + finished_at.present? && error.eql?("Cancelled") + end + + def running? + started? && !finished? + end + + def status + if cancelled? + "Cancelled" + elsif errored? + "Failed" + elsif finished? + "Completed" + elsif started? + "Running" + else + "Unstarted" + end + end + + def self.unstarted + where(started_at: nil).where(finished_at: nil) + end + + def self.running + where.not(started_at: nil).where(finished_at: nil) + end + + def self.successful + where(error: nil).where.not(finished_at: nil) + end + + def self.failed + where.not(error: nil).where.not(finished_at: nil) + end + + def cancel! + update!(finished_at: Time.current, error: "Cancelled") + end + + def output_file_name + case script + when "health_check_runner.ts" + "health-check-#{id}.txt" + when "advanced_runner.ts", "runner.ts" + "output-#{id}" + when "categorization_runner.ts" + "categorization-output-#{id}.csv" + when "single-html-build.js" + "report-#{id}.html" + else + "output-#{id}.csv" + end + end + + def has_multiple_outputs? + ["advanced_runner.ts", "runner.ts"].include?(script) + end + + def default_output_path + File.join(Sensemaker::Paths.sensemaker_data_folder, output_file_name) + end + + def output_artifact_paths + if persisted_output.present? + base_path = persisted_output + else + base_path = default_output_path + end + + case script + when "advanced_runner.ts" + [ + "#{base_path}-summary.json", + "#{base_path}-topic-stats.json", + "#{base_path}-comments-with-scores.json" + ] + when "runner.ts" + [ + "#{base_path}-summary.json", + "#{base_path}-summary.html", + "#{base_path}-summary.md", + "#{base_path}-summaryAndSource.csv" + ] + else + [base_path] + end + end + + def has_outputs? + output_artifact_paths.all? { |path| File.exist?(path) } + end + + def self.for_budget(budget) + group_subquery = budget.groups.select(:id) + published.where(analysable_type: "Budget", analysable_id: budget.id).or( + published.where(analysable_type: "Budget::Group", analysable_id: group_subquery) + ) + end + + def self.for_process(process) + proposals_subquery = process.proposals.select(:id) + questions_subquery = process.questions.select(:id) + question_options_subquery = Legislation::QuestionOption + .where(legislation_question_id: questions_subquery) + .select(:id) + + published + .where(analysable_type: "Legislation::Proposal", analysable_id: proposals_subquery) + .or(published.where(analysable_type: "Legislation::Question", analysable_id: questions_subquery)) + .or(published.where(analysable_type: "Legislation::QuestionOption", + analysable_id: question_options_subquery)) + end + + private + + def set_persisted_output_if_successful + return unless finished_at.present? && error.nil? + return if persisted_output.present? + + if has_outputs? + self.persisted_output = default_output_path + end + end + + def cleanup_associated_files + data_folder = Sensemaker::Paths.sensemaker_data_folder + result = [] + result << cleanup_input_files(data_folder) + result << cleanup_output_files(data_folder) + result << cleanup_persisted_output() + result.flatten! + result.compact! + Rails.logger.info("Cleaned up files for job #{id}: #{result.inspect}") + result + rescue => e + Rails.logger.warn("Failed to cleanup files for job #{id}: #{e.message}") + nil + end + + def cleanup_input_files(data_folder) + input_file = "#{data_folder}/input-#{id}.csv" + result = [] + result << FileUtils.rm_f(input_file) + result << FileUtils.rm_f("#{input_file}.unfiltered") + result + end + + def cleanup_output_files(data_folder) + result = [] + case script + when "advanced_runner.ts" + result << FileUtils.rm_f("#{data_folder}/#{output_file_name}-summary.json") + result << FileUtils.rm_f("#{data_folder}/#{output_file_name}-topic-stats.json") + result << FileUtils.rm_f("#{data_folder}/#{output_file_name}-comments-with-scores.json") + when "runner.ts" + result << FileUtils.rm_f("#{data_folder}/#{output_file_name}-summary.json") + result << FileUtils.rm_f("#{data_folder}/#{output_file_name}-summary.html") + result << FileUtils.rm_f("#{data_folder}/#{output_file_name}-summary.md") + result << FileUtils.rm_f("#{data_folder}/#{output_file_name}-summaryAndSource.csv") + else + result << FileUtils.rm_f("#{data_folder}/#{output_file_name}") + end + result + end + + def cleanup_persisted_output + return unless persisted_output.present? && File.exist?(persisted_output) + + [FileUtils.rm_f(persisted_output)] + end + end +end diff --git a/db/migrate/20251113104941_create_sensemaker_jobs.rb b/db/migrate/20251113104941_create_sensemaker_jobs.rb new file mode 100644 index 00000000000..36deeff7daf --- /dev/null +++ b/db/migrate/20251113104941_create_sensemaker_jobs.rb @@ -0,0 +1,23 @@ +class CreateSensemakerJobs < ActiveRecord::Migration[7.1] + def change + create_table :sensemaker_jobs do |t| + t.datetime :started_at + t.datetime :finished_at + t.string :script + t.integer :pid + t.text :error + t.references :user, null: false, foreign_key: true + t.string :analysable_type, null: false + t.integer :analysable_id + t.timestamps + t.text :additional_context + t.references :parent_job, foreign_key: { to_table: :sensemaker_jobs } + t.string :input_file + t.string :persisted_output + t.boolean :published, default: false + t.integer :comments_analysed, default: 0 + end + + add_index :sensemaker_jobs, [:analysable_type, :analysable_id] + end +end diff --git a/db/schema.rb b/db/schema.rb index c5077c98346..b4a03be32e0 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -10,7 +10,7 @@ # # It's strongly recommended that you check this file into your version control system. -ActiveRecord::Schema[7.2].define(version: 2025_10_09_085528) do +ActiveRecord::Schema[7.2].define(version: 2025_11_13_104941) do # These are extensions that must be enabled in order to support this database enable_extension "pg_trgm" enable_extension "plpgsql" @@ -1440,6 +1440,28 @@ t.index ["goal_id"], name: "index_sdg_targets_on_goal_id" end + create_table "sensemaker_jobs", force: :cascade do |t| + t.datetime "started_at" + t.datetime "finished_at" + t.string "script" + t.integer "pid" + t.text "error" + t.bigint "user_id", null: false + t.string "analysable_type", null: false + t.integer "analysable_id" + t.datetime "created_at", null: false + t.datetime "updated_at", null: false + t.text "additional_context" + t.bigint "parent_job_id" + t.string "input_file" + t.string "persisted_output" + t.boolean "published", default: false + t.integer "comments_analysed", default: 0 + t.index ["analysable_type", "analysable_id"], name: "index_sensemaker_jobs_on_analysable_type_and_analysable_id" + t.index ["parent_job_id"], name: "index_sensemaker_jobs_on_parent_job_id" + t.index ["user_id"], name: "index_sensemaker_jobs_on_user_id" + end + create_table "settings", id: :serial, force: :cascade do |t| t.string "key" t.string "value" @@ -1818,6 +1840,8 @@ add_foreign_key "related_content_scores", "related_contents" add_foreign_key "related_content_scores", "users" add_foreign_key "sdg_managers", "users" + add_foreign_key "sensemaker_jobs", "sensemaker_jobs", column: "parent_job_id" + add_foreign_key "sensemaker_jobs", "users" add_foreign_key "users", "geozones" add_foreign_key "valuators", "users" end diff --git a/spec/factories/sensemaker/jobs.rb b/spec/factories/sensemaker/jobs.rb new file mode 100644 index 00000000000..b6979a77134 --- /dev/null +++ b/spec/factories/sensemaker/jobs.rb @@ -0,0 +1,21 @@ +FactoryBot.define do + factory :sensemaker_job, class: "Sensemaker::Job" do + user + script { "categorization_runner.ts" } + started_at { Time.current } + finished_at { nil } + error { nil } + analysable_type { "Debate" } + analysable_id { create(:debate).id } + additional_context { "Test context" } + published { true } + + trait :unpublished do + published { false } + end + + trait :published do + published { true } + end + end +end diff --git a/spec/models/sensemaker/job_spec.rb b/spec/models/sensemaker/job_spec.rb new file mode 100644 index 00000000000..c7f75514017 --- /dev/null +++ b/spec/models/sensemaker/job_spec.rb @@ -0,0 +1,560 @@ +require "rails_helper" + +describe Sensemaker::Job do + let(:user) { create(:user) } + let(:debate) { create(:debate) } + let(:job) do + create(:sensemaker_job, + analysable_type: "Debate", + analysable_id: debate.id, + script: "categorization_runner.ts", + user: user, + started_at: Time.current, + additional_context: "Test context") + end + + describe "validations" do + it "is valid with valid attributes" do + expect(job).to be_valid + end + + it "requires analysable_type" do + job.analysable_type = nil + expect(job).not_to be_valid + end + + it "requires analysable_id for non-Proposal types" do + job.analysable_id = nil + expect(job).not_to be_valid + end + + it "allows nil analysable_id for Proposal type" do + job.analysable_type = "Proposal" + job.analysable_id = nil + expect(job).to be_valid + end + end + + describe "associations" do + it "belongs to a user" do + expect(job.user).to eq(user) + end + end + + describe "instance methods" do + describe "#has_multiple_outputs?" do + it "returns true for advanced_runner.ts and runner.ts" do + job.script = "advanced_runner.ts" + expect(job.has_multiple_outputs?).to be true + job.script = "runner.ts" + expect(job.has_multiple_outputs?).to be true + end + + it "returns false for single output scripts" do + job.script = "categorization_runner.ts" + expect(job.has_multiple_outputs?).to be false + job.script = "health_check_runner.ts" + expect(job.has_multiple_outputs?).to be false + job.script = "single-html-build.js" + expect(job.has_multiple_outputs?).to be false + end + end + + describe "#output_file_name" do + it "returns the correct output file name for each script" do + job.script = "categorization_runner.ts" + expect(job.output_file_name).to eq("categorization-output-#{job.id}.csv") + + job.script = "advanced_runner.ts" + expect(job.output_file_name).to eq("output-#{job.id}") + + job.script = "runner.ts" + expect(job.output_file_name).to eq("output-#{job.id}") + + job.script = "health_check_runner.ts" + expect(job.output_file_name).to eq("health-check-#{job.id}.txt") + + job.script = "single-html-build.js" + expect(job.output_file_name).to eq("report-#{job.id}.html") + end + end + + describe "#started?" do + it "returns true when started_at is present" do + expect(job.started?).to be true + end + + it "returns false when started_at is nil" do + job.started_at = nil + expect(job.started?).to be false + end + end + + describe "#finished?" do + it "returns true when finished_at is present" do + job.finished_at = Time.current + expect(job.finished?).to be true + end + + it "returns false when finished_at is nil" do + expect(job.finished?).to be false + end + end + + describe "#cancelled?" do + it "returns true when finished_at is present and error is 'Cancelled'" do + job.finished_at = Time.current + job.error = "Cancelled" + expect(job.cancelled?).to be true + end + end + + describe "cancel!" do + it "updates the job with finished_at and error 'Cancelled'" do + job.cancel! + expect(job.finished_at).to be_present + expect(job.error).to eq("Cancelled") + end + end + + describe "#errored?" do + it "returns true when error is present" do + job.error = "Some error occurred" + expect(job.errored?).to be true + end + + it "returns false when error is nil" do + expect(job.errored?).to be false + end + end + + describe "#default_output_path" do + let(:data_folder) { "/tmp/sensemaker_test_folder/data" } + + before do + allow(Sensemaker::Paths).to receive(:sensemaker_data_folder).and_return(data_folder) + end + + it "returns the correct path for categorization_runner.ts" do + job.script = "categorization_runner.ts" + expect(job.default_output_path).to eq("#{data_folder}/categorization-output-#{job.id}.csv") + end + + it "returns the correct path for advanced_runner.ts" do + job.script = "advanced_runner.ts" + expect(job.default_output_path).to eq("#{data_folder}/output-#{job.id}") + end + + it "returns the correct path for runner.ts" do + job.script = "runner.ts" + expect(job.default_output_path).to eq("#{data_folder}/output-#{job.id}") + end + end + + describe "#output_artifact_paths" do + let(:data_folder) { "/tmp/sensemaker_test_folder/data" } + let(:base_path) { "#{data_folder}/output-#{job.id}" } + + before do + allow(Sensemaker::Paths).to receive(:sensemaker_data_folder).and_return(data_folder) + end + + context "when persisted_output is not set" do + it "uses default_output_path for single output scripts" do + job.script = "categorization_runner.ts" + expected_path = "#{data_folder}/categorization-output-#{job.id}.csv" + expect(job.output_artifact_paths).to eq([expected_path]) + end + + it "uses default_output_path for advanced_runner.ts" do + job.script = "advanced_runner.ts" + expect(job.output_artifact_paths).to eq([ + "#{base_path}-summary.json", + "#{base_path}-topic-stats.json", + "#{base_path}-comments-with-scores.json" + ]) + end + + it "uses default_output_path for runner.ts" do + job.script = "runner.ts" + expect(job.output_artifact_paths).to eq([ + "#{base_path}-summary.json", + "#{base_path}-summary.html", + "#{base_path}-summary.md", + "#{base_path}-summaryAndSource.csv" + ]) + end + end + + context "when persisted_output is set" do + let(:persisted_path) { "/historical/path/output-#{job.id}" } + + before do + job.persisted_output = persisted_path + end + + it "uses persisted_output for single output scripts" do + job.script = "categorization_runner.ts" + expect(job.output_artifact_paths).to eq([persisted_path]) + end + + it "uses persisted_output for advanced_runner.ts" do + job.script = "advanced_runner.ts" + expect(job.output_artifact_paths).to eq([ + "#{persisted_path}-summary.json", + "#{persisted_path}-topic-stats.json", + "#{persisted_path}-comments-with-scores.json" + ]) + end + + it "uses persisted_output for runner.ts" do + job.script = "runner.ts" + expect(job.output_artifact_paths).to eq([ + "#{persisted_path}-summary.json", + "#{persisted_path}-summary.html", + "#{persisted_path}-summary.md", + "#{persisted_path}-summaryAndSource.csv" + ]) + end + end + end + + describe "#has_outputs?" do + let(:data_folder) { "/tmp/sensemaker_test_folder/data" } + + before do + allow(Sensemaker::Paths).to receive(:sensemaker_data_folder).and_return(data_folder) + allow(File).to receive(:exist?).and_return(false) + end + + context "when script has single output" do + before do + job.script = "categorization_runner.ts" + end + + it "returns true when the output file exists" do + output_path = "#{data_folder}/categorization-output-#{job.id}.csv" + allow(File).to receive(:exist?).with(output_path).and_return(true) + expect(job.has_outputs?).to be true + end + + it "returns false when the output file does not exist" do + expect(job.has_outputs?).to be false + end + end + + context "when script has multiple outputs (advanced_runner.ts)" do + before do + job.script = "advanced_runner.ts" + end + + it "returns true when all output files exist" do + base_path = "#{data_folder}/output-#{job.id}" + allow(File).to receive(:exist?).with("#{base_path}-summary.json").and_return(true) + allow(File).to receive(:exist?).with("#{base_path}-topic-stats.json").and_return(true) + allow(File).to receive(:exist?).with("#{base_path}-comments-with-scores.json").and_return(true) + expect(job.has_outputs?).to be true + end + + it "returns false when not all output files exist" do + base_path = "#{data_folder}/output-#{job.id}" + allow(File).to receive(:exist?).with("#{base_path}-summary.json").and_return(true) + allow(File).to receive(:exist?).with("#{base_path}-topic-stats.json").and_return(true) + allow(File).to receive(:exist?).with("#{base_path}-comments-with-scores.json").and_return(false) + expect(job.has_outputs?).to be false + end + end + + context "when script has multiple outputs (runner.ts)" do + before do + job.script = "runner.ts" + end + + it "returns true when all output files exist" do + base_path = "#{data_folder}/output-#{job.id}" + allow(File).to receive(:exist?).with("#{base_path}-summary.json").and_return(true) + allow(File).to receive(:exist?).with("#{base_path}-summary.html").and_return(true) + allow(File).to receive(:exist?).with("#{base_path}-summary.md").and_return(true) + allow(File).to receive(:exist?).with("#{base_path}-summaryAndSource.csv").and_return(true) + expect(job.has_outputs?).to be true + end + + it "returns false when not all output files exist" do + base_path = "#{data_folder}/output-#{job.id}" + allow(File).to receive(:exist?).with("#{base_path}-summary.json").and_return(true) + allow(File).to receive(:exist?).with("#{base_path}-summary.html").and_return(true) + allow(File).to receive(:exist?).with("#{base_path}-summary.md").and_return(true) + allow(File).to receive(:exist?).with("#{base_path}-summaryAndSource.csv").and_return(false) + expect(job.has_outputs?).to be false + end + end + end + + describe "#cleanup_associated_files" do + let(:data_folder) { "/tmp/sensemaker_test_folder/data" } + + before do + allow(Sensemaker::Paths).to receive(:sensemaker_data_folder).and_return(data_folder) + allow(FileUtils).to receive(:rm_f).and_return(true) + allow(Rails.logger).to receive(:info) + allow(Rails.logger).to receive(:warn) + end + + it "cleans up input files" do + expect(FileUtils).to receive(:rm_f).with("#{data_folder}/input-#{job.id}.csv") + expect(FileUtils).to receive(:rm_f).with("#{data_folder}/input-#{job.id}.csv.unfiltered") + + job.send(:cleanup_input_files, data_folder) + end + + context "when script is health_check_runner.ts" do + let(:job) { create(:sensemaker_job, script: "health_check_runner.ts") } + + it "cleans up health check output file" do + expect(FileUtils).to receive(:rm_f).with("#{data_folder}/health-check-#{job.id}.txt") + + job.send(:cleanup_output_files, data_folder) + end + end + + context "when script is advanced_runner.ts" do + let(:job) { create(:sensemaker_job, script: "advanced_runner.ts") } + + it "cleans up all advanced runner output files" do + expect(FileUtils).to receive(:rm_f).with("#{data_folder}/output-#{job.id}-summary.json") + expect(FileUtils).to receive(:rm_f).with("#{data_folder}/output-#{job.id}-topic-stats.json") + expect(FileUtils).to receive(:rm_f).with( + "#{data_folder}/output-#{job.id}-comments-with-scores.json" + ) + + job.send(:cleanup_output_files, data_folder) + end + end + + context "when script is categorization_runner.ts" do + let(:job) { create(:sensemaker_job, script: "categorization_runner.ts") } + + it "cleans up categorization output file" do + expect(FileUtils).to receive(:rm_f).with("#{data_folder}/categorization-output-#{job.id}.csv") + + job.send(:cleanup_output_files, data_folder) + end + end + + context "when script is single-html-build.js" do + let(:job) { create(:sensemaker_job, script: "single-html-build.js") } + + it "cleans up HTML report file" do + expect(FileUtils).to receive(:rm_f).with("#{data_folder}/report-#{job.id}.html") + + job.send(:cleanup_output_files, data_folder) + end + end + + context "when script is runner.ts" do + let(:job) { create(:sensemaker_job, script: "runner.ts") } + + it "cleans up all runner summary output files" do + expect(FileUtils).to receive(:rm_f).with("#{data_folder}/output-#{job.id}-summary.json") + expect(FileUtils).to receive(:rm_f).with("#{data_folder}/output-#{job.id}-summary.html") + expect(FileUtils).to receive(:rm_f).with("#{data_folder}/output-#{job.id}-summary.md") + expect(FileUtils).to receive(:rm_f).with("#{data_folder}/output-#{job.id}-summaryAndSource.csv") + + job.send(:cleanup_output_files, data_folder) + end + end + + describe "#cleanup_persisted_output" do + context "when persisted_output is present and file exists" do + before do + job.persisted_output = "/path/to/output.txt" + allow(File).to receive(:exist?).and_return(false) + allow(File).to receive(:exist?).with("/path/to/output.txt").and_return(true) + end + + it "removes the persisted output file" do + expect(FileUtils).to receive(:rm_f).with("/path/to/output.txt") + + job.send(:cleanup_persisted_output) + end + end + + context "when persisted_output is nil" do + before do + job.persisted_output = nil + end + + it "does not attempt to remove any file" do + expect(FileUtils).not_to receive(:rm_f) + + job.send(:cleanup_persisted_output) + end + end + + context "when persisted_output is present but file does not exist" do + before do + job.persisted_output = "/path/to/nonexistent.txt" + allow(File).to receive(:exist?).and_return(false) + allow(File).to receive(:exist?).with("/path/to/nonexistent.txt").and_return(false) + end + + it "does not attempt to remove the file" do + expect(FileUtils).not_to receive(:rm_f) + + job.send(:cleanup_persisted_output) + end + end + end + + it "logs cleanup results" do + allow(FileUtils).to receive(:rm_f).and_return(true) + + expect(Rails.logger).to receive(:info).with(/Cleaned up files for job #{job.id}/) + + job.send(:cleanup_associated_files) + end + + it "handles errors gracefully" do + allow(FileUtils).to receive(:rm_f).and_raise(StandardError.new("File system error")) + + expect(Rails.logger).to receive(:warn).with(/Failed to cleanup files for job #{job.id}/) + + result = job.send(:cleanup_associated_files) + expect(result).to be(nil) + end + end + end + + describe "callbacks" do + describe "before_save :set_persisted_output_if_successful" do + let(:data_folder) { "/tmp/sensemaker_test_folder/data" } + + before do + allow(Sensemaker::Paths).to receive(:sensemaker_data_folder).and_return(data_folder) + allow(File).to receive(:exist?).and_return(false) + end + + context "when job is successful (finished_at present, no error)" do + context "when persisted_output is not set" do + context "when all output files exist" do + it "sets persisted_output to default_output_path" do + job.script = "categorization_runner.ts" + output_path = "#{data_folder}/categorization-output-#{job.id}.csv" + allow(File).to receive(:exist?).with(output_path).and_return(true) + + job.finished_at = Time.current + job.error = nil + job.save! + + expect(job.persisted_output).to eq(job.default_output_path) + end + + it "sets persisted_output for advanced_runner.ts when all files exist" do + job.script = "advanced_runner.ts" + base_path = "#{data_folder}/output-#{job.id}" + allow(File).to receive(:exist?).with("#{base_path}-summary.json").and_return(true) + allow(File).to receive(:exist?).with("#{base_path}-topic-stats.json").and_return(true) + allow(File).to receive(:exist?).with("#{base_path}-comments-with-scores.json").and_return(true) + + job.finished_at = Time.current + job.error = nil + job.save! + + expect(job.persisted_output).to eq(job.default_output_path) + end + + it "sets persisted_output for runner.ts when all files exist" do + job.script = "runner.ts" + base_path = "#{data_folder}/output-#{job.id}" + allow(File).to receive(:exist?).with("#{base_path}-summary.json").and_return(true) + allow(File).to receive(:exist?).with("#{base_path}-summary.html").and_return(true) + allow(File).to receive(:exist?).with("#{base_path}-summary.md").and_return(true) + allow(File).to receive(:exist?).with("#{base_path}-summaryAndSource.csv").and_return(true) + + job.finished_at = Time.current + job.error = nil + job.save! + + expect(job.persisted_output).to eq(job.default_output_path) + end + end + + context "when not all output files exist" do + it "does not set persisted_output" do + job.script = "advanced_runner.ts" + base_path = "#{data_folder}/output-#{job.id}" + allow(File).to receive(:exist?).with("#{base_path}-summary.json").and_return(true) + allow(File).to receive(:exist?).with("#{base_path}-topic-stats.json").and_return(true) + allow(File).to receive(:exist?).with("#{base_path}-comments-with-scores.json").and_return(false) + + job.finished_at = Time.current + job.error = nil + job.save! + + expect(job.persisted_output).to be(nil) + end + end + end + + context "when persisted_output is already set" do + it "does not overwrite existing persisted_output" do + existing_path = "/existing/path/output-#{job.id}" + job.persisted_output = existing_path + + job.finished_at = Time.current + job.error = nil + job.save! + + expect(job.persisted_output).to eq(existing_path) + end + end + end + + context "when job is not finished" do + it "does not set persisted_output" do + job.finished_at = nil + job.error = nil + job.save! + + expect(job.persisted_output).to be(nil) + end + end + + context "when job has an error" do + it "does not set persisted_output" do + job.finished_at = Time.current + job.error = "Some error occurred" + job.save! + + expect(job.persisted_output).to be(nil) + end + end + end + + describe "after_destroy" do + let(:data_folder) { "/tmp/sensemaker_test_folder/data" } + + before do + allow(Sensemaker::Paths).to receive(:sensemaker_data_folder).and_return(data_folder) + allow(FileUtils).to receive(:rm_f).and_return(true) + allow(Rails.logger).to receive(:info) + allow(Rails.logger).to receive(:warn) + end + + it "calls cleanup_associated_files when job is destroyed" do + expect(job).to receive(:cleanup_associated_files) + job.destroy! + end + + it "continues with destruction even if cleanup fails" do + expect(job).to receive(:cleanup_input_files) + + expect(job).to receive(:cleanup_output_files) + allow(job).to receive(:cleanup_output_files).and_raise(StandardError.new("Bork")) + + expect { job.destroy }.not_to raise_error + expect(Sensemaker::Job.find_by(id: job.id)).to be(nil) + end + end + end +end