From 8efadd12a05a4777d0f169eae909eeb0af4bbb15 Mon Sep 17 00:00:00 2001 From: RoBGlaBe Date: Thu, 30 Nov 2023 17:00:26 +0100 Subject: [PATCH 01/14] idle detector can be about to arm --- dispatcher/DAQController.py | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/dispatcher/DAQController.py b/dispatcher/DAQController.py index a43ee2b4..59f7aa75 100644 --- a/dispatcher/DAQController.py +++ b/dispatcher/DAQController.py @@ -15,7 +15,7 @@ class DAQController(): D. Masson, 06 Apr 2020 S. di Pede, 17 Mar 2021 - Brief: This code handles the logic of what the dispatcher does when. It takes in + Brief: This code handles the logic of what the dispatcher does when. It takes in aggregated status updates and commands from the mongo connector and decides if any action needs to be taken to get the DAQ into the target state. It also handles the resetting of runs (the ~hourly stop/start) during normal operations. @@ -74,12 +74,12 @@ def solve_problem(self, latest_status, goal_state): therefore unavailable. The frontend should prevent many of these cases though. The way that works is this: - A) the detector should be INACTIVE (i.e., IDLE), we stop the detector + A) the detector should be INACTIVE (i.e., IDLE), we stop the detector if the status is in one of the active states - B) the detector should be ACTIVE (i.e, RUNNING), we issue the necessary + B) the detector should be ACTIVE (i.e, RUNNING), we issue the necessary commands to put the system in the RUNNING status - C) we deal separately with the ERROR and TIMEOUT statuses, as in the - first time we need to promptly stop the detector, and in the second + C) we deal separately with the ERROR and TIMEOUT statuses, as in the + first time we need to promptly stop the detector, and in the second case we need to handle the timeouts. """ # cache these so other functions can see them @@ -91,6 +91,8 @@ def solve_problem(self, latest_status, goal_state): if latest_status[det]['status'] == DAQ_STATUS.IDLE: self.can_force_stop[det] = True self.error_stop_count[det] = 0 + if (now() - self.last_command['arm'][detector]).total_seconds() < self.time_between_commands): + self.one_detector_arming = True if latest_status[det]['status'] in [DAQ_STATUS.ARMING, DAQ_STATUS.ARMED]: self.one_detector_arming = True @@ -225,7 +227,7 @@ def control_detector(self, command, detector, force=False): readers, cc = self.mongo.get_hosts_for_mode(ls[detector]['mode']) hosts = (readers, cc) delay = self.start_cmd_delay - #Reset arming timeout counter + #Reset arming timeout counter self.missed_arm_cycles[detector]=0 else: # stop readers, cc = self.mongo.get_hosts_for_mode(ls[detector]['mode'], detector) @@ -255,9 +257,9 @@ def control_detector(self, command, detector, force=False): return 0 def check_timeouts(self, detector, command=None): - """ + """ This one is invoked if we think we need to change states. Either a stop command needs - to be sent, or we've detected an anomaly and want to decide what to do. + to be sent, or we've detected an anomaly and want to decide what to do. Basically this function decides: - We are not in any timeouts: send the normal stop command - We are waiting for something: do nothing From a67866ae578f398d858d001df46eec6551e3ffaa Mon Sep 17 00:00:00 2001 From: RoBGlaBe Date: Thu, 30 Nov 2023 17:04:59 +0100 Subject: [PATCH 02/14] typo --- dispatcher/DAQController.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dispatcher/DAQController.py b/dispatcher/DAQController.py index 59f7aa75..d0bbebed 100644 --- a/dispatcher/DAQController.py +++ b/dispatcher/DAQController.py @@ -91,7 +91,7 @@ def solve_problem(self, latest_status, goal_state): if latest_status[det]['status'] == DAQ_STATUS.IDLE: self.can_force_stop[det] = True self.error_stop_count[det] = 0 - if (now() - self.last_command['arm'][detector]).total_seconds() < self.time_between_commands): + if (now() - self.last_command['arm'][det]).total_seconds() < self.time_between_commands): self.one_detector_arming = True if latest_status[det]['status'] in [DAQ_STATUS.ARMING, DAQ_STATUS.ARMED]: self.one_detector_arming = True From 444a00573963bd3e46600a320a0e34874cc9e15e Mon Sep 17 00:00:00 2001 From: RoBGlaBe Date: Fri, 1 Dec 2023 16:31:54 +0100 Subject: [PATCH 03/14] only one condition to change 'one_detector_arming' --- dispatcher/DAQController.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/dispatcher/DAQController.py b/dispatcher/DAQController.py index d0bbebed..1ab2049d 100644 --- a/dispatcher/DAQController.py +++ b/dispatcher/DAQController.py @@ -88,13 +88,15 @@ def solve_problem(self, latest_status, goal_state): self.one_detector_arming = False for det in latest_status.keys(): - if latest_status[det]['status'] == DAQ_STATUS.IDLE: + if ( + (latest_status[det]['status'] in [DAQ_STATUS.ARMING, DAQ_STATUS.ARMED]) or + ((latest_status[det]['status'] == DAQ_STATUS.IDLE) and # Arming, but detector still reports IDLE + (now() - self.last_command['arm'][det]).total_seconds() < self.time_between_commands) + ): + self.one_detector_arming = True + elif latest_status[det]['status'] == DAQ_STATUS.IDLE: self.can_force_stop[det] = True self.error_stop_count[det] = 0 - if (now() - self.last_command['arm'][det]).total_seconds() < self.time_between_commands): - self.one_detector_arming = True - if latest_status[det]['status'] in [DAQ_STATUS.ARMING, DAQ_STATUS.ARMED]: - self.one_detector_arming = True active_states = [DAQ_STATUS.RUNNING, DAQ_STATUS.ARMED, DAQ_STATUS.ARMING, DAQ_STATUS.UNKNOWN] From 17787c0adc7eb037b1b78c17609d8e5924c58642 Mon Sep 17 00:00:00 2001 From: Carlo Fuselli Date: Thu, 19 Sep 2024 11:44:35 +0200 Subject: [PATCH 04/14] Update hypervisor to work for nuclear timeout We never managed to enter in the tactic nuclear because it was only looking for timeouts. Now we say: it not armed or running, do nuclear ( that means, power cycle the crates ) --- dispatcher/hypervisor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dispatcher/hypervisor.py b/dispatcher/hypervisor.py index a3df0a53..ea68525a 100644 --- a/dispatcher/hypervisor.py +++ b/dispatcher/hypervisor.py @@ -408,7 +408,7 @@ def linked_nuclear_option(self): ok, not_ok = [], [] physical_status = self.mongo_connect.physical_status for phys_det, statuses in physical_status.items(): - if self.mongo_connect.combine_statuses(statuses) in [daqnt.DAQ_STATUS.TIMEOUT]: + if self.mongo_connect.combine_statuses(statuses) not in [daqnt.DAQ_STATUS.RUNNING, daqnt.DAQ_STATUS.ARMED]: not_ok.append(phys_det) else: ok.append(phys_det) From 50e5ff5351429c514be4365356dab905dba4ad3d Mon Sep 17 00:00:00 2001 From: Robin Glade-Beucke Date: Tue, 4 Feb 2025 16:43:05 +0000 Subject: [PATCH 05/14] write config for busy fw v10 --- V1495_tpc.cc | 223 +++++++++++++++++++++++++++++++++++++++++++++------ V1495_tpc.hh | 20 +++-- 2 files changed, 211 insertions(+), 32 deletions(-) diff --git a/V1495_tpc.cc b/V1495_tpc.cc index 547657c7..ab07f298 100644 --- a/V1495_tpc.cc +++ b/V1495_tpc.cc @@ -2,48 +2,221 @@ #include "MongoLog.hh" V1495_TPC::V1495_TPC(std::shared_ptr& log, std::shared_ptr& opts, int bid, int handle, unsigned int address) : - V1495(log, opts, bid, handle, address), fControlReg(0x101E), - fVetoOffMSBReg(0x1012), fVetoOffLSBReg(0x1010), - fVetoOnMSBReg(0x100E), fVetoOnLSBReg(0x100C) { - fFractionalModeActive = 0; - fVetoOn_clk = fVetoOff_clk = 0; + V1495(log, opts, bid, handle, address), + fModeReg(0x100C), + fFracLtOnAReg(0x100E), fFracLtOnBReg(0x1010), + fFracLtOffAReg(0x1012), fFracLtOffBReg(0x1014), + fAntiVetoDelayAReg(0x1016), fAntiVetoDelayBReg(0x1018), + fAntiVetoDurationAReg(0x101A), fAntiVetoDurationBReg(0x101C) { + fFracLTVetoOn_clk = fFracLTVetoOff_clk = 0; + fAntiVetoDelay_clk = fAntiVetoDuration_clk = 0; + fMode = 0x0; } V1495_TPC::~V1495_TPC() {} int V1495_TPC::Arm(std::map& opts) { int clocks_per_us = 40; - if ((fFractionalModeActive = opts["fractional_mode_active"]) == 1) { - fVetoOn_clk = opts["veto_on_us"] * clocks_per_us; - fVetoOff_clk = opts["veto_off_us"] * clocks_per_us; - if (fVetoOn_clk * fVetoOff_clk == 0) { + unsigned int is_busy_he_used = 1; + unsigned int is_hev_on = opts["is_hev_on"]; + unsigned int is_hev_start_stop_on = 1; + unsigned int is_frac_lt_mode_on = opts["is_frac_lt_mode_on"]; + unsigned int is_led_start_stop_active = opts["is_led_start_stop_active"]; + unsigned int is_anti_veto_active = opts["is_anti_veto_active"]; + unsigned int is_anti_veto_start_stop_active = 0; + unsigned int use_legacy_port_hev = opts["_use_legacy_port_hev"]; + unsigned int use_regular_port_trg = opts["_use_regular_port_trg"]; + unsigned int use_legacy_port_trg = opts["_use_legacy_port_trg"]; + unsigned int use_NG_input = opts["_use_NG_input"]; + + // High Energy Busy always used for Busy Veto + if (is_busy_he_used == 0){ // this is currently forced to be zero, but in case we want to still use this someday. + fLog->Entry(MongoLog::Message, "V1495: Busy from high energy ADCs are set to be ignored, " + "however this is probably not a good idea. You have been warned! :)"); + } + + // Anti Veto Time conversion, duration > 0 + if (is_anti_veto_active == 1) { + fAntiVetoDelay_clk = opts["anti_veto_delay_us"] * clocks_per_us; + fAntiVetoDuration_clk = opts["anti_veto_duration_us"] * clocks_per_us; + if (fAntiVetoDuration_clk == 0) { + fLog->Entry(MongoLog::Message, "V1495: Neutron Generator anti-veto duration is zero. Turning anti-veto off."); + is_anti_veto_active = 0; + } else { + fLog->Entry(MongoLog::Local, "V1495 Neutron Generator anti-veto mode active: delay %ius, duration %ius", + opts["anti_veto_delay_us"], opts["anti_veto_duration_us"]); + } + } + + // Fractional Lifetime Veto On & Off durations conversion, both > 0 + if (is_frac_lt_mode_on == 1) { + fFracLTVetoOn_clk = opts["fractional_lifetime_veto_on_us"] * clocks_per_us; + fFracLTVetoOff_clk = opts["fractional_lifetime_veto_off_us"] * clocks_per_us; + if (fFracLTVetoOn_clk * fFracLTVetoOff_clk == 0) { fLog->Entry(MongoLog::Message, "V1495: at least one value is zero, check the config: %i/%i", - opts["veto_on_us"], opts["veto_off_us"]); - fFractionalModeActive = 0; + opts["fractional_lifetime_veto_on_us"], opts["fractional_lifetime_veto_off_us"]); + is_frac_lt_mode_on = 0; } else { - fLog->Entry(MongoLog::Local, "V1495 fractional mode active: %i/%i", - opts["veto_on_us"], opts["veto_off_us"]); + fLog->Entry(MongoLog::Local, "V1495 fractional mode active: on %i, off %i", + opts["fractional_lifetime_veto_on_us"], opts["fractional_lifetime_veto_off_us"]); } - } else { - fLog->Entry(MongoLog::Local, "V1495 fractional mode inactive"); } + + // Only for testing, that of Anti-Veto start/stop is reconstructed corretcly in straxen from NG start/stop + // Anti-Veto start/stop on, if HEV is not used (they are on the same output; Anti-Veto can be reconstructed otherwise) + if ((is_anti_veto_active == 1) && (is_hev_on == 0)) { + fLog->Entry(MongoLog::Message, "V1495: Anti-Veto on and HEV off: putting Anti-Veto start/stops on HEV start/stop output"); + is_anti_veto_start_stop_active = 1; + is_hev_start_stop_on = 0; + } + + // Fractional Lifetime mode: no HEV at the same time. Turn off HEV start/stops (they are on the same output) + if (is_frac_lt_mode_on == 1){ + if (is_hev_on == 1) { + fLog->Entry(MongoLog::Message, "V1495: HEV and Fractional Lifetime set to on. Assuming that was a mistake. Switching off FracLT."); + is_frac_lt_mode_on = 0; + } else { + fLog->Entry(MongoLog::Message, "V1495: Fractional Lifetime set to on. Switching off HEV start/stop to avoid confusion."); + is_hev_start_stop_on = 0; + } + } + + fMode = (is_busy_he_used << 0) | + (is_hev_on << 2) | + (is_hev_start_stop_on << 3) | + (is_frac_lt_mode_on << 4) | + (is_led_start_stop_active << 5) | + (is_anti_veto_active << 6) | + (is_anti_veto_start_stop_active << 7) | + (use_legacy_port_hev << 8) | + (use_regular_port_trg << 9) | + (use_legacy_port_trg << 10) | + (use_NG_input << 11); + + fLog->Entry(MongoLog::Local, "V1495: Mode register will be: 0x%X", fMode); + fLog->Entry(MongoLog::Message, "V1495: Final Mode: " + "hebusy %i, hev %i, hevss %i, " + "fraclt %i, LEDss_on %i, antiv %i, antivss %i, " + "hev_leg %i, reg_trg %i, leg_trg %i, NG_inp %i", + is_busy_he_used, is_hev_on, is_hev_start_stop_on, + is_frac_lt_mode_on, is_led_start_stop_active, is_anti_veto_active, is_anti_veto_start_stop_active, + use_legacy_port_hev, use_regular_port_trg, use_legacy_port_trg, use_NG_input); return 0; } + + +int V1495_TPC::ArmSoft(std::map& opts) { + int clocks_per_us = 40; + unsigned int is_busy_he_used = 1; + unsigned int is_hev_on = opts["is_hev_on"]; + unsigned int is_hev_start_stop_on = 1; + unsigned int is_frac_lt_mode_on = opts["is_frac_lt_mode_on"]; + unsigned int is_led_start_stop_active = opts["is_led_start_stop_active"]; + unsigned int is_anti_veto_active = opts["is_anti_veto_active"]; + unsigned int is_anti_veto_start_stop_active = 0; + unsigned int use_legacy_port_hev = opts["_use_legacy_port_hev"]; + unsigned int use_regular_port_trg = opts["_use_regular_port_trg"]; + unsigned int use_legacy_port_trg = opts["_use_legacy_port_trg"]; + unsigned int use_NG_input = opts["_use_NG_input"]; + + // Only for testing, that of Anti-Veto start/stop is reconstructed corretcly in straxen from NG start/stop + // Anti-Veto start/stop on, if HEV is not used (they are on the same output; Anti-Veto can be reconstructed otherwise) + if ((is_anti_veto_active == 1) && (is_hev_on == 0)) { + fLog->Entry(MongoLog::Message, "V1495: Anti-Veto on and HEV off: putting Anti-Veto start/stops on HEV start/stop output"); + is_anti_veto_start_stop_active = 1; + is_hev_start_stop_on = 0; + } + + // Fractional Lifetime mode: no HEV at the same time. Turn off HEV start/stops (they are on the same output) + if (is_frac_lt_mode_on == 1){ + if (is_hev_on == 1) { + fLog->Entry(MongoLog::Message, "V1495: HEV and Fractional Lifetime set to on. Please activate only 1 of them at a time."); + return 1; + } else { + fLog->Entry(MongoLog::Message, "V1495: Fractional Lifetime set to on. Switching off HEV start/stop to avoid confusion."); + is_hev_start_stop_on = 0; + } + } + + // Check numerical options, if we run in modes that require those + // Anti Veto Time conversion, duration > 0 + if (is_anti_veto_active == 1) { + fAntiVetoDelay_clk = opts["anti_veto_delay_us"] * clocks_per_us; + fAntiVetoDuration_clk = opts["anti_veto_duration_us"] * clocks_per_us; + if (fAntiVetoDuration_clk == 0) { + fLog->Entry(MongoLog::Message, "V1495: Neutron Generator anti-veto duration is zero. Turning anti-veto off."); + is_anti_veto_active = 0; + return 1; + } else { + fLog->Entry(MongoLog::Local, "V1495 Neutron Generator anti-veto mode active: delay %ius, duration %ius", + opts["anti_veto_delay_us"], opts["anti_veto_duration_us"]); + } + } + + // Fractional Lifetime Veto On & Off durations conversion, both > 0 + if (is_frac_lt_mode_on == 1) { + fFracLTVetoOn_clk = opts["fractional_lifetime_veto_on_us"] * clocks_per_us; + fFracLTVetoOff_clk = opts["fractional_lifetime_veto_off_us"] * clocks_per_us; + if (fFracLTVetoOn_clk * fFracLTVetoOff_clk == 0) { + fLog->Entry(MongoLog::Message, "V1495: at least one value is zero, check the config: %i/%i", + opts["fractional_lifetime_veto_on_us"], opts["fractional_lifetime_veto_off_us"]); + return 1; + } else { + fLog->Entry(MongoLog::Local, "V1495 fractional mode active: on %i, off %i", + opts["fractional_lifetime_veto_on_us"], opts["fractional_lifetime_veto_off_us"]); + } + } + + fMode = (is_busy_he_used << 0) | + (is_hev_on << 2) | + (is_hev_start_stop_on << 3) | + (is_frac_lt_mode_on << 4) | + (is_led_start_stop_active << 5) | + (is_anti_veto_active << 6) | + (is_anti_veto_start_stop_active << 7) | + (use_legacy_port_hev << 8) | + (use_regular_port_trg << 9) | + (use_legacy_port_trg << 10) | + (use_NG_input << 11); + + fLog->Entry(MongoLog::Local, "V1495: Mode register will be: 0x%X", fMode); + fLog->Entry(MongoLog::Message, "V1495: Final Mode: " + "hebusy %i, hev %i, hevss %i, " + "fraclt %i, LEDss_on %i, antiv %i, antivss %i, " + "hev_leg %i, reg_trg %i, leg_trg %i, NG_inp %i", + is_busy_he_used, is_hev_on, is_hev_start_stop_on, + is_frac_lt_mode_on, is_led_start_stop_active, is_anti_veto_active, is_anti_veto_start_stop_active, + use_legacy_port_hev, use_regular_port_trg, use_legacy_port_trg, use_NG_input); + return 0; +} + + int V1495_TPC::BeforeSINStart() { int ret = 0; - if (fFractionalModeActive) { - ret += WriteReg(fControlReg, 0x1); - ret += WriteReg(fVetoOffMSBReg, (fVetoOff_clk & 0xFFFF0000) >> 16); - ret += WriteReg(fVetoOffLSBReg, fVetoOff_clk & 0xFFFF); - ret += WriteReg(fVetoOnMSBReg, (fVetoOn_clk & 0xFFFF0000) >> 16); - ret += WriteReg(fVetoOnLSBReg, fVetoOn_clk & 0xFFFF); - } else { - ret = WriteReg(fControlReg, 0x0); - } + ret += WriteReg(fModeReg, fMode); + ret += WriteReg(fFracLtOnBReg, (fFracLTVetoOn_clk & 0xFFFF0000) >> 16); + ret += WriteReg(fFracLtOnAReg, fFracLTVetoOn_clk & 0xFFFF); + ret += WriteReg(fFracLtOffBReg, (fFracLTVetoOff_clk & 0xFFFF0000) >> 16); + ret += WriteReg(fFracLtOffAReg, fFracLTVetoOff_clk & 0xFFFF); + ret += WriteReg(fAntiVetoDelayBReg, (fAntiVetoDelay_clk & 0xFFFF0000) >> 16); + ret += WriteReg(fAntiVetoDelayAReg, fAntiVetoDelay_clk & 0xFFFF); + ret += WriteReg(fAntiVetoDurationBReg, (fAntiVetoDuration_clk & 0xFFFF0000) >> 16); + ret += WriteReg(fAntiVetoDurationAReg, fAntiVetoDuration_clk & 0xFFFF); return ret; } int V1495_TPC::BeforeSINStop() { - return WriteReg(fControlReg, 0x0); + int ret = 0; + ret += WriteReg(fModeReg, 0xD); + ret += WriteReg(fFracLtOnBReg, 0x0000); + ret += WriteReg(fFracLtOnAReg, 0x0000); + ret += WriteReg(fFracLtOffBReg, 0x0000); + ret += WriteReg(fFracLtOffAReg, 0x0000); + ret += WriteReg(fAntiVetoDelayBReg, 0x0000); + ret += WriteReg(fAntiVetoDelayAReg, 0x0000); + ret += WriteReg(fAntiVetoDurationBReg, 0x0000); + ret += WriteReg(fAntiVetoDurationAReg, 0x0000); + return ret; } + diff --git a/V1495_tpc.hh b/V1495_tpc.hh index afe00114..c72f957c 100644 --- a/V1495_tpc.hh +++ b/V1495_tpc.hh @@ -8,18 +8,24 @@ class V1495_TPC : public V1495 { V1495_TPC(std::shared_ptr&, std::shared_ptr&, int, int, unsigned); virtual ~V1495_TPC(); virtual int Arm(std::map&); + virtual int ArmSoft(std::map&); virtual int BeforeSINStart(); virtual int BeforeSINStop(); protected: - const uint32_t fControlReg; - const uint32_t fVetoOffMSBReg; - const uint32_t fVetoOffLSBReg; - const uint32_t fVetoOnMSBReg; - const uint32_t fVetoOnLSBReg; + const uint32_t fModeReg; + const uint32_t fFracLtOnAReg; + const uint32_t fFracLtOnBReg; + const uint32_t fFracLtOffAReg; + const uint32_t fFracLtOffBReg; + const uint32_t fAntiVetoDelayAReg; + const uint32_t fAntiVetoDelayBReg; + const uint32_t fAntiVetoDurationAReg; + const uint32_t fAntiVetoDurationBReg; - int fFractionalModeActive; - uint32_t fVetoOn_clk, fVetoOff_clk; + uint32_t fMode; + uint32_t fFracLTVetoOn_clk, fFracLTVetoOff_clk; + uint32_t fAntiVetoDelay_clk, fAntiVetoDuration_clk; }; #endif // _V1495_TPC_HH_ defined From 692561fe09b18a1025532efb412109d9bccadc46 Mon Sep 17 00:00:00 2001 From: Andrew Stevens <152290053+stevensa22@users.noreply.github.com> Date: Thu, 16 Oct 2025 16:35:13 +0200 Subject: [PATCH 06/14] Update Options.cc to skip boards --- Options.cc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Options.cc b/Options.cc index 408d5511..68883b10 100644 --- a/Options.cc +++ b/Options.cc @@ -203,6 +203,8 @@ std::vector Options::GetBoards(std::string type){ // If there is no host field then no biggie. Assume we have just 1 host. }; BoardType bt; + if (ele["skip"] && ele["skip"].get_bool().value == true) + continue; bt.link = ele["link"].get_int32(); bt.crate = ele["crate"].get_int32(); bt.board = ele["board"].get_int32(); From c9140d3e70c1bb7f435ad9de2119cda314aae039 Mon Sep 17 00:00:00 2001 From: Andrew Stevens Date: Tue, 18 Nov 2025 11:34:29 +0100 Subject: [PATCH 07/14] Commit uncommitted changes used to fix the nuclear option for linked runs --- dispatcher/DAQController.py | 28 ++++++++++++++++------------ dispatcher/config.ini | 2 +- dispatcher/hypervisor.py | 18 +++++++++++++----- 3 files changed, 30 insertions(+), 18 deletions(-) diff --git a/dispatcher/DAQController.py b/dispatcher/DAQController.py index a43ee2b4..65256fcd 100644 --- a/dispatcher/DAQController.py +++ b/dispatcher/DAQController.py @@ -15,7 +15,7 @@ class DAQController(): D. Masson, 06 Apr 2020 S. di Pede, 17 Mar 2021 - Brief: This code handles the logic of what the dispatcher does when. It takes in + Brief: This code handles the logic of what the dispatcher does when. It takes in aggregated status updates and commands from the mongo connector and decides if any action needs to be taken to get the DAQ into the target state. It also handles the resetting of runs (the ~hourly stop/start) during normal operations. @@ -48,7 +48,7 @@ def __init__(self, config, daq_config, mongo_connector, logger, hypervisor): self.stop_retries = int(config['RetryReset']) self.hv_nuclear_timeout = int(config['HypervisorNuclearTimeout']) - self.last_nuke = now() + self.last_nuke = now() - datetime.timedelta(seconds=self.hv_nuclear_timeout) self.logger = logger self.time_between_commands = int(config['TimeBetweenCommands']) @@ -74,12 +74,12 @@ def solve_problem(self, latest_status, goal_state): therefore unavailable. The frontend should prevent many of these cases though. The way that works is this: - A) the detector should be INACTIVE (i.e., IDLE), we stop the detector + A) the detector should be INACTIVE (i.e., IDLE), we stop the detector if the status is in one of the active states - B) the detector should be ACTIVE (i.e, RUNNING), we issue the necessary + B) the detector should be ACTIVE (i.e, RUNNING), we issue the necessary commands to put the system in the RUNNING status - C) we deal separately with the ERROR and TIMEOUT statuses, as in the - first time we need to promptly stop the detector, and in the second + C) we deal separately with the ERROR and TIMEOUT statuses, as in the + first time we need to promptly stop the detector, and in the second case we need to handle the timeouts. """ # cache these so other functions can see them @@ -88,11 +88,15 @@ def solve_problem(self, latest_status, goal_state): self.one_detector_arming = False for det in latest_status.keys(): - if latest_status[det]['status'] == DAQ_STATUS.IDLE: + if ( + (latest_status[det]['status'] in [DAQ_STATUS.ARMING, DAQ_STATUS.ARMED]) or + ((latest_status[det]['status'] == DAQ_STATUS.IDLE) and # Arming, but detector still reports IDLE + (now() - self.last_command['arm'][det]).total_seconds() < self.time_between_commands) + ): + self.one_detector_arming = True + elif latest_status[det]['status'] == DAQ_STATUS.IDLE: self.can_force_stop[det] = True self.error_stop_count[det] = 0 - if latest_status[det]['status'] in [DAQ_STATUS.ARMING, DAQ_STATUS.ARMED]: - self.one_detector_arming = True active_states = [DAQ_STATUS.RUNNING, DAQ_STATUS.ARMED, DAQ_STATUS.ARMING, DAQ_STATUS.UNKNOWN] @@ -225,7 +229,7 @@ def control_detector(self, command, detector, force=False): readers, cc = self.mongo.get_hosts_for_mode(ls[detector]['mode']) hosts = (readers, cc) delay = self.start_cmd_delay - #Reset arming timeout counter + #Reset arming timeout counter self.missed_arm_cycles[detector]=0 else: # stop readers, cc = self.mongo.get_hosts_for_mode(ls[detector]['mode'], detector) @@ -255,9 +259,9 @@ def control_detector(self, command, detector, force=False): return 0 def check_timeouts(self, detector, command=None): - """ + """ This one is invoked if we think we need to change states. Either a stop command needs - to be sent, or we've detected an anomaly and want to decide what to do. + to be sent, or we've detected an anomaly and want to decide what to do. Basically this function decides: - We are not in any timeouts: send the normal stop command - We are waiting for something: do nothing diff --git a/dispatcher/config.ini b/dispatcher/config.ini index 6471db91..e8c19497 100644 --- a/dispatcher/config.ini +++ b/dispatcher/config.ini @@ -37,7 +37,7 @@ TimeBetweenCommands = 6 # How often the HV can restart hosts HypervisorHostRestartTimeout = 300 # How often we can drop nukes -HypervisorNuclearTimeout = 1800 +HypervisorNuclearTimeout = 900 # Time between reader and CC start (can be float) # THIS IS AN IMPORTANT VALUE, DON'T CHANGE IT UNLESS YOU KNOW WHAT YOU'RE DOING diff --git a/dispatcher/hypervisor.py b/dispatcher/hypervisor.py index a3df0a53..6fe9d0be 100644 --- a/dispatcher/hypervisor.py +++ b/dispatcher/hypervisor.py @@ -380,7 +380,8 @@ def handle_timeout(self, host: str): if self.slackbot is not None: self.slackbot.send_message( f'Hypervisor is restarting {host}', - add_tags=('daq',)) + # add_tags=('daq',) + ) if 0 not in self.kill_redax(host): self.logger.error(f'Error killing {host}?') @@ -408,7 +409,10 @@ def linked_nuclear_option(self): ok, not_ok = [], [] physical_status = self.mongo_connect.physical_status for phys_det, statuses in physical_status.items(): - if self.mongo_connect.combine_statuses(statuses) in [daqnt.DAQ_STATUS.TIMEOUT]: + detector_combined_status = self.mongo_connect.combine_statuses(statuses) + self.logger.debug(f'{phys_det} has status {detector_combined_status}') + # if detector_combined_status in [daqnt.DAQ_STATUS.TIMEOUT, daqnt.DAQ_STATUS.ARMING]: + if detector_combined_status not in [daqnt.DAQ_STATUS.RUNNING, daqnt.DAQ_STATUS.ARMED]: not_ok.append(phys_det) else: ok.append(phys_det) @@ -420,12 +424,15 @@ def linked_nuclear_option(self): if len(ok) == len(physical_status): self.logger.error('Uh, how did you get here???') self.slackbot.send_message('This happened again, you should really' - ' get someone to fix this', tags='ALL') + ' get someone to fix this', + # tags='ALL' + ) raise ValueError('Why did this happen?') if self.slackbot is not None: self.slackbot.send_message('Hypervisor is unlinking detectors', - add_tags='ALL') + # add_tags='ALL', + ) # ok, we aren't the problem, let's see about unlinking if len(ok) == 1: @@ -614,7 +621,8 @@ def hard_reset(self, authorization_level): f'Responding {str(responding)}\n' f'Timeout {str(timeout)}\n' f'Level {str(authorization_level)}', - add_tags='ALL') + # add_tags='ALL' + ) self.logger.info('%i responding, %i timeout' % (len(responding), len(timeout))) # all processes are either idle or timing out. # Make sure to first (force) quit redax instances prior to VMEs. From a7150e4c602f5137672bcb341c22babde0912ba1 Mon Sep 17 00:00:00 2001 From: Alessandro Razeto Date: Mon, 1 Dec 2025 10:25:15 +0000 Subject: [PATCH 08/14] Added missing include --- V1724.hh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/V1724.hh b/V1724.hh index 21d3bd62..0a7daf52 100644 --- a/V1724.hh +++ b/V1724.hh @@ -8,6 +8,7 @@ #include #include #include +#include class MongoLog; class Options; @@ -87,7 +88,7 @@ protected: bool MonitorRegister(uint32_t reg, uint32_t mask, int ntries, int sleep, uint32_t val=1); virtual std::tuple GetClockInfo(std::u32string_view); virtual int GetClockCounter(uint32_t); - int fBoardHandle; + int32_t fBoardHandle; int fBID; unsigned int fBaseAddress; int fDefaultDelay; From 467bbfc9ff605cc1155c644a79f31fc85f40f2e6 Mon Sep 17 00:00:00 2001 From: Alessandro Razeto Date: Mon, 1 Dec 2025 10:25:23 +0000 Subject: [PATCH 09/14] do not discard environment LDFLAGS and CFLAGS + add mongoc2 libraries --- Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 6a43c00d..d664f9d7 100644 --- a/Makefile +++ b/Makefile @@ -2,13 +2,13 @@ SHELL = /bin/bash -O extglob -c CC = g++ CXX = g++ BUILD_COMMIT = "$(shell git log -n 1 --pretty=oneline | awk '{print $$1}')" -CFLAGS = -Wall -Wextra -pedantic -pedantic-errors -g -O2 -DLINUX -DREDAX_BUILD_COMMIT='$(BUILD_COMMIT)' -std=c++17 -pthread $(shell pkg-config --cflags libmongocxx) +CFLAGS += -Wall -Wextra -pedantic -pedantic-errors -g -O2 -DLINUX -DREDAX_BUILD_COMMIT='$(BUILD_COMMIT)' -std=c++17 -pthread $(shell pkg-config --cflags libmongocxx) CPPFLAGS := $(CFLAGS) IS_READER0 := false ifeq "$(shell hostname)" "reader0" IS_READER0 = true endif -LDFLAGS = -lCAENVME -lstdc++fs -llz4 -lblosc $(shell pkg-config --libs libmongocxx) $(shell pkg-config --libs libbsoncxx) +LDFLAGS += -lCAENVME -lstdc++fs -llz4 -lblosc $(shell pkg-config --libs libmongocxx) $(shell pkg-config --libs mongoc2) $(shell pkg-config --libs libbsoncxx) #LDFLAGS_CC = ${LDFLAGS} -lexpect -ltcl8.6 SOURCES_SLAVE = CControl_Handler.cc DAQController.cc f1724.cc main.cc MongoLog.cc \ From e8d013305365da7bc88145b7f4e2e49c157d05f2 Mon Sep 17 00:00:00 2001 From: Alessandro Razeto Date: Mon, 1 Dec 2025 10:25:37 +0000 Subject: [PATCH 10/14] updated to new lz4 interface - to be tested --- StraxFormatter.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/StraxFormatter.cc b/StraxFormatter.cc index 21ee1dc4..6768bacd 100644 --- a/StraxFormatter.cc +++ b/StraxFormatter.cc @@ -28,9 +28,10 @@ long compress_lz4(std::shared_ptr& in, std::shared_ptr // the LZ4F_preferences_t object to the new format. // Can tune here as needed, these are defaults from the LZ4 examples LZ4F_preferences_t kPrefs = { - { LZ4F_max256KB, LZ4F_blockLinked, LZ4F_noContentChecksum, LZ4F_frame, 0, { 0, 0 } }, + { LZ4F_max256KB, LZ4F_blockLinked, LZ4F_noContentChecksum, LZ4F_frame, 0, 0, LZ4F_noBlockChecksum }, 0, /* compression level; 0 == default */ 0, /* autoflush */ + 0, /* favorDecSpeed */ { 0, 0, 0 }, /* reserved, must be set to 0 */ }; long max_compressed_size = LZ4F_compressFrameBound(size_in, &kPrefs); From 96cf27aa43b29b53ca52eeef258e763f90076dee Mon Sep 17 00:00:00 2001 From: Alessandro Razeto Date: Mon, 1 Dec 2025 10:25:46 +0000 Subject: [PATCH 11/14] avoid format-overflow error on date --- MongoLog.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MongoLog.cc b/MongoLog.cc index fbac52f1..b4c95473 100644 --- a/MongoLog.cc +++ b/MongoLog.cc @@ -182,7 +182,7 @@ int MongoLog::Entry(int priority, const std::string& message, ...){ fs::path MongoLog_nT::OutputDirectory(struct tm* date) { char temp[6]; - std::sprintf(temp, "%02d.%02d", date->tm_mon+1, date->tm_mday); + std::sprintf(temp, "%02d.%02d", 0x3f & (date->tm_mon+1), 0x3f & date->tm_mday); return fOutputDir / std::to_string(date->tm_year+1900) / std::string(temp); } From 9a3fbd8ed1c0bbeeda5164820e49481519adb49a Mon Sep 17 00:00:00 2001 From: Alessandro Razeto Date: Mon, 1 Dec 2025 10:25:54 +0000 Subject: [PATCH 12/14] Use correct type for handle --- V2718.hh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/V2718.hh b/V2718.hh index 80e28238..9f373462 100644 --- a/V2718.hh +++ b/V2718.hh @@ -18,7 +18,7 @@ public: int GetHandle(){return fBoardHandle;}; protected: - int fBoardHandle; + int32_t fBoardHandle; CrateOptions fCopts; std::shared_ptr fLog; From 92badd0b8900545b1809d30d8bc19e3192af4be0 Mon Sep 17 00:00:00 2001 From: Alessandro Razeto Date: Mon, 1 Dec 2025 10:26:07 +0000 Subject: [PATCH 13/14] Move to CAENVME_Init2 --- V1724.cc | 3 ++- V2718.cc | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/V1724.cc b/V1724.cc index 1d5657d9..0ccb4ece 100644 --- a/V1724.cc +++ b/V1724.cc @@ -65,7 +65,8 @@ V1724::~V1724(){ } int V1724::Init(int link, int crate) { - int a = CAENVME_Init(cvV2718, link, crate, &fBoardHandle); + uint32_t arg = link; + int a = CAENVME_Init2(cvV2718, &arg, crate, &fBoardHandle); if(a != cvSuccess){ fLog->Entry(MongoLog::Warning, "Board %i failed to init, error %i handle %i link %i bdnum %i", fBID, a, fBoardHandle, link, crate); diff --git a/V2718.cc b/V2718.cc index 8e99af21..0af0d45e 100644 --- a/V2718.cc +++ b/V2718.cc @@ -13,7 +13,8 @@ V2718::~V2718(){ } int V2718::Init(int link, int crate) { - if (CAENVME_Init(cvV2718, link, crate, &fBoardHandle)) + uint32_t arg = link; + if (CAENVME_Init2(cvV2718, &arg, crate, &fBoardHandle)) return -1; fLog->Entry(MongoLog::Local, "V2718 init, handle %i", fBoardHandle); return SendStopSignal(false); From 6a9b2327f55c3c8a80e58012d0b346c1716798e5 Mon Sep 17 00:00:00 2001 From: Alessandro Razeto Date: Mon, 1 Dec 2025 10:26:21 +0000 Subject: [PATCH 14/14] updated to new Mongocxx interface --- Options.cc | 24 ++++++++++++------------ main.cc | 6 +++--- 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/Options.cc b/Options.cc index 68883b10..8daf81ef 100644 --- a/Options.cc +++ b/Options.cc @@ -64,7 +64,7 @@ int Options::Load(std::string name, mongocxx::collection* opts_collection, std:: bson_value = new bsoncxx::document::value(doc); bson_options = bson_value->view(); try{ - fDetector = bson_options["detectors"][fHostname].get_utf8().value.to_string(); + fDetector = bson_options["detectors"][fHostname].get_string().value; }catch(const std::exception& e){ fLog->Entry(MongoLog::Warning, "No detector specified for this host"); return -1; @@ -147,7 +147,7 @@ int Options::GetNestedInt(std::string path, int default_value){ std::string Options::GetString(std::string path, std::string default_value){ try{ - return bson_options[path].get_utf8().value.to_string(); + return std::string(bson_options[path].get_string().value); } catch (const std::exception &e){ //LOG @@ -169,7 +169,7 @@ std::string Options::GetNestedString(std::string path, std::string default_value auto val = bson_options[fields[0]]; for(unsigned int i=1; iEntry(MongoLog::Local, "Using default value for %s",path.c_str()); return default_value; @@ -192,11 +192,11 @@ std::vector Options::GetBoards(std::string type){ types.push_back(type); for(bsoncxx::array::element ele : subarr){ - std::string btype = ele["type"].get_utf8().value.to_string(); + std::string btype(ele["type"].get_string().value); if(!std::count(types.begin(), types.end(), btype)) continue; try{ - if(ele["host"].get_utf8().value.to_string() != fHostname) + if(ele["host"].get_string().value != fHostname) continue; } catch(const std::exception &e){ @@ -208,8 +208,8 @@ std::vector Options::GetBoards(std::string type){ bt.link = ele["link"].get_int32(); bt.crate = ele["crate"].get_int32(); bt.board = ele["board"].get_int32(); - bt.type = ele["type"].get_utf8().value.to_string(); - bt.vme_address = DAXHelpers::StringToHex(ele["vme_address"].get_utf8().value.to_string()); + bt.type = ele["type"].get_string().value; + bt.vme_address = DAXHelpers::StringToHex(std::string(ele["vme_address"].get_string().value)); ret.push_back(bt); } @@ -227,7 +227,7 @@ std::vector Options::GetRegisters(int board, bool strict){ sdet = ""; }catch(const std::exception& e){ try{ - sdet = ele["board"].get_utf8().value.to_string(); + sdet = ele["board"].get_string().value; ibid = -1; }catch(const std::exception& ee){ throw std::runtime_error("Invalid register: board is neither int nor string"); @@ -236,8 +236,8 @@ std::vector Options::GetRegisters(int board, bool strict){ if ((ibid != board) && strict) continue; if ((ibid == board) || (sdet == fDetector) || (sdet == "all")) { RegisterType rt; - rt.reg = ele["reg"].get_utf8().value.to_string(); - rt.val = ele["val"].get_utf8().value.to_string(); + rt.reg = ele["reg"].get_string().value; + rt.val = ele["val"].get_string().value; ret.push_back(rt); } @@ -332,8 +332,8 @@ int Options::GetHEVOpt(HEVOptions &ret){ ret.component_status = bson_options["DDC10"]["component_status"].get_int32().value; ret.width_cut = bson_options["DDC10"]["width_cut"].get_int32().value; ret.delay = bson_options["DDC10"]["delay"].get_int32().value; - ret.address = bson_options["DDC10"]["address"].get_utf8().value.to_string(); - ret.required = bson_options["DDC10"]["required"].get_utf8().value.to_string(); + ret.address = bson_options["DDC10"]["address"].get_string().value; + ret.required = bson_options["DDC10"]["required"].get_string().value; }catch(std::exception &E){ fLog->Entry(MongoLog::Local, "Exception getting DDC10 opts: %s",E.what()); return -1; diff --git a/main.cc b/main.cc index cf8e83d3..7870280e 100644 --- a/main.cc +++ b/main.cc @@ -195,8 +195,8 @@ int main(int argc, char** argv){ std::string command = ""; std::string user = ""; try{ - command = (doc)["command"].get_utf8().value.to_string(); - user = (doc)["user"].get_utf8().value.to_string(); + command = (doc)["command"].get_string().value; + user = (doc)["user"].get_string().value; } catch (const std::exception &e){ fLog->Entry(MongoLog::Warning, "Received malformed command %s", @@ -241,7 +241,7 @@ int main(int argc, char** argv){ catch(const std::exception &e){ } // Mongocxx types confusing so passing json strings around - std::string mode = doc["mode"].get_utf8().value.to_string(); + std::string mode(doc["mode"].get_string().value); fLog->Entry(MongoLog::Local, "Getting options doc for mode %s", mode.c_str()); fOptions = std::make_shared(fLog, mode, hostname, &opts_collection, pool, dbname, override_json);