From 7e82a1b97cd066506c34d7bcd07c83b46e68e3ef Mon Sep 17 00:00:00 2001 From: ChadliaJerad Date: Fri, 24 Mar 2023 13:23:00 -0700 Subject: [PATCH 01/80] Start on transient federates: Add cmdline argument -nt to RTI and create needed attributes. --- core/federated/RTI/rti.c | 13 ++++++++++--- core/federated/RTI/rti_lib.c | 19 +++++++++++++++++++ core/federated/RTI/rti_lib.h | 9 ++++++++- 3 files changed, 37 insertions(+), 4 deletions(-) diff --git a/core/federated/RTI/rti.c b/core/federated/RTI/rti.c index b8d1d7b0e..5fe6e94aa 100644 --- a/core/federated/RTI/rti.c +++ b/core/federated/RTI/rti.c @@ -66,10 +66,17 @@ int main(int argc, const char* argv[]) { start_trace(rti_trace_file_name); printf("Tracing the RTI execution in %s file.\n", rti_trace_file_name); } - printf("Starting RTI for %d federates in federation ID %s\n", _RTI.number_of_federates, _RTI.federation_id); + printf("Starting RTI for %d perisistent federates and %d transient federates in federation ID %s\n", \ + _RTI.number_of_federates, + _RTI.number_of_transient_federates, + _RTI.federation_id); + + // FIXME: Should number_of_federates + number_of_transient_federates be < UINT16_MAX? assert(_RTI.number_of_federates < UINT16_MAX); - _RTI.federates = (federate_t*)calloc(_RTI.number_of_federates, sizeof(federate_t)); - for (uint16_t i = 0; i < _RTI.number_of_federates; i++) { + assert(_RTI.number_of_transient_federates < UINT16_MAX); + + _RTI.federates = (federate_t *)calloc(_RTI.number_of_federates + _RTI.number_of_transient_federates, sizeof(federate_t)); + for (uint16_t i = 0; i < _RTI.number_of_federates + _RTI.number_of_transient_federates; i++) { initialize_federate(i); } int socket_descriptor = start_rti_server(_RTI.user_specified_port); diff --git a/core/federated/RTI/rti_lib.c b/core/federated/RTI/rti_lib.c index b456c4f8f..00f825df2 100644 --- a/core/federated/RTI/rti_lib.c +++ b/core/federated/RTI/rti_lib.c @@ -1746,6 +1746,7 @@ void* respond_to_erroneous_connections(void* nothing) { } void initialize_federate(uint16_t id) { + _RTI.federates[id].thread_id = -1; _RTI.federates[id].id = id; _RTI.federates[id].socket = -1; // No socket. _RTI.federates[id].clock_synchronization_enabled = true; @@ -1765,6 +1766,7 @@ void initialize_federate(uint16_t id) { _RTI.federates[id].server_ip_addr.s_addr = 0; _RTI.federates[id].server_port = -1; _RTI.federates[id].requested_stop = false; + _RTI.federates[id].is_transient = false; } int32_t start_rti_server(uint16_t port) { @@ -1873,6 +1875,8 @@ void usage(int argc, const char* argv[]) { printf(" The ID of the federation that this RTI will control.\n\n"); printf(" -n, --number_of_federates \n"); printf(" The number of federates in the federation that this RTI will control.\n\n"); + printf(" -nt, --number_of_transient_federates \n"); + printf(" The number of transient federates in the federation that this RTI will control.\n\n"); printf(" -p, --port \n"); printf(" The port number to use for the RTI. Must be larger than 0 and smaller than %d. Default is %d.\n\n", UINT16_MAX, STARTING_PORT); printf(" -c, --clock_sync [off|init|on] [period ] [exchanges-per-interval ]\n"); @@ -1983,6 +1987,21 @@ int process_args(int argc, const char* argv[]) { } _RTI.number_of_federates = (int32_t)num_federates; // FIXME: Loses numbers on 64-bit machines printf("RTI: Number of federates: %d\n", _RTI.number_of_federates); + } else if (strcmp(argv[i], "-nt") == 0 || strcmp(argv[i], "--number_of_transient_federates") == 0) { + if (argc < i + 2) { + fprintf(stderr, "Error: --number_of_transient_federates needs an integer argument.\n"); + usage(argc, argv); + return 0; + } + i++; + long num_transient_federates = strtol(argv[i], NULL, 10); + if (num_transient_federates == 0L || num_transient_federates == LONG_MAX || num_transient_federates == LONG_MIN) { + fprintf(stderr, "Error: --number_of_transient_federates needs a valid positive integer argument.\n"); + usage(argc, argv); + return 0; + } + _RTI.number_of_transient_federates = (int32_t)num_transient_federates; // FIXME: Loses numbers on 64-bit machines + printf("RTI: Number of transient federates: %d\n", _RTI.number_of_transient_federates); } else if (strcmp(argv[i], "-p") == 0 || strcmp(argv[i], "--port") == 0) { if (argc < i + 2) { fprintf( diff --git a/core/federated/RTI/rti_lib.h b/core/federated/RTI/rti_lib.h index 76b6f3742..fd1121a40 100644 --- a/core/federated/RTI/rti_lib.h +++ b/core/federated/RTI/rti_lib.h @@ -124,6 +124,7 @@ typedef struct federate_t { bool requested_stop; // Indicates that the federate has requested stop or has replied // to a request for stop from the RTI. Used to prevent double-counting // a federate when handling lf_request_stop(). + bool is_transient; // Indicates whether the federate is transient or persistent. } federate_t; /** @@ -152,9 +153,15 @@ typedef struct RTI_instance_t { // RTI's decided stop tag for federates tag_t max_stop_tag; - // Number of federates in the federation + // Number of (persistent) federates in the federation int32_t number_of_federates; + // Number of transient federates in the federation + int32_t number_of_transient_federates; + + // Number of connected transient federates in the federation + int32_t number_of_connected_transient_federates; + // The federates. federate_t* federates; From c236f06fed87ff3ad351e688746dd29d43f76e44 Mon Sep 17 00:00:00 2001 From: ChadliaJerad Date: Fri, 24 Mar 2023 13:52:07 -0700 Subject: [PATCH 02/80] RTI does not exist if a transient federate exits, but does when a persistent federate exits. --- core/federated/RTI/rti_lib.c | 148 +++++++++++++++++++++++++++++++---- core/federated/RTI/rti_lib.h | 9 +++ 2 files changed, 142 insertions(+), 15 deletions(-) diff --git a/core/federated/RTI/rti_lib.c b/core/federated/RTI/rti_lib.c index 00f825df2..77b693925 100644 --- a/core/federated/RTI/rti_lib.c +++ b/core/federated/RTI/rti_lib.c @@ -1342,7 +1342,7 @@ int32_t receive_and_check_fed_id_message(int socket_id, struct sockaddr_in* clie // FIXME: This should not exit with error but rather should just reject the connection. read_from_socket_errexit(socket_id, length, buffer, "RTI failed to read from accepted socket."); - uint16_t fed_id = _RTI.number_of_federates; // Initialize to an invalid value. + uint16_t fed_id = _RTI.number_of_federates + _RTI.number_of_transient_federates; // Initialize to an invalid value. // First byte received is the message type. if (buffer[0] != MSG_TYPE_FED_IDS) { @@ -1397,7 +1397,7 @@ int32_t receive_and_check_fed_id_message(int socket_id, struct sockaddr_in* clie send_reject(socket_id, FEDERATION_ID_DOES_NOT_MATCH); return -1; } else { - if (fed_id >= _RTI.number_of_federates) { + if (fed_id >= _RTI.number_of_federates + _RTI.number_of_transient_federates) { // Federate ID is out of range. lf_print_error("RTI received federate ID %d, which is out of range.", fed_id); if (_RTI.tracing_enabled){ @@ -1648,7 +1648,10 @@ bool authenticate_federate(int socket) { } #endif +// FIXME: The socket descriptor here is not used. Should be removed? void connect_to_federates(int socket_descriptor) { + // This loop will accept both, persistent and transient federates. + // For transient, however, i will be decreased for (int i = 0; i < _RTI.number_of_federates; i++) { // Wait for an incoming connection request. struct sockaddr client_fd; @@ -1683,6 +1686,9 @@ void connect_to_federates(int socket_descriptor) { // The first message from the federate should contain its ID and the federation ID. int32_t fed_id = receive_and_check_fed_id_message(socket_id, (struct sockaddr_in*)&client_fd); + + // FIXME: THIS IS A TEMPORARY HACK THAT MAKES FEDERATES WITH EVEN IDs PERSISTENT + // AND THOSE WITH ODD IDs TRANSIENT!!! if (fed_id >= 0 && receive_connection_information(socket_id, (uint16_t)fed_id) && receive_udp_message_and_set_up_clock_sync(socket_id, (uint16_t)fed_id)) { @@ -1691,15 +1697,26 @@ void connect_to_federates(int socket_descriptor) { // This has to be done after clock synchronization is finished // or that thread may end up attempting to handle incoming clock // synchronization messages. - pthread_create(&(_RTI.federates[fed_id].thread_id), NULL, federate_thread_TCP, &(_RTI.federates[fed_id])); - + // pthread_create(&(_RTI.federates[fed_id].thread_id), NULL, federate_thread_TCP, &(_RTI.federates[fed_id])); + + if (fed_id % 2 == 0) { // PART OF THE HACK, SAYING THAT THIS IS PERSISTENT + pthread_create(&(_RTI.federates[fed_id].thread_id), NULL, federate_thread_TCP, &(_RTI.federates[fed_id])); + _RTI.federates[fed_id].is_transient = false; + } else { // PART OF THE HACK, SAYING THAT THIS IS TRANSIENT + if (_RTI.number_of_connected_transient_federates < _RTI.number_of_transient_federates) { + pthread_create(&(_RTI.federates[fed_id].thread_id), NULL, federate_thread_TCP, &(_RTI.federates[fed_id])); + _RTI.number_of_connected_transient_federates++; + _RTI.federates[fed_id].is_transient = true; + i--; + } + } } else { // Received message was rejected. Try again. i--; } } - // All federates have connected. - LF_PRINT_DEBUG("All federates have connected to RTI."); + // All (persistent) federates have connected. + LF_PRINT_DEBUG("All (persistent) federates have connected to RTI."); if (_RTI.clock_sync_global_status >= clock_sync_on) { // Create the thread that performs periodic PTP clock synchronization sessions @@ -1718,6 +1735,95 @@ void connect_to_federates(int socket_descriptor) { } } +void* connect_to_transient_federates_thread() { + // This loop will continue to accept connections of transient federates, as + // soon as there is room + // This needs to terminate somehow... + // That will be part of the while condition + while (1) { + if (_RTI.number_of_connected_transient_federates < _RTI.number_of_transient_federates) { + // Continue waiting for an incoming connection requests from transients. + struct sockaddr client_fd; + uint32_t client_length = sizeof(client_fd); + // The following blocks until a federate connects. + int socket_id = -1; + while(1) { + socket_id = accept(_RTI.socket_descriptor_TCP, &client_fd, &client_length); + if (socket_id >= 0) { + // Got a socket + break; + } else if (socket_id < 0 && (errno != EAGAIN || errno != EWOULDBLOCK)) { + lf_print_error_and_exit("RTI failed to accept the socket. %s.", strerror(errno)); + } else { + // Try again + lf_print_warning("RTI failed to accept the socket. %s. Trying again.", strerror(errno)); + continue; + } + } + + // Send RTI hello when RTI -a option is on. + #ifdef __RTI_AUTH__ + if (_RTI.authentication_enabled) { + if (!authenticate_federate(socket_id)) { + lf_print_warning("RTI failed to authenticate the incoming federate."); + // Ignore the federate that failed authentication. + i--; + continue; + } + } + #endif + + // The first message from the federate should contain its ID and the federation ID. + int32_t fed_id = receive_and_check_fed_id_message(socket_id, (struct sockaddr_in*)&client_fd); + // FIXME: THIS IS A TEMPORARY HACK THAT MAKES FEDERATES WITH EVEN IDs PERSISTENT + // AND THOSE WITH ODD IDs TRANSIENT!!! + if (fed_id >= 0 + && receive_connection_information(socket_id, (uint16_t)fed_id) + && receive_udp_message_and_set_up_clock_sync(socket_id, (uint16_t)fed_id)) { + + // Create a thread to communicate with the federate. + // This has to be done after clock synchronization is finished + // or that thread may end up attempting to handle incoming clock + // synchronization messages. + pthread_create(&(_RTI.federates[fed_id].thread_id), NULL, federate_thread_TCP, &(_RTI.federates[fed_id])); + _RTI.federates[fed_id].is_transient = true; + _RTI.number_of_connected_transient_federates++; + } + } + + // Check if transient federate threads did exit. + void *thread_exit_status; + if (_RTI.number_of_connected_transient_federates > 0 ) { + for (int i = 0; i < _RTI.number_of_transient_federates + _RTI.number_of_federates; i++) { + // Chaeck if this is a transient federate that has already joined at some point + if (_RTI.federates[i].thread_id != -1 && _RTI.federates[i].is_transient) { + if (pthread_tryjoin_np(_RTI.federates[i].thread_id, &thread_exit_status) == 0) { + free_in_transit_message_q(_RTI.federates[i].in_transit_message_tags); + lf_print("RTI: Transient Federate %d thread exited.", _RTI.federates[i].id); + // Update the number of connected transient federates + _RTI.number_of_connected_transient_federates--; + + // Reinitialize the ststaus of the leaving federate + _RTI.federates[i].thread_id = -1; + _RTI.federates[i].socket = -1; // No socket. + _RTI.federates[i].last_granted = NEVER_TAG; + _RTI.federates[i].state = NOT_CONNECTED; + _RTI.federates[i].mode = REALTIME; + strncpy(_RTI.federates[i].server_hostname, "localhost", INET_ADDRSTRLEN); + _RTI.federates[i].server_ip_addr.s_addr = 0; + _RTI.federates[i].server_port = -1; + _RTI.federates[i].requested_stop = false; + // _RTI.federates[i].clock_synchronization_enabled = true; + // _RTI.federates[i].completed = NEVER_TAG; + // _RTI.federates[i].last_provisionally_granted = NEVER_TAG; + // _RTI.federates[i].next_event = NEVER_TAG; + } + } + } + } + } +} + void* respond_to_erroneous_connections(void* nothing) { while (true) { // Wait for an incoming connection request. @@ -1791,24 +1897,36 @@ void wait_for_federates(int socket_descriptor) { // Wait for connections from federates and create a thread for each. connect_to_federates(socket_descriptor); - // All federates have connected. - lf_print("RTI: All expected federates have connected. Starting execution."); + // All persistent federates have connected. + lf_print("RTI: All expected (persistent) federates have connected. Starting execution."); + if (_RTI.number_of_transient_federates > 0) { + lf_print("RTI: Transient Federates can join and leave the federation at anytime."); + } // The socket server will not continue to accept connections after all the federates // have joined. // In case some other federation's federates are trying to join the wrong // federation, need to respond. Start a separate thread to do that. pthread_t responder_thread; - pthread_create(&responder_thread, NULL, respond_to_erroneous_connections, NULL); + // FIXME: temporary remove, so that federate are not confused + // pthread_create(&responder_thread, NULL, respond_to_erroneous_connections, NULL); - // Wait for federate threads to exit. + // Create a thread that will continue listening to joining and leaving transient + // federates + pthread_t transient_thread; + pthread_create(&transient_thread, NULL, connect_to_transient_federates_thread, NULL); + + // Wait for persistent federate threads to exit. void* thread_exit_status; - for (int i = 0; i < _RTI.number_of_federates; i++) { - lf_print("RTI: Waiting for thread handling federate %d.", _RTI.federates[i].id); - pthread_join(_RTI.federates[i].thread_id, &thread_exit_status); - free_in_transit_message_q(_RTI.federates[i].in_transit_message_tags); - lf_print("RTI: Federate %d thread exited.", _RTI.federates[i].id); + for (int i = 0; i < _RTI.number_of_federates + _RTI.number_of_transient_federates; i++) { + if (_RTI.federates[i].is_transient == false) { + lf_print("RTI: Waiting for thread handling federate %d.", _RTI.federates[i].id); + pthread_join(_RTI.federates[i].thread_id, &thread_exit_status); + free_in_transit_message_q(_RTI.federates[i].in_transit_message_tags); + lf_print("RTI: Federate %d thread exited.", _RTI.federates[i].id); + } } + _RTI.all_federates_exited = true; diff --git a/core/federated/RTI/rti_lib.h b/core/federated/RTI/rti_lib.h index fd1121a40..5e093c5d4 100644 --- a/core/federated/RTI/rti_lib.h +++ b/core/federated/RTI/rti_lib.h @@ -628,6 +628,15 @@ bool authenticate_federate(int socket); */ void connect_to_federates(int socket_descriptor); +/** + * Once all persistent federates have connected, continue to wait for incoming + * connection requests from transient federates. + * Upon receiving it, it creates a thread to communicate with that federate. + * This thread continues to check whether the communication thread with a transient + * federate has exited, in which case it accepts other connections. + */ +void* connect_to_transient_federates_thread(); + /** * Thread to respond to new connections, which could be federates of other * federations who are attempting to join the wrong federation. From da789617e4e5efc3c65485dada2cf3f353bdf911 Mon Sep 17 00:00:00 2001 From: ChadliaJerad Date: Mon, 27 Mar 2023 11:22:29 -0700 Subject: [PATCH 03/80] Add is_transient attribute to the federate data stucture --- include/core/federated/federate.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/include/core/federated/federate.h b/include/core/federated/federate.h index d15a0e920..237e9523a 100644 --- a/include/core/federated/federate.h +++ b/include/core/federated/federate.h @@ -168,6 +168,12 @@ typedef struct federate_instance_t { */ bool has_downstream; + /** + * Indicator of whether this federate is transient. + * The default value of false may be overridden in _lf_initialize_trigger_objects. + */ + bool is_transient; + /** * Used to prevent the federate from sending a REQUEST_STOP * message multiple times to the RTI. From 9f5aca4ec90c6e12bdd3e614056bbcc5ddef77f9 Mon Sep 17 00:00:00 2001 From: ChadliaJerad Date: Tue, 28 Mar 2023 10:01:21 -0700 Subject: [PATCH 04/80] Federate sends its type (transient or not) to RTI or p2p federate together with the FED_ID, and RTI, or federate, reads it --- core/federated/RTI/rti_lib.c | 49 ++++++++++++++++++++---------------- core/federated/federate.c | 19 ++++++++------ 2 files changed, 39 insertions(+), 29 deletions(-) diff --git a/core/federated/RTI/rti_lib.c b/core/federated/RTI/rti_lib.c index 77b693925..9759563e8 100644 --- a/core/federated/RTI/rti_lib.c +++ b/core/federated/RTI/rti_lib.c @@ -1334,8 +1334,8 @@ void send_reject(int socket_id, unsigned char error_code) { } int32_t receive_and_check_fed_id_message(int socket_id, struct sockaddr_in* client_fd) { - // Buffer for message ID, federate ID, and federation ID length. - size_t length = 1 + sizeof(uint16_t) + 1; // Message ID, federate ID, length of fedration ID. + // Buffer for message ID, federate ID, type, and federation ID length. + size_t length = 1 + sizeof(uint16_t) + 1 + 1; // Message ID, federate ID, type, length of fedration ID. unsigned char buffer[length]; // Read bytes from the socket. We need 4 bytes. @@ -1343,7 +1343,7 @@ int32_t receive_and_check_fed_id_message(int socket_id, struct sockaddr_in* clie read_from_socket_errexit(socket_id, length, buffer, "RTI failed to read from accepted socket."); uint16_t fed_id = _RTI.number_of_federates + _RTI.number_of_transient_federates; // Initialize to an invalid value. - + bool is_transient = false; // First byte received is the message type. if (buffer[0] != MSG_TYPE_FED_IDS) { if(buffer[0] == MSG_TYPE_P2P_SENDING_FED_ID || buffer[0] == MSG_TYPE_P2P_TAGGED_MESSAGE) { @@ -1366,10 +1366,11 @@ int32_t receive_and_check_fed_id_message(int socket_id, struct sockaddr_in* clie } else { // Received federate ID. fed_id = extract_uint16(buffer + 1); - LF_PRINT_DEBUG("RTI received federate ID: %d.", fed_id); + is_transient = (buffer[sizeof(uint16_t) + 1] == 1)? true : false ; + printf("\nRTI received federate ID: %d. which is transient: %d\n", fed_id, is_transient); // Read the federation ID. First read the length, which is one byte. - size_t federation_id_length = (size_t)buffer[sizeof(uint16_t) + 1]; + size_t federation_id_length = (size_t)buffer[sizeof(uint16_t) + 2]; char federation_id_received[federation_id_length + 1]; // One extra for null terminator. // Next read the actual federation ID. // FIXME: This should not exit on error, but rather just reject the connection. @@ -1442,6 +1443,7 @@ int32_t receive_and_check_fed_id_message(int socket_id, struct sockaddr_in* clie // because it is waiting for the start time to be // sent by the RTI before beginning its execution. _RTI.federates[fed_id].state = PENDING; + _RTI.federates[fed_id].is_transient = is_transient; LF_PRINT_DEBUG("RTI responding with MSG_TYPE_ACK to federate %d.", fed_id); // Send an MSG_TYPE_ACK message. @@ -1687,8 +1689,6 @@ void connect_to_federates(int socket_descriptor) { // The first message from the federate should contain its ID and the federation ID. int32_t fed_id = receive_and_check_fed_id_message(socket_id, (struct sockaddr_in*)&client_fd); - // FIXME: THIS IS A TEMPORARY HACK THAT MAKES FEDERATES WITH EVEN IDs PERSISTENT - // AND THOSE WITH ODD IDs TRANSIENT!!! if (fed_id >= 0 && receive_connection_information(socket_id, (uint16_t)fed_id) && receive_udp_message_and_set_up_clock_sync(socket_id, (uint16_t)fed_id)) { @@ -1697,18 +1697,11 @@ void connect_to_federates(int socket_descriptor) { // This has to be done after clock synchronization is finished // or that thread may end up attempting to handle incoming clock // synchronization messages. - // pthread_create(&(_RTI.federates[fed_id].thread_id), NULL, federate_thread_TCP, &(_RTI.federates[fed_id])); + pthread_create(&(_RTI.federates[fed_id].thread_id), NULL, federate_thread_TCP, &(_RTI.federates[fed_id])); - if (fed_id % 2 == 0) { // PART OF THE HACK, SAYING THAT THIS IS PERSISTENT - pthread_create(&(_RTI.federates[fed_id].thread_id), NULL, federate_thread_TCP, &(_RTI.federates[fed_id])); - _RTI.federates[fed_id].is_transient = false; - } else { // PART OF THE HACK, SAYING THAT THIS IS TRANSIENT - if (_RTI.number_of_connected_transient_federates < _RTI.number_of_transient_federates) { - pthread_create(&(_RTI.federates[fed_id].thread_id), NULL, federate_thread_TCP, &(_RTI.federates[fed_id])); - _RTI.number_of_connected_transient_federates++; - _RTI.federates[fed_id].is_transient = true; - i--; - } + if (_RTI.federates[fed_id].is_transient) { + _RTI.number_of_connected_transient_federates++; + i--; } } else { // Received message was rejected. Try again. @@ -1872,7 +1865,7 @@ void initialize_federate(uint16_t id) { _RTI.federates[id].server_ip_addr.s_addr = 0; _RTI.federates[id].server_port = -1; _RTI.federates[id].requested_stop = false; - _RTI.federates[id].is_transient = false; + _RTI.federates[id].is_transient = true; } int32_t start_rti_server(uint16_t port) { @@ -1903,6 +1896,9 @@ void wait_for_federates(int socket_descriptor) { lf_print("RTI: Transient Federates can join and leave the federation at anytime."); } + // Set all the remaining deferates as transient + // for (int i = 0; i < _RTI.number_of_federates + _RT) + // The socket server will not continue to accept connections after all the federates // have joined. // In case some other federation's federates are trying to join the wrong @@ -1912,9 +1908,12 @@ void wait_for_federates(int socket_descriptor) { // pthread_create(&responder_thread, NULL, respond_to_erroneous_connections, NULL); // Create a thread that will continue listening to joining and leaving transient - // federates + // federates, if any + // FIXME: pthread_t transient_thread; - pthread_create(&transient_thread, NULL, connect_to_transient_federates_thread, NULL); + if (_RTI.number_of_transient_federates > 0) { + pthread_create(&transient_thread, NULL, connect_to_transient_federates_thread, NULL); + } // Wait for persistent federate threads to exit. void* thread_exit_status; @@ -1926,7 +1925,13 @@ void wait_for_federates(int socket_descriptor) { lf_print("RTI: Federate %d thread exited.", _RTI.federates[i].id); } } - + + // FIXME: Once persistent federates exited, send stop requests to transient federates + // ??? + // and kill thread_exit_status? + if (_RTI.number_of_transient_federates > 0) { + // WIP + } _RTI.all_federates_exited = true; diff --git a/core/federated/federate.c b/core/federated/federate.c index 820269a6b..d1ecc925d 100644 --- a/core/federated/federate.c +++ b/core/federated/federate.c @@ -97,7 +97,8 @@ federate_instance_t _fed = { .min_delay_from_physical_action_to_federate_output = NEVER, .triggers_for_network_input_control_reactions = NULL, .triggers_for_network_input_control_reactions_size = 0, - .trigger_for_network_output_control_reactions = NULL + .trigger_for_network_output_control_reactions = NULL, + .is_transient = false }; @@ -554,7 +555,7 @@ void* handle_p2p_connections_from_federates(void* ignored) { } LF_PRINT_LOG("Accepted new connection from remote federate."); - size_t header_length = 1 + sizeof(uint16_t) + 1; + size_t header_length = 1 + sizeof(uint16_t) + 1 + 1; unsigned char buffer[header_length]; ssize_t bytes_read = read_from_socket(socket_id, header_length, (unsigned char*)&buffer); if (bytes_read != (ssize_t)header_length || buffer[0] != MSG_TYPE_P2P_SENDING_FED_ID) { @@ -594,6 +595,7 @@ void* handle_p2p_connections_from_federates(void* ignored) { // Extract the ID of the sending federate. uint16_t remote_fed_id = extract_uint16((unsigned char*)&(buffer[1])); + bool remote_fed_is_transient = buffer[1 + sizeof(uint16_t)]; LF_PRINT_DEBUG("Received sending federate ID %d.", remote_fed_id); // Trace the event when tracing is enabled @@ -841,7 +843,7 @@ void connect_to_federate(uint16_t remote_federate_id) { } } else { // Connect was successful. - size_t buffer_length = 1 + sizeof(uint16_t) + 1; + size_t buffer_length = 1 + sizeof(uint16_t) + 1 + 1; unsigned char buffer[buffer_length]; buffer[0] = MSG_TYPE_P2P_SENDING_FED_ID; if (_lf_my_fed_id > UINT16_MAX) { @@ -849,8 +851,9 @@ void connect_to_federate(uint16_t remote_federate_id) { lf_print_error_and_exit("Too many federates! More than %d.", UINT16_MAX); } encode_uint16((uint16_t)_lf_my_fed_id, (unsigned char*)&(buffer[1])); + buffer[1 + sizeof(uint16_t)] = _fed.is_transient ? 1 : 0; unsigned char federation_id_length = (unsigned char)strnlen(federation_metadata.federation_id, 255); - buffer[sizeof(uint16_t) + 1] = federation_id_length; + buffer[sizeof(uint16_t) + 2] = federation_id_length; // Trace the event when tracing is enabled tracepoint_federate_to_federate(send_FED_ID, _lf_my_fed_id, remote_federate_id, NULL); write_to_socket_errexit(socket_id, @@ -1090,7 +1093,7 @@ void connect_to_rti(const char* hostname, int port) { // Have connected to an RTI, but not sure it's the right RTI. // Send a MSG_TYPE_FED_IDS message and wait for a reply. // Notify the RTI of the ID of this federate and its federation. - unsigned char buffer[4]; + unsigned char buffer[5]; #ifdef FEDERATED_AUTHENTICATED LF_PRINT_LOG("Connected to an RTI. Performing HMAC-based authentication using federation ID."); @@ -1106,15 +1109,17 @@ void connect_to_rti(const char* hostname, int port) { lf_print_error_and_exit("Too many federates! More than %d.", UINT16_MAX); } encode_uint16((uint16_t)_lf_my_fed_id, &buffer[1]); + // Next send the federate type (persistent or transient) + buffer[1 + sizeof(uint16_t)] = _fed.is_transient? 1 : 0; // Next send the federation ID length. // The federation ID is limited to 255 bytes. size_t federation_id_length = strnlen(federation_metadata.federation_id, 255); - buffer[1 + sizeof(uint16_t)] = (unsigned char)(federation_id_length & 0xff); + buffer[2 + sizeof(uint16_t)] = (unsigned char)(federation_id_length & 0xff); // Trace the event when tracing is enabled tracepoint_federate_to_RTI(send_FED_ID, _lf_my_fed_id, NULL); - write_to_socket_errexit(_fed.socket_TCP_RTI, 2 + sizeof(uint16_t), buffer, + write_to_socket_errexit(_fed.socket_TCP_RTI, 3 + sizeof(uint16_t), buffer, "Failed to send federate ID to RTI."); // Next send the federation ID itself. From 190a211b5568b63a6664f82c3756dcada70c51d7 Mon Sep 17 00:00:00 2001 From: ChadliaJerad Date: Wed, 5 Apr 2023 03:23:59 -0700 Subject: [PATCH 05/80] Add reset transient federate. --- core/federated/RTI/rti_lib.c | 48 +++++++++++++++++++++++------------- core/federated/RTI/rti_lib.h | 6 +++++ 2 files changed, 37 insertions(+), 17 deletions(-) diff --git a/core/federated/RTI/rti_lib.c b/core/federated/RTI/rti_lib.c index 9759563e8..8e440b7c8 100644 --- a/core/federated/RTI/rti_lib.c +++ b/core/federated/RTI/rti_lib.c @@ -1716,8 +1716,8 @@ void connect_to_federates(int socket_descriptor) { // over the UDP channel, but only if the UDP channel is open and at least one // federate is performing runtime clock synchronization. bool clock_sync_enabled = false; - for (int i = 0; i < _RTI.number_of_federates; i++) { - if (_RTI.federates[i].clock_synchronization_enabled) { + for (int i = 0; i < _RTI.number_of_federates + _RTI.number_of_transient_federates; i++) { + if (_RTI.federates[i].clock_synchronization_enabled && !_RTI.federates[i].is_transient) { clock_sync_enabled = true; break; } @@ -1795,21 +1795,8 @@ void* connect_to_transient_federates_thread() { lf_print("RTI: Transient Federate %d thread exited.", _RTI.federates[i].id); // Update the number of connected transient federates _RTI.number_of_connected_transient_federates--; - - // Reinitialize the ststaus of the leaving federate - _RTI.federates[i].thread_id = -1; - _RTI.federates[i].socket = -1; // No socket. - _RTI.federates[i].last_granted = NEVER_TAG; - _RTI.federates[i].state = NOT_CONNECTED; - _RTI.federates[i].mode = REALTIME; - strncpy(_RTI.federates[i].server_hostname, "localhost", INET_ADDRSTRLEN); - _RTI.federates[i].server_ip_addr.s_addr = 0; - _RTI.federates[i].server_port = -1; - _RTI.federates[i].requested_stop = false; - // _RTI.federates[i].clock_synchronization_enabled = true; - // _RTI.federates[i].completed = NEVER_TAG; - // _RTI.federates[i].last_provisionally_granted = NEVER_TAG; - // _RTI.federates[i].next_event = NEVER_TAG; + // Reset the status of the leaving federate + reset_transient_federate(_RTI.federates[i].id); } } } @@ -1868,6 +1855,33 @@ void initialize_federate(uint16_t id) { _RTI.federates[id].is_transient = true; } +void reset_transient_federate(uint16_t id) +{ + // The commented lines highlignts the values that a transient federate needs + // to passes to its future joining one + _RTI.federates[id].thread_id = -1; + // _RTI.federates[id].id = id; + _RTI.federates[id].socket = -1; // No socket. + _RTI.federates[id].clock_synchronization_enabled = true; + // _RTI.federates[id].completed = NEVER_TAG; + _RTI.federates[id].last_granted = NEVER_TAG; + _RTI.federates[id].last_provisionally_granted = NEVER_TAG; + _RTI.federates[id].next_event = NEVER_TAG; + _RTI.federates[id].in_transit_message_tags = initialize_in_transit_message_q(); + _RTI.federates[id].state = NOT_CONNECTED; + // _RTI.federates[id].upstream = NULL; + // _RTI.federates[id].upstream_delay = NULL; + // _RTI.federates[id].num_upstream = 0; + // _RTI.federates[id].downstream = NULL; + // _RTI.federates[id].num_downstream = 0; + _RTI.federates[id].mode = REALTIME; + strncpy(_RTI.federates[id].server_hostname ,"localhost", INET_ADDRSTRLEN); + _RTI.federates[id].server_ip_addr.s_addr = 0; + _RTI.federates[id].server_port = -1; + _RTI.federates[id].requested_stop = false; + _RTI.federates[id].is_transient = true; +} + int32_t start_rti_server(uint16_t port) { int32_t specified_port = port; if (port == 0) { diff --git a/core/federated/RTI/rti_lib.h b/core/federated/RTI/rti_lib.h index 5e093c5d4..3983ad8c2 100644 --- a/core/federated/RTI/rti_lib.h +++ b/core/federated/RTI/rti_lib.h @@ -650,6 +650,12 @@ void* respond_to_erroneous_connections(void* nothing); */ void initialize_federate(uint16_t id); +/** + * Reset the federate with the specified ID. The federate has to be transient. + * @param id The transient federate ID. + */ +void reset_transient_federate(uint16_t id); + /** * Start the socket server for the runtime infrastructure (RTI) and * return the socket descriptor. From 13d8409e63997ddb0c576e633909e06499c1dd61 Mon Sep 17 00:00:00 2001 From: ChadliaJerad Date: Thu, 6 Apr 2023 07:49:34 -0700 Subject: [PATCH 06/80] The number of transient federates (given as cmdline argument) can be 0 --- core/federated/RTI/rti_lib.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/core/federated/RTI/rti_lib.c b/core/federated/RTI/rti_lib.c index 8e440b7c8..a15c163b0 100644 --- a/core/federated/RTI/rti_lib.c +++ b/core/federated/RTI/rti_lib.c @@ -1855,8 +1855,7 @@ void initialize_federate(uint16_t id) { _RTI.federates[id].is_transient = true; } -void reset_transient_federate(uint16_t id) -{ +void reset_transient_federate(uint16_t id) { // The commented lines highlignts the values that a transient federate needs // to passes to its future joining one _RTI.federates[id].thread_id = -1; @@ -2132,8 +2131,8 @@ int process_args(int argc, const char* argv[]) { } i++; long num_transient_federates = strtol(argv[i], NULL, 10); - if (num_transient_federates == 0L || num_transient_federates == LONG_MAX || num_transient_federates == LONG_MIN) { - fprintf(stderr, "Error: --number_of_transient_federates needs a valid positive integer argument.\n"); + if (num_transient_federates == LONG_MAX || num_transient_federates == LONG_MIN) { + fprintf(stderr, "Error: --number_of_transient_federates needs a valid positive or null integer argument.\n"); usage(argc, argv); return 0; } From 20817f11a57b0696564307ecd112b3ca757a34fe Mon Sep 17 00:00:00 2001 From: ChadliaJerad Date: Mon, 10 Apr 2023 16:11:13 -0700 Subject: [PATCH 07/80] Add canvas to identify if a TIMESTAMP message if received from a trabsient federate after the startup phase, or not. --- core/federated/RTI/rti_lib.c | 97 +++++++++++++++++++++--------------- core/federated/RTI/rti_lib.h | 8 +++ 2 files changed, 65 insertions(+), 40 deletions(-) diff --git a/core/federated/RTI/rti_lib.c b/core/federated/RTI/rti_lib.c index a15c163b0..29f12e14b 100644 --- a/core/federated/RTI/rti_lib.c +++ b/core/federated/RTI/rti_lib.c @@ -1013,52 +1013,69 @@ void handle_timestamp(federate_t *my_fed) { } LF_PRINT_LOG("RTI received timestamp message: %lld.", timestamp); + pthread_mutex_lock(&_RTI.rti_mutex); - _RTI.num_feds_proposed_start++; - if (timestamp > _RTI.max_start_time) { - _RTI.max_start_time = timestamp; - } - if (_RTI.num_feds_proposed_start == _RTI.number_of_federates) { - // All federates have proposed a start time. - pthread_cond_broadcast(&_RTI.received_start_times); - } else { - // Some federates have not yet proposed a start time. - // wait for a notification. - while (_RTI.num_feds_proposed_start < _RTI.number_of_federates) { - // FIXME: Should have a timeout here? - pthread_cond_wait(&_RTI.received_start_times, &_RTI.rti_mutex); + // The behavior here depends on whether the message is received within the + // startup phase or not. By startup phase, it is menat that all persistent federates + // have their start_time set (already started or about to start). + // If all persistent federates have started, then a TIMESTAMP message will be + // received from a transient. In such case, the start_time of the newly joined + // transient federate will depend on the NET of his updtream and downstream + // federates. + if (_RTI.num_feds_proposed_start < _RTI.number_of_federates) { + if (timestamp > _RTI.max_start_time) { + _RTI.max_start_time = timestamp; + } + // Check that persistent federates did propose a start_time + if (!my_fed->is_transient) { + _RTI.num_feds_proposed_start++; + } + if (_RTI.num_feds_proposed_start == _RTI.number_of_federates) { + // All federates have proposed a start time. + pthread_cond_broadcast(&_RTI.received_start_times); + } else { + // Some federates have not yet proposed a start time. + // wait for a notification. + while (_RTI.num_feds_proposed_start < _RTI.number_of_federates) { + // FIXME: Should have a timeout here? + pthread_cond_wait(&_RTI.received_start_times, &_RTI.rti_mutex); + } } - } - pthread_mutex_unlock(&_RTI.rti_mutex); + pthread_mutex_unlock(&_RTI.rti_mutex); - // Send back to the federate the maximum time plus an offset on a TIMESTAMP - // message. - unsigned char start_time_buffer[MSG_TYPE_TIMESTAMP_LENGTH]; - start_time_buffer[0] = MSG_TYPE_TIMESTAMP; - // Add an offset to this start time to get everyone starting together. - start_time = _RTI.max_start_time + DELAY_START; - encode_int64(swap_bytes_if_big_endian_int64(start_time), &start_time_buffer[1]); + // Send back to the federate the maximum time plus an offset on a TIMESTAMP + // message. + unsigned char start_time_buffer[MSG_TYPE_TIMESTAMP_LENGTH]; + start_time_buffer[0] = MSG_TYPE_TIMESTAMP; + // Add an offset to this start time to get everyone starting together. + start_time = _RTI.max_start_time + DELAY_START; + encode_int64(swap_bytes_if_big_endian_int64(start_time), &start_time_buffer[1]); - if (_RTI.tracing_enabled) { - tag_t tag = {.time = start_time, .microstep = 0}; - tracepoint_RTI_to_federate(send_TIMESTAMP, my_fed->id, &tag); - } - ssize_t bytes_written = write_to_socket( - my_fed->socket, MSG_TYPE_TIMESTAMP_LENGTH, - start_time_buffer - ); - if (bytes_written < MSG_TYPE_TIMESTAMP_LENGTH) { - lf_print_error("Failed to send the starting time to federate %d.", my_fed->id); - } + if (_RTI.tracing_enabled) { + tag_t tag = {.time = start_time, .microstep = 0}; + tracepoint_RTI_to_federate(send_TIMESTAMP, my_fed->id, &tag); + } + ssize_t bytes_written = write_to_socket( + my_fed->socket, MSG_TYPE_TIMESTAMP_LENGTH, + start_time_buffer + ); + if (bytes_written < MSG_TYPE_TIMESTAMP_LENGTH) { + lf_print_error("Failed to send the starting time to federate %d.", my_fed->id); + } - pthread_mutex_lock(&_RTI.rti_mutex); - // Update state for the federate to indicate that the MSG_TYPE_TIMESTAMP - // message has been sent. That MSG_TYPE_TIMESTAMP message grants time advance to - // the federate to the start time. - my_fed->state = GRANTED; - pthread_cond_broadcast(&_RTI.sent_start_time); - LF_PRINT_LOG("RTI sent start time %lld to federate %d.", start_time, my_fed->id); + pthread_mutex_lock(&_RTI.rti_mutex); + // Update state for the federate to indicate that the MSG_TYPE_TIMESTAMP + // message has been sent. That MSG_TYPE_TIMESTAMP message grants time advance to + // the federate to the start time. + my_fed->state = GRANTED; + pthread_cond_broadcast(&_RTI.sent_start_time); + LF_PRINT_LOG("RTI sent start time %lld to federate %d.", start_time, my_fed->id); + } else { + // A transient has joined after the startup phase + + // Send NET_QUERY to all federates + } pthread_mutex_unlock(&_RTI.rti_mutex); } diff --git a/core/federated/RTI/rti_lib.h b/core/federated/RTI/rti_lib.h index 3983ad8c2..085b3acb9 100644 --- a/core/federated/RTI/rti_lib.h +++ b/core/federated/RTI/rti_lib.h @@ -493,6 +493,14 @@ void handle_address_ad(uint16_t federate_id); /** * A function to handle timestamp messages. * This function assumes the caller does not hold the mutex. + * + * The behavior here depends on whether the message is received within the + * startup phase or not. By startup phase, it is menat that all persistent federates + * have their start_time set (already started or about to start). + * If all persistent federates have started, then a TIMESTAMP message will be + * received from a transient. In such case, the start_time of the newly joined + * transient federate will depend on the NET of his updtream and downstream + * federates. */ void handle_timestamp(federate_t *my_fed); From 4a9b9c87f1d7efede9d958fe85c79ba8f83269a7 Mon Sep 17 00:00:00 2001 From: ChadliaJerad Date: Wed, 12 Apr 2023 16:55:50 -0700 Subject: [PATCH 08/80] Add new message types and handle the start_time of trabsient federates after the startup phase --- core/federated/RTI/rti_lib.c | 158 ++++++++++++++++++++++++++-- core/federated/RTI/rti_lib.h | 25 +++++ core/federated/federate.c | 96 ++++++++++++++++- include/core/federated/net_common.h | 130 +++++++++++++++-------- include/core/trace.h | 18 ++++ 5 files changed, 373 insertions(+), 54 deletions(-) diff --git a/core/federated/RTI/rti_lib.c b/core/federated/RTI/rti_lib.c index 29f12e14b..904474895 100644 --- a/core/federated/RTI/rti_lib.c +++ b/core/federated/RTI/rti_lib.c @@ -248,6 +248,36 @@ void send_tag_advance_grant(federate_t* fed, tag_t tag) { } } +bool send_next_event_tag_query (federate_t* conn_fed, uint16_t fed_id) { + if (conn_fed->state == NOT_CONNECTED) { + return false; + } + + // Write the message type and the related fed_id + size_t message_length = 1 + sizeof(uint16_t); + unsigned char buffer[message_length]; + buffer[0] = MSG_TYPE_NEXT_EVENT_TAG_QUERY; + encode_uint16(fed_id, (unsigned char *)&(buffer[1])); + + if (_RTI.tracing_enabled) { + tracepoint_RTI_to_federate(send_NET_QR, conn_fed->id, NULL); + } + // If write_to_socket fails, the consider it as soft failure and update the + // federate's status. + ssize_t bytes_written = write_to_socket(conn_fed->socket, message_length, buffer); + if (bytes_written < (ssize_t)message_length) { + lf_print_error("RTI failed to send next event tag query to federate %d.", conn_fed->id); + if (bytes_written < 0) { + conn_fed->state = NOT_CONNECTED; + } + return false; + } else { + LF_PRINT_LOG("RTI sent to federate %d a next event tag query.", conn_fed->id); + return true; + } +} + + tag_t transitive_next_event(federate_t* fed, tag_t candidate, bool visited[]) { if (visited[fed->id] || fed->state == NOT_CONNECTED) { // Federate has stopped executing or we have visited it before. @@ -1013,15 +1043,11 @@ void handle_timestamp(federate_t *my_fed) { } LF_PRINT_LOG("RTI received timestamp message: %lld.", timestamp); - + // FIXME: Should the lock be inside the if statement only? pthread_mutex_lock(&_RTI.rti_mutex); - // The behavior here depends on whether the message is received within the - // startup phase or not. By startup phase, it is menat that all persistent federates - // have their start_time set (already started or about to start). - // If all persistent federates have started, then a TIMESTAMP message will be - // received from a transient. In such case, the start_time of the newly joined - // transient federate will depend on the NET of his updtream and downstream - // federates. + my_fed->fed_start_time = timestamp; + // Processing the TIMESTAMP depends on whether it is the startup phase (all + // persistent federates joined) or not. if (_RTI.num_feds_proposed_start < _RTI.number_of_federates) { if (timestamp > _RTI.max_start_time) { _RTI.max_start_time = timestamp; @@ -1063,18 +1089,117 @@ void handle_timestamp(federate_t *my_fed) { if (bytes_written < MSG_TYPE_TIMESTAMP_LENGTH) { lf_print_error("Failed to send the starting time to federate %d.", my_fed->id); } - pthread_mutex_lock(&_RTI.rti_mutex); + my_fed->fed_start_time = start_time; // Update state for the federate to indicate that the MSG_TYPE_TIMESTAMP // message has been sent. That MSG_TYPE_TIMESTAMP message grants time advance to // the federate to the start time. my_fed->state = GRANTED; pthread_cond_broadcast(&_RTI.sent_start_time); LF_PRINT_LOG("RTI sent start time %lld to federate %d.", start_time, my_fed->id); + pthread_mutex_unlock(&_RTI.rti_mutex); } else { // A transient has joined after the startup phase - - // Send NET_QUERY to all federates + // At this point, we already hold the mutex + + // Iterate over the upstream federates to query the next event tag. + // Since they may not be connected (being themselves transient, for example) + // the total number of connected federates (my_fed->num_of_conn_federates) + // will be compared against those who already sent the NET query response + // (my_fed->num_of_conn_federates_sent_net) + for (int j = 0; j < my_fed->num_upstream; j++) { + federate_t* upstream = &_RTI.federates[my_fed->upstream[j]]; + // Ignore this federate if it has resigned or if it a transient that + // is absent + if (upstream->state == NOT_CONNECTED) { + continue; + } + if (send_next_event_tag_query(upstream, my_fed->id)) { + my_fed->num_of_conn_federates++; + } + } + // Iterate over the downstream federates to query the next event tag. + for (int j = 0; j < my_fed->num_downstream; j++) { + federate_t* downstream = &_RTI.federates[my_fed->downstream[j]]; + // Ignore this federate if it has resigned. + if (downstream->state == NOT_CONNECTED) { + continue; + } + if (send_next_event_tag_query(downstream, my_fed->id)) { + my_fed->num_of_conn_federates++; + } + } + + // If the transient federate has no connected upstream or downstream federates, + // then do not wait for the start time + if (my_fed->num_of_conn_federates == 0) { + my_fed->start_time_is_set = true; + lf_print_debug("the start time of transient is: %lld", my_fed->fed_start_time); + } + pthread_mutex_unlock(&_RTI.rti_mutex); + // Now wait until all connected federates have responded with their next + // event logial time instant. + + while(!my_fed->start_time_is_set); + + // Once the start time set, sent it to the joining transient + unsigned char start_time_buffer[MSG_TYPE_TIMESTAMP_LENGTH]; + start_time_buffer[0] = MSG_TYPE_TIMESTAMP; + // FIXME: Sould we check if the time instant have passed or not, and if yes, + // add a delay? + start_time = my_fed->fed_start_time; + encode_int64(swap_bytes_if_big_endian_int64(start_time), &start_time_buffer[1]); + + if (_RTI.tracing_enabled) { + tag_t tag = {.time = start_time, .microstep = 0}; + tracepoint_RTI_to_federate(send_TIMESTAMP, my_fed->id, &tag); + } + ssize_t bytes_written = write_to_socket( + my_fed->socket, MSG_TYPE_TIMESTAMP_LENGTH, + start_time_buffer + ); + if (bytes_written < MSG_TYPE_TIMESTAMP_LENGTH) { + lf_print_error("Failed to send the starting time to federate %d.", my_fed->id); + } + pthread_mutex_lock(&_RTI.rti_mutex); + my_fed->state = GRANTED; + LF_PRINT_LOG("RTI sent start time %lld to federate %d.", start_time, my_fed->id); + pthread_mutex_unlock(&_RTI.rti_mutex); + } +} + +void handle_next_event_tag_query_response(federate_t *my_fed) { + // Get the logical time instant and the transient fed_id from the socket + size_t buffer_size = 1 + sizeof(uint16_t) + sizeof(uint16_t); + unsigned char buffer[buffer_size]; + // Read bytes from the socket. We need 8 bytes. + ssize_t bytes_read = read_from_socket(my_fed->socket, buffer_size, (unsigned char*)&buffer); + if (bytes_read < (ssize_t)sizeof(int64_t)) { + lf_print_error("ERROR reading next event query response from federate %d.\n", my_fed->id); + } + + // Get the timestamp and the transient federate id + int64_t timestamp = swap_bytes_if_big_endian_int64(*((int64_t *)(&(buffer[1])))); + uint16_t transient_fed_id = extract_uint16(buffer[9]); + if (_RTI.tracing_enabled) { + tag_t tag = {.time = timestamp, .microstep = 0}; + tracepoint_RTI_from_federate(receive_NET_QR_RES, my_fed->id, &tag); + } + LF_PRINT_LOG("RTI received NET query response message: %lld.", timestamp); + + // FIXME: Should the lock be inside the if statement only? + pthread_mutex_lock(&_RTI.rti_mutex); + // Processing the TIMESTAMP depends on whether it is the startup phase (all + // persistent federates joined) or not. + federate_t* transient = &(_RTI.federates[transient_fed_id]); + if (timestamp > transient->fed_start_time) { + transient->fed_start_time = timestamp; + } + // Check that upstream and downstream federates of the transient did propose a start_time + transient->num_of_conn_federates_sent_net++; + if (transient->num_of_conn_federates_sent_net == transient->num_of_conn_federates) { + // All expected connected federates to transient have sent responses with NET to RTI + transient->start_time_is_set = true; } pthread_mutex_unlock(&_RTI.rti_mutex); } @@ -1294,6 +1419,9 @@ void* federate_thread_TCP(void* fed) { case MSG_TYPE_TIMESTAMP: handle_timestamp(my_fed); break; + case MSG_TYPE_NEXT_EVENT_TAG_QUERY_RESPONSE: + handle_next_event_tag_query_response(my_fed); + break; case MSG_TYPE_ADDRESS_QUERY: handle_address_query(my_fed->id); break; @@ -1870,6 +1998,10 @@ void initialize_federate(uint16_t id) { _RTI.federates[id].server_port = -1; _RTI.federates[id].requested_stop = false; _RTI.federates[id].is_transient = true; + _RTI.federates[id].fed_start_time = 0LL; + _RTI.federates[id].num_of_conn_federates = 0; + _RTI.federates[id].num_of_conn_federates_sent_net = 0; + _RTI.federates[id].start_time_is_set = false; } void reset_transient_federate(uint16_t id) { @@ -1896,6 +2028,10 @@ void reset_transient_federate(uint16_t id) { _RTI.federates[id].server_port = -1; _RTI.federates[id].requested_stop = false; _RTI.federates[id].is_transient = true; + _RTI.federates[id].fed_start_time = 0LL; + _RTI.federates[id].num_of_conn_federates = 0; + _RTI.federates[id].num_of_conn_federates_sent_net = 0; + _RTI.federates[id].start_time_is_set = false; } int32_t start_rti_server(uint16_t port) { diff --git a/core/federated/RTI/rti_lib.h b/core/federated/RTI/rti_lib.h index 085b3acb9..791a140c7 100644 --- a/core/federated/RTI/rti_lib.h +++ b/core/federated/RTI/rti_lib.h @@ -125,6 +125,14 @@ typedef struct federate_t { // to a request for stop from the RTI. Used to prevent double-counting // a federate when handling lf_request_stop(). bool is_transient; // Indicates whether the federate is transient or persistent. + int64_t fed_start_time; // Records the start time of the federate, which is mainly useful for transient federates + int num_of_conn_federates; // Records the total number of connected federates among the upstream and + // downstream federates. This is used only in the case of transient federate, for + // computing the start time. + int num_of_conn_federates_sent_net; // Records the total number of connected federates + // that sent responded to the next event tag query form the RTI. + bool start_time_is_set; // Boolean variable used to signal that all connected federates + // have sent a response to next event tag query from the RTI. } federate_t; /** @@ -703,6 +711,23 @@ int process_clock_sync_args(int argc, const char* argv[]); */ int process_args(int argc, const char* argv[]); +////////////////////////////////////////////////////////// +/** + * Queries conn_fed for its Next Event Tag (using MSG_TYPE_NEXT_EVENT_TAG_QUERY). + * If the function fails to send the query, for example in case the federate is + * not connected (can be a transient one itself), then return false. In such case, + * the RTI will not wait to receive an answer from it. + * + * The fed_id of the transient federate is sent to conn_fed, which should be + * returned as is within MSG_TYPE_NEXT_EVENT_TYPE_QUERY_RESPONSE. The aim is to + * identify which of the transient federates has initiated the request. This enables + * the support of two diffrent transient federates joining close eanough in time. + * + * @param conn_fed: the federate to which to send the NET request + * @param fed_id: The ID of the transient joining federate + * @return true, if successfully sent, false otherwise. + */ +bool send_next_event_tag_query(federate_t* conn_fed, uint16_t fed_id); #endif // RTI_LIB_H \ No newline at end of file diff --git a/core/federated/federate.c b/core/federated/federate.c index d1ecc925d..da19358b7 100644 --- a/core/federated/federate.c +++ b/core/federated/federate.c @@ -2393,6 +2393,93 @@ void handle_stop_request_message() { lf_mutex_unlock(&mutex); } +////////////////// Start of transient time coordination //////////////////////// + +/** + * Handle a Next Event Tag query received from the RTI. Such message is sent when + * a transient federate attempts to join a federation after the startup phase. + * The funtion will read the NET in the event queue and call + * send_next_event_tag_query_answer(). + * + * FIXME: This function assumes the caller does hold the mutex lock? + */ +void handle_next_event_tag_query(){ + tracepoint_federate_from_RTI(receive_NET_QR, _lf_my_fed_id, NULL); + // Extract the fed_id of the relative transient federate + // Read the header. + size_t bytes_to_read = 1 + sizeof(uint16_t); + unsigned char buffer[bytes_to_read]; + read_from_socket_errexit(_fed.socket_TCP_RTI, bytes_to_read, buffer, + "Failed to read the transient federate ID."); + + uint16_t fed_id = extract_uint16(buffer + 1); + // Get the next event tag in the reactions queue + tag_t next_tag = get_next_event_tag(); + + instant_t logical_time = next_tag.time; + + // Answer with the time instant of the next event tag + send_next_event_tag_query_response(logical_time, fed_id); +} + +/** + * Send the answer to the next event tag query to the RTI. + * + * @param time The time. + * @param fed_id The transient federate id to send back + * Print a soft error message otherwise + */ +void send_next_event_tag_query_response(instant_t time, uint16_t fed_id) { + LF_PRINT_DEBUG("Sending logical time " PRINTF_TIME " to the RTI.", time); + size_t bytes_to_write = 1 + sizeof(instant_t) + sizeof(uint16_t); + unsigned char buffer[bytes_to_write]; + buffer[0] = MSG_TYPE_NEXT_EVENT_TAG_QUERY_RESPONSE; + encode_int64(time, &(buffer[1])); + encode_uint16(fed_id, &(buffer[9])); + lf_mutex_lock(&outbound_socket_mutex); + if (_fed.socket_TCP_RTI < 0) { + lf_print_warning("Socket is no longer connected. Dropping message."); + lf_mutex_unlock(&outbound_socket_mutex); + return; + } + + tag_t tag = {.time = time, .microstep = 0}; + // Trace the event when tracing is enabled + tracepoint_federate_to_RTI(send_NET_QR_RES, _lf_my_fed_id, &tag); + + ssize_t bytes_written = write_to_socket(_fed.socket_TCP_RTI, bytes_to_write, buffer); + if (bytes_written < (ssize_t)bytes_to_write) { + lf_print_error_and_exit("Failed to send time " PRINTF_TIME " to the RTI." + " Error code %d: %s", + time - start_time, + errno, + strerror(errno) + ); + } + lf_mutex_unlock(&outbound_socket_mutex); +} + +/** + * Handle a Halt message received form the RTI. This will cause the federation to + * stop. + * + * FIXME: WIP. Should it be + */ +void handle_halt(){ + +} + +/** + * Handle a RESUME message received from the RTI + * + * FIXME: What to do exactly? Can it be mixed with handle_halt()? + */ +void handle_resume(){ + // tracepoint_federate_from_RTI(receive_TAG, _lf_my_fed_id, &TAG); +} + +/////////////////// End of transient time coordination ///////////////////////// + /** * Close sockets used to communicate with other federates, if they are open, * and send a MSG_TYPE_RESIGN message to the RTI. This implements the function @@ -2595,6 +2682,12 @@ void* listen_to_rti_TCP(void* args) { case MSG_TYPE_PORT_ABSENT: handle_port_absent_message(_fed.socket_TCP_RTI, -1); break; + case MSG_TYPE_NEXT_EVENT_TAG_QUERY: + handle_next_event_tag_query(); + break; + case MSG_TYPE_HALT: + handle_halt(); + break; case MSG_TYPE_CLOCK_SYNC_T1: case MSG_TYPE_CLOCK_SYNC_T4: lf_print_error("Federate %d received unexpected clock sync message from RTI on TCP socket.", @@ -2774,8 +2867,7 @@ tag_t _lf_send_next_event_tag(tag_t tag, bool wait_for_reply) { LF_PRINT_DEBUG("Granted tag " PRINTF_TAG " because the federate has neither " "upstream nor downstream federates.", tag.time - start_time, tag.microstep); - return tag; - } + return tag; } // If time advance (TAG or PTAG) has already been granted for this tag // or a larger tag, then return immediately. diff --git a/include/core/federated/net_common.h b/include/core/federated/net_common.h index 8ace6d9b0..c4c4263dc 100644 --- a/include/core/federated/net_common.h +++ b/include/core/federated/net_common.h @@ -64,7 +64,7 @@ THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * When the federation IDs match, the RTI will respond with an * MSG_TYPE_ACK. - * + * * The next message to the RTI will be a MSG_TYPE_NEIGHBOR_STRUCTURE message * that informs the RTI about connections between this federate and other * federates where messages are routed through the RTI. Currently, this only @@ -172,9 +172,9 @@ THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * each federate report a reading of its physical clock to the RTI on a * `MSG_TYPE_TIMESTAMP`. The RTI broadcasts the maximum of these readings plus * `DELAY_START` to all federates as the start time, again on a `MSG_TYPE_TIMESTAMP`. - * - * The next step depends on the coordination type. - * + * + * The next step depends on the coordination type. + * * Under centralized coordination, each federate will send a * `MSG_TYPE_NEXT_EVENT_TAG` to the RTI with the start tag. That is to say that * each federate has a valid event at the start tag (start time, 0) and it will @@ -185,10 +185,10 @@ THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * have to wait for a `MSG_TYPE_TAG_ADVANCE_GRANT` or a * `MSG_TYPE_PROVISIONAL_TAG_ADVANCE_GRANT` before it can advance to a * particular tag. - * + * * Under decentralized coordination, the coordination is governed by STA and * STAAs, as further explained in https://doi.org/10.48550/arXiv.2109.07771. - * + * * FIXME: Expand this. Explain control reactions. * */ @@ -208,7 +208,6 @@ THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #define UDP_TIMEOUT_TIME SEC(1) - /** * Size of the buffer used for messages sent between federates. * This is used by both the federates and the rti, so message lengths @@ -345,7 +344,7 @@ THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * One byte equal to MSG_TYPE_FED_RESPONSE. * * Eight bytes for federate's nonce. * * Two bytes (ushort) giving the federate ID. - * * 32 bytes for HMAC tag based on SHA256. + * * 32 bytes for HMAC tag based on SHA256. * The HMAC tag is composed of the following order: * * One byte equal to MSG_TYPE_FED_RESPONSE. * * Two bytes (ushort) giving the federate ID. @@ -412,7 +411,7 @@ THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #define MSG_TYPE_TAGGED_MESSAGE 5 -/** +/** * Byte identifying a next event tag (NET) message sent from a federate in * centralized coordination. The next eight bytes will be the timestamp. The * next four bytes will be the microstep. This message from a federate tells the @@ -428,7 +427,7 @@ THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #define MSG_TYPE_NEXT_EVENT_TAG 6 -/** +/** * Byte identifying a time advance grant (TAG) sent by the RTI to a federate * in centralized coordination. This message is a promise by the RTI to the federate * that no later message sent to the federate will have a tag earlier than or @@ -438,7 +437,7 @@ THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #define MSG_TYPE_TAG_ADVANCE_GRANT 7 -/** +/** * Byte identifying a provisional time advance grant (PTAG) sent by the RTI to a federate * in centralized coordination. This message is a promise by the RTI to the federate * that no later message sent to the federate will have a tag earlier than the tag @@ -448,7 +447,7 @@ THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #define MSG_TYPE_PROVISIONAL_TAG_ADVANCE_GRANT 8 -/** +/** * Byte identifying a logical tag complete (LTC) message sent by a federate * to the RTI. * The next eight bytes will be the timestep of the completed tag. @@ -488,18 +487,20 @@ THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * that specifies the stop time on all other federates, then every federate * depends on every other federate and time cannot be advanced. * Hence, the actual stop time may be nondeterministic. - * + * * If, on the other hand, the federate requesting the stop is upstream of every * other federate, then it should be possible to respect its requested stop tag. */ #define MSG_TYPE_STOP_REQUEST 10 #define MSG_TYPE_STOP_REQUEST_LENGTH (1 + sizeof(instant_t) + sizeof(microstep_t)) -#define ENCODE_STOP_REQUEST(buffer, time, microstep) do { \ - buffer[0] = MSG_TYPE_STOP_REQUEST; \ - encode_int64(time, &(buffer[1])); \ - assert(microstep >= 0); \ - encode_int32((int32_t)microstep, &(buffer[1 + sizeof(instant_t)])); \ -} while(0) +#define ENCODE_STOP_REQUEST(buffer, time, microstep) \ + do \ + { \ + buffer[0] = MSG_TYPE_STOP_REQUEST; \ + encode_int64(time, &(buffer[1])); \ + assert(microstep >= 0); \ + encode_int32((int32_t)microstep, &(buffer[1 + sizeof(instant_t)])); \ + } while (0) /** * Byte indicating a federate's reply to a MSG_TYPE_STOP_REQUEST that was sent @@ -511,28 +512,32 @@ THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #define MSG_TYPE_STOP_REQUEST_REPLY 11 #define MSG_TYPE_STOP_REQUEST_REPLY_LENGTH (1 + sizeof(instant_t) + sizeof(microstep_t)) -#define ENCODE_STOP_REQUEST_REPLY(buffer, time, microstep) do { \ - buffer[0] = MSG_TYPE_STOP_REQUEST_REPLY; \ - encode_int64(time, &(buffer[1])); \ - assert(microstep >= 0); \ - encode_int32((int32_t)microstep, &(buffer[1 + sizeof(instant_t)])); \ -} while(0) +#define ENCODE_STOP_REQUEST_REPLY(buffer, time, microstep) \ + do \ + { \ + buffer[0] = MSG_TYPE_STOP_REQUEST_REPLY; \ + encode_int64(time, &(buffer[1])); \ + assert(microstep >= 0); \ + encode_int32((int32_t)microstep, &(buffer[1 + sizeof(instant_t)])); \ + } while (0) /** * Byte sent by the RTI indicating that the stop request from some federate * has been granted. The payload is the tag at which all federates have * agreed that they can stop. - * The next 8 bytes will be the time at which the federates will stop. * + * The next 8 bytes will be the time at which the federates will stop. * * The next 4 bytes will be the microstep at which the federates will stop.. */ #define MSG_TYPE_STOP_GRANTED 12 #define MSG_TYPE_STOP_GRANTED_LENGTH (1 + sizeof(instant_t) + sizeof(microstep_t)) -#define ENCODE_STOP_GRANTED(buffer, time, microstep) do { \ - buffer[0] = MSG_TYPE_STOP_GRANTED; \ - encode_int64(time, &(buffer[1])); \ - assert(microstep >= 0); \ - encode_int32((int32_t)microstep, &(buffer[1 + sizeof(instant_t)])); \ -} while(0) +#define ENCODE_STOP_GRANTED(buffer, time, microstep) \ + do \ + { \ + buffer[0] = MSG_TYPE_STOP_GRANTED; \ + encode_int64(time, &(buffer[1])); \ + assert(microstep >= 0); \ + encode_int32((int32_t)microstep, &(buffer[1 + sizeof(instant_t)])); \ + } while (0) /////////// End of lf_request_stop() messages //////////////// @@ -570,7 +575,7 @@ THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. /** * Byte identifying a message to send directly to another federate. - * + * * The next two bytes will be the ID of the destination port. * The next two bytes are the destination federate ID. This is checked against * the _lf_my_fed_id of the receiving federate to ensure the message was intended for @@ -584,7 +589,7 @@ THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * This is a variant of @see MSG_TYPE_TAGGED_MESSAGE that is used in P2P connections between * federates. Having a separate message type for P2P connections between federates * will be useful in preventing crosstalk. - * + * * The next two bytes will be the ID of the destination port. * The next two bytes are the destination federate ID. This is checked against * the _lf_my_fed_id of the receiving federate to ensure the message was intended for @@ -639,11 +644,10 @@ THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #define MSG_TYPE_CLOCK_SYNC_CODED_PROBE 22 - /** * A port absent message, informing the receiver that a given port * will not have event for the current logical time. - * + * * The next 2 bytes is the port id. * The next 2 bytes will be the federate id of the destination federate. * This is needed for the centralized coordination so that the RTI knows where @@ -653,21 +657,19 @@ THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #define MSG_TYPE_PORT_ABSENT 23 - - /** * A message that informs the RTI about connections between this federate and * other federates where messages are routed through the RTI. Currently, this * only includes logical connections when the coordination is centralized. This * information is needed for the RTI to perform the centralized coordination. - * + * * @note Only information about the immediate neighbors is required. The RTI can * transitively obtain the structure of the federation based on each federate's * immediate neighbor information. * - * The next 4 bytes is the number of upstream federates. + * The next 4 bytes is the number of upstream federates. * The next 4 bytes is the number of downstream federates. - * + * * Depending on the first four bytes, the next bytes are pairs of (fed ID (2 * bytes), delay (8 bytes)) for this federate's connection to upstream federates * (by direct connection). The delay is the minimum "after" delay of all @@ -683,6 +685,52 @@ THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define MSG_TYPE_NEIGHBOR_STRUCTURE 24 #define MSG_TYPE_NEIGHBOR_STRUCTURE_HEADER_SIZE 9 +/////////// Messages used by joining transient federates /////////////// +//// Overview of the algorithm: +//// When a transient federate joins the deferation after the startup phase (all +//// persistent federates have joined and received, or all least are receiveing +//// their start_time), its start_time is decided based on the Next Event Tags +//// of its upstream and downstream federates. Next Event Tags are queried by the +//// RTI, by sending MSG_TYPE_NEXT_EVENT_TAG_QUERY. Federates will answer with +//// MSG_TYPE_NEXT_EVENT_TAG_QUERY_RESPONSE. +//// The start_time of the transient will be: +//// * the maximun of all received MSG_TYPE_NEXT_EVENT_TAG_QUERY_RESPONSE, if +//// different from the stop_time. +//// * or either the maximum of all upstream or the minimum of all downstream, +//// if there is an intersection. +//// FIXME: Look for counter-examples to choose! +//// Once decided about the start_time of the transient, all federates will be +//// requested to halt up to the give time tag (start_time?). +//// They will resume when they receive + +/** + * Byte identifying a query of a federate about its Next Event Tag. This is useful + * when deciding about the start_time of a joining transient federate. + */ +#define MSG_TYPE_NEXT_EVENT_TAG_QUERY 30 + +/** + * Byte identifying a response to a MSG_TYPE_NEXT_EVENT_TAG_QUERY. This is useful + * when deciding about the start_time of a joining transient federate. + */ +#define MSG_TYPE_NEXT_EVENT_TAG_QUERY_RESPONSE 31 + +/** + * Byte identifying a request sent by the RTI to upstream and downstream federates + * of a joining transient. It asks them to halt reactions at the given tag. This + * is useful for the synchronous logical start a joining transient federate. + */ +#define MSG_TYPE_HALT 32 + +/** + * Byte identifying a request sent by the RTI to upstream and downstream federates + * of a joining transient to resume reactions at the tag sent within MSG_TYPE_HALT. + * This is useful for the synchronous logical start a joining transient federate. + */ +#define MSG_TYPE_RESUME 33 + +/////////// End of messages used by joining transient federates /////////////// + ///////////////////////////////////////////// //// Rejection codes diff --git a/include/core/trace.h b/include/core/trace.h index 5b2cf2589..73c62c4ad 100644 --- a/include/core/trace.h +++ b/include/core/trace.h @@ -119,6 +119,15 @@ typedef enum receive_ADR_AD, receive_ADR_QR, receive_UNIDENTIFIED, + // Transient + send_NET_QR, + send_NET_QR_RES, + send_HALT, + send_RESUME, + receive_NET_QR, + receive_NET_QR_RES, + receive_HALT, + receive_RESUME, NUM_EVENT_TYPES } trace_event_t; @@ -182,6 +191,15 @@ static const char *trace_event_names[] = { "Receiving ADR_AD", "Receiving ADR_QR", "Receiving UNIDENTIFIED", + // Transient + "Sending TAG_QR", + "Sending TAG_QR_RES", + "Sending HALT", + "Sending RESUME", + "Receiving TAG_QR", + "Receiving TAG_QR_RES", + "Receiving HALT", + "Receiving RESUME" }; // FIXME: Target property should specify the capacity of the trace buffer. From 86a1a473c33b7aea38ef4409e893204e764a9122 Mon Sep 17 00:00:00 2001 From: ChadliaJerad Date: Thu, 13 Apr 2023 19:26:21 -0700 Subject: [PATCH 09/80] Fix buffer reading of the timestamp when dealing with transient start time computation. --- core/federated/RTI/rti_lib.c | 29 +++++++++++++++-------------- core/federated/federate.c | 25 +++++++++++++++---------- 2 files changed, 30 insertions(+), 24 deletions(-) diff --git a/core/federated/RTI/rti_lib.c b/core/federated/RTI/rti_lib.c index 73ad5cc0a..7fa3b01f3 100644 --- a/core/federated/RTI/rti_lib.c +++ b/core/federated/RTI/rti_lib.c @@ -250,16 +250,16 @@ void send_tag_advance_grant(federate_t* fed, tag_t tag) { } } -bool send_next_event_tag_query (federate_t* conn_fed, uint16_t fed_id) { +bool send_next_event_tag_query (federate_t* conn_fed, uint16_t transient_id) { if (conn_fed->state == NOT_CONNECTED) { return false; } - // Write the message type and the related fed_id + // Write the message type and the related transient_id size_t message_length = 1 + sizeof(uint16_t); unsigned char buffer[message_length]; buffer[0] = MSG_TYPE_NEXT_EVENT_TAG_QUERY; - encode_uint16(fed_id, (unsigned char *)&(buffer[1])); + encode_uint16(transient_id, (unsigned char *)&(buffer[1])); if (_RTI.tracing_enabled) { tracepoint_RTI_to_federate(send_NET_QR, conn_fed->id, NULL); @@ -1109,6 +1109,7 @@ void handle_timestamp(federate_t *my_fed) { // the total number of connected federates (my_fed->num_of_conn_federates) // will be compared against those who already sent the NET query response // (my_fed->num_of_conn_federates_sent_net) + LF_PRINT_DEBUG("RTI sends next event tag requests regarding transient %d.", my_fed->id); for (int j = 0; j < my_fed->num_upstream; j++) { federate_t* upstream = &_RTI.federates[my_fed->upstream[j]]; // Ignore this federate if it has resigned or if it a transient that @@ -1136,21 +1137,21 @@ void handle_timestamp(federate_t *my_fed) { // then do not wait for the start time if (my_fed->num_of_conn_federates == 0) { my_fed->start_time_is_set = true; - lf_print_debug("the start time of transient is: %lld", my_fed->fed_start_time); + LF_PRINT_DEBUG("Transient federate %d has no upstream or downstrean federates. Its start time is The start time of transient is: %lld", my_fed->fed_start_time); } pthread_mutex_unlock(&_RTI.rti_mutex); // Now wait until all connected federates have responded with their next // event logial time instant. + LF_PRINT_DEBUG("RTI waits for transient start time to be set."); while(!my_fed->start_time_is_set); - + // Once the start time set, sent it to the joining transient unsigned char start_time_buffer[MSG_TYPE_TIMESTAMP_LENGTH]; start_time_buffer[0] = MSG_TYPE_TIMESTAMP; - // FIXME: Sould we check if the time instant have passed or not, and if yes, - // add a delay? - start_time = my_fed->fed_start_time; - encode_int64(swap_bytes_if_big_endian_int64(start_time), &start_time_buffer[1]); + my_fed->fed_start_time += DELAY_START; + LF_PRINT_DEBUG("Transient federate %d start time is set and is %lld.", my_fed->id, my_fed->fed_start_time); + encode_int64(swap_bytes_if_big_endian_int64(my_fed->fed_start_time), &start_time_buffer[1]); if (_RTI.tracing_enabled) { tag_t tag = {.time = start_time, .microstep = 0}; @@ -1165,24 +1166,24 @@ void handle_timestamp(federate_t *my_fed) { } pthread_mutex_lock(&_RTI.rti_mutex); my_fed->state = GRANTED; - LF_PRINT_LOG("RTI sent start time %lld to federate %d.", start_time, my_fed->id); + LF_PRINT_LOG("RTI sent start time %lld to transient federate %d.", my_fed->fed_start_time, my_fed->id); pthread_mutex_unlock(&_RTI.rti_mutex); } } void handle_next_event_tag_query_response(federate_t *my_fed) { // Get the logical time instant and the transient fed_id from the socket - size_t buffer_size = 1 + sizeof(uint16_t) + sizeof(uint16_t); + size_t buffer_size = sizeof(instant_t) + sizeof(uint16_t); unsigned char buffer[buffer_size]; // Read bytes from the socket. We need 8 bytes. - ssize_t bytes_read = read_from_socket(my_fed->socket, buffer_size, (unsigned char*)&buffer); + ssize_t bytes_read = read_from_socket(my_fed->socket, buffer_size, buffer); if (bytes_read < (ssize_t)sizeof(int64_t)) { lf_print_error("ERROR reading next event query response from federate %d.\n", my_fed->id); } // Get the timestamp and the transient federate id - int64_t timestamp = swap_bytes_if_big_endian_int64(*((int64_t *)(&(buffer[1])))); - uint16_t transient_fed_id = extract_uint16(buffer[9]); + instant_t timestamp = swap_bytes_if_big_endian_int64(*((int64_t *)(buffer))); + uint16_t transient_fed_id = extract_uint16((&buffer[8])); if (_RTI.tracing_enabled) { tag_t tag = {.time = timestamp, .microstep = 0}; tracepoint_RTI_from_federate(receive_NET_QR_RES, my_fed->id, &tag); diff --git a/core/federated/federate.c b/core/federated/federate.c index b2c1eaab4..cf907f98c 100644 --- a/core/federated/federate.c +++ b/core/federated/federate.c @@ -2406,37 +2406,42 @@ void handle_stop_request_message() { */ void handle_next_event_tag_query(){ tracepoint_federate_from_RTI(receive_NET_QR, _lf_my_fed_id, NULL); - // Extract the fed_id of the relative transient federate - // Read the header. - size_t bytes_to_read = 1 + sizeof(uint16_t); + + // Extract the transient federate Id + size_t bytes_to_read = sizeof(uint16_t); unsigned char buffer[bytes_to_read]; read_from_socket_errexit(_fed.socket_TCP_RTI, bytes_to_read, buffer, "Failed to read the transient federate ID."); - uint16_t fed_id = extract_uint16(buffer + 1); + // Read it + uint16_t transient_id = extract_uint16(buffer); + // Check if the message is intended for this federate + assert(_lf_my_fed_id != transient_id); + LF_PRINT_DEBUG("Receiving NET query message regarding transient federate %d.", transient_id); + // Get the next event tag in the reactions queue tag_t next_tag = get_next_event_tag(); instant_t logical_time = next_tag.time; // Answer with the time instant of the next event tag - send_next_event_tag_query_response(logical_time, fed_id); + send_next_event_tag_query_response(logical_time, transient_id); } /** * Send the answer to the next event tag query to the RTI. - * + * * @param time The time. - * @param fed_id The transient federate id to send back + * @param transient_id The transient federate id to send back * Print a soft error message otherwise */ -void send_next_event_tag_query_response(instant_t time, uint16_t fed_id) { - LF_PRINT_DEBUG("Sending logical time " PRINTF_TIME " to the RTI.", time); +void send_next_event_tag_query_response(instant_t time, uint16_t transient_id) { + LF_PRINT_DEBUG("Sending logical time " PRINTF_TIME " to the RTI regarding NET QR RES of trabsient %d.", time, transient_id); size_t bytes_to_write = 1 + sizeof(instant_t) + sizeof(uint16_t); unsigned char buffer[bytes_to_write]; buffer[0] = MSG_TYPE_NEXT_EVENT_TAG_QUERY_RESPONSE; encode_int64(time, &(buffer[1])); - encode_uint16(fed_id, &(buffer[9])); + encode_uint16(transient_id, &(buffer[9])); lf_mutex_lock(&outbound_socket_mutex); if (_fed.socket_TCP_RTI < 0) { lf_print_warning("Socket is no longer connected. Dropping message."); From 3f29f1d59058fc0e3b39401b1a07e83b2e21c07c Mon Sep 17 00:00:00 2001 From: ChadliaJerad Date: Fri, 14 Apr 2023 03:23:42 -0700 Subject: [PATCH 10/80] A non clean workig version that uses the min of the current tag of the upstream federates to set the strt_time of a transient federate. Warning: the message_tag_is_in_the_future is changed! --- core/federated/RTI/rti_lib.c | 28 +++++++++++++++------------- core/federated/federate.c | 8 ++++++-- 2 files changed, 21 insertions(+), 15 deletions(-) diff --git a/core/federated/RTI/rti_lib.c b/core/federated/RTI/rti_lib.c index 7fa3b01f3..3c77af910 100644 --- a/core/federated/RTI/rti_lib.c +++ b/core/federated/RTI/rti_lib.c @@ -1122,16 +1122,16 @@ void handle_timestamp(federate_t *my_fed) { } } // Iterate over the downstream federates to query the next event tag. - for (int j = 0; j < my_fed->num_downstream; j++) { - federate_t* downstream = &_RTI.federates[my_fed->downstream[j]]; - // Ignore this federate if it has resigned. - if (downstream->state == NOT_CONNECTED) { - continue; - } - if (send_next_event_tag_query(downstream, my_fed->id)) { - my_fed->num_of_conn_federates++; - } - } + // for (int j = 0; j < my_fed->num_downstream; j++) { + // federate_t* downstream = &_RTI.federates[my_fed->downstream[j]]; + // // Ignore this federate if it has resigned. + // if (downstream->state == NOT_CONNECTED) { + // continue; + // } + // if (send_next_event_tag_query(downstream, my_fed->id)) { + // my_fed->num_of_conn_federates++; + // } + // } // If the transient federate has no connected upstream or downstream federates, // then do not wait for the start time @@ -1166,6 +1166,8 @@ void handle_timestamp(federate_t *my_fed) { } pthread_mutex_lock(&_RTI.rti_mutex); my_fed->state = GRANTED; + // tag_t tag = {.time= my_fed->fed_start_time, .microstep=0}; + // send_tag_advance_grant(my_fed, tag); LF_PRINT_LOG("RTI sent start time %lld to transient federate %d.", my_fed->fed_start_time, my_fed->id); pthread_mutex_unlock(&_RTI.rti_mutex); } @@ -1195,7 +1197,7 @@ void handle_next_event_tag_query_response(federate_t *my_fed) { // Processing the TIMESTAMP depends on whether it is the startup phase (all // persistent federates joined) or not. federate_t* transient = &(_RTI.federates[transient_fed_id]); - if (timestamp > transient->fed_start_time) { + if (timestamp < transient->fed_start_time) { // min of the LTC of upstream? transient->fed_start_time = timestamp; } // Check that upstream and downstream federates of the transient did propose a start_time @@ -2001,7 +2003,7 @@ void initialize_federate(uint16_t id) { _RTI.federates[id].server_port = -1; _RTI.federates[id].requested_stop = false; _RTI.federates[id].is_transient = true; - _RTI.federates[id].fed_start_time = 0LL; + _RTI.federates[id].fed_start_time = FOREVER; _RTI.federates[id].num_of_conn_federates = 0; _RTI.federates[id].num_of_conn_federates_sent_net = 0; _RTI.federates[id].start_time_is_set = false; @@ -2031,7 +2033,7 @@ void reset_transient_federate(uint16_t id) { _RTI.federates[id].server_port = -1; _RTI.federates[id].requested_stop = false; _RTI.federates[id].is_transient = true; - _RTI.federates[id].fed_start_time = 0LL; + _RTI.federates[id].fed_start_time = FOREVER; _RTI.federates[id].num_of_conn_federates = 0; _RTI.federates[id].num_of_conn_federates_sent_net = 0; _RTI.federates[id].start_time_is_set = false; diff --git a/core/federated/federate.c b/core/federated/federate.c index cf907f98c..94d1f6808 100644 --- a/core/federated/federate.c +++ b/core/federated/federate.c @@ -1668,7 +1668,8 @@ trigger_handle_t schedule_message_received_from_network_already_locked( // of the message (timestamp, microstep) is // in the future relative to the tag of this // federate. By default, assume it is not. - bool message_tag_is_in_the_future = lf_tag_compare(tag, current_tag) > 0; + // FIXME: Changed '>' with '>='. Is it correct?????? + bool message_tag_is_in_the_future = lf_tag_compare(tag, current_tag) >= 0; // Assign the intended tag trigger->intended_tag = tag; @@ -2420,9 +2421,12 @@ void handle_next_event_tag_query(){ LF_PRINT_DEBUG("Receiving NET query message regarding transient federate %d.", transient_id); // Get the next event tag in the reactions queue - tag_t next_tag = get_next_event_tag(); + // tag_t next_tag = _fed.last_sent_LTC; + // tag_t next_tag = get_next_event_tag(); + tag_t next_tag = lf_tag(); instant_t logical_time = next_tag.time; + lf_print("!!!!!!!!!!!!! the net I am sending is: %lld.", logical_time); // Answer with the time instant of the next event tag send_next_event_tag_query_response(logical_time, transient_id); From 8a442f5b4da7326b8cb72b784e75cf9b827120a3 Mon Sep 17 00:00:00 2001 From: ChadliaJerad Date: Mon, 17 Apr 2023 10:26:31 -0700 Subject: [PATCH 11/80] Reset message_tag_is_in_the_fiture condition --- core/federated/federate.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/core/federated/federate.c b/core/federated/federate.c index 94d1f6808..f8879617c 100644 --- a/core/federated/federate.c +++ b/core/federated/federate.c @@ -1668,8 +1668,7 @@ trigger_handle_t schedule_message_received_from_network_already_locked( // of the message (timestamp, microstep) is // in the future relative to the tag of this // federate. By default, assume it is not. - // FIXME: Changed '>' with '>='. Is it correct?????? - bool message_tag_is_in_the_future = lf_tag_compare(tag, current_tag) >= 0; + bool message_tag_is_in_the_future = lf_tag_compare(tag, current_tag) > 0; // Assign the intended tag trigger->intended_tag = tag; From 0c0ccffd31885295d8dda762c3c94f0ea69429ca Mon Sep 17 00:00:00 2001 From: ChadliaJerad Date: Tue, 18 Apr 2023 10:41:49 -0700 Subject: [PATCH 12/80] Fix the number of workers, so that no segmentation fault accurs when tracing. --- core/federated/RTI/rti.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/core/federated/RTI/rti.c b/core/federated/RTI/rti.c index 5fe6e94aa..988e38784 100644 --- a/core/federated/RTI/rti.c +++ b/core/federated/RTI/rti.c @@ -62,15 +62,16 @@ int main(int argc, const char* argv[]) { return -1; } if (_RTI.tracing_enabled) { - _lf_number_of_workers = _RTI.number_of_federates; + _lf_number_of_workers = _RTI.number_of_federates + _RTI.number_of_transient_federates; start_trace(rti_trace_file_name); - printf("Tracing the RTI execution in %s file.\n", rti_trace_file_name); + lf_print("Tracing the RTI execution in %s file.", rti_trace_file_name); } - printf("Starting RTI for %d perisistent federates and %d transient federates in federation ID %s\n", \ + lf_print("Starting RTI for %d persistent federates and %d transient federates in federation ID %s", \ _RTI.number_of_federates, _RTI.number_of_transient_federates, _RTI.federation_id); - + lf_print("Number of workers is: %d.", _lf_number_of_workers); + // FIXME: Should number_of_federates + number_of_transient_federates be < UINT16_MAX? assert(_RTI.number_of_federates < UINT16_MAX); assert(_RTI.number_of_transient_federates < UINT16_MAX); From 33191e943ff68634fe1b725f9f3b536eb4919ad6 Mon Sep 17 00:00:00 2001 From: ChadliaJerad Date: Tue, 18 Apr 2023 10:44:47 -0700 Subject: [PATCH 13/80] Iterate over the number of persistant AND transient federates + Minor updates. --- core/federated/RTI/rti_lib.c | 35 ++++++++++++++++++++--------------- 1 file changed, 20 insertions(+), 15 deletions(-) diff --git a/core/federated/RTI/rti_lib.c b/core/federated/RTI/rti_lib.c index 3c77af910..955e28128 100644 --- a/core/federated/RTI/rti_lib.c +++ b/core/federated/RTI/rti_lib.c @@ -374,7 +374,7 @@ void send_provisional_tag_advance_grant(federate_t* fed, tag_t tag) { // To handle cycles, need to create a boolean array to keep // track of which upstream federates have been visited. - bool* visited = (bool*)calloc(_RTI.number_of_federates, sizeof(bool)); // Initializes to 0. + bool* visited = (bool*)calloc(_RTI.number_of_federates + _RTI.number_of_transient_federates, sizeof(bool)); // Initializes to 0. // Find the (transitive) next event tag upstream. tag_t upstream_next_event = transitive_next_event( @@ -429,7 +429,7 @@ bool send_advance_grant_if_safe(federate_t* fed) { // To handle cycles, need to create a boolean array to keep // track of which upstream federates have been visited. - bool* visited = (bool*)calloc(_RTI.number_of_federates, sizeof(bool)); // Initializes to 0. + bool* visited = (bool*)calloc(_RTI.number_of_federates + _RTI.number_of_transient_federates, sizeof(bool)); // Initializes to 0. // Find the tag of the earliest possible incoming message from // upstream federates. @@ -537,7 +537,7 @@ void update_federate_next_event_tag_locked(uint16_t federate_id, tag_t next_even // Check downstream federates to see whether they should now be granted a TAG. // To handle cycles, need to create a boolean array to keep // track of which upstream federates have been visited. - bool* visited = (bool*)calloc(_RTI.number_of_federates, sizeof(bool)); // Initializes to 0. + bool* visited = (bool*)calloc(_RTI.number_of_federates + _RTI.number_of_transient_federates, sizeof(bool)); // Initializes to 0. send_downstream_advance_grants_if_safe(&_RTI.federates[federate_id], visited); free(visited); } @@ -777,7 +777,7 @@ void handle_logical_tag_complete(federate_t* fed) { for (int i = 0; i < fed->num_downstream; i++) { federate_t* downstream = &_RTI.federates[fed->downstream[i]]; send_advance_grant_if_safe(downstream); - bool* visited = (bool*)calloc(_RTI.number_of_federates, sizeof(bool)); // Initializes to 0. + bool* visited = (bool*)calloc(_RTI.number_of_federates + _RTI.number_of_transient_federates, sizeof(bool)); // Initializes to 0. send_downstream_advance_grants_if_safe(downstream, visited); free(visited); } @@ -829,7 +829,7 @@ void _lf_rti_broadcast_stop_time_to_federates_already_locked() { ENCODE_STOP_GRANTED(outgoing_buffer, _RTI.max_stop_tag.time, _RTI.max_stop_tag.microstep); // Iterate over federates and send each the message. - for (int i = 0; i < _RTI.number_of_federates; i++) { + for (int i = 0; i < _RTI.number_of_federates + _RTI.number_of_transient_federates; i++) { if (_RTI.federates[i].state == NOT_CONNECTED) { continue; } @@ -854,7 +854,10 @@ void mark_federate_requesting_stop(federate_t* fed) { if (!fed->requested_stop) { // Assume that the federate // has requested stop - _RTI.num_feds_handling_stop++; + // FIXME: Inc only if it is a persistent federate + if (fed->is_transient == false) { + _RTI.num_feds_handling_stop++; + } fed->requested_stop = true; } if (_RTI.num_feds_handling_stop == _RTI.number_of_federates) { @@ -917,7 +920,7 @@ void handle_stop_request_message(federate_t* fed) { // Iterate over federates and send each the MSG_TYPE_STOP_REQUEST message // if we do not have a stop_time already for them. - for (int i = 0; i < _RTI.number_of_federates; i++) { + for (int i = 0; i < _RTI.number_of_federates + _RTI.number_of_transient_federates; i++) { if (_RTI.federates[i].id != fed->id && _RTI.federates[i].requested_stop == false) { if (_RTI.federates[i].state == NOT_CONNECTED) { mark_federate_requesting_stop(&_RTI.federates[i]); @@ -1288,7 +1291,7 @@ void* clock_synchronization_thread(void* noargs) { // Sleep nanosleep(&sleep_time, &remaining_time); // Can be interrupted any_federates_connected = false; - for (int fed = 0; fed < _RTI.number_of_federates; fed++) { + for (int fed = 0; fed < _RTI.number_of_federates + _RTI.number_of_transient_federates ; fed++) { if (_RTI.federates[fed].state == NOT_CONNECTED) { // FIXME: We need better error handling here, but clock sync failure // should not stop execution. @@ -1391,7 +1394,7 @@ void handle_federate_resign(federate_t *my_fed) { // Check downstream federates to see whether they should now be granted a TAG. // To handle cycles, need to create a boolean array to keep // track of which upstream federates have been visited. - bool* visited = (bool*)calloc(_RTI.number_of_federates, sizeof(bool)); // Initializes to 0. + bool* visited = (bool*)calloc(_RTI.number_of_federates + _RTI.number_of_transient_federates, sizeof(bool)); // Initializes to 0. send_downstream_advance_grants_if_safe(my_fed, visited); free(visited); @@ -1516,8 +1519,12 @@ int32_t receive_and_check_fed_id_message(int socket_id, struct sockaddr_in* clie } else { // Received federate ID. fed_id = extract_uint16(buffer + 1); - is_transient = (buffer[sizeof(uint16_t) + 1] == 1)? true : false ; - printf("\nRTI received federate ID: %d. which is transient: %d\n", fed_id, is_transient); + is_transient = (buffer[sizeof(uint16_t) + 1] == 1)? true : false; + if(is_transient) { + LF_PRINT_LOG("RTI received federate ID: %d, which is transient.", fed_id); + } else { + LF_PRINT_LOG("RTI received federate ID: %d, which is persistent.", fed_id); + } // Read the federation ID. First read the length, which is one byte. size_t federation_id_length = (size_t)buffer[sizeof(uint16_t) + 2]; @@ -1851,6 +1858,7 @@ void connect_to_federates(int socket_descriptor) { if (_RTI.federates[fed_id].is_transient) { _RTI.number_of_connected_transient_federates++; + assert(_RTI.number_of_connected_transient_federates <= _RTI.number_of_transient_federates); i--; } } else { @@ -1938,7 +1946,7 @@ void* connect_to_transient_federates_thread() { void *thread_exit_status; if (_RTI.number_of_connected_transient_federates > 0 ) { for (int i = 0; i < _RTI.number_of_transient_federates + _RTI.number_of_federates; i++) { - // Chaeck if this is a transient federate that has already joined at some point + // Check if this is a transient federate that has already joined at some point if (_RTI.federates[i].thread_id != -1 && _RTI.federates[i].is_transient) { if (pthread_tryjoin_np(_RTI.federates[i].thread_id, &thread_exit_status) == 0) { free_in_transit_message_q(_RTI.federates[i].in_transit_message_tags); @@ -2067,9 +2075,6 @@ void wait_for_federates(int socket_descriptor) { lf_print("RTI: Transient Federates can join and leave the federation at anytime."); } - // Set all the remaining deferates as transient - // for (int i = 0; i < _RTI.number_of_federates + _RT) - // The socket server will not continue to accept connections after all the federates // have joined. // In case some other federation's federates are trying to join the wrong From 2fc8f2a33004a53cf68efae70a3c830cb252f290 Mon Sep 17 00:00:00 2001 From: ChadliaJerad Date: Tue, 18 Apr 2023 10:48:20 -0700 Subject: [PATCH 14/80] Remove non useful message log. --- core/federated/RTI/rti.c | 1 - 1 file changed, 1 deletion(-) diff --git a/core/federated/RTI/rti.c b/core/federated/RTI/rti.c index 988e38784..c9c61c562 100644 --- a/core/federated/RTI/rti.c +++ b/core/federated/RTI/rti.c @@ -70,7 +70,6 @@ int main(int argc, const char* argv[]) { _RTI.number_of_federates, _RTI.number_of_transient_federates, _RTI.federation_id); - lf_print("Number of workers is: %d.", _lf_number_of_workers); // FIXME: Should number_of_federates + number_of_transient_federates be < UINT16_MAX? assert(_RTI.number_of_federates < UINT16_MAX); From df9bd413b51c5cd150002cec3355c563d62fbb8b Mon Sep 17 00:00:00 2001 From: ChadliaJerad Date: Tue, 25 Apr 2023 16:01:45 -0700 Subject: [PATCH 15/80] Set the start time of a trabsient to be the max among current tags of upstreams and its physical joinin time. A constant delay to the start time is added + Fix the corner case of sending the first TAG to a joining transient, where there upstream is only one transient that is not connected yet (was generating a warning) --- core/federated/RTI/rti_lib.c | 49 +++++++++++++++++++++++++----------- core/federated/federate.c | 1 - 2 files changed, 35 insertions(+), 15 deletions(-) diff --git a/core/federated/RTI/rti_lib.c b/core/federated/RTI/rti_lib.c index ce27172d2..e5a202dc0 100644 --- a/core/federated/RTI/rti_lib.c +++ b/core/federated/RTI/rti_lib.c @@ -376,6 +376,8 @@ bool send_advance_grant_if_safe(federate_t* fed) { // Find the earliest LTC of upstream federates. tag_t min_upstream_completed = FOREVER_TAG; + // Count the number of that connected upstream federates + uint16_t number_of_connected_upstream_federates = 0; for (int j = 0; j < fed->num_upstream; j++) { federate_t* upstream = &_RTI.federates[fed->upstream[j]]; @@ -383,12 +385,21 @@ bool send_advance_grant_if_safe(federate_t* fed) { // Ignore this federate if it has resigned. if (upstream->state == NOT_CONNECTED) continue; + number_of_connected_upstream_federates++; + tag_t candidate = lf_delay_tag(upstream->completed, fed->upstream_delay[j]); if (lf_tag_compare(candidate, min_upstream_completed) < 0) { min_upstream_completed = candidate; } } + + // If none of the upstream federates is connected, then nothing to do. + // It is equivelent to not having upstream federates at all. + if (number_of_connected_upstream_federates == 0) { + return false; + } + LF_PRINT_LOG("Minimum upstream LTC for fed %d is (%lld, %u) " "(adjusted by after delay).", fed->id, @@ -652,8 +663,16 @@ void handle_timed_message(federate_t* sending_federate, unsigned char* buffer) { _RTI.federates[federate_id].last_provisionally_granted.microstep ); return; + } else { + tag_t fed_start_tag = {.time=_RTI.federates[federate_id].fed_start_time, .microstep=0}; + if(lf_tag_compare(intended_tag, fed_start_tag) < 0) { + // Do not forward the message if the federate is connected, but its + // start_time is not reached yet + pthread_mutex_unlock(&_RTI.rti_mutex); + return; + } } - + // Forward the message or message chunk. int destination_socket = _RTI.federates[federate_id].socket; @@ -1120,7 +1139,7 @@ void handle_timestamp(federate_t *my_fed) { // then do not wait for the start time if (my_fed->num_of_conn_federates == 0) { my_fed->start_time_is_set = true; - LF_PRINT_DEBUG("Transient federate %d has no upstream or downstrean federates. Its start time is The start time of transient is: %lld", my_fed->fed_start_time); + LF_PRINT_DEBUG("Transient federate %d has no upstream or downstrean federates. Its start time is: %lld", my_fed->fed_start_time); } pthread_mutex_unlock(&_RTI.rti_mutex); // Now wait until all connected federates have responded with their next @@ -1180,7 +1199,10 @@ void handle_next_event_tag_query_response(federate_t *my_fed) { // Processing the TIMESTAMP depends on whether it is the startup phase (all // persistent federates joined) or not. federate_t* transient = &(_RTI.federates[transient_fed_id]); - if (timestamp < transient->fed_start_time) { // min of the LTC of upstream? + + // Set the start_time of the transient federate to be the maximum among + // current tag of upstreams and the physical time at which it joined + if (timestamp > transient->fed_start_time) { transient->fed_start_time = timestamp; } // Check that upstream and downstream federates of the transient did propose a start_time @@ -1991,7 +2013,7 @@ void initialize_federate(uint16_t id) { _RTI.federates[id].server_port = -1; _RTI.federates[id].requested_stop = false; _RTI.federates[id].is_transient = true; - _RTI.federates[id].fed_start_time = FOREVER; + _RTI.federates[id].fed_start_time = 0LL; _RTI.federates[id].num_of_conn_federates = 0; _RTI.federates[id].num_of_conn_federates_sent_net = 0; _RTI.federates[id].start_time_is_set = false; @@ -2021,7 +2043,7 @@ void reset_transient_federate(uint16_t id) { _RTI.federates[id].server_port = -1; _RTI.federates[id].requested_stop = false; _RTI.federates[id].is_transient = true; - _RTI.federates[id].fed_start_time = FOREVER; + _RTI.federates[id].fed_start_time = 0LL; _RTI.federates[id].num_of_conn_federates = 0; _RTI.federates[id].num_of_conn_federates_sent_net = 0; _RTI.federates[id].start_time_is_set = false; @@ -2055,19 +2077,18 @@ void wait_for_federates(int socket_descriptor) { lf_print("RTI: Transient Federates can join and leave the federation at anytime."); } - // The socket server will not continue to accept connections after all the federates - // have joined. + // The socket server will only continue to accept connections from transient + // federates. // In case some other federation's federates are trying to join the wrong // federation, need to respond. Start a separate thread to do that. pthread_t responder_thread; - // FIXME: temporary remove, so that federate are not confused - // pthread_create(&responder_thread, NULL, respond_to_erroneous_connections, NULL); - - // Create a thread that will continue listening to joining and leaving transient - // federates, if any - // FIXME: pthread_t transient_thread; - if (_RTI.number_of_transient_federates > 0) { + // If the federation does not include transient federates, then respond to + // erronous connections. Otherwise, continue to accept transients joining and + // respond to duplicate joing requests. + if (_RTI.number_of_transient_federates == 0) { + pthread_create(&responder_thread, NULL, respond_to_erroneous_connections, NULL); + } else if (_RTI.number_of_transient_federates > 0) { pthread_create(&transient_thread, NULL, connect_to_transient_federates_thread, NULL); } diff --git a/core/federated/federate.c b/core/federated/federate.c index aeb981092..1a4b5864b 100644 --- a/core/federated/federate.c +++ b/core/federated/federate.c @@ -2430,7 +2430,6 @@ void handle_next_event_tag_query(){ tag_t next_tag = lf_tag(); instant_t logical_time = next_tag.time; - lf_print("!!!!!!!!!!!!! the net I am sending is: %lld.", logical_time); // Answer with the time instant of the next event tag send_next_event_tag_query_response(logical_time, transient_id); From 22b8657e406520d6370f8bcd2bfe07f2cf27e22e Mon Sep 17 00:00:00 2001 From: ChadliaJerad Date: Tue, 25 Apr 2023 17:31:45 -0700 Subject: [PATCH 16/80] Better messages and function names. --- core/federated/RTI/rti_lib.c | 20 ++++++++--------- core/federated/RTI/rti_lib.h | 12 ++++++++--- core/federated/federate.c | 12 +++++------ include/core/federated/net_common.h | 33 ++++++++++++----------------- 4 files changed, 39 insertions(+), 38 deletions(-) diff --git a/core/federated/RTI/rti_lib.c b/core/federated/RTI/rti_lib.c index e5a202dc0..f43cdc6ed 100644 --- a/core/federated/RTI/rti_lib.c +++ b/core/federated/RTI/rti_lib.c @@ -230,7 +230,7 @@ void send_tag_advance_grant(federate_t* fed, tag_t tag) { } } -bool send_next_event_tag_query (federate_t* conn_fed, uint16_t transient_id) { +bool send_current_tag_query (federate_t* conn_fed, uint16_t transient_id) { if (conn_fed->state == NOT_CONNECTED) { return false; } @@ -238,7 +238,7 @@ bool send_next_event_tag_query (federate_t* conn_fed, uint16_t transient_id) { // Write the message type and the related transient_id size_t message_length = 1 + sizeof(uint16_t); unsigned char buffer[message_length]; - buffer[0] = MSG_TYPE_NEXT_EVENT_TAG_QUERY; + buffer[0] = MSG_TYPE_CURRENT_TAG_QUERY; encode_uint16(transient_id, (unsigned char *)&(buffer[1])); if (_RTI.tracing_enabled) { @@ -1119,7 +1119,7 @@ void handle_timestamp(federate_t *my_fed) { if (upstream->state == NOT_CONNECTED) { continue; } - if (send_next_event_tag_query(upstream, my_fed->id)) { + if (send_current_tag_query(upstream, my_fed->id)) { my_fed->num_of_conn_federates++; } } @@ -1130,7 +1130,7 @@ void handle_timestamp(federate_t *my_fed) { // if (downstream->state == NOT_CONNECTED) { // continue; // } - // if (send_next_event_tag_query(downstream, my_fed->id)) { + // if (send_current_tag_query(downstream, my_fed->id)) { // my_fed->num_of_conn_federates++; // } // } @@ -1139,7 +1139,7 @@ void handle_timestamp(federate_t *my_fed) { // then do not wait for the start time if (my_fed->num_of_conn_federates == 0) { my_fed->start_time_is_set = true; - LF_PRINT_DEBUG("Transient federate %d has no upstream or downstrean federates. Its start time is: %lld", my_fed->fed_start_time); + LF_PRINT_DEBUG("Transient federate %d has no upstream or downstrean federates. Its start time is: %lld", my_fed->id, my_fed->fed_start_time); } pthread_mutex_unlock(&_RTI.rti_mutex); // Now wait until all connected federates have responded with their next @@ -1175,7 +1175,7 @@ void handle_timestamp(federate_t *my_fed) { } } -void handle_next_event_tag_query_response(federate_t *my_fed) { +void handle_current_tag_query_response(federate_t *my_fed) { // Get the logical time instant and the transient fed_id from the socket size_t buffer_size = sizeof(instant_t) + sizeof(uint16_t); unsigned char buffer[buffer_size]; @@ -1192,7 +1192,7 @@ void handle_next_event_tag_query_response(federate_t *my_fed) { tag_t tag = {.time = timestamp, .microstep = 0}; tracepoint_RTI_from_federate(receive_NET_QR_RES, my_fed->id, &tag); } - LF_PRINT_LOG("RTI received NET query response message: %lld.", timestamp); + LF_PRINT_LOG("RTI received current TAG query response message: %lld.", timestamp); // FIXME: Should the lock be inside the if statement only? pthread_mutex_lock(&_RTI.rti_mutex); @@ -1201,7 +1201,7 @@ void handle_next_event_tag_query_response(federate_t *my_fed) { federate_t* transient = &(_RTI.federates[transient_fed_id]); // Set the start_time of the transient federate to be the maximum among - // current tag of upstreams and the physical time at which it joined + // current tag of upstreams and the physical time at which it joined . if (timestamp > transient->fed_start_time) { transient->fed_start_time = timestamp; } @@ -1429,8 +1429,8 @@ void* federate_thread_TCP(void* fed) { case MSG_TYPE_TIMESTAMP: handle_timestamp(my_fed); break; - case MSG_TYPE_NEXT_EVENT_TAG_QUERY_RESPONSE: - handle_next_event_tag_query_response(my_fed); + case MSG_TYPE_CURRENT_TAG_QUERY_RESPONSE: + handle_current_tag_query_response(my_fed); break; case MSG_TYPE_ADDRESS_QUERY: handle_address_query(my_fed->id); diff --git a/core/federated/RTI/rti_lib.h b/core/federated/RTI/rti_lib.h index 9ddb65f7e..a1e6dcaee 100644 --- a/core/federated/RTI/rti_lib.h +++ b/core/federated/RTI/rti_lib.h @@ -691,13 +691,13 @@ int process_args(int argc, const char* argv[]); ////////////////////////////////////////////////////////// /** - * Queries conn_fed for its Next Event Tag (using MSG_TYPE_NEXT_EVENT_TAG_QUERY). + * Queries conn_fed for its current Tag (using MSG_TYPE_CURRENT_TAG_QUERY). * If the function fails to send the query, for example in case the federate is * not connected (can be a transient one itself), then return false. In such case, * the RTI will not wait to receive an answer from it. * * The fed_id of the transient federate is sent to conn_fed, which should be - * returned as is within MSG_TYPE_NEXT_EVENT_TYPE_QUERY_RESPONSE. The aim is to + * returned as is within MSG_TYPE_CURRENT_TAG_QUERY_RESPONSE. The aim is to * identify which of the transient federates has initiated the request. This enables * the support of two diffrent transient federates joining close eanough in time. * @@ -705,6 +705,12 @@ int process_args(int argc, const char* argv[]); * @param fed_id: The ID of the transient joining federate * @return true, if successfully sent, false otherwise. */ -bool send_next_event_tag_query(federate_t* conn_fed, uint16_t fed_id); +bool send_current_tag_query(federate_t* conn_fed, uint16_t fed_id); +/** + * Handles current tag query response received form my_fed. + * + * @param my_fed: the federate from whom the response is received. + */ +void handle_current_tag_query_response(federate_t *my_fed); #endif // RTI_LIB_H \ No newline at end of file diff --git a/core/federated/federate.c b/core/federated/federate.c index 1a4b5864b..e5f0782b4 100644 --- a/core/federated/federate.c +++ b/core/federated/federate.c @@ -2405,7 +2405,7 @@ void handle_stop_request_message() { * Handle a Next Event Tag query received from the RTI. Such message is sent when * a transient federate attempts to join a federation after the startup phase. * The funtion will read the NET in the event queue and call - * send_next_event_tag_query_answer(). + * send_current_tag_query_answer(). * * FIXME: This function assumes the caller does hold the mutex lock? */ @@ -2432,21 +2432,21 @@ void handle_next_event_tag_query(){ instant_t logical_time = next_tag.time; // Answer with the time instant of the next event tag - send_next_event_tag_query_response(logical_time, transient_id); + send_current_tag_query_response(logical_time, transient_id); } /** - * Send the answer to the next event tag query to the RTI. + * Send to RTI the answer to current tag query. * * @param time The time. * @param transient_id The transient federate id to send back * Print a soft error message otherwise */ -void send_next_event_tag_query_response(instant_t time, uint16_t transient_id) { +void send_current_tag_query_response(instant_t time, uint16_t transient_id) { LF_PRINT_DEBUG("Sending logical time " PRINTF_TIME " to the RTI regarding NET QR RES of trabsient %d.", time, transient_id); size_t bytes_to_write = 1 + sizeof(instant_t) + sizeof(uint16_t); unsigned char buffer[bytes_to_write]; - buffer[0] = MSG_TYPE_NEXT_EVENT_TAG_QUERY_RESPONSE; + buffer[0] = MSG_TYPE_CURRENT_TAG_QUERY_RESPONSE; encode_int64(time, &(buffer[1])); encode_uint16(transient_id, &(buffer[9])); lf_mutex_lock(&outbound_socket_mutex); @@ -2695,7 +2695,7 @@ void* listen_to_rti_TCP(void* args) { case MSG_TYPE_PORT_ABSENT: handle_port_absent_message(_fed.socket_TCP_RTI, -1); break; - case MSG_TYPE_NEXT_EVENT_TAG_QUERY: + case MSG_TYPE_CURRENT_TAG_QUERY: handle_next_event_tag_query(); break; case MSG_TYPE_HALT: diff --git a/include/core/federated/net_common.h b/include/core/federated/net_common.h index c4c4263dc..c3d20cf96 100644 --- a/include/core/federated/net_common.h +++ b/include/core/federated/net_common.h @@ -689,31 +689,26 @@ THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. //// Overview of the algorithm: //// When a transient federate joins the deferation after the startup phase (all //// persistent federates have joined and received, or all least are receiveing -//// their start_time), its start_time is decided based on the Next Event Tags -//// of its upstream and downstream federates. Next Event Tags are queried by the -//// RTI, by sending MSG_TYPE_NEXT_EVENT_TAG_QUERY. Federates will answer with -//// MSG_TYPE_NEXT_EVENT_TAG_QUERY_RESPONSE. -//// The start_time of the transient will be: -//// * the maximun of all received MSG_TYPE_NEXT_EVENT_TAG_QUERY_RESPONSE, if -//// different from the stop_time. -//// * or either the maximum of all upstream or the minimum of all downstream, -//// if there is an intersection. -//// FIXME: Look for counter-examples to choose! -//// Once decided about the start_time of the transient, all federates will be -//// requested to halt up to the give time tag (start_time?). -//// They will resume when they receive - -/** - * Byte identifying a query of a federate about its Next Event Tag. This is useful +//// their start_time), its start_time is decided based on the current Tags +//// of its upstream federates and its own physical join time. Current Tags are +//// queried by the RTI, by sending MSG_TYPE_CURRENT_TAG_QUERY. Federates will +//// answer with MSG_TYPE_CURRENT_TAG_QUERY_RESPONSE. +//// The start_time of the transient will be the the maximun of all received +//// MSG_TYPE_CURRENT_TAG_QUERY_RESPONSE and its physical join time. +//// This choice avoids deadline violations and enables hot join (without any +//// federate to halt its execution). + +/** + * Byte identifying a query of a federate about its current Tag. This is useful * when deciding about the start_time of a joining transient federate. */ -#define MSG_TYPE_NEXT_EVENT_TAG_QUERY 30 +#define MSG_TYPE_CURRENT_TAG_QUERY 30 /** - * Byte identifying a response to a MSG_TYPE_NEXT_EVENT_TAG_QUERY. This is useful + * Byte identifying a response to a MSG_TYPE_CURRENT_TAG_QUERY. This is useful * when deciding about the start_time of a joining transient federate. */ -#define MSG_TYPE_NEXT_EVENT_TAG_QUERY_RESPONSE 31 +#define MSG_TYPE_CURRENT_TAG_QUERY_RESPONSE 31 /** * Byte identifying a request sent by the RTI to upstream and downstream federates From 9a788e9370b2f91b4f06c1044259838e39c539ee Mon Sep 17 00:00:00 2001 From: ChadliaJerad Date: Tue, 25 Apr 2023 18:18:52 -0700 Subject: [PATCH 17/80] Clean useless functions --- core/federated/RTI/rti_lib.c | 4 ++-- core/federated/federate.c | 4 ++-- include/core/trace.h | 24 ++++++++---------------- 3 files changed, 12 insertions(+), 20 deletions(-) diff --git a/core/federated/RTI/rti_lib.c b/core/federated/RTI/rti_lib.c index f43cdc6ed..2cb6b77bd 100644 --- a/core/federated/RTI/rti_lib.c +++ b/core/federated/RTI/rti_lib.c @@ -242,7 +242,7 @@ bool send_current_tag_query (federate_t* conn_fed, uint16_t transient_id) { encode_uint16(transient_id, (unsigned char *)&(buffer[1])); if (_RTI.tracing_enabled) { - tracepoint_RTI_to_federate(send_NET_QR, conn_fed->id, NULL); + tracepoint_RTI_to_federate(send_CuTAG_QR, conn_fed->id, NULL); } // If write_to_socket fails, the consider it as soft failure and update the // federate's status. @@ -1190,7 +1190,7 @@ void handle_current_tag_query_response(federate_t *my_fed) { uint16_t transient_fed_id = extract_uint16((&buffer[8])); if (_RTI.tracing_enabled) { tag_t tag = {.time = timestamp, .microstep = 0}; - tracepoint_RTI_from_federate(receive_NET_QR_RES, my_fed->id, &tag); + tracepoint_RTI_from_federate(receive_CuTAG_QR_RES, my_fed->id, &tag); } LF_PRINT_LOG("RTI received current TAG query response message: %lld.", timestamp); diff --git a/core/federated/federate.c b/core/federated/federate.c index e5f0782b4..9c3075837 100644 --- a/core/federated/federate.c +++ b/core/federated/federate.c @@ -2410,7 +2410,7 @@ void handle_stop_request_message() { * FIXME: This function assumes the caller does hold the mutex lock? */ void handle_next_event_tag_query(){ - tracepoint_federate_from_RTI(receive_NET_QR, _lf_my_fed_id, NULL); + tracepoint_federate_from_RTI(receive_CuTAG_QR, _lf_my_fed_id, NULL); // Extract the transient federate Id size_t bytes_to_read = sizeof(uint16_t); @@ -2458,7 +2458,7 @@ void send_current_tag_query_response(instant_t time, uint16_t transient_id) { tag_t tag = {.time = time, .microstep = 0}; // Trace the event when tracing is enabled - tracepoint_federate_to_RTI(send_NET_QR_RES, _lf_my_fed_id, &tag); + tracepoint_federate_to_RTI(send_CuTAG_QR_RES, _lf_my_fed_id, &tag); ssize_t bytes_written = write_to_socket(_fed.socket_TCP_RTI, bytes_to_write, buffer); if (bytes_written < (ssize_t)bytes_to_write) { diff --git a/include/core/trace.h b/include/core/trace.h index 73c62c4ad..fa1be3fa7 100644 --- a/include/core/trace.h +++ b/include/core/trace.h @@ -120,14 +120,10 @@ typedef enum receive_ADR_QR, receive_UNIDENTIFIED, // Transient - send_NET_QR, - send_NET_QR_RES, - send_HALT, - send_RESUME, - receive_NET_QR, - receive_NET_QR_RES, - receive_HALT, - receive_RESUME, + send_CuTAG_QR, + send_CuTAG_QR_RES, + receive_CuTAG_QR, + receive_CuTAG_QR_RES, NUM_EVENT_TYPES } trace_event_t; @@ -192,14 +188,10 @@ static const char *trace_event_names[] = { "Receiving ADR_QR", "Receiving UNIDENTIFIED", // Transient - "Sending TAG_QR", - "Sending TAG_QR_RES", - "Sending HALT", - "Sending RESUME", - "Receiving TAG_QR", - "Receiving TAG_QR_RES", - "Receiving HALT", - "Receiving RESUME" + "Sending CuTAG_QR", + "Sending CuTAG_QR_RES", + "Receiving CuTAG_QR", + "Receiving CuTAG_QR_RES" }; // FIXME: Target property should specify the capacity of the trace buffer. From ad23fe0d965992e3ffc9cac2eb822ec0c6fbbf5b Mon Sep 17 00:00:00 2001 From: ChadliaJerad Date: Wed, 26 Apr 2023 10:14:07 -0700 Subject: [PATCH 18/80] Fix error found by CI: there is no i in connect_to_transient_federates(). --- core/federated/RTI/rti_lib.c | 1 - 1 file changed, 1 deletion(-) diff --git a/core/federated/RTI/rti_lib.c b/core/federated/RTI/rti_lib.c index 2cb6b77bd..e5865463a 100644 --- a/core/federated/RTI/rti_lib.c +++ b/core/federated/RTI/rti_lib.c @@ -1920,7 +1920,6 @@ void* connect_to_transient_federates_thread() { if (!authenticate_federate(socket_id)) { lf_print_warning("RTI failed to authenticate the incoming federate."); // Ignore the federate that failed authentication. - i--; continue; } } From c925e413a53021ac400e33f94a70f713a4e8ced0 Mon Sep 17 00:00:00 2001 From: ChadliaJerad Date: Wed, 26 Apr 2023 11:29:45 -0700 Subject: [PATCH 19/80] Another error fix brought up by CI, about the usage of thread_id. --- core/federated/RTI/rti_lib.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/core/federated/RTI/rti_lib.c b/core/federated/RTI/rti_lib.c index e5865463a..f361dc489 100644 --- a/core/federated/RTI/rti_lib.c +++ b/core/federated/RTI/rti_lib.c @@ -1948,7 +1948,7 @@ void* connect_to_transient_federates_thread() { if (_RTI.number_of_connected_transient_federates > 0 ) { for (int i = 0; i < _RTI.number_of_transient_federates + _RTI.number_of_federates; i++) { // Check if this is a transient federate that has already joined at some point - if (_RTI.federates[i].thread_id != -1 && _RTI.federates[i].is_transient) { + if (_RTI.federates[i].is_transient) { if (pthread_tryjoin_np(_RTI.federates[i].thread_id, &thread_exit_status) == 0) { free_in_transit_message_q(_RTI.federates[i].in_transit_message_tags); lf_print("RTI: Transient Federate %d thread exited.", _RTI.federates[i].id); @@ -1991,7 +1991,6 @@ void* respond_to_erroneous_connections(void* nothing) { } void initialize_federate(uint16_t id) { - _RTI.federates[id].thread_id = -1; _RTI.federates[id].id = id; _RTI.federates[id].socket = -1; // No socket. _RTI.federates[id].clock_synchronization_enabled = true; @@ -2021,7 +2020,6 @@ void initialize_federate(uint16_t id) { void reset_transient_federate(uint16_t id) { // The commented lines highlignts the values that a transient federate needs // to passes to its future joining one - _RTI.federates[id].thread_id = -1; // _RTI.federates[id].id = id; _RTI.federates[id].socket = -1; // No socket. _RTI.federates[id].clock_synchronization_enabled = true; From 190ed0260296cb7021adb844edc9ef5139fccbe2 Mon Sep 17 00:00:00 2001 From: ChadliaJerad Date: Wed, 26 Apr 2023 14:42:59 -0700 Subject: [PATCH 20/80] Get rid of pthread_tryjoin_np(), for the sake of portability and CI. --- core/federated/RTI/rti_lib.c | 34 +++++++++++++++------------------- 1 file changed, 15 insertions(+), 19 deletions(-) diff --git a/core/federated/RTI/rti_lib.c b/core/federated/RTI/rti_lib.c index f361dc489..7e3f757f2 100644 --- a/core/federated/RTI/rti_lib.c +++ b/core/federated/RTI/rti_lib.c @@ -1443,7 +1443,6 @@ void* federate_thread_TCP(void* fed) { break; case MSG_TYPE_RESIGN: handle_federate_resign(my_fed); - return NULL; break; case MSG_TYPE_NEXT_EVENT_TAG: handle_next_event_tag(my_fed); @@ -1474,6 +1473,21 @@ void* federate_thread_TCP(void* fed) { // Nothing more to do. Close the socket and exit. close(my_fed->socket); // from unistd.h + // Manual clean, in case of a transient federate + // FIXME: Should free_in_transit_message_q be called in case of persistent federates as well? + if (my_fed->is_transient) { + free_in_transit_message_q(my_fed->in_transit_message_tags); + lf_print("RTI: Transient Federate %d thread exited.", my_fed->id); + + // Update the number of connected transient federates + pthread_mutex_lock(&_RTI.rti_mutex); + _RTI.number_of_connected_transient_federates--; + + // Reset the status of the leaving federate + reset_transient_federate(my_fed->id); + pthread_mutex_unlock(&_RTI.rti_mutex); + } + return NULL; } @@ -1942,24 +1956,6 @@ void* connect_to_transient_federates_thread() { _RTI.number_of_connected_transient_federates++; } } - - // Check if transient federate threads did exit. - void *thread_exit_status; - if (_RTI.number_of_connected_transient_federates > 0 ) { - for (int i = 0; i < _RTI.number_of_transient_federates + _RTI.number_of_federates; i++) { - // Check if this is a transient federate that has already joined at some point - if (_RTI.federates[i].is_transient) { - if (pthread_tryjoin_np(_RTI.federates[i].thread_id, &thread_exit_status) == 0) { - free_in_transit_message_q(_RTI.federates[i].in_transit_message_tags); - lf_print("RTI: Transient Federate %d thread exited.", _RTI.federates[i].id); - // Update the number of connected transient federates - _RTI.number_of_connected_transient_federates--; - // Reset the status of the leaving federate - reset_transient_federate(_RTI.federates[i].id); - } - } - } - } } } From 3bf95e8a42e63d7a7cb8971d1b5e3ac0743df0ca Mon Sep 17 00:00:00 2001 From: ChadliaJerad Date: Thu, 27 Apr 2023 12:05:29 -0700 Subject: [PATCH 21/80] Remove non useful code --- core/federated/federate.c | 22 ---------------------- 1 file changed, 22 deletions(-) diff --git a/core/federated/federate.c b/core/federated/federate.c index 9c3075837..b4399f768 100644 --- a/core/federated/federate.c +++ b/core/federated/federate.c @@ -2472,25 +2472,6 @@ void send_current_tag_query_response(instant_t time, uint16_t transient_id) { lf_mutex_unlock(&outbound_socket_mutex); } -/** - * Handle a Halt message received form the RTI. This will cause the federation to - * stop. - * - * FIXME: WIP. Should it be - */ -void handle_halt(){ - -} - -/** - * Handle a RESUME message received from the RTI - * - * FIXME: What to do exactly? Can it be mixed with handle_halt()? - */ -void handle_resume(){ - // tracepoint_federate_from_RTI(receive_TAG, _lf_my_fed_id, &TAG); -} - /////////////////// End of transient time coordination ///////////////////////// /** @@ -2698,9 +2679,6 @@ void* listen_to_rti_TCP(void* args) { case MSG_TYPE_CURRENT_TAG_QUERY: handle_next_event_tag_query(); break; - case MSG_TYPE_HALT: - handle_halt(); - break; case MSG_TYPE_CLOCK_SYNC_T1: case MSG_TYPE_CLOCK_SYNC_T4: lf_print_error("Federate %d received unexpected clock sync message from RTI on TCP socket.", From c35cfd167bb3f0560e05b6a967386672270a24f2 Mon Sep 17 00:00:00 2001 From: ChadliaJerad Date: Thu, 27 Apr 2023 12:07:37 -0700 Subject: [PATCH 22/80] Tentative solution for granting time advance in a federate with absent upstream transient. --- core/federated/RTI/rti_lib.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/core/federated/RTI/rti_lib.c b/core/federated/RTI/rti_lib.c index 7e3f757f2..1e26f6cbc 100644 --- a/core/federated/RTI/rti_lib.c +++ b/core/federated/RTI/rti_lib.c @@ -375,9 +375,9 @@ void send_provisional_tag_advance_grant(federate_t* fed, tag_t tag) { bool send_advance_grant_if_safe(federate_t* fed) { // Find the earliest LTC of upstream federates. - tag_t min_upstream_completed = FOREVER_TAG; - // Count the number of that connected upstream federates - uint16_t number_of_connected_upstream_federates = 0; + tag_t min_upstream_completed = fed->next_event; + // // Count the number of that connected upstream federates + // uint16_t number_of_connected_upstream_federates = 0; for (int j = 0; j < fed->num_upstream; j++) { federate_t* upstream = &_RTI.federates[fed->upstream[j]]; @@ -385,7 +385,7 @@ bool send_advance_grant_if_safe(federate_t* fed) { // Ignore this federate if it has resigned. if (upstream->state == NOT_CONNECTED) continue; - number_of_connected_upstream_federates++; + // number_of_connected_upstream_federates++; tag_t candidate = lf_delay_tag(upstream->completed, fed->upstream_delay[j]); @@ -396,9 +396,9 @@ bool send_advance_grant_if_safe(federate_t* fed) { // If none of the upstream federates is connected, then nothing to do. // It is equivelent to not having upstream federates at all. - if (number_of_connected_upstream_federates == 0) { - return false; - } + // if (number_of_connected_upstream_federates == 0) { + // return true; + // } LF_PRINT_LOG("Minimum upstream LTC for fed %d is (%lld, %u) " "(adjusted by after delay).", From e17ffc7218e61f10e054d83fe2c3a318a2ed14da Mon Sep 17 00:00:00 2001 From: ChadliaJerad Date: Fri, 28 Apr 2023 18:10:34 -0700 Subject: [PATCH 23/80] Better logging for absent transient federates --- core/federated/RTI/rti_lib.c | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/core/federated/RTI/rti_lib.c b/core/federated/RTI/rti_lib.c index 1e26f6cbc..3452d3e7a 100644 --- a/core/federated/RTI/rti_lib.c +++ b/core/federated/RTI/rti_lib.c @@ -553,12 +553,17 @@ void handle_port_absent_message(federate_t* sending_federate, unsigned char* buf // issue a TAG before this message has been forwarded. pthread_mutex_lock(&_RTI.rti_mutex); - // If the destination federate is no longer connected, issue a warning - // and return. + // If the destination federate is persistent and is no longer connected, issue + // a warning and return. If, however, it is transient, then print a message. if (_RTI.federates[federate_id].state == NOT_CONNECTED) { pthread_mutex_unlock(&_RTI.rti_mutex); - lf_print_warning("RTI: Destination federate %d is no longer connected. Dropping message.", - federate_id); + if (!_RTI.federates[federate_id].is_transient) { + lf_print_warning("RTI: Destination federate %d is no longer connected. Dropping message.", + federate_id); + } else { + lf_print("RTI: Destination transient federate %d is currently not connected. Dropping message.", + federate_id); + } LF_PRINT_LOG("Fed status: next_event (%lld, %d), " "completed (%lld, %d), " "last_granted (%lld, %d), " @@ -643,12 +648,17 @@ void handle_timed_message(federate_t* sending_federate, unsigned char* buffer) { // issue a TAG before this message has been forwarded. pthread_mutex_lock(&_RTI.rti_mutex); - // If the destination federate is no longer connected, issue a warning - // and return. + // If the destination federate is persistent and is no longer connected, issue + // a warning and return. If, however, it is transient, then print a message. if (_RTI.federates[federate_id].state == NOT_CONNECTED) { pthread_mutex_unlock(&_RTI.rti_mutex); - lf_print_warning("RTI: Destination federate %d is no longer connected. Dropping message.", - federate_id); + if (!_RTI.federates[federate_id].is_transient) { + lf_print_warning("RTI: Destination federate %d is no longer connected. Dropping message.", + federate_id); + } else { + lf_print("RTI: Destination transient federate %d is currently not connected. Dropping message.", + federate_id); + } LF_PRINT_LOG("Fed status: next_event (%lld, %d), " "completed (%lld, %d), " "last_granted (%lld, %d), " From 2856f3443bc74fee285ea1f5f4535af7ec462b51 Mon Sep 17 00:00:00 2001 From: ChadliaJerad Date: Tue, 2 May 2023 21:59:53 -0700 Subject: [PATCH 24/80] Create a different trace file if there is already one. This is temporary though. It should be called only in case of transient federates. --- core/trace.c | 50 +++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 49 insertions(+), 1 deletion(-) diff --git a/core/trace.c b/core/trace.c index 648c26812..ea0886780 100644 --- a/core/trace.c +++ b/core/trace.c @@ -37,6 +37,7 @@ THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include #include +#include #include "platform.h" @@ -237,9 +238,56 @@ void flush_trace(int worker) { lf_critical_section_exit(); } +/** + * This utility function helps creating a new file trace, if already one exists. + * This is particularly useful for transient federates, since each joining one will + * have a different trace file. + * @param n the integer to convert into a string + * @return the converted string + */ +char * convert_int_to_string(int n) { + // Count the number of digits in n + int n_ = n; + int number_of_digits = 0; + while (n_) { + number_of_digits++; + n_ /= 10; + } + + // Construct the array of chars to return + char *string_of_int; + string_of_int = (char *)malloc(number_of_digits + 1); + + // Extract the digits and convert them into chars + int index = 0; + for (int i = 0; i < number_of_digits ; i++) { + string_of_int[number_of_digits - i - 1] = n % 10 + '0'; + n /= 10; + } + // Add the null character and return + string_of_int[number_of_digits] = '\0'; + return (char *)string_of_int; +} + void start_trace(const char* filename) { // FIXME: location of trace file should be customizable. - _lf_trace_file = fopen(filename, "w"); + + // If a file already exists with the same file name, then derive another one. + char filename_[strlen(filename) + 10]; + strcpy(filename_, filename); + int i = 0; + while (access(filename_, F_OK) == 0) { + // Get the root of the original file name + memset(filename_, '\0', sizeof(filename_)); + strncpy(filename_, filename, strlen(filename) - 4); + // Add an index + char *ind = convert_int_to_string(i++); + strcat(filename_, ind); + // Add the file extension + strcat(filename_, ".lft"); + } + + _lf_trace_file = fopen(filename_, "w"); if (_lf_trace_file == NULL) { fprintf(stderr, "WARNING: Failed to open log file with error code %d." "No log will be written.\n", errno); From 825fbc19e3c0d3decda5799d91bded3f2c37ec33 Mon Sep 17 00:00:00 2001 From: ChadliaJerad Date: Wed, 3 May 2023 14:35:48 -0700 Subject: [PATCH 25/80] Add the global variable effective_start_time. It is useful for transients federates. --- core/tag.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/core/tag.c b/core/tag.c index 012c8d4b0..ae1f9277a 100644 --- a/core/tag.c +++ b/core/tag.c @@ -33,6 +33,12 @@ typedef enum _lf_time_type { // Global variables declared in tag.h: instant_t start_time = NEVER; +/** + * Only useful for transient federates. It records the effective start time, to + * be used at startup. Elapsed logical time calculations will use start_time. + */ +instant_t effective_start_time = NEVER; + //////////////// Global variables not declared in tag.h (must be declared extern if used elsewhere): /** From fababedeec2925e996991a3d5b7a4b96e4af21c0 Mon Sep 17 00:00:00 2001 From: ChadliaJerad Date: Wed, 3 May 2023 18:02:43 -0700 Subject: [PATCH 26/80] Define and use effective_start_time to start a transient federate. Add a new message type to be sent from the RTI to federates, so that both the start_time and the effective_start_time are known and used. --- core/federated/RTI/rti_lib.c | 104 +++++++++++++++++----------- core/federated/federate.c | 26 ++++--- core/threaded/reactor_threaded.c | 3 +- include/core/federated/net_common.h | 15 +++- 4 files changed, 93 insertions(+), 55 deletions(-) diff --git a/core/federated/RTI/rti_lib.c b/core/federated/RTI/rti_lib.c index 3452d3e7a..8d4879b76 100644 --- a/core/federated/RTI/rti_lib.c +++ b/core/federated/RTI/rti_lib.c @@ -1084,23 +1084,26 @@ void handle_timestamp(federate_t *my_fed) { pthread_mutex_unlock(&_RTI.rti_mutex); - // Send back to the federate the maximum time plus an offset on a TIMESTAMP + // Send back to the federate the maximum time plus an offset on a TIMESTAMP_START // message. - unsigned char start_time_buffer[MSG_TYPE_TIMESTAMP_LENGTH]; - start_time_buffer[0] = MSG_TYPE_TIMESTAMP; + // In the startup phase, federates will receive identical start_time and + // effective_start_time + unsigned char start_time_buffer[MSG_TYPE_TIMESTAMP_START_LENGTH]; + start_time_buffer[0] = MSG_TYPE_TIMESTAMP_START; // Add an offset to this start time to get everyone starting together. start_time = _RTI.max_start_time + DELAY_START; encode_int64(swap_bytes_if_big_endian_int64(start_time), &start_time_buffer[1]); + encode_int64(swap_bytes_if_big_endian_int64(start_time), &start_time_buffer[9]); if (_RTI.tracing_enabled) { tag_t tag = {.time = start_time, .microstep = 0}; tracepoint_RTI_to_federate(send_TIMESTAMP, my_fed->id, &tag); } ssize_t bytes_written = write_to_socket( - my_fed->socket, MSG_TYPE_TIMESTAMP_LENGTH, + my_fed->socket, MSG_TYPE_TIMESTAMP_START_LENGTH, start_time_buffer ); - if (bytes_written < MSG_TYPE_TIMESTAMP_LENGTH) { + if (bytes_written < MSG_TYPE_TIMESTAMP_START_LENGTH) { lf_print_error("Failed to send the starting time to federate %d.", my_fed->id); } pthread_mutex_lock(&_RTI.rti_mutex); @@ -1110,18 +1113,17 @@ void handle_timestamp(federate_t *my_fed) { // the federate to the start time. my_fed->state = GRANTED; pthread_cond_broadcast(&_RTI.sent_start_time); - LF_PRINT_LOG("RTI sent start time %lld to federate %d.", start_time, my_fed->id); pthread_mutex_unlock(&_RTI.rti_mutex); } else { // A transient has joined after the startup phase // At this point, we already hold the mutex - // Iterate over the upstream federates to query the next event tag. + // Iterate over the upstream federates to query the current tag. // Since they may not be connected (being themselves transient, for example) // the total number of connected federates (my_fed->num_of_conn_federates) - // will be compared against those who already sent the NET query response + // will be compared against those who already sent the query response // (my_fed->num_of_conn_federates_sent_net) - LF_PRINT_DEBUG("RTI sends next event tag requests regarding transient %d.", my_fed->id); + LF_PRINT_DEBUG("RTI sends current tag requests regarding transient %d.", my_fed->id); for (int j = 0; j < my_fed->num_upstream; j++) { federate_t* upstream = &_RTI.federates[my_fed->upstream[j]]; // Ignore this federate if it has resigned or if it a transient that @@ -1133,17 +1135,17 @@ void handle_timestamp(federate_t *my_fed) { my_fed->num_of_conn_federates++; } } - // Iterate over the downstream federates to query the next event tag. - // for (int j = 0; j < my_fed->num_downstream; j++) { - // federate_t* downstream = &_RTI.federates[my_fed->downstream[j]]; - // // Ignore this federate if it has resigned. - // if (downstream->state == NOT_CONNECTED) { - // continue; - // } - // if (send_current_tag_query(downstream, my_fed->id)) { - // my_fed->num_of_conn_federates++; - // } - // } + // Iterate over the downstream federates to query the current event tag. + for (int j = 0; j < my_fed->num_downstream; j++) { + federate_t* downstream = &_RTI.federates[my_fed->downstream[j]]; + // Ignore this federate if it has resigned. + if (downstream->state == NOT_CONNECTED) { + continue; + } + if (send_current_tag_query(downstream, my_fed->id)) { + my_fed->num_of_conn_federates++; + } + } // If the transient federate has no connected upstream or downstream federates, // then do not wait for the start time @@ -1158,22 +1160,28 @@ void handle_timestamp(federate_t *my_fed) { LF_PRINT_DEBUG("RTI waits for transient start time to be set."); while(!my_fed->start_time_is_set); - // Once the start time set, sent it to the joining transient - unsigned char start_time_buffer[MSG_TYPE_TIMESTAMP_LENGTH]; - start_time_buffer[0] = MSG_TYPE_TIMESTAMP; + // Once the effective start time set, sent it to the joining transient, + // together with the start time of the federation. + unsigned char start_time_buffer[MSG_TYPE_TIMESTAMP_START_LENGTH]; + start_time_buffer[0] = MSG_TYPE_TIMESTAMP_START; my_fed->fed_start_time += DELAY_START; - LF_PRINT_DEBUG("Transient federate %d start time is set and is %lld.", my_fed->id, my_fed->fed_start_time); - encode_int64(swap_bytes_if_big_endian_int64(my_fed->fed_start_time), &start_time_buffer[1]); + + encode_int64(swap_bytes_if_big_endian_int64(start_time), &start_time_buffer[1]); + encode_int64(swap_bytes_if_big_endian_int64(my_fed->fed_start_time), &start_time_buffer[9]); + + lf_print("Transient federate %d effective start time is set and is %lld. " + " The federation start time is %lld.", + my_fed->id, my_fed->fed_start_time, start_time); if (_RTI.tracing_enabled) { - tag_t tag = {.time = start_time, .microstep = 0}; + tag_t tag = {.time = my_fed->fed_start_time, .microstep = 0}; tracepoint_RTI_to_federate(send_TIMESTAMP, my_fed->id, &tag); } ssize_t bytes_written = write_to_socket( - my_fed->socket, MSG_TYPE_TIMESTAMP_LENGTH, + my_fed->socket, MSG_TYPE_TIMESTAMP_START_LENGTH, start_time_buffer ); - if (bytes_written < MSG_TYPE_TIMESTAMP_LENGTH) { + if (bytes_written < MSG_TYPE_TIMESTAMP_START_LENGTH) { lf_print_error("Failed to send the starting time to federate %d.", my_fed->id); } pthread_mutex_lock(&_RTI.rti_mutex); @@ -1373,15 +1381,17 @@ void* clock_synchronization_thread(void* noargs) { void handle_federate_resign(federate_t *my_fed) { // Nothing more to do. Close the socket and exit. pthread_mutex_lock(&_RTI.rti_mutex); + + // Extract the tag + size_t header_size = 1 + sizeof(tag_t); + unsigned char buffer[header_size]; + // Read the header, minus the first byte which has already been read. + read_from_socket_errexit(my_fed->socket, header_size - 1, &(buffer[1]), + "RTI failed to read the timed message header from remote federate."); + // Extract the tag sent by the resigning federate + tag_t tag = extract_tag(&(buffer[1])); + if (_RTI.tracing_enabled) { - // Extract the tag, for tracing purposes - size_t header_size = 1 + sizeof(tag_t); - unsigned char buffer[header_size]; - // Read the header, minus the first byte which has already been read. - read_from_socket_errexit(my_fed->socket, header_size - 1, &(buffer[1]), - "RTI failed to read the timed message header from remote federate."); - // Extract the tag sent by the resigning federate - tag_t tag = extract_tag(&(buffer[1])); tracepoint_RTI_from_federate(receive_RESIGN, my_fed->id, &tag); } @@ -1964,6 +1974,7 @@ void* connect_to_transient_federates_thread() { pthread_create(&(_RTI.federates[fed_id].thread_id), NULL, federate_thread_TCP, &(_RTI.federates[fed_id])); _RTI.federates[fed_id].is_transient = true; _RTI.number_of_connected_transient_federates++; + lf_print("Federate %d joined.", _RTI.federates[fed_id].id); } } } @@ -2098,19 +2109,28 @@ void wait_for_federates(int socket_descriptor) { // Wait for persistent federate threads to exit. void* thread_exit_status; for (int i = 0; i < _RTI.number_of_federates + _RTI.number_of_transient_federates; i++) { - if (_RTI.federates[i].is_transient == false) { - lf_print("RTI: Waiting for thread handling federate %d.", _RTI.federates[i].id); + if (!_RTI.federates[i].is_transient) { + lf_print("RTI: Waiting for thread handling peristent federate %d.", _RTI.federates[i].id); pthread_join(_RTI.federates[i].thread_id, &thread_exit_status); free_in_transit_message_q(_RTI.federates[i].in_transit_message_tags); lf_print("RTI: Federate %d thread exited.", _RTI.federates[i].id); } } - // FIXME: Once persistent federates exited, send stop requests to transient federates - // ??? - // and kill thread_exit_status? + // Wait for transient federate threads to exit. + // NOTE: It is important to separate the waiting of persistent federates from + // the transient federates. The reason is that if, for example, federate 0 is + // transienet, and it did leave in the middle of a federation execution, then + // we will no more wait for the thread of a future joining instance to pthread_join. if (_RTI.number_of_transient_federates > 0) { - // WIP + for (int i = 0; i < _RTI.number_of_federates + _RTI.number_of_transient_federates; i++) { + if (_RTI.federates[i].is_transient) { + lf_print("RTI: Waiting for thread handling transient federate %d.", _RTI.federates[i].id); + pthread_join(_RTI.federates[i].thread_id, &thread_exit_status); + free_in_transit_message_q(_RTI.federates[i].in_transit_message_tags); + lf_print("RTI: Federate %d thread exited.", _RTI.federates[i].id); + } + } } _RTI.all_federates_exited = true; diff --git a/core/federated/federate.c b/core/federated/federate.c index b4399f768..999dac333 100644 --- a/core/federated/federate.c +++ b/core/federated/federate.c @@ -70,6 +70,7 @@ THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. extern instant_t _lf_last_reported_unadjusted_physical_time_ns; extern tag_t current_tag; extern instant_t start_time; +extern instant_t effective_start_time; // Error messages. char* ERROR_SENDING_HEADER = "ERROR sending header information to federate via RTI"; @@ -1196,27 +1197,31 @@ instant_t get_start_time_from_rti(instant_t my_physical_time) { // Send the timestamp marker first. _lf_send_time(MSG_TYPE_TIMESTAMP, my_physical_time, true); - // Read bytes from the socket. We need 9 bytes. + // Read bytes from the socket. We need 17 (1 + 8 + 8) bytes. // Buffer for message ID plus timestamp. - size_t buffer_length = 1 + sizeof(instant_t); + size_t buffer_length = MSG_TYPE_TIMESTAMP_START_LENGTH; unsigned char buffer[buffer_length]; read_from_socket_errexit(_fed.socket_TCP_RTI, buffer_length, buffer, - "Failed to read MSG_TYPE_TIMESTAMP message from RTI."); + "Failed to read MSG_TYPE_TIMESTAMP_START message from RTI."); LF_PRINT_DEBUG("Read 9 bytes."); // First byte received is the message ID. - if (buffer[0] != MSG_TYPE_TIMESTAMP) { - lf_print_error_and_exit("Expected a MSG_TYPE_TIMESTAMP message from the RTI. Got %u (see net_common.h).", - buffer[0]); + if (buffer[0] != MSG_TYPE_TIMESTAMP_START) { + lf_print_error_and_exit("Expected a MSG_TYPE_TIMESTAMP_START message from " + "the RTI. Got %u (see net_common.h).", + buffer[0]); } + // Read the federation start_time first, then the effective start_time after instant_t timestamp = extract_int64(&(buffer[1])); + effective_start_time = extract_int64(&(buffer[9])); - tag_t tag = {.time = timestamp, .microstep = 0}; - // Trace the event when tracing is enabled + tag_t tag = {.time = effective_start_time, .microstep = 0}; + // Trace the event when tracing is enabled. + // Note that we report in the trace the effective_start_time. + // This is rather a choice. To be changed, if needed, of course. tracepoint_federate_from_RTI(receive_TIMESTAMP, _lf_my_fed_id, &tag); - lf_print("Starting timestamp is: " PRINTF_TIME ".", timestamp); LF_PRINT_LOG("Current physical time is: " PRINTF_TIME ".", lf_time_physical()); return timestamp; @@ -2724,6 +2729,9 @@ void synchronize_with_other_federates() { stop_tag = ((tag_t) {.time = start_time + duration, .microstep = 0}); } + lf_print_log("Start time of the federation is " PRINTF_TIME ".", start_time); + lf_print_log("Effective start time of federate %d is: " PRINTF_TIME ".", _lf_my_fed_id, effective_start_time); + // Start a thread to listen for incoming TCP messages from the RTI. // @note Up until this point, the federate has been listening for messages // from the RTI in a sequential manner in the main thread. From now on, a diff --git a/core/threaded/reactor_threaded.c b/core/threaded/reactor_threaded.c index a9abd848b..971ab83f8 100644 --- a/core/threaded/reactor_threaded.c +++ b/core/threaded/reactor_threaded.c @@ -50,6 +50,7 @@ THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. extern instant_t _lf_last_reported_unadjusted_physical_time_ns; extern tag_t current_tag; extern instant_t start_time; +extern instant_t effective_start_time; /** * Global mutex and condition variable. @@ -727,7 +728,7 @@ void _lf_initialize_start_tag() { // Get a start_time from the RTI synchronize_with_other_federates(); // Resets start_time in federated execution according to the RTI. - current_tag = (tag_t){.time = start_time, .microstep = 0u}; + current_tag = (tag_t){.time = effective_start_time, .microstep = 0u}; #endif _lf_initialize_timers(); diff --git a/include/core/federated/net_common.h b/include/core/federated/net_common.h index c3d20cf96..1fd59f482 100644 --- a/include/core/federated/net_common.h +++ b/include/core/federated/net_common.h @@ -377,12 +377,21 @@ THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. /** * Byte identifying a timestamp message, which is 64 bits long. * Each federate sends its starting physical time as a message of this - * type, and the RTI broadcasts to all the federates the starting logical - * time as a message of this type. - s*/ + * type. + */ #define MSG_TYPE_TIMESTAMP 2 #define MSG_TYPE_TIMESTAMP_LENGTH (1 + sizeof(int64_t)) +/** + * As an answer to MSG_TYPE_TIMESTAMP, the RTI broadcasts to all persistent + * federates, or sends to newly joining transient federate, a message of + * MSG_TYPE_STIMESTAMP_START. It includes the starting logical time of the + * federation, together with the effective starting logical time. The latter + * is useful for transient federates. + */ +#define MSG_TYPE_TIMESTAMP_START 50 +#define MSG_TYPE_TIMESTAMP_START_LENGTH (1 + sizeof(int64_t) + sizeof(int64_t)) + /** Byte identifying a message to forward to another federate. * The next two bytes will be the ID of the destination port. * The next two bytes are the destination federate ID. From 82cfe8cf48a35fbf4a365f7cc6d1db5f1db7c4a5 Mon Sep 17 00:00:00 2001 From: ChadliaJerad Date: Fri, 5 May 2023 16:06:25 -0700 Subject: [PATCH 27/80] Better trace visualization --- core/federated/RTI/rti_lib.c | 4 ++-- core/federated/federate.c | 10 +++++----- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/core/federated/RTI/rti_lib.c b/core/federated/RTI/rti_lib.c index 8d4879b76..c6ee0ab7f 100644 --- a/core/federated/RTI/rti_lib.c +++ b/core/federated/RTI/rti_lib.c @@ -242,7 +242,7 @@ bool send_current_tag_query (federate_t* conn_fed, uint16_t transient_id) { encode_uint16(transient_id, (unsigned char *)&(buffer[1])); if (_RTI.tracing_enabled) { - tracepoint_RTI_to_federate(send_CuTAG_QR, conn_fed->id, NULL); + tracepoint_RTI_to_federate(send_CuTAG_QR, conn_fed->id, &NEVER_TAG); } // If write_to_socket fails, the consider it as soft failure and update the // federate's status. @@ -1642,7 +1642,7 @@ int32_t receive_and_check_fed_id_message(int socket_id, struct sockaddr_in* clie // Send an MSG_TYPE_ACK message. unsigned char ack_message = MSG_TYPE_ACK; if (_RTI.tracing_enabled) { - tracepoint_RTI_to_federate(send_ACK, fed_id, NULL); + tracepoint_RTI_to_federate(send_ACK, fed_id, &NEVER_TAG); } write_to_socket_errexit(socket_id, 1, &ack_message, "RTI failed to write MSG_TYPE_ACK message to federate %d.", fed_id); diff --git a/core/federated/federate.c b/core/federated/federate.c index 999dac333..4d571b6d4 100644 --- a/core/federated/federate.c +++ b/core/federated/federate.c @@ -618,7 +618,7 @@ void* handle_p2p_connections_from_federates(void* ignored) { // Send an MSG_TYPE_ACK message. unsigned char response = MSG_TYPE_ACK; // Trace the event when tracing is enabled - tracepoint_federate_to_federate(send_ACK, _lf_my_fed_id, remote_fed_id, NULL); + tracepoint_federate_to_federate(send_ACK, _lf_my_fed_id, remote_fed_id, &NEVER_TAG); write_to_socket_errexit(socket_id, 1, (unsigned char*)&response, "Failed to write MSG_TYPE_ACK in response to federate %d.", remote_fed_id); @@ -884,7 +884,7 @@ void connect_to_federate(uint16_t remote_federate_id) { } else { lf_print("Connected to federate %d, port %d.", remote_federate_id, port); // Trace the event when tracing is enabled - tracepoint_federate_to_federate(receive_ACK, _lf_my_fed_id, remote_federate_id, NULL); + tracepoint_federate_to_federate(receive_ACK, _lf_my_fed_id, remote_federate_id, &NEVER_TAG); } } } @@ -1158,7 +1158,7 @@ void connect_to_rti(const char* hostname, int port) { "%d. Error code: %d. Federate quits.\n", response, cause); } else if (response == MSG_TYPE_ACK) { // Trace the event when tracing is enabled - tracepoint_federate_from_RTI(receive_ACK, _lf_my_fed_id, NULL); + tracepoint_federate_from_RTI(receive_ACK, _lf_my_fed_id, &NEVER_TAG); LF_PRINT_LOG("Received acknowledgment from the RTI."); // Call a generated (external) function that sends information @@ -2415,8 +2415,8 @@ void handle_stop_request_message() { * FIXME: This function assumes the caller does hold the mutex lock? */ void handle_next_event_tag_query(){ - tracepoint_federate_from_RTI(receive_CuTAG_QR, _lf_my_fed_id, NULL); - + tracepoint_federate_from_RTI(receive_CuTAG_QR, _lf_my_fed_id, &NEVER_TAG); + // Extract the transient federate Id size_t bytes_to_read = sizeof(uint16_t); unsigned char buffer[bytes_to_read]; From 79a67ff829f7c4c63a0104c99d6206f056eb105e Mon Sep 17 00:00:00 2001 From: ChadliaJerad Date: Fri, 5 May 2023 17:43:13 -0700 Subject: [PATCH 28/80] Get rid of access() of unistd.h, so that Zephyr tests pass. --- core/trace.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/core/trace.c b/core/trace.c index ea0886780..c8d413ec5 100644 --- a/core/trace.c +++ b/core/trace.c @@ -37,7 +37,6 @@ THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include #include -#include #include "platform.h" @@ -276,7 +275,13 @@ void start_trace(const char* filename) { char filename_[strlen(filename) + 10]; strcpy(filename_, filename); int i = 0; - while (access(filename_, F_OK) == 0) { + FILE *test_file_exists; + while (true) { + test_file_exists = fopen(filename_, "r"); + if (test_file_exists == NULL) { + break; + } + fclose(test_file_exists); // Get the root of the original file name memset(filename_, '\0', sizeof(filename_)); strncpy(filename_, filename, strlen(filename) - 4); From c41f0ed9ee41cf5dba974c6e2f398908e453f1df Mon Sep 17 00:00:00 2001 From: ChadliaJerad Date: Wed, 7 Jun 2023 10:47:28 -0700 Subject: [PATCH 29/80] Remove redundant declaration. --- core/federated/RTI/rti.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/core/federated/RTI/rti.c b/core/federated/RTI/rti.c index 38e38d788..0ce019dcb 100644 --- a/core/federated/RTI/rti.c +++ b/core/federated/RTI/rti.c @@ -63,14 +63,6 @@ extern federation_RTI_t* _F_RTI; */ unsigned int _lf_number_of_workers = 0u; -/** - * References to the federation RTI and the enclave RTI. - * They both point to the same enclaves stuctures. In the case of federation RTI, - * however, enclaves are encapsulated in federates. - */ -extern enclave_RTI_t * _E_RTI; -extern federation_RTI_t* _F_RTI; - extern lf_mutex_t rti_mutex; extern lf_cond_t received_start_times; extern lf_cond_t sent_start_time; From af093e458c451957091ee1e3fe6226e3063f53ea Mon Sep 17 00:00:00 2001 From: ChadliaJerad Date: Wed, 7 Jun 2023 12:54:28 -0700 Subject: [PATCH 30/80] Pass to fix merge errors. --- core/federated/RTI/rti.c | 27 ++-- core/federated/RTI/rti_lib.c | 231 +++++++++++++++++------------------ core/federated/RTI/rti_lib.h | 32 +++-- 3 files changed, 147 insertions(+), 143 deletions(-) diff --git a/core/federated/RTI/rti.c b/core/federated/RTI/rti.c index 17243f0de..d49304613 100644 --- a/core/federated/RTI/rti.c +++ b/core/federated/RTI/rti.c @@ -102,24 +102,25 @@ int main(int argc, const char* argv[]) { // Processing command-line arguments failed. return -1; } + if (_F_RTI->tracing_enabled) { _lf_number_of_workers = _F_RTI->number_of_enclaves; start_trace(rti_trace_file_name); lf_print("Tracing the RTI execution in %s file.", rti_trace_file_name); - lf_print("Tracing the RTI execution in %s file.", rti_trace_file_name); } - lf_print("Starting RTI for %d persistent federates and %d transient federates in federation ID %s", \ - _RTI.number_of_federates, - _RTI.number_of_transient_federates, - _RTI.federation_id); - - // FIXME: Should number_of_federates + number_of_transient_federates be < UINT16_MAX? - assert(_RTI.number_of_federates < UINT16_MAX); - assert(_RTI.number_of_transient_federates < UINT16_MAX); - - _RTI.federates = (federate_t *)calloc(_RTI.number_of_federates + _RTI.number_of_transient_federates, sizeof(federate_t)); - for (uint16_t i = 0; i < _RTI.number_of_federates + _RTI.number_of_transient_federates; i++) { - initialize_federate(i); + lf_print("Starting RTI for a total of %d federates, with %d being transient, in federation ID %s", \ + _F_RTI->number_of_enclaves, + _F_RTI->number_of_transient_federates, + _F_RTI->federation_id); + + assert(_F_RTI->number_of_enclaves < UINT16_MAX); + assert(_F_RTI->number_of_transient_federates < UINT16_MAX); + + // Allocate memory for the federates + _F_RTI->enclaves = (federate_t**)calloc(_F_RTI->number_of_enclaves, sizeof(federate_t*)); + for (uint16_t i = 0; i < _F_RTI->number_of_enclaves; i++) { + _F_RTI->enclaves[i] = (federate_t *)malloc(sizeof(federate_t)); + initialize_federate(_F_RTI->enclaves[i], i); } // Initialize the RTI enclaves diff --git a/core/federated/RTI/rti_lib.c b/core/federated/RTI/rti_lib.c index a835809b4..2e4b7f76e 100644 --- a/core/federated/RTI/rti_lib.c +++ b/core/federated/RTI/rti_lib.c @@ -429,11 +429,11 @@ void handle_timed_message(federate_t* sending_federate, unsigned char* buffer) { ); return; } else { - tag_t fed_start_tag = {.time=_RTI.federates[federate_id].fed_start_time, .microstep=0}; + tag_t fed_start_tag = {.time=_F_RTI->enclaves[federate_id]->fed_start_time, .microstep=0}; if(lf_tag_compare(intended_tag, fed_start_tag) < 0) { // Do not forward the message if the federate is connected, but its // start_time is not reached yet - pthread_mutex_unlock(&_RTI.rti_mutex); + lf_mutex_unlock(&rti_mutex); return; } } @@ -807,21 +807,21 @@ void handle_timestamp(federate_t *my_fed) { my_fed->fed_start_time = timestamp; // Processing the TIMESTAMP depends on whether it is the startup phase (all // persistent federates joined) or not. - if (_F_RTI->num_feds_proposed_start < _RTI.number_of_federates) { + if (_F_RTI->num_feds_proposed_start < (_F_RTI->number_of_enclaves - _F_RTI->number_of_transient_federates)) { if (timestamp > _F_RTI->max_start_time) { _F_RTI->max_start_time = timestamp; } // Check that persistent federates did propose a start_time if (!my_fed->is_transient) { - _RTI.num_feds_proposed_start++; + _F_RTI->num_feds_proposed_start++; } - if (_F_RTI->num_feds_proposed_start == _F_RTI->number_of_enclaves) { + if (_F_RTI->num_feds_proposed_start == (_F_RTI->number_of_enclaves - _F_RTI->number_of_transient_federates)) { // All federates have proposed a start time. lf_cond_broadcast(&received_start_times); } else { // Some federates have not yet proposed a start time. // wait for a notification. - while (_F_RTI->num_feds_proposed_start < _F_RTI->number_of_enclaves) { + while (_F_RTI->num_feds_proposed_start < (_F_RTI->number_of_enclaves - _F_RTI->number_of_transient_federates)) { // FIXME: Should have a timeout here? lf_cond_wait(&received_start_times); } @@ -845,7 +845,7 @@ void handle_timestamp(federate_t *my_fed) { tracepoint_RTI_to_federate(send_TIMESTAMP, my_fed->enclave.id, &tag); } ssize_t bytes_written = write_to_socket( - my_fed->socket, MSG_TYPE_TIMESTAMP__START_LENGTH, + my_fed->socket, MSG_TYPE_TIMESTAMP_START_LENGTH, start_time_buffer ); if (bytes_written < MSG_TYPE_TIMESTAMP_LENGTH) { @@ -869,26 +869,26 @@ void handle_timestamp(federate_t *my_fed) { // the total number of connected federates (my_fed->num_of_conn_federates) // will be compared against those who already sent the query response // (my_fed->num_of_conn_federates_sent_net) - LF_PRINT_DEBUG("RTI sends current tag requests regarding transient %d.", my_fed->id); - for (int j = 0; j < my_fed->num_upstream; j++) { - federate_t* upstream = &_RTI.federates[my_fed->upstream[j]]; + LF_PRINT_DEBUG("RTI sends current tag requests regarding transient %d.", my_fed->enclave.id); + for (int j = 0; j < my_fed->enclave.num_upstream; j++) { + federate_t* upstream = _F_RTI->enclaves[my_fed->enclave.upstream[j]]; // Ignore this federate if it has resigned or if it a transient that // is absent - if (upstream->state == NOT_CONNECTED) { + if (upstream->enclave.state == NOT_CONNECTED) { continue; } - if (send_current_tag_query(upstream, my_fed->id)) { + if (send_current_tag_query(upstream, my_fed->enclave.id)) { my_fed->num_of_conn_federates++; } } // Iterate over the downstream federates to query the current event tag. - for (int j = 0; j < my_fed->num_downstream; j++) { - federate_t* downstream = &_RTI.federates[my_fed->downstream[j]]; + for (int j = 0; j < my_fed->enclave.num_downstream; j++) { + federate_t* downstream = _F_RTI->enclaves[my_fed->enclave.downstream[j]]; // Ignore this federate if it has resigned. - if (downstream->state == NOT_CONNECTED) { + if (downstream->enclave.state == NOT_CONNECTED) { continue; } - if (send_current_tag_query(downstream, my_fed->id)) { + if (send_current_tag_query(downstream, my_fed->enclave.id)) { my_fed->num_of_conn_federates++; } } @@ -897,9 +897,12 @@ void handle_timestamp(federate_t *my_fed) { // then do not wait for the start time if (my_fed->num_of_conn_federates == 0) { my_fed->start_time_is_set = true; - LF_PRINT_DEBUG("Transient federate %d has no upstream or downstrean federates. Its start time is: %lld", my_fed->id, my_fed->fed_start_time); + LF_PRINT_DEBUG("Transient federate %d has no upstream or downstrean federates. " + "Its start time is: " PRINTF_TIME, + my_fed->enclave.id, + my_fed->fed_start_time); } - pthread_mutex_unlock(&_RTI.rti_mutex); + lf_mutex_unlock(&rti_mutex); // Now wait until all connected federates have responded with their next // event logial time instant. @@ -915,27 +918,29 @@ void handle_timestamp(federate_t *my_fed) { encode_int64(swap_bytes_if_big_endian_int64(start_time), &start_time_buffer[1]); encode_int64(swap_bytes_if_big_endian_int64(my_fed->fed_start_time), &start_time_buffer[9]); - lf_print("Transient federate %d effective start time is set and is %lld. " - " The federation start time is %lld.", - my_fed->id, my_fed->fed_start_time, start_time); + lf_print("Transient federate %d effective start time is set and is " PRINTF_TIME "." + " The federation start time is " PRINTF_TIME ". ", + my_fed->enclave.id, + my_fed->fed_start_time, + start_time); - if (_RTI.tracing_enabled) { + if (_F_RTI->tracing_enabled) { tag_t tag = {.time = my_fed->fed_start_time, .microstep = 0}; - tracepoint_RTI_to_federate(send_TIMESTAMP, my_fed->id, &tag); + tracepoint_RTI_to_federate(send_TIMESTAMP, my_fed->enclave.id, &tag); } ssize_t bytes_written = write_to_socket( my_fed->socket, MSG_TYPE_TIMESTAMP_START_LENGTH, start_time_buffer ); if (bytes_written < MSG_TYPE_TIMESTAMP_START_LENGTH) { - lf_print_error("Failed to send the starting time to federate %d.", my_fed->id); + lf_print_error("Failed to send the starting time to federate %d.", my_fed->enclave.id); } - pthread_mutex_lock(&_RTI.rti_mutex); - my_fed->state = GRANTED; + lf_mutex_lock(&rti_mutex); + my_fed->enclave.state = GRANTED; // tag_t tag = {.time= my_fed->fed_start_time, .microstep=0}; // send_tag_advance_grant(my_fed, tag); - LF_PRINT_LOG("RTI sent start time %lld to transient federate %d.", my_fed->fed_start_time, my_fed->id); - pthread_mutex_unlock(&_RTI.rti_mutex); + LF_PRINT_LOG("RTI sent start time " PRINTF_TIME " to transient federate %d.", my_fed->fed_start_time, my_fed->enclave.id); + lf_mutex_unlock(&rti_mutex); } } @@ -946,23 +951,23 @@ void handle_current_tag_query_response(federate_t *my_fed) { // Read bytes from the socket. We need 8 bytes. ssize_t bytes_read = read_from_socket(my_fed->socket, buffer_size, buffer); if (bytes_read < (ssize_t)sizeof(int64_t)) { - lf_print_error("ERROR reading next event query response from federate %d.\n", my_fed->id); + lf_print_error("ERROR reading next event query response from federate %d.\n", my_fed->enclave.id); } // Get the timestamp and the transient federate id instant_t timestamp = swap_bytes_if_big_endian_int64(*((int64_t *)(buffer))); uint16_t transient_fed_id = extract_uint16((&buffer[8])); - if (_RTI.tracing_enabled) { + if (_F_RTI->tracing_enabled) { tag_t tag = {.time = timestamp, .microstep = 0}; - tracepoint_RTI_from_federate(receive_CuTAG_QR_RES, my_fed->id, &tag); + tracepoint_RTI_from_federate(receive_CuTAG_QR_RES, my_fed->enclave.id, &tag); } - LF_PRINT_LOG("RTI received current TAG query response message: %lld.", timestamp); + LF_PRINT_LOG("RTI received current TAG query response message: " PRINTF_TIME, timestamp); // FIXME: Should the lock be inside the if statement only? - pthread_mutex_lock(&_RTI.rti_mutex); + lf_mutex_lock(&rti_mutex); // Processing the TIMESTAMP depends on whether it is the startup phase (all // persistent federates joined) or not. - federate_t* transient = &(_RTI.federates[transient_fed_id]); + federate_t* transient = _F_RTI->enclaves[transient_fed_id]; // Set the start_time of the transient federate to be the maximum among // current tag of upstreams and the physical time at which it joined . @@ -975,7 +980,7 @@ void handle_current_tag_query_response(federate_t *my_fed) { // All expected connected federates to transient have sent responses with NET to RTI transient->start_time_is_set = true; } - pthread_mutex_unlock(&_RTI.rti_mutex); + lf_mutex_unlock(&rti_mutex); } @@ -1242,15 +1247,15 @@ void* federate_thread_TCP(void* fed) { // FIXME: Should free_in_transit_message_q be called in case of persistent federates as well? if (my_fed->is_transient) { free_in_transit_message_q(my_fed->in_transit_message_tags); - lf_print("RTI: Transient Federate %d thread exited.", my_fed->id); + lf_print("RTI: Transient Federate %d thread exited.", my_fed->enclave.id); // Update the number of connected transient federates - pthread_mutex_lock(&_RTI.rti_mutex); - _RTI.number_of_connected_transient_federates--; + lf_mutex_lock(&rti_mutex); + _F_RTI->number_of_connected_transient_federates--; // Reset the status of the leaving federate - reset_transient_federate(my_fed->id); - pthread_mutex_unlock(&_RTI.rti_mutex); + reset_transient_federate(my_fed); + lf_mutex_unlock(&rti_mutex); } return NULL; @@ -1276,7 +1281,7 @@ int32_t receive_and_check_fed_id_message(int socket_id, struct sockaddr_in* clie // FIXME: This should not exit with error but rather should just reject the connection. read_from_socket_errexit(socket_id, length, buffer, "RTI failed to read from accepted socket."); - uint16_t fed_id = _F_RTI->number_of_enclaves + _RTI.number_of_transient_federates; // Initialize to an invalid value. + uint16_t fed_id = _F_RTI->number_of_enclaves; // Initialize to an invalid value. bool is_transient = false; // First byte received is the message type. if (buffer[0] != MSG_TYPE_FED_IDS) { @@ -1381,7 +1386,7 @@ int32_t receive_and_check_fed_id_message(int socket_id, struct sockaddr_in* clie // because it is waiting for the start time to be // sent by the RTI before beginning its execution. fed->enclave.state = PENDING; - _RTI.federates[fed_id].is_transient = is_transient; + _F_RTI->enclaves[fed_id]->is_transient = is_transient; LF_PRINT_DEBUG("RTI responding with MSG_TYPE_ACK to federate %d.", fed_id); // Send an MSG_TYPE_ACK message. @@ -1640,9 +1645,9 @@ void connect_to_federates(int socket_descriptor) { federate_t *fed = _F_RTI->enclaves[fed_id]; lf_thread_create(&(fed->thread_id), federate_thread_TCP, fed); - if (_RTI.federates[fed_id].is_transient) { - _RTI.number_of_connected_transient_federates++; - assert(_RTI.number_of_connected_transient_federates <= _RTI.number_of_transient_federates); + if (_F_RTI->enclaves[fed_id]->is_transient) { + _F_RTI->number_of_connected_transient_federates++; + assert(_F_RTI->number_of_connected_transient_federates <= _F_RTI->number_of_transient_federates); i--; } } else { @@ -1676,14 +1681,14 @@ void* connect_to_transient_federates_thread() { // This needs to terminate somehow... // That will be part of the while condition while (1) { - if (_RTI.number_of_connected_transient_federates < _RTI.number_of_transient_federates) { + if (_F_RTI->number_of_connected_transient_federates < _F_RTI->number_of_transient_federates) { // Continue waiting for an incoming connection requests from transients. struct sockaddr client_fd; uint32_t client_length = sizeof(client_fd); // The following blocks until a federate connects. int socket_id = -1; while(1) { - socket_id = accept(_RTI.socket_descriptor_TCP, &client_fd, &client_length); + socket_id = accept(_F_RTI->socket_descriptor_TCP, &client_fd, &client_length); if (socket_id >= 0) { // Got a socket break; @@ -1698,7 +1703,7 @@ void* connect_to_transient_federates_thread() { // Send RTI hello when RTI -a option is on. #ifdef __RTI_AUTH__ - if (_RTI.authentication_enabled) { + if (_F_RTI->authentication_enabled) { if (!authenticate_federate(socket_id)) { lf_print_warning("RTI failed to authenticate the incoming federate."); // Ignore the federate that failed authentication. @@ -1719,10 +1724,10 @@ void* connect_to_transient_federates_thread() { // This has to be done after clock synchronization is finished // or that thread may end up attempting to handle incoming clock // synchronization messages. - pthread_create(&(_RTI.federates[fed_id].thread_id), NULL, federate_thread_TCP, &(_RTI.federates[fed_id])); - _RTI.federates[fed_id].is_transient = true; - _RTI.number_of_connected_transient_federates++; - lf_print("Federate %d joined.", _RTI.federates[fed_id].id); + lf_thread_create(&(_F_RTI->enclaves[fed_id]->thread_id), federate_thread_TCP, &(_F_RTI->enclaves[fed_id])); + _F_RTI->enclaves[fed_id]->is_transient = true; + _F_RTI->number_of_connected_transient_federates++; + lf_print("Federate %d joined.", _F_RTI->enclaves[fed_id]->enclave.id); } } } @@ -1760,51 +1765,36 @@ void initialize_federate(federate_t* fed, uint16_t id) { fed->socket = -1; // No socket. fed->clock_synchronization_enabled = true; fed->in_transit_message_tags = initialize_in_transit_message_q(); - _RTI.federates[id].state = NOT_CONNECTED; - _RTI.federates[id].upstream = NULL; - _RTI.federates[id].upstream_delay = NULL; - _RTI.federates[id].num_upstream = 0; - _RTI.federates[id].downstream = NULL; - _RTI.federates[id].num_downstream = 0; - _RTI.federates[id].mode = REALTIME; - strncpy(_RTI.federates[id].server_hostname ,"localhost", INET_ADDRSTRLEN); - _fed->server_ip_addr.s_addr = 0; - _fed->server_port = -1; - _RTI.federates[id].requested_stop = false; - _RTI.federates[id].is_transient = true; - _RTI.federates[id].fed_start_time = 0LL; - _RTI.federates[id].num_of_conn_federates = 0; - _RTI.federates[id].num_of_conn_federates_sent_net = 0; - _RTI.federates[id].start_time_is_set = false; + strncpy(fed->server_hostname ,"localhost", INET_ADDRSTRLEN); + fed->server_ip_addr.s_addr = 0; + fed->server_port = -1; + fed->is_transient = true; + fed->fed_start_time = 0LL; + fed->num_of_conn_federates = 0; + fed->num_of_conn_federates_sent_net = 0; + fed->start_time_is_set = false; } -void reset_transient_federate(uint16_t id) { - // The commented lines highlignts the values that a transient federate needs - // to passes to its future joining one - // _RTI.federates[id].id = id; - _RTI.federates[id].socket = -1; // No socket. - _RTI.federates[id].clock_synchronization_enabled = true; - // _RTI.federates[id].completed = NEVER_TAG; - _RTI.federates[id].last_granted = NEVER_TAG; - _RTI.federates[id].last_provisionally_granted = NEVER_TAG; - _RTI.federates[id].next_event = NEVER_TAG; - _RTI.federates[id].in_transit_message_tags = initialize_in_transit_message_q(); - _RTI.federates[id].state = NOT_CONNECTED; - // _RTI.federates[id].upstream = NULL; - // _RTI.federates[id].upstream_delay = NULL; - // _RTI.federates[id].num_upstream = 0; - // _RTI.federates[id].downstream = NULL; - // _RTI.federates[id].num_downstream = 0; - _RTI.federates[id].mode = REALTIME; - strncpy(_RTI.federates[id].server_hostname ,"localhost", INET_ADDRSTRLEN); - _RTI.federates[id].server_ip_addr.s_addr = 0; - _RTI.federates[id].server_port = -1; - _RTI.federates[id].requested_stop = false; - _RTI.federates[id].is_transient = true; - _RTI.federates[id].fed_start_time = 0LL; - _RTI.federates[id].num_of_conn_federates = 0; - _RTI.federates[id].num_of_conn_federates_sent_net = 0; - _RTI.federates[id].start_time_is_set = false; +void reset_transient_federate(federate_t* fed) { + // Reset of the enclave-related attributes + // FIXME: Should check further what to reset in the enclave data structure + fed->enclave.last_granted = NEVER_TAG; + fed->enclave.last_provisionally_granted = NEVER_TAG; + fed->enclave.next_event = NEVER_TAG; + fed->enclave.state = NOT_CONNECTED; + // Reset of the federate-related attributes + fed->socket = -1; // No socket. + fed->clock_synchronization_enabled = true; + fed->in_transit_message_tags = initialize_in_transit_message_q(); + strncpy(fed->server_hostname ,"localhost", INET_ADDRSTRLEN); + fed->server_ip_addr.s_addr = 0; + fed->server_port = -1; + fed->enclave.requested_stop = false; + fed->is_transient = true; + fed->fed_start_time = 0LL; + fed->num_of_conn_federates = 0; + fed->num_of_conn_federates_sent_net = 0; + fed->start_time_is_set = false; } int32_t start_rti_server(uint16_t port) { @@ -1831,7 +1821,7 @@ void wait_for_federates(int socket_descriptor) { // All persistent federates have connected. lf_print("RTI: All expected (persistent) federates have connected. Starting execution."); - if (_RTI.number_of_transient_federates > 0) { + if (_F_RTI->number_of_transient_federates > 0) { lf_print("RTI: Transient Federates can join and leave the federation at anytime."); } @@ -1840,22 +1830,22 @@ void wait_for_federates(int socket_descriptor) { // In case some other federation's federates are trying to join the wrong // federation, need to respond. Start a separate thread to do that. lf_thread_t responder_thread; - pthread_t transient_thread; + lf_thread_t transient_thread; // If the federation does not include transient federates, then respond to // erronous connections. Otherwise, continue to accept transients joining and // respond to duplicate joing requests. - if (_RTI.number_of_transient_federates == 0) { + if (_F_RTI->number_of_transient_federates == 0) { lf_thread_create(&responder_thread, respond_to_erroneous_connections, NULL); - } else if (_RTI.number_of_transient_federates > 0) { - pthread_create(&transient_thread, NULL, connect_to_transient_federates_thread, NULL); + } else if (_F_RTI->number_of_transient_federates > 0) { + lf_thread_create(&transient_thread, connect_to_transient_federates_thread, NULL); } // Wait for persistent federate threads to exit. void* thread_exit_status; - for (int i = 0; i < _F_RTI->number_of_enclaves + _RTI.number_of_transient_federates; i++) { - if (!_RTI.federates[i].is_transient) { + for (int i = 0 ; i < _F_RTI->number_of_enclaves ; i++) { + if (!_F_RTI->enclaves[i]->is_transient) { federate_t* fed = _F_RTI->enclaves[i]; - lf_print("RTI: Waiting for thread handling peristent federate %d.", fed->enclave.id); + lf_print("RTI: Waiting for thread handling peristent federate %d.", fed->enclave.id); lf_thread_join(fed->thread_id, &thread_exit_status); free_in_transit_message_q(fed->in_transit_message_tags); lf_print("RTI: Federate %d thread exited.", fed->enclave.id); @@ -1866,14 +1856,14 @@ void wait_for_federates(int socket_descriptor) { // NOTE: It is important to separate the waiting of persistent federates from // the transient federates. The reason is that if, for example, federate 0 is // transienet, and it did leave in the middle of a federation execution, then - // we will no more wait for the thread of a future joining instance to pthread_join. - if (_RTI.number_of_transient_federates > 0) { - for (int i = 0; i < _RTI.number_of_federates + _RTI.number_of_transient_federates; i++) { - if (_RTI.federates[i].is_transient) { - lf_print("RTI: Waiting for thread handling transient federate %d.", _RTI.federates[i].id); - pthread_join(_RTI.federates[i].thread_id, &thread_exit_status); - free_in_transit_message_q(_RTI.federates[i].in_transit_message_tags); - lf_print("RTI: Federate %d thread exited.", _RTI.federates[i].id); + // we will no more wait for the thread of a future joining instance to lf_thread_join. + if (_F_RTI->number_of_transient_federates > 0) { + for (int i = 0 ; i < _F_RTI->number_of_enclaves ; i++) { + if (_F_RTI->enclaves[i]->is_transient) { + lf_print("RTI: Waiting for thread handling transient federate %d.", _F_RTI->enclaves[i]->enclave.id); + lf_thread_join(_F_RTI->enclaves[i]->thread_id, &thread_exit_status); + free_in_transit_message_q(_F_RTI->enclaves[i]->in_transit_message_tags); + lf_print("RTI: Federate %d thread exited.", _F_RTI->enclaves[i]->enclave.id); } } } @@ -1943,8 +1933,8 @@ void usage(int argc, const char* argv[]) { lf_print(" The ID of the federation that this RTI will control.\n"); lf_print(" -n, --number_of_federates "); lf_print(" The number of federates in the federation that this RTI will control.\n"); - printf(" -nt, --number_of_transient_federates \n"); - printf(" The number of transient federates in the federation that this RTI will control.\n\n"); + lf_print(" -nt, --number_of_transient_federates "); + lf_print(" The number of transient federates in the federation that this RTI will control.\n"); lf_print(" -p, --port "); lf_print(" The port number to use for the RTI. Must be larger than 0 and smaller than %d. Default is %d.\n", UINT16_MAX, STARTING_PORT); lf_print(" -c, --clock_sync [off|init|on] [period ] [exchanges-per-interval ]"); @@ -2057,19 +2047,24 @@ int process_args(int argc, const char* argv[]) { lf_print("RTI: Number of federates: %d\n", _F_RTI->number_of_enclaves); } else if (strcmp(argv[i], "-nt") == 0 || strcmp(argv[i], "--number_of_transient_federates") == 0) { if (argc < i + 2) { - fprintf(stderr, "Error: --number_of_transient_federates needs an integer argument.\n"); + lf_print_error("--number_of_transient_federates needs an integer argument."); usage(argc, argv); return 0; } i++; long num_transient_federates = strtol(argv[i], NULL, 10); if (num_transient_federates == LONG_MAX || num_transient_federates == LONG_MIN) { - fprintf(stderr, "Error: --number_of_transient_federates needs a valid positive or null integer argument.\n"); + lf_print_error("--number_of_transient_federates needs a valid positive or null integer argument."); usage(argc, argv); return 0; } - _RTI.number_of_transient_federates = (int32_t)num_transient_federates; // FIXME: Loses numbers on 64-bit machines - printf("RTI: Number of transient federates: %d\n", _RTI.number_of_transient_federates); + if (num_transient_federates > _F_RTI->number_of_enclaves) { + lf_print_error("--number_of_transient_federates cannot be higher than the number of federates."); + usage(argc, argv); + return 0; + } + _F_RTI->number_of_transient_federates = (int32_t)num_transient_federates; // FIXME: Loses numbers on 64-bit machines + lf_print("RTI: Number of transient federates: %d", _F_RTI->number_of_transient_federates); } else if (strcmp(argv[i], "-p") == 0 || strcmp(argv[i], "--port") == 0) { if (argc < i + 2) { lf_print_error( @@ -2145,4 +2140,4 @@ void initialize_RTI(){ _F_RTI->clock_sync_exchanges_per_interval = 10, _F_RTI->authentication_enabled = false, _F_RTI->tracing_enabled = false; -} \ No newline at end of file +} diff --git a/core/federated/RTI/rti_lib.h b/core/federated/RTI/rti_lib.h index d7d460edb..2a73ba496 100644 --- a/core/federated/RTI/rti_lib.h +++ b/core/federated/RTI/rti_lib.h @@ -61,9 +61,6 @@ typedef struct federate_t { // RTI has not been informed of the port number. struct in_addr server_ip_addr; // Information about the IP address of the socket // server of the federate. - bool requested_stop; // Indicates that the federate has requested stop or has replied - // to a request for stop from the RTI. Used to prevent double-counting - // a federate when handling lf_request_stop(). bool is_transient; // Indicates whether the federate is transient or persistent. int64_t fed_start_time; // Records the start time of the federate, which is mainly useful for transient federates int num_of_conn_federates; // Records the total number of connected federates among the upstream and @@ -109,14 +106,13 @@ typedef struct federation_RTI_t { // RTI's decided stop tag for enclaves tag_t max_stop_tag; -////////////// Federation only specific attributes ////////////// + // Number of enclaves handling stop + int num_enclaves_handling_stop; - // Number of transient federates in the federation - int32_t number_of_transient_federates; - - // Number of connected transient federates in the federation - int32_t number_of_connected_transient_federates; + // Boolean indicating that tracing is enabled. + bool tracing_enabled; + ////////////// Federation only specific attributes ////////////// // Maximum start time seen so far from the federates. int64_t max_start_time; @@ -182,6 +178,13 @@ typedef struct federation_RTI_t { * Boolean indicating that authentication is enabled. */ bool authentication_enabled; + + // Number of transient federates in the federation + int32_t number_of_transient_federates; + + // Number of connected transient federates in the federation + int32_t number_of_connected_transient_federates; + } federation_RTI_t; /** @@ -503,6 +506,7 @@ void* respond_to_erroneous_connections(void* nothing); /** * Initialize the federate with the specified ID. + * @param fed A pointer to the federate * @param id The federate ID. */ void initialize_federate(federate_t* fed, uint16_t id); @@ -510,9 +514,9 @@ void initialize_federate(federate_t* fed, uint16_t id); /** * Reset the federate with the specified ID. The federate has to be transient. - * @param id The transient federate ID. + * @param fed A pointer to the federate */ -void reset_transient_federate(uint16_t id); +void reset_transient_federate(federate_t* fed); /** * Start the socket server for the runtime infrastructure (RTI) and @@ -553,11 +557,13 @@ int process_clock_sync_args(int argc, const char* argv[]); */ int process_args(int argc, const char* argv[]); -////////////////////////////////////////////////////////// /** * Initialize the _RTI instance. */ void initialize_RTI(); + +////////////////////////////////////////////////////////// + /** * Queries conn_fed for its current Tag (using MSG_TYPE_CURRENT_TAG_QUERY). * If the function fails to send the query, for example in case the federate is @@ -581,4 +587,6 @@ bool send_current_tag_query(federate_t* conn_fed, uint16_t fed_id); * @param my_fed: the federate from whom the response is received. */ void handle_current_tag_query_response(federate_t *my_fed); + +////////////////////////////////////////////////////////// #endif // RTI_LIB_H \ No newline at end of file From e61ea4d9c0899adadaeeac8f59edda0073041800 Mon Sep 17 00:00:00 2001 From: ChadliaJerad Date: Thu, 8 Jun 2023 10:00:36 -0700 Subject: [PATCH 31/80] More merge fixes. --- core/federated/RTI/rti_lib.c | 383 ++++++++++++++-------------- core/federated/RTI/rti_lib.h | 53 ++-- core/federated/federate.c | 14 +- core/tag.c | 2 +- core/threaded/reactor_threaded.c | 4 +- include/core/federated/net_common.h | 8 +- 6 files changed, 236 insertions(+), 228 deletions(-) diff --git a/core/federated/RTI/rti_lib.c b/core/federated/RTI/rti_lib.c index 2e4b7f76e..1a41aab5a 100644 --- a/core/federated/RTI/rti_lib.c +++ b/core/federated/RTI/rti_lib.c @@ -718,8 +718,6 @@ void handle_stop_request_reply(federate_t* fed) { lf_mutex_unlock(&rti_mutex); } -////////////////////////////////////////////////// - void handle_address_query(uint16_t fed_id) { federate_t *fed = _F_RTI->enclaves[fed_id]; // Use buffer both for reading and constructing the reply. @@ -799,12 +797,11 @@ void handle_timestamp(federate_t *my_fed) { tag_t tag = {.time = timestamp, .microstep = 0}; tracepoint_RTI_from_federate(receive_TIMESTAMP, my_fed->enclave.id, &tag); } - LF_PRINT_LOG("RTI received timestamp message: %ld.", timestamp); LF_PRINT_LOG("RTI received timestamp message: " PRINTF_TIME ".", timestamp); - // FIXME: Should the lock be inside the if statement only? lf_mutex_lock(&rti_mutex); my_fed->fed_start_time = timestamp; + // Processing the TIMESTAMP depends on whether it is the startup phase (all // persistent federates joined) or not. if (_F_RTI->num_feds_proposed_start < (_F_RTI->number_of_enclaves - _F_RTI->number_of_transient_federates)) { @@ -826,50 +823,22 @@ void handle_timestamp(federate_t *my_fed) { lf_cond_wait(&received_start_times); } } - lf_mutex_unlock(&rti_mutex); - // Send back to the federate the maximum time plus an offset on a TIMESTAMP_START - // message. - // In the startup phase, federates will receive identical start_time and - // effective_start_time - unsigned char start_time_buffer[MSG_TYPE_TIMESTAMP_START_LENGTH]; - start_time_buffer[0] = MSG_TYPE_TIMESTAMP_START; // Add an offset to this start time to get everyone starting together. start_time = _F_RTI->max_start_time + DELAY_START; - encode_int64(swap_bytes_if_big_endian_int64(start_time), &start_time_buffer[1]); - encode_int64(swap_bytes_if_big_endian_int64(start_time), &start_time_buffer[9]); - if (_F_RTI->tracing_enabled) { - tag_t tag = {.time = start_time, .microstep = 0}; - tracepoint_RTI_to_federate(send_TIMESTAMP, my_fed->enclave.id, &tag); - } - ssize_t bytes_written = write_to_socket( - my_fed->socket, MSG_TYPE_TIMESTAMP_START_LENGTH, - start_time_buffer - ); - if (bytes_written < MSG_TYPE_TIMESTAMP_LENGTH) { - lf_print_error("Failed to send the starting time to federate %d.", my_fed->enclave.id); - } - - lf_mutex_lock(&rti_mutex); - // Update state for the federate to indicate that the MSG_TYPE_TIMESTAMP - // message has been sent. That MSG_TYPE_TIMESTAMP message grants time advance to - // the federate to the start time. - my_fed->enclave.state = GRANTED; - lf_cond_broadcast(&sent_start_time); - LF_PRINT_LOG("RTI sent start time " PRINTF_TIME " to federate %d.", start_time, my_fed->enclave.id); - lf_mutex_unlock(&rti_mutex); - } else { + // Send the start_time + send_start_tag(my_fed, start_time, (tag_t){.time = start_time, .microstep = 0}); + } else { // A transient has joined after the startup phase // At this point, we already hold the mutex - // Iterate over the upstream federates to query the current tag. - // Since they may not be connected (being themselves transient, for example) - // the total number of connected federates (my_fed->num_of_conn_federates) - // will be compared against those who already sent the query response - // (my_fed->num_of_conn_federates_sent_net) - LF_PRINT_DEBUG("RTI sends current tag requests regarding transient %d.", my_fed->enclave.id); + // Iterate over the upstream federates and downstream federates to find + // get the max of the TAGs. + // FIXME: what about PTAGs? + // FIXME: Maybe we can use TAGs from a transient that left??? + tag_t federate_start_tag = NEVER_TAG; for (int j = 0; j < my_fed->enclave.num_upstream; j++) { federate_t* upstream = _F_RTI->enclaves[my_fed->enclave.upstream[j]]; // Ignore this federate if it has resigned or if it a transient that @@ -877,8 +846,9 @@ void handle_timestamp(federate_t *my_fed) { if (upstream->enclave.state == NOT_CONNECTED) { continue; } - if (send_current_tag_query(upstream, my_fed->enclave.id)) { - my_fed->num_of_conn_federates++; + if (lf_tag_compare(federate_start_tag, upstream->enclave.last_granted) < 0) { + federate_start_tag = upstream->enclave.last_granted; + federate_start_tag.microstep++; } } // Iterate over the downstream federates to query the current event tag. @@ -888,99 +858,72 @@ void handle_timestamp(federate_t *my_fed) { if (downstream->enclave.state == NOT_CONNECTED) { continue; } - if (send_current_tag_query(downstream, my_fed->enclave.id)) { - my_fed->num_of_conn_federates++; + if (lf_tag_compare(federate_start_tag, downstream->enclave.last_granted) < 0) { + federate_start_tag = downstream->enclave.last_granted; + federate_start_tag.microstep++; } } // If the transient federate has no connected upstream or downstream federates, // then do not wait for the start time - if (my_fed->num_of_conn_federates == 0) { + if (lf_tag_compare(federate_start_tag, NEVER_TAG) == 0) { my_fed->start_time_is_set = true; - LF_PRINT_DEBUG("Transient federate %d has no upstream or downstrean federates. " - "Its start time is: " PRINTF_TIME, - my_fed->enclave.id, - my_fed->fed_start_time); + my_fed->fed_start_time += DELAY_START; + federate_start_tag = (tag_t){.time = my_fed->fed_start_time, .microstep = 0}; } lf_mutex_unlock(&rti_mutex); - // Now wait until all connected federates have responded with their next - // event logial time instant. - - LF_PRINT_DEBUG("RTI waits for transient start time to be set."); - while(!my_fed->start_time_is_set); // Once the effective start time set, sent it to the joining transient, // together with the start time of the federation. - unsigned char start_time_buffer[MSG_TYPE_TIMESTAMP_START_LENGTH]; - start_time_buffer[0] = MSG_TYPE_TIMESTAMP_START; - my_fed->fed_start_time += DELAY_START; - - encode_int64(swap_bytes_if_big_endian_int64(start_time), &start_time_buffer[1]); - encode_int64(swap_bytes_if_big_endian_int64(my_fed->fed_start_time), &start_time_buffer[9]); - - lf_print("Transient federate %d effective start time is set and is " PRINTF_TIME "." - " The federation start time is " PRINTF_TIME ". ", - my_fed->enclave.id, - my_fed->fed_start_time, - start_time); - if (_F_RTI->tracing_enabled) { - tag_t tag = {.time = my_fed->fed_start_time, .microstep = 0}; - tracepoint_RTI_to_federate(send_TIMESTAMP, my_fed->enclave.id, &tag); - } - ssize_t bytes_written = write_to_socket( - my_fed->socket, MSG_TYPE_TIMESTAMP_START_LENGTH, - start_time_buffer - ); - if (bytes_written < MSG_TYPE_TIMESTAMP_START_LENGTH) { - lf_print_error("Failed to send the starting time to federate %d.", my_fed->enclave.id); - } - lf_mutex_lock(&rti_mutex); - my_fed->enclave.state = GRANTED; - // tag_t tag = {.time= my_fed->fed_start_time, .microstep=0}; - // send_tag_advance_grant(my_fed, tag); - LF_PRINT_LOG("RTI sent start time " PRINTF_TIME " to transient federate %d.", my_fed->fed_start_time, my_fed->enclave.id); - lf_mutex_unlock(&rti_mutex); + // Send the start time + send_start_tag(my_fed, start_time, federate_start_tag); } } -void handle_current_tag_query_response(federate_t *my_fed) { - // Get the logical time instant and the transient fed_id from the socket - size_t buffer_size = sizeof(instant_t) + sizeof(uint16_t); - unsigned char buffer[buffer_size]; - // Read bytes from the socket. We need 8 bytes. - ssize_t bytes_read = read_from_socket(my_fed->socket, buffer_size, buffer); - if (bytes_read < (ssize_t)sizeof(int64_t)) { - lf_print_error("ERROR reading next event query response from federate %d.\n", my_fed->enclave.id); - } +void send_start_tag(federate_t* my_fed, instant_t federation_start_time, tag_t federate_start_tag) { + // Send back to the federate the maximum time plus an offset on a TIMESTAMP_START + // message. + // In the startup phase, federates will receive identical start_time and + // effective_start_tag + unsigned char start_time_buffer[MSG_TYPE_TIMESTAMP_START_LENGTH]; + start_time_buffer[0] = MSG_TYPE_TIMESTAMP_START; + + encode_int64(swap_bytes_if_big_endian_int64(federation_start_time), &start_time_buffer[1]); + encode_int64(swap_bytes_if_big_endian_int64(federate_start_tag.time), &start_time_buffer[9]); + encode_int32(swap_bytes_if_big_endian_int64(federate_start_tag.microstep), &start_time_buffer[9+8]); - // Get the timestamp and the transient federate id - instant_t timestamp = swap_bytes_if_big_endian_int64(*((int64_t *)(buffer))); - uint16_t transient_fed_id = extract_uint16((&buffer[8])); if (_F_RTI->tracing_enabled) { - tag_t tag = {.time = timestamp, .microstep = 0}; - tracepoint_RTI_from_federate(receive_CuTAG_QR_RES, my_fed->enclave.id, &tag); + tracepoint_RTI_to_federate(send_TIMESTAMP, my_fed->enclave.id, &federate_start_tag); } - LF_PRINT_LOG("RTI received current TAG query response message: " PRINTF_TIME, timestamp); - // FIXME: Should the lock be inside the if statement only? - lf_mutex_lock(&rti_mutex); - // Processing the TIMESTAMP depends on whether it is the startup phase (all - // persistent federates joined) or not. - federate_t* transient = _F_RTI->enclaves[transient_fed_id]; - - // Set the start_time of the transient federate to be the maximum among - // current tag of upstreams and the physical time at which it joined . - if (timestamp > transient->fed_start_time) { - transient->fed_start_time = timestamp; + ssize_t bytes_written = write_to_socket( + my_fed->socket, MSG_TYPE_TIMESTAMP_START_LENGTH, + start_time_buffer + ); + if (bytes_written < MSG_TYPE_TIMESTAMP_LENGTH) { + lf_print_error("Failed to send the starting time to federate %d.", my_fed->enclave.id); } - // Check that upstream and downstream federates of the transient did propose a start_time - transient->num_of_conn_federates_sent_net++; - if (transient->num_of_conn_federates_sent_net == transient->num_of_conn_federates) { - // All expected connected federates to transient have sent responses with NET to RTI - transient->start_time_is_set = true; + + // Update state for the federate to indicate that MSG_TYPE_TIMESTAMP_START + // message has been sent. MSG_TYPE_TIMESTAMP_START grants time advance to + // the federate to the start time. + lf_mutex_lock(&rti_mutex); + my_fed->enclave.state = GRANTED; + + // If it the startup phase, then broadcast that the start_time was sent + if (_F_RTI->num_feds_proposed_start < (_F_RTI->number_of_enclaves - _F_RTI->number_of_transient_federates)) { + lf_cond_broadcast(&sent_start_time); } + lf_mutex_unlock(&rti_mutex); + + LF_PRINT_LOG("RTI sent start time " PRINTF_TIME " to federate %d." + " The effective start tag is " PRINTF_TAG ".", + federation_start_time, + my_fed->enclave.id, + federate_start_tag.time, + federate_start_tag.microstep); } @@ -1599,7 +1542,7 @@ bool authenticate_federate(int socket) { void connect_to_federates(int socket_descriptor) { // This loop will accept both, persistent and transient federates. // For transient, however, i will be decreased - for (int i = 0; i < _F_RTI->number_of_enclaves; i++) { + for (int i = 0 ; i < _F_RTI->number_of_enclaves - _F_RTI->number_of_transient_federates ; i++) { // Wait for an incoming connection request. struct sockaddr client_fd; uint32_t client_length = sizeof(client_fd); @@ -1649,6 +1592,7 @@ void connect_to_federates(int socket_descriptor) { _F_RTI->number_of_connected_transient_federates++; assert(_F_RTI->number_of_connected_transient_federates <= _F_RTI->number_of_transient_federates); i--; + lf_print("RTI: Transient federate %d joined.", fed->enclave.id); } } else { // Received message was rejected. Try again. @@ -1675,64 +1619,6 @@ void connect_to_federates(int socket_descriptor) { } } -void* connect_to_transient_federates_thread() { - // This loop will continue to accept connections of transient federates, as - // soon as there is room - // This needs to terminate somehow... - // That will be part of the while condition - while (1) { - if (_F_RTI->number_of_connected_transient_federates < _F_RTI->number_of_transient_federates) { - // Continue waiting for an incoming connection requests from transients. - struct sockaddr client_fd; - uint32_t client_length = sizeof(client_fd); - // The following blocks until a federate connects. - int socket_id = -1; - while(1) { - socket_id = accept(_F_RTI->socket_descriptor_TCP, &client_fd, &client_length); - if (socket_id >= 0) { - // Got a socket - break; - } else if (socket_id < 0 && (errno != EAGAIN || errno != EWOULDBLOCK)) { - lf_print_error_and_exit("RTI failed to accept the socket. %s.", strerror(errno)); - } else { - // Try again - lf_print_warning("RTI failed to accept the socket. %s. Trying again.", strerror(errno)); - continue; - } - } - - // Send RTI hello when RTI -a option is on. - #ifdef __RTI_AUTH__ - if (_F_RTI->authentication_enabled) { - if (!authenticate_federate(socket_id)) { - lf_print_warning("RTI failed to authenticate the incoming federate."); - // Ignore the federate that failed authentication. - continue; - } - } - #endif - - // The first message from the federate should contain its ID and the federation ID. - int32_t fed_id = receive_and_check_fed_id_message(socket_id, (struct sockaddr_in*)&client_fd); - // FIXME: THIS IS A TEMPORARY HACK THAT MAKES FEDERATES WITH EVEN IDs PERSISTENT - // AND THOSE WITH ODD IDs TRANSIENT!!! - if (fed_id >= 0 - && receive_connection_information(socket_id, (uint16_t)fed_id) - && receive_udp_message_and_set_up_clock_sync(socket_id, (uint16_t)fed_id)) { - - // Create a thread to communicate with the federate. - // This has to be done after clock synchronization is finished - // or that thread may end up attempting to handle incoming clock - // synchronization messages. - lf_thread_create(&(_F_RTI->enclaves[fed_id]->thread_id), federate_thread_TCP, &(_F_RTI->enclaves[fed_id])); - _F_RTI->enclaves[fed_id]->is_transient = true; - _F_RTI->number_of_connected_transient_federates++; - lf_print("Federate %d joined.", _F_RTI->enclaves[fed_id]->enclave.id); - } - } - } -} - void* respond_to_erroneous_connections(void* nothing) { while (true) { // Wait for an incoming connection request. @@ -1775,28 +1661,6 @@ void initialize_federate(federate_t* fed, uint16_t id) { fed->start_time_is_set = false; } -void reset_transient_federate(federate_t* fed) { - // Reset of the enclave-related attributes - // FIXME: Should check further what to reset in the enclave data structure - fed->enclave.last_granted = NEVER_TAG; - fed->enclave.last_provisionally_granted = NEVER_TAG; - fed->enclave.next_event = NEVER_TAG; - fed->enclave.state = NOT_CONNECTED; - // Reset of the federate-related attributes - fed->socket = -1; // No socket. - fed->clock_synchronization_enabled = true; - fed->in_transit_message_tags = initialize_in_transit_message_q(); - strncpy(fed->server_hostname ,"localhost", INET_ADDRSTRLEN); - fed->server_ip_addr.s_addr = 0; - fed->server_port = -1; - fed->enclave.requested_stop = false; - fed->is_transient = true; - fed->fed_start_time = 0LL; - fed->num_of_conn_federates = 0; - fed->num_of_conn_federates_sent_net = 0; - fed->start_time_is_set = false; -} - int32_t start_rti_server(uint16_t port) { int32_t specified_port = port; if (port == 0) { @@ -1852,6 +1716,8 @@ void wait_for_federates(int socket_descriptor) { } } + lf_print("All persistent threads exited."); + // Wait for transient federate threads to exit. // NOTE: It is important to separate the waiting of persistent federates from // the transient federates. The reason is that if, for example, federate 0 is @@ -1869,7 +1735,14 @@ void wait_for_federates(int socket_descriptor) { } _F_RTI->all_federates_exited = true; + lf_print("All transient threads exited."); + if (_F_RTI->number_of_transient_federates == 0) { + lf_thread_join(&responder_thread, &thread_exit_status); + } else if (_F_RTI->number_of_transient_federates > 0) { + lf_thread_join(&transient_thread, &thread_exit_status); + } + // Shutdown and close the socket so that the accept() call in // respond_to_erroneous_connections returns. That thread should then // check _F_RTI->all_federates_exited and it should exit. @@ -2141,3 +2014,125 @@ void initialize_RTI(){ _F_RTI->authentication_enabled = false, _F_RTI->tracing_enabled = false; } + +////////////////////////////////////////////////////////// + +void* connect_to_transient_federates_thread() { + // This loop will continue to accept connections of transient federates, as + // soon as there is room + while (!_F_RTI->all_federates_exited) { + if (_F_RTI->number_of_connected_transient_federates < _F_RTI->number_of_transient_federates) { + // Continue waiting for an incoming connection requests from transients. + struct sockaddr client_fd; + uint32_t client_length = sizeof(client_fd); + // The following blocks until a federate connects. + int socket_id = -1; + while(1) { + if (!_F_RTI->all_federates_exited) { + return; + } + socket_id = accept(_F_RTI->socket_descriptor_TCP, &client_fd, &client_length); + if (socket_id >= 0) { + // Got a socket + break; + } else if (socket_id < 0 && (errno != EAGAIN || errno != EWOULDBLOCK)) { + lf_print_error_and_exit("RTI failed to accept the socket. %s.", strerror(errno)); + } else { + // Try again + lf_print_warning("RTI failed to accept the socket. %s. Trying again.", strerror(errno)); + continue; + } + } + + // Send RTI hello when RTI -a option is on. + #ifdef __RTI_AUTH__ + if (_F_RTI->authentication_enabled) { + if (!authenticate_federate(socket_id)) { + lf_print_warning("RTI failed to authenticate the incoming federate."); + // Ignore the federate that failed authentication. + continue; + } + } + #endif + + // The first message from the federate should contain its ID and the federation ID. + int32_t fed_id = receive_and_check_fed_id_message(socket_id, (struct sockaddr_in*)&client_fd); + // FIXME: THIS IS A TEMPORARY HACK THAT MAKES FEDERATES WITH EVEN IDs PERSISTENT + // AND THOSE WITH ODD IDs TRANSIENT!!! + if (fed_id >= 0 + && receive_connection_information(socket_id, (uint16_t)fed_id) + && receive_udp_message_and_set_up_clock_sync(socket_id, (uint16_t)fed_id)) { + + // Create a thread to communicate with the federate. + // This has to be done after clock synchronization is finished + // or that thread may end up attempting to handle incoming clock + // synchronization messages. + lf_thread_create(&(_F_RTI->enclaves[fed_id]->thread_id), federate_thread_TCP, &(_F_RTI->enclaves[fed_id])); + _F_RTI->enclaves[fed_id]->is_transient = true; + _F_RTI->number_of_connected_transient_federates++; + lf_print("RTI: Transient federate %d joined.", fed_id); + } + } + } +} + +void reset_transient_federate(federate_t* fed) { + // Reset of the enclave-related attributes + // FIXME: Should check further what to reset in the enclave data structure + fed->enclave.last_granted = NEVER_TAG; + fed->enclave.last_provisionally_granted = NEVER_TAG; + fed->enclave.next_event = NEVER_TAG; + fed->enclave.state = NOT_CONNECTED; + // Reset of the federate-related attributes + fed->socket = -1; // No socket. + fed->clock_synchronization_enabled = true; + fed->in_transit_message_tags = initialize_in_transit_message_q(); + strncpy(fed->server_hostname ,"localhost", INET_ADDRSTRLEN); + fed->server_ip_addr.s_addr = 0; + fed->server_port = -1; + fed->enclave.requested_stop = false; + fed->is_transient = true; + fed->fed_start_time = 0LL; + fed->num_of_conn_federates = 0; + fed->num_of_conn_federates_sent_net = 0; + fed->start_time_is_set = false; +} + +void handle_current_tag_query_response(federate_t *my_fed) { + // Get the logical time instant and the transient fed_id from the socket + size_t buffer_size = sizeof(instant_t) + sizeof(uint16_t); + unsigned char buffer[buffer_size]; + // Read bytes from the socket. We need 8 bytes. + ssize_t bytes_read = read_from_socket(my_fed->socket, buffer_size, buffer); + if (bytes_read < (ssize_t)sizeof(int64_t)) { + lf_print_error("ERROR reading next event query response from federate %d.\n", my_fed->enclave.id); + } + + // Get the timestamp and the transient federate id + instant_t timestamp = swap_bytes_if_big_endian_int64(*((int64_t *)(buffer))); + uint16_t transient_fed_id = extract_uint16((&buffer[8])); + if (_F_RTI->tracing_enabled) { + tag_t tag = {.time = timestamp, .microstep = 0}; + tracepoint_RTI_from_federate(receive_CuTAG_QR_RES, my_fed->enclave.id, &tag); + } + LF_PRINT_LOG("RTI received current TAG query response message: " PRINTF_TIME, timestamp); + + // FIXME: Should the lock be inside the if statement only? + lf_mutex_lock(&rti_mutex); + // Processing the TIMESTAMP depends on whether it is the startup phase (all + // persistent federates joined) or not. + federate_t* transient = _F_RTI->enclaves[transient_fed_id]; + + // Set the start_time of the transient federate to be the maximum among + // current tag of upstreams and the physical time at which it joined . + if (timestamp > transient->fed_start_time) { + transient->fed_start_time = timestamp; + } + // Check that upstream and downstream federates of the transient did propose a start_time + transient->num_of_conn_federates_sent_net++; + if (transient->num_of_conn_federates_sent_net == transient->num_of_conn_federates) { + // All expected connected federates to transient have sent responses with NET to RTI + transient->start_time_is_set = true; + } + lf_mutex_unlock(&rti_mutex); +} diff --git a/core/federated/RTI/rti_lib.h b/core/federated/RTI/rti_lib.h index 2a73ba496..23e92ee52 100644 --- a/core/federated/RTI/rti_lib.h +++ b/core/federated/RTI/rti_lib.h @@ -349,13 +349,27 @@ void handle_address_ad(uint16_t federate_id); * The behavior here depends on whether the message is received within the * startup phase or not. By startup phase, it is menat that all persistent federates * have their start_time set (already started or about to start). - * If all persistent federates have started, then a TIMESTAMP message will be - * received from a transient. In such case, the start_time of the newly joined - * transient federate will depend on the NET of his updtream and downstream - * federates. + * + * @param my_fed the federate that sent a MSG_TYPE_TIMESTAMP message. */ void handle_timestamp(federate_t *my_fed); +/** + * Send to the start time to the federate my_fed. + * This function assumes the caller does not hold the mutex. + * + * If it is the startup phase, the start_time will be the maximum received timestamps + * plus an offset. The federate will then receive identical federation_start_time + * and federate_start_tag.time (the federate_start_tag.microstep will be 0). + * If, however, the startup phase is passed, the federate will receive different + * values than sateted above. + * + * @param my_fed the federate to send the start time to. + * @param federation_start_time the federation start_time + * @param federate_start_tag the federate effective start tag + */ +void send_start_tag(federate_t* my_fed, instant_t federation_start_time, tag_t federate_start_tag); + /** * Take a snapshot of the physical clock time and send * it to federate fed_id. @@ -488,15 +502,6 @@ bool authenticate_federate(int socket); */ void connect_to_federates(int socket_descriptor); -/** - * Once all persistent federates have connected, continue to wait for incoming - * connection requests from transient federates. - * Upon receiving it, it creates a thread to communicate with that federate. - * This thread continues to check whether the communication thread with a transient - * federate has exited, in which case it accepts other connections. - */ -void* connect_to_transient_federates_thread(); - /** * Thread to respond to new connections, which could be federates of other * federations who are attempting to join the wrong federation. @@ -511,13 +516,6 @@ void* respond_to_erroneous_connections(void* nothing); */ void initialize_federate(federate_t* fed, uint16_t id); - -/** - * Reset the federate with the specified ID. The federate has to be transient. - * @param fed A pointer to the federate - */ -void reset_transient_federate(federate_t* fed); - /** * Start the socket server for the runtime infrastructure (RTI) and * return the socket descriptor. @@ -564,6 +562,21 @@ void initialize_RTI(); ////////////////////////////////////////////////////////// +/** + * Once all persistent federates have connected, continue to wait for incoming + * connection requests from transient federates. + * Upon receiving it, it creates a thread to communicate with that federate. + * This thread continues to check whether the communication thread with a transient + * federate has exited, in which case it accepts other connections. + */ +void* connect_to_transient_federates_thread(); + +/** + * Reset the federate with the specified ID. The federate has to be transient. + * @param fed A pointer to the federate + */ +void reset_transient_federate(federate_t* fed); + /** * Queries conn_fed for its current Tag (using MSG_TYPE_CURRENT_TAG_QUERY). * If the function fails to send the query, for example in case the federate is diff --git a/core/federated/federate.c b/core/federated/federate.c index 4172e5114..0699fb2da 100644 --- a/core/federated/federate.c +++ b/core/federated/federate.c @@ -70,7 +70,7 @@ THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. extern instant_t _lf_last_reported_unadjusted_physical_time_ns; extern tag_t current_tag; extern instant_t start_time; -extern instant_t effective_start_time; +extern tag_t effective_start_tag; // Error messages. char* ERROR_SENDING_HEADER = "ERROR sending header information to federate via RTI"; @@ -1208,13 +1208,13 @@ instant_t get_start_time_from_rti(instant_t my_physical_time) { // Read the federation start_time first, then the effective start_time after instant_t timestamp = extract_int64(&(buffer[1])); - effective_start_time = extract_int64(&(buffer[9])); - - tag_t tag = {.time = effective_start_time, .microstep = 0}; + effective_start_tag.time = extract_int64(&(buffer[9])); + effective_start_tag.microstep = extract_int32(&(buffer[9+8])); + // Trace the event when tracing is enabled. - // Note that we report in the trace the effective_start_time. + // Note that we report in the trace the effective_start_tag. // This is rather a choice. To be changed, if needed, of course. - tracepoint_federate_from_RTI(receive_TIMESTAMP, _lf_my_fed_id, &tag); + tracepoint_federate_from_RTI(receive_TIMESTAMP, _lf_my_fed_id, &effective_start_tag); LF_PRINT_LOG("Current physical time is: " PRINTF_TIME ".", lf_time_physical()); return timestamp; @@ -2723,7 +2723,7 @@ void synchronize_with_other_federates() { } lf_print_log("Start time of the federation is " PRINTF_TIME ".", start_time); - lf_print_log("Effective start time of federate %d is: " PRINTF_TIME ".", _lf_my_fed_id, effective_start_time); + lf_print_log("Effective start time of federate %d is: " PRINTF_TIME ".", _lf_my_fed_id, effective_start_tag.time); // Start a thread to listen for incoming TCP messages from the RTI. // @note Up until this point, the federate has been listening for messages diff --git a/core/tag.c b/core/tag.c index ae1f9277a..8f1f48ef0 100644 --- a/core/tag.c +++ b/core/tag.c @@ -37,7 +37,7 @@ instant_t start_time = NEVER; * Only useful for transient federates. It records the effective start time, to * be used at startup. Elapsed logical time calculations will use start_time. */ -instant_t effective_start_time = NEVER; +instant_t effective_start_tag = NEVER; //////////////// Global variables not declared in tag.h (must be declared extern if used elsewhere): diff --git a/core/threaded/reactor_threaded.c b/core/threaded/reactor_threaded.c index 5c6ae75fc..5e5123d74 100644 --- a/core/threaded/reactor_threaded.c +++ b/core/threaded/reactor_threaded.c @@ -50,7 +50,7 @@ THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. extern instant_t _lf_last_reported_unadjusted_physical_time_ns; extern tag_t current_tag; extern instant_t start_time; -extern instant_t effective_start_time; +extern tag_t effective_start_tag; /** * Global mutex and condition variable. @@ -728,7 +728,7 @@ void _lf_initialize_start_tag() { // Get a start_time from the RTI synchronize_with_other_federates(); // Resets start_time in federated execution according to the RTI. - current_tag = (tag_t){.time = effective_start_time, .microstep = 0u}; + current_tag = effective_start_tag; #endif _lf_initialize_timers(); diff --git a/include/core/federated/net_common.h b/include/core/federated/net_common.h index 71280276b..9abbb7437 100644 --- a/include/core/federated/net_common.h +++ b/include/core/federated/net_common.h @@ -385,12 +385,12 @@ THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. /** * As an answer to MSG_TYPE_TIMESTAMP, the RTI broadcasts to all persistent * federates, or sends to newly joining transient federate, a message of - * MSG_TYPE_STIMESTAMP_START. It includes the starting logical time of the - * federation, together with the effective starting logical time. The latter - * is useful for transient federates. + * MSG_TYPE_STIMESTAMP_START. It includes the starting time of the federation, + * together with the effective starting logical tag. The latter is useful for + * transient federates. */ #define MSG_TYPE_TIMESTAMP_START 50 -#define MSG_TYPE_TIMESTAMP_START_LENGTH (1 + sizeof(int64_t) + sizeof(int64_t)) +#define MSG_TYPE_TIMESTAMP_START_LENGTH (1 + sizeof(instant_t) + sizeof(tag_t)) /** Byte identifying a message to forward to another federate. * The next two bytes will be the ID of the destination port. From 64c34bd29b38751568c68b636dad4062efa481ed Mon Sep 17 00:00:00 2001 From: ChadliaJerad Date: Thu, 8 Jun 2023 13:37:06 -0700 Subject: [PATCH 32/80] Fix merge conflicts. --- core/federated/RTI/rti.c | 12 ++--- core/federated/RTI/rti_lib.c | 102 +++++++++++++++++------------------ 2 files changed, 57 insertions(+), 57 deletions(-) diff --git a/core/federated/RTI/rti.c b/core/federated/RTI/rti.c index 9aebc354b..2d0b0f4e0 100644 --- a/core/federated/RTI/rti.c +++ b/core/federated/RTI/rti.c @@ -108,13 +108,13 @@ int main(int argc, const char* argv[]) { lf_print("Tracing the RTI execution in %s file.", rti_trace_file_name); } lf_print("Starting RTI for a total of %d federates, with %d being transient, in federation ID %s", \ - _F_RTI->number_of_enclaves, - _F_RTI->number_of_transient_federates, - _F_RTI->federation_id); + _f_rti->number_of_enclaves, + _f_rti->number_of_transient_federates, + _f_rti->federation_id); - lf_print("Starting RTI for %d federates in federation ID %s.", _F_RTI->number_of_enclaves, _F_RTI->federation_id); - assert(_F_RTI->number_of_enclaves < UINT16_MAX); - assert(_F_RTI->number_of_transient_federates < UINT16_MAX); + lf_print("Starting RTI for %d federates in federation ID %s.", _f_rti->number_of_enclaves, _f_rti->federation_id); + assert(_f_rti->number_of_enclaves < UINT16_MAX); + assert(_f_rti->number_of_transient_federates < UINT16_MAX); // Allocate memory for the federates _f_rti->enclaves = (federate_t**)calloc(_f_rti->number_of_enclaves, sizeof(federate_t*)); for (uint16_t i = 0; i < _f_rti->number_of_enclaves; i++) { diff --git a/core/federated/RTI/rti_lib.c b/core/federated/RTI/rti_lib.c index 153af16a7..cfe4faecb 100644 --- a/core/federated/RTI/rti_lib.c +++ b/core/federated/RTI/rti_lib.c @@ -429,7 +429,7 @@ void handle_timed_message(federate_t* sending_federate, unsigned char* buffer) { ); return; } else { - tag_t fed_start_tag = {.time=_F_RTI->enclaves[federate_id]->fed_start_time, .microstep=0}; + tag_t fed_start_tag = {.time=_f_rti->enclaves[federate_id]->fed_start_time, .microstep=0}; if(lf_tag_compare(intended_tag, fed_start_tag) < 0) { // Do not forward the message if the federate is connected, but its // start_time is not reached yet @@ -804,21 +804,21 @@ void handle_timestamp(federate_t *my_fed) { // Processing the TIMESTAMP depends on whether it is the startup phase (all // persistent federates joined) or not. - if (_F_RTI->num_feds_proposed_start < (_F_RTI->number_of_enclaves - _F_RTI->number_of_transient_federates)) { - if (timestamp > _F_RTI->max_start_time) { - _F_RTI->max_start_time = timestamp; + if (_f_rti->num_feds_proposed_start < (_f_rti->number_of_enclaves - _f_rti->number_of_transient_federates)) { + if (timestamp > _f_rti->max_start_time) { + _f_rti->max_start_time = timestamp; } // Check that persistent federates did propose a start_time if (!my_fed->is_transient) { - _F_RTI->num_feds_proposed_start++; + _f_rti->num_feds_proposed_start++; } - if (_F_RTI->num_feds_proposed_start == (_F_RTI->number_of_enclaves - _F_RTI->number_of_transient_federates)) { + if (_f_rti->num_feds_proposed_start == (_f_rti->number_of_enclaves - _f_rti->number_of_transient_federates)) { // All federates have proposed a start time. lf_cond_broadcast(&received_start_times); } else { // Some federates have not yet proposed a start time. // wait for a notification. - while (_F_RTI->num_feds_proposed_start < (_F_RTI->number_of_enclaves - _F_RTI->number_of_transient_federates)) { + while (_f_rti->num_feds_proposed_start < (_f_rti->number_of_enclaves - _f_rti->number_of_transient_federates)) { // FIXME: Should have a timeout here? lf_cond_wait(&received_start_times); } @@ -826,7 +826,7 @@ void handle_timestamp(federate_t *my_fed) { lf_mutex_unlock(&rti_mutex); // Add an offset to this start time to get everyone starting together. - start_time = _F_RTI->max_start_time + DELAY_START; + start_time = _f_rti->max_start_time + DELAY_START; // Send the start_time send_start_tag(my_fed, start_time, (tag_t){.time = start_time, .microstep = 0}); @@ -840,7 +840,7 @@ void handle_timestamp(federate_t *my_fed) { // FIXME: Maybe we can use TAGs from a transient that left??? tag_t federate_start_tag = NEVER_TAG; for (int j = 0; j < my_fed->enclave.num_upstream; j++) { - federate_t* upstream = _F_RTI->enclaves[my_fed->enclave.upstream[j]]; + federate_t* upstream = _f_rti->enclaves[my_fed->enclave.upstream[j]]; // Ignore this federate if it has resigned or if it a transient that // is absent if (upstream->enclave.state == NOT_CONNECTED) { @@ -853,7 +853,7 @@ void handle_timestamp(federate_t *my_fed) { } // Iterate over the downstream federates to query the current event tag. for (int j = 0; j < my_fed->enclave.num_downstream; j++) { - federate_t* downstream = _F_RTI->enclaves[my_fed->enclave.downstream[j]]; + federate_t* downstream = _f_rti->enclaves[my_fed->enclave.downstream[j]]; // Ignore this federate if it has resigned. if (downstream->enclave.state == NOT_CONNECTED) { continue; @@ -893,7 +893,7 @@ void send_start_tag(federate_t* my_fed, instant_t federation_start_time, tag_t f encode_int64(swap_bytes_if_big_endian_int64(federate_start_tag.time), &start_time_buffer[9]); encode_int32(swap_bytes_if_big_endian_int64(federate_start_tag.microstep), &start_time_buffer[9+8]); - if (_F_RTI->tracing_enabled) { + if (_f_rti->tracing_enabled) { tracepoint_RTI_to_federate(send_TIMESTAMP, my_fed->enclave.id, &federate_start_tag); } @@ -912,7 +912,7 @@ void send_start_tag(federate_t* my_fed, instant_t federation_start_time, tag_t f my_fed->enclave.state = GRANTED; // If it the startup phase, then broadcast that the start_time was sent - if (_F_RTI->num_feds_proposed_start < (_F_RTI->number_of_enclaves - _F_RTI->number_of_transient_federates)) { + if (_f_rti->num_feds_proposed_start < (_f_rti->number_of_enclaves - _f_rti->number_of_transient_federates)) { lf_cond_broadcast(&sent_start_time); } @@ -1194,7 +1194,7 @@ void* federate_thread_TCP(void* fed) { // Update the number of connected transient federates lf_mutex_lock(&rti_mutex); - _F_RTI->number_of_connected_transient_federates--; + _f_rti->number_of_connected_transient_federates--; // Reset the status of the leaving federate reset_transient_federate(my_fed); @@ -1224,7 +1224,7 @@ int32_t receive_and_check_fed_id_message(int socket_id, struct sockaddr_in* clie // FIXME: This should not exit with error but rather should just reject the connection. read_from_socket_errexit(socket_id, length, buffer, "RTI failed to read from accepted socket."); - uint16_t fed_id = _F_RTI->number_of_enclaves; // Initialize to an invalid value. + uint16_t fed_id = _f_rti->number_of_enclaves; // Initialize to an invalid value. bool is_transient = false; // First byte received is the message type. if (buffer[0] != MSG_TYPE_FED_IDS) { @@ -1329,7 +1329,7 @@ int32_t receive_and_check_fed_id_message(int socket_id, struct sockaddr_in* clie // because it is waiting for the start time to be // sent by the RTI before beginning its execution. fed->enclave.state = PENDING; - _F_RTI->enclaves[fed_id]->is_transient = is_transient; + _f_rti->enclaves[fed_id]->is_transient = is_transient; LF_PRINT_DEBUG("RTI responding with MSG_TYPE_ACK to federate %d.", fed_id); // Send an MSG_TYPE_ACK message. @@ -1542,7 +1542,7 @@ bool authenticate_federate(int socket) { void connect_to_federates(int socket_descriptor) { // This loop will accept both, persistent and transient federates. // For transient, however, i will be decreased - for (int i = 0 ; i < _F_RTI->number_of_enclaves - _F_RTI->number_of_transient_federates ; i++) { + for (int i = 0 ; i < _f_rti->number_of_enclaves - _f_rti->number_of_transient_federates ; i++) { // Wait for an incoming connection request. struct sockaddr client_fd; uint32_t client_length = sizeof(client_fd); @@ -1588,9 +1588,9 @@ void connect_to_federates(int socket_descriptor) { federate_t *fed = _f_rti->enclaves[fed_id]; lf_thread_create(&(fed->thread_id), federate_thread_TCP, fed); - if (_F_RTI->enclaves[fed_id]->is_transient) { - _F_RTI->number_of_connected_transient_federates++; - assert(_F_RTI->number_of_connected_transient_federates <= _F_RTI->number_of_transient_federates); + if (_f_rti->enclaves[fed_id]->is_transient) { + _f_rti->number_of_connected_transient_federates++; + assert(_f_rti->number_of_connected_transient_federates <= _f_rti->number_of_transient_federates); i--; lf_print("RTI: Transient federate %d joined.", fed->enclave.id); } @@ -1685,7 +1685,7 @@ void wait_for_federates(int socket_descriptor) { // All persistent federates have connected. lf_print("RTI: All expected (persistent) federates have connected. Starting execution."); - if (_F_RTI->number_of_transient_federates > 0) { + if (_f_rti->number_of_transient_federates > 0) { lf_print("RTI: Transient Federates can join and leave the federation at anytime."); } @@ -1698,17 +1698,17 @@ void wait_for_federates(int socket_descriptor) { // If the federation does not include transient federates, then respond to // erronous connections. Otherwise, continue to accept transients joining and // respond to duplicate joing requests. - if (_F_RTI->number_of_transient_federates == 0) { + if (_f_rti->number_of_transient_federates == 0) { lf_thread_create(&responder_thread, respond_to_erroneous_connections, NULL); - } else if (_F_RTI->number_of_transient_federates > 0) { + } else if (_f_rti->number_of_transient_federates > 0) { lf_thread_create(&transient_thread, connect_to_transient_federates_thread, NULL); } // Wait for persistent federate threads to exit. void* thread_exit_status; - for (int i = 0 ; i < _F_RTI->number_of_enclaves ; i++) { - if (!_F_RTI->enclaves[i]->is_transient) { - federate_t* fed = _F_RTI->enclaves[i]; + for (int i = 0 ; i < _f_rti->number_of_enclaves ; i++) { + if (!_f_rti->enclaves[i]->is_transient) { + federate_t* fed = _f_rti->enclaves[i]; lf_print("RTI: Waiting for thread handling peristent federate %d.", fed->enclave.id); lf_thread_join(fed->thread_id, &thread_exit_status); free_in_transit_message_q(fed->in_transit_message_tags); @@ -1723,23 +1723,23 @@ void wait_for_federates(int socket_descriptor) { // the transient federates. The reason is that if, for example, federate 0 is // transienet, and it did leave in the middle of a federation execution, then // we will no more wait for the thread of a future joining instance to lf_thread_join. - if (_F_RTI->number_of_transient_federates > 0) { - for (int i = 0 ; i < _F_RTI->number_of_enclaves ; i++) { - if (_F_RTI->enclaves[i]->is_transient) { - lf_print("RTI: Waiting for thread handling transient federate %d.", _F_RTI->enclaves[i]->enclave.id); - lf_thread_join(_F_RTI->enclaves[i]->thread_id, &thread_exit_status); - free_in_transit_message_q(_F_RTI->enclaves[i]->in_transit_message_tags); - lf_print("RTI: Federate %d thread exited.", _F_RTI->enclaves[i]->enclave.id); + if (_f_rti->number_of_transient_federates > 0) { + for (int i = 0 ; i < _f_rti->number_of_enclaves ; i++) { + if (_f_rti->enclaves[i]->is_transient) { + lf_print("RTI: Waiting for thread handling transient federate %d.", _f_rti->enclaves[i]->enclave.id); + lf_thread_join(_f_rti->enclaves[i]->thread_id, &thread_exit_status); + free_in_transit_message_q(_f_rti->enclaves[i]->in_transit_message_tags); + lf_print("RTI: Federate %d thread exited.", _f_rti->enclaves[i]->enclave.id); } } } - _F_RTI->all_federates_exited = true; + _f_rti->all_federates_exited = true; lf_print("All transient threads exited."); - if (_F_RTI->number_of_transient_federates == 0) { + if (_f_rti->number_of_transient_federates == 0) { lf_thread_join(&responder_thread, &thread_exit_status); - } else if (_F_RTI->number_of_transient_federates > 0) { + } else if (_f_rti->number_of_transient_federates > 0) { lf_thread_join(&transient_thread, &thread_exit_status); } @@ -1916,8 +1916,8 @@ int process_args(int argc, const char* argv[]) { usage(argc, argv); return 0; } - _F_RTI->number_of_enclaves = (int32_t)num_federates; // FIXME: Loses numbers on 64-bit machines - lf_print("RTI: Number of federates: %d\n", _F_RTI->number_of_enclaves); + _f_rti->number_of_enclaves = (int32_t)num_federates; // FIXME: Loses numbers on 64-bit machines + lf_print("RTI: Number of federates: %d\n", _f_rti->number_of_enclaves); } else if (strcmp(argv[i], "-nt") == 0 || strcmp(argv[i], "--number_of_transient_federates") == 0) { if (argc < i + 2) { lf_print_error("--number_of_transient_federates needs an integer argument."); @@ -1931,13 +1931,13 @@ int process_args(int argc, const char* argv[]) { usage(argc, argv); return 0; } - if (num_transient_federates > _F_RTI->number_of_enclaves) { + if (num_transient_federates > _f_rti->number_of_enclaves) { lf_print_error("--number_of_transient_federates cannot be higher than the number of federates."); usage(argc, argv); return 0; } - _F_RTI->number_of_transient_federates = (int32_t)num_transient_federates; // FIXME: Loses numbers on 64-bit machines - lf_print("RTI: Number of transient federates: %d", _F_RTI->number_of_transient_federates); + _f_rti->number_of_transient_federates = (int32_t)num_transient_federates; // FIXME: Loses numbers on 64-bit machines + lf_print("RTI: Number of transient federates: %d", _f_rti->number_of_transient_federates); } else if (strcmp(argv[i], "-p") == 0 || strcmp(argv[i], "--port") == 0) { if (argc < i + 2) { lf_print_error( @@ -2020,18 +2020,18 @@ void initialize_RTI(){ void* connect_to_transient_federates_thread() { // This loop will continue to accept connections of transient federates, as // soon as there is room - while (!_F_RTI->all_federates_exited) { - if (_F_RTI->number_of_connected_transient_federates < _F_RTI->number_of_transient_federates) { + while (!_f_rti->all_federates_exited) { + if (_f_rti->number_of_connected_transient_federates < _f_rti->number_of_transient_federates) { // Continue waiting for an incoming connection requests from transients. struct sockaddr client_fd; uint32_t client_length = sizeof(client_fd); // The following blocks until a federate connects. int socket_id = -1; while(1) { - if (!_F_RTI->all_federates_exited) { - return; + if (!_f_rti->all_federates_exited) { + return NULL; } - socket_id = accept(_F_RTI->socket_descriptor_TCP, &client_fd, &client_length); + socket_id = accept(_f_rti->socket_descriptor_TCP, &client_fd, &client_length); if (socket_id >= 0) { // Got a socket break; @@ -2046,7 +2046,7 @@ void* connect_to_transient_federates_thread() { // Send RTI hello when RTI -a option is on. #ifdef __RTI_AUTH__ - if (_F_RTI->authentication_enabled) { + if (_f_rti->authentication_enabled) { if (!authenticate_federate(socket_id)) { lf_print_warning("RTI failed to authenticate the incoming federate."); // Ignore the federate that failed authentication. @@ -2067,9 +2067,9 @@ void* connect_to_transient_federates_thread() { // This has to be done after clock synchronization is finished // or that thread may end up attempting to handle incoming clock // synchronization messages. - lf_thread_create(&(_F_RTI->enclaves[fed_id]->thread_id), federate_thread_TCP, &(_F_RTI->enclaves[fed_id])); - _F_RTI->enclaves[fed_id]->is_transient = true; - _F_RTI->number_of_connected_transient_federates++; + lf_thread_create(&(_f_rti->enclaves[fed_id]->thread_id), federate_thread_TCP, &(_f_rti->enclaves[fed_id])); + _f_rti->enclaves[fed_id]->is_transient = true; + _f_rti->number_of_connected_transient_federates++; lf_print("RTI: Transient federate %d joined.", fed_id); } } @@ -2111,7 +2111,7 @@ void handle_current_tag_query_response(federate_t *my_fed) { // Get the timestamp and the transient federate id instant_t timestamp = swap_bytes_if_big_endian_int64(*((int64_t *)(buffer))); uint16_t transient_fed_id = extract_uint16((&buffer[8])); - if (_F_RTI->tracing_enabled) { + if (_f_rti->tracing_enabled) { tag_t tag = {.time = timestamp, .microstep = 0}; tracepoint_RTI_from_federate(receive_CuTAG_QR_RES, my_fed->enclave.id, &tag); } @@ -2121,7 +2121,7 @@ void handle_current_tag_query_response(federate_t *my_fed) { lf_mutex_lock(&rti_mutex); // Processing the TIMESTAMP depends on whether it is the startup phase (all // persistent federates joined) or not. - federate_t* transient = _F_RTI->enclaves[transient_fed_id]; + federate_t* transient = _f_rti->enclaves[transient_fed_id]; // Set the start_time of the transient federate to be the maximum among // current tag of upstreams and the physical time at which it joined . From cd0a9925ae135435b92f6138a7036767d4901906 Mon Sep 17 00:00:00 2001 From: ChadliaJerad Date: Thu, 8 Jun 2023 15:58:30 -0700 Subject: [PATCH 33/80] Fix the call to lf_thread_join() --- core/federated/RTI/rti_lib.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/core/federated/RTI/rti_lib.c b/core/federated/RTI/rti_lib.c index cfe4faecb..70b0ead97 100644 --- a/core/federated/RTI/rti_lib.c +++ b/core/federated/RTI/rti_lib.c @@ -1738,9 +1738,9 @@ void wait_for_federates(int socket_descriptor) { lf_print("All transient threads exited."); if (_f_rti->number_of_transient_federates == 0) { - lf_thread_join(&responder_thread, &thread_exit_status); + lf_thread_join(responder_thread, &thread_exit_status); } else if (_f_rti->number_of_transient_federates > 0) { - lf_thread_join(&transient_thread, &thread_exit_status); + lf_thread_join(transient_thread, &thread_exit_status); } // Shutdown and close the socket so that the accept() call in From f6970198ad6b61bc5288c9110896b85fabc6abe5 Mon Sep 17 00:00:00 2001 From: ChadliaJerad Date: Fri, 9 Jun 2023 09:46:39 -0700 Subject: [PATCH 34/80] Minor fixes and Make the effective start as a tag rather than a time --- core/federated/RTI/rti_lib.c | 48 ++++++++++++++--------------- core/federated/RTI/rti_lib.h | 2 +- core/federated/federate.c | 6 +--- core/tag.c | 4 +-- include/core/federated/net_common.h | 2 +- 5 files changed, 29 insertions(+), 33 deletions(-) diff --git a/core/federated/RTI/rti_lib.c b/core/federated/RTI/rti_lib.c index 70b0ead97..3ed9572eb 100644 --- a/core/federated/RTI/rti_lib.c +++ b/core/federated/RTI/rti_lib.c @@ -428,9 +428,8 @@ void handle_timed_message(federate_t* sending_federate, unsigned char* buffer) { fed->enclave.last_provisionally_granted.microstep ); return; - } else { - tag_t fed_start_tag = {.time=_f_rti->enclaves[federate_id]->fed_start_time, .microstep=0}; - if(lf_tag_compare(intended_tag, fed_start_tag) < 0) { + } else { + if(lf_tag_compare(intended_tag, fed->effective_start_tag) < 0) { // Do not forward the message if the federate is connected, but its // start_time is not reached yet lf_mutex_unlock(&rti_mutex); @@ -601,15 +600,14 @@ void _lf_rti_broadcast_stop_time_to_federates_already_locked() { void mark_federate_requesting_stop(federate_t* fed) { if (!fed->enclave.requested_stop) { - // Assume that the federate - // has requested stop - // FIXME: Inc only if it is a persistent federate + // Assume that the federate has requested stop + // Increment the number of federates handling stop only if it is persistent if (fed->is_transient == false) { _f_rti->num_enclaves_handling_stop++; } fed->enclave.requested_stop = true; } - if (_f_rti->num_enclaves_handling_stop == _f_rti->number_of_enclaves) { + if (_f_rti->num_enclaves_handling_stop == _f_rti->number_of_enclaves - _f_rti->number_of_transient_federates) { // We now have information about the stop time of all // federates. _lf_rti_broadcast_stop_time_to_federates_already_locked(); @@ -655,7 +653,7 @@ void handle_stop_request_message(federate_t* fed) { // for a stop, add it to the tally. mark_federate_requesting_stop(fed); - if (_f_rti->num_enclaves_handling_stop == _f_rti->number_of_enclaves) { + if (_f_rti->num_enclaves_handling_stop == _f_rti->number_of_enclaves - _f_rti->number_of_transient_federates) { // We now have information about the stop time of all // federates. This is extremely unlikely, but it can occur // all federates call lf_request_stop() at the same tag. @@ -800,7 +798,7 @@ void handle_timestamp(federate_t *my_fed) { LF_PRINT_LOG("RTI received timestamp message: " PRINTF_TIME ".", timestamp); lf_mutex_lock(&rti_mutex); - my_fed->fed_start_time = timestamp; + my_fed->effective_start_tag.time = timestamp; // Processing the TIMESTAMP depends on whether it is the startup phase (all // persistent federates joined) or not. @@ -829,7 +827,8 @@ void handle_timestamp(federate_t *my_fed) { start_time = _f_rti->max_start_time + DELAY_START; // Send the start_time - send_start_tag(my_fed, start_time, (tag_t){.time = start_time, .microstep = 0}); + my_fed->effective_start_tag = (tag_t){.time = start_time, .microstep = 0u}; + send_start_tag(my_fed, start_time, my_fed->effective_start_tag); } else { // A transient has joined after the startup phase // At this point, we already hold the mutex @@ -868,8 +867,9 @@ void handle_timestamp(federate_t *my_fed) { // then do not wait for the start time if (lf_tag_compare(federate_start_tag, NEVER_TAG) == 0) { my_fed->start_time_is_set = true; - my_fed->fed_start_time += DELAY_START; - federate_start_tag = (tag_t){.time = my_fed->fed_start_time, .microstep = 0}; + my_fed->effective_start_tag.time += DELAY_START; + my_fed->effective_start_tag.microstep = 0u; + federate_start_tag = my_fed->effective_start_tag; } lf_mutex_unlock(&rti_mutex); @@ -890,9 +890,8 @@ void send_start_tag(federate_t* my_fed, instant_t federation_start_time, tag_t f start_time_buffer[0] = MSG_TYPE_TIMESTAMP_START; encode_int64(swap_bytes_if_big_endian_int64(federation_start_time), &start_time_buffer[1]); - encode_int64(swap_bytes_if_big_endian_int64(federate_start_tag.time), &start_time_buffer[9]); - encode_int32(swap_bytes_if_big_endian_int64(federate_start_tag.microstep), &start_time_buffer[9+8]); - + encode_tag(&(start_time_buffer[1 + sizeof(instant_t)]), federate_start_tag); + if (_f_rti->tracing_enabled) { tracepoint_RTI_to_federate(send_TIMESTAMP, my_fed->enclave.id, &federate_start_tag); } @@ -1655,7 +1654,7 @@ void initialize_federate(federate_t* fed, uint16_t id) { fed->server_ip_addr.s_addr = 0; fed->server_port = -1; fed->is_transient = true; - fed->fed_start_time = 0LL; + fed->effective_start_tag = NEVER_TAG; fed->num_of_conn_federates = 0; fed->num_of_conn_federates_sent_net = 0; fed->start_time_is_set = false; @@ -1931,11 +1930,6 @@ int process_args(int argc, const char* argv[]) { usage(argc, argv); return 0; } - if (num_transient_federates > _f_rti->number_of_enclaves) { - lf_print_error("--number_of_transient_federates cannot be higher than the number of federates."); - usage(argc, argv); - return 0; - } _f_rti->number_of_transient_federates = (int32_t)num_transient_federates; // FIXME: Loses numbers on 64-bit machines lf_print("RTI: Number of transient federates: %d", _f_rti->number_of_transient_federates); } else if (strcmp(argv[i], "-p") == 0 || strcmp(argv[i], "--port") == 0) { @@ -1989,6 +1983,11 @@ int process_args(int argc, const char* argv[]) { usage(argc, argv); return 0; } + if (_f_rti->number_of_transient_federates > _f_rti->number_of_enclaves) { + lf_print_error("--number_of_transient_federates cannot be higher than the number of federates."); + usage(argc, argv); + return 0; + } return 1; } @@ -2092,7 +2091,8 @@ void reset_transient_federate(federate_t* fed) { fed->server_port = -1; fed->enclave.requested_stop = false; fed->is_transient = true; - fed->fed_start_time = 0LL; + // FIXME: Should it be reset to the NEVER_TAG? + fed->effective_start_tag = NEVER_TAG; fed->num_of_conn_federates = 0; fed->num_of_conn_federates_sent_net = 0; fed->start_time_is_set = false; @@ -2125,8 +2125,8 @@ void handle_current_tag_query_response(federate_t *my_fed) { // Set the start_time of the transient federate to be the maximum among // current tag of upstreams and the physical time at which it joined . - if (timestamp > transient->fed_start_time) { - transient->fed_start_time = timestamp; + if (timestamp > transient->effective_start_tag.time) { + transient->effective_start_tag.time = timestamp; } // Check that upstream and downstream federates of the transient did propose a start_time transient->num_of_conn_federates_sent_net++; diff --git a/core/federated/RTI/rti_lib.h b/core/federated/RTI/rti_lib.h index 9e2bb7882..2680c18cc 100644 --- a/core/federated/RTI/rti_lib.h +++ b/core/federated/RTI/rti_lib.h @@ -62,7 +62,7 @@ typedef struct federate_t { struct in_addr server_ip_addr; // Information about the IP address of the socket // server of the federate. bool is_transient; // Indicates whether the federate is transient or persistent. - int64_t fed_start_time; // Records the start time of the federate, which is mainly useful for transient federates + tag_t effective_start_tag; // Records the start time of the federate, which is mainly useful for transient federates int num_of_conn_federates; // Records the total number of connected federates among the upstream and // downstream federates. This is used only in the case of transient federate, for // computing the start time. diff --git a/core/federated/federate.c b/core/federated/federate.c index 0699fb2da..261be45d2 100644 --- a/core/federated/federate.c +++ b/core/federated/federate.c @@ -1208,8 +1208,7 @@ instant_t get_start_time_from_rti(instant_t my_physical_time) { // Read the federation start_time first, then the effective start_time after instant_t timestamp = extract_int64(&(buffer[1])); - effective_start_tag.time = extract_int64(&(buffer[9])); - effective_start_tag.microstep = extract_int32(&(buffer[9+8])); + effective_start_tag = extract_tag(&(buffer[9])); // Trace the event when tracing is enabled. // Note that we report in the trace the effective_start_tag. @@ -2722,9 +2721,6 @@ void synchronize_with_other_federates() { stop_tag = ((tag_t) {.time = start_time + duration, .microstep = 0}); } - lf_print_log("Start time of the federation is " PRINTF_TIME ".", start_time); - lf_print_log("Effective start time of federate %d is: " PRINTF_TIME ".", _lf_my_fed_id, effective_start_tag.time); - // Start a thread to listen for incoming TCP messages from the RTI. // @note Up until this point, the federate has been listening for messages // from the RTI in a sequential manner in the main thread. From now on, a diff --git a/core/tag.c b/core/tag.c index 8f1f48ef0..8a677a461 100644 --- a/core/tag.c +++ b/core/tag.c @@ -34,10 +34,10 @@ typedef enum _lf_time_type { instant_t start_time = NEVER; /** - * Only useful for transient federates. It records the effective start time, to + * Only useful for transient federates. It records the effective start tag, to * be used at startup. Elapsed logical time calculations will use start_time. */ -instant_t effective_start_tag = NEVER; +tag_t effective_start_tag = NEVER_TAG; //////////////// Global variables not declared in tag.h (must be declared extern if used elsewhere): diff --git a/include/core/federated/net_common.h b/include/core/federated/net_common.h index 9abbb7437..aa1d7f6e8 100644 --- a/include/core/federated/net_common.h +++ b/include/core/federated/net_common.h @@ -390,7 +390,7 @@ THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * transient federates. */ #define MSG_TYPE_TIMESTAMP_START 50 -#define MSG_TYPE_TIMESTAMP_START_LENGTH (1 + sizeof(instant_t) + sizeof(tag_t)) +#define MSG_TYPE_TIMESTAMP_START_LENGTH (1 + sizeof(instant_t) + sizeof(instant_t) + sizeof(microstep_t)) /** Byte identifying a message to forward to another federate. * The next two bytes will be the ID of the destination port. From 18f75c6d8ecfb90ae700c1b4079af96541017daf Mon Sep 17 00:00:00 2001 From: ChadliaJerad Date: Fri, 9 Jun 2023 15:27:19 -0700 Subject: [PATCH 35/80] Attempt to make tests pass on Windows --- core/tag.c | 2 +- core/trace.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/core/tag.c b/core/tag.c index 8a677a461..b60c3fa05 100644 --- a/core/tag.c +++ b/core/tag.c @@ -37,7 +37,7 @@ instant_t start_time = NEVER; * Only useful for transient federates. It records the effective start tag, to * be used at startup. Elapsed logical time calculations will use start_time. */ -tag_t effective_start_tag = NEVER_TAG; +tag_t effective_start_tag = {.time = 0LL, .microstep = 0}; //////////////// Global variables not declared in tag.h (must be declared extern if used elsewhere): diff --git a/core/trace.c b/core/trace.c index c8d413ec5..7bd193c9f 100644 --- a/core/trace.c +++ b/core/trace.c @@ -272,7 +272,7 @@ void start_trace(const char* filename) { // FIXME: location of trace file should be customizable. // If a file already exists with the same file name, then derive another one. - char filename_[strlen(filename) + 10]; + char filename_[100]; strcpy(filename_, filename); int i = 0; FILE *test_file_exists; From 0da38c512d6fe7488ec64250306d516689611e92 Mon Sep 17 00:00:00 2001 From: ChadliaJerad Date: Sat, 10 Jun 2023 09:32:18 -0700 Subject: [PATCH 36/80] Attempt to make MacOs tests pass. --- core/federated/RTI/rti_lib.c | 80 ++++++++++----------- core/federated/RTI/rti_lib.h | 4 +- core/federated/federate.c | 130 +++++++++++++++++------------------ 3 files changed, 107 insertions(+), 107 deletions(-) diff --git a/core/federated/RTI/rti_lib.c b/core/federated/RTI/rti_lib.c index 3ed9572eb..f896f5342 100644 --- a/core/federated/RTI/rti_lib.c +++ b/core/federated/RTI/rti_lib.c @@ -1141,9 +1141,9 @@ void* federate_thread_TCP(void* fed) { case MSG_TYPE_TIMESTAMP: handle_timestamp(my_fed); break; - case MSG_TYPE_CURRENT_TAG_QUERY_RESPONSE: - handle_current_tag_query_response(my_fed); - break; + // case MSG_TYPE_CURRENT_TAG_QUERY_RESPONSE: + // handle_current_tag_query_response(my_fed); + // break; case MSG_TYPE_ADDRESS_QUERY: handle_address_query(my_fed->enclave.id); break; @@ -2098,41 +2098,41 @@ void reset_transient_federate(federate_t* fed) { fed->start_time_is_set = false; } -void handle_current_tag_query_response(federate_t *my_fed) { - // Get the logical time instant and the transient fed_id from the socket - size_t buffer_size = sizeof(instant_t) + sizeof(uint16_t); - unsigned char buffer[buffer_size]; - // Read bytes from the socket. We need 8 bytes. - ssize_t bytes_read = read_from_socket(my_fed->socket, buffer_size, buffer); - if (bytes_read < (ssize_t)sizeof(int64_t)) { - lf_print_error("ERROR reading next event query response from federate %d.\n", my_fed->enclave.id); - } - - // Get the timestamp and the transient federate id - instant_t timestamp = swap_bytes_if_big_endian_int64(*((int64_t *)(buffer))); - uint16_t transient_fed_id = extract_uint16((&buffer[8])); - if (_f_rti->tracing_enabled) { - tag_t tag = {.time = timestamp, .microstep = 0}; - tracepoint_RTI_from_federate(receive_CuTAG_QR_RES, my_fed->enclave.id, &tag); - } - LF_PRINT_LOG("RTI received current TAG query response message: " PRINTF_TIME, timestamp); - - // FIXME: Should the lock be inside the if statement only? - lf_mutex_lock(&rti_mutex); - // Processing the TIMESTAMP depends on whether it is the startup phase (all - // persistent federates joined) or not. - federate_t* transient = _f_rti->enclaves[transient_fed_id]; +// void handle_current_tag_query_response(federate_t *my_fed) { +// // Get the logical time instant and the transient fed_id from the socket +// size_t buffer_size = sizeof(instant_t) + sizeof(uint16_t); +// unsigned char buffer[buffer_size]; +// // Read bytes from the socket. We need 8 bytes. +// ssize_t bytes_read = read_from_socket(my_fed->socket, buffer_size, buffer); +// if (bytes_read < (ssize_t)sizeof(int64_t)) { +// lf_print_error("ERROR reading next event query response from federate %d.\n", my_fed->enclave.id); +// } + +// // Get the timestamp and the transient federate id +// instant_t timestamp = swap_bytes_if_big_endian_int64(*((int64_t *)(buffer))); +// uint16_t transient_fed_id = extract_uint16((&buffer[8])); +// if (_f_rti->tracing_enabled) { +// tag_t tag = {.time = timestamp, .microstep = 0}; +// tracepoint_RTI_from_federate(receive_CuTAG_QR_RES, my_fed->enclave.id, &tag); +// } +// LF_PRINT_LOG("RTI received current TAG query response message: " PRINTF_TIME, timestamp); + +// // FIXME: Should the lock be inside the if statement only? +// lf_mutex_lock(&rti_mutex); +// // Processing the TIMESTAMP depends on whether it is the startup phase (all +// // persistent federates joined) or not. +// federate_t* transient = _f_rti->enclaves[transient_fed_id]; - // Set the start_time of the transient federate to be the maximum among - // current tag of upstreams and the physical time at which it joined . - if (timestamp > transient->effective_start_tag.time) { - transient->effective_start_tag.time = timestamp; - } - // Check that upstream and downstream federates of the transient did propose a start_time - transient->num_of_conn_federates_sent_net++; - if (transient->num_of_conn_federates_sent_net == transient->num_of_conn_federates) { - // All expected connected federates to transient have sent responses with NET to RTI - transient->start_time_is_set = true; - } - lf_mutex_unlock(&rti_mutex); -} +// // Set the start_time of the transient federate to be the maximum among +// // current tag of upstreams and the physical time at which it joined . +// if (timestamp > transient->effective_start_tag.time) { +// transient->effective_start_tag.time = timestamp; +// } +// // Check that upstream and downstream federates of the transient did propose a start_time +// transient->num_of_conn_federates_sent_net++; +// if (transient->num_of_conn_federates_sent_net == transient->num_of_conn_federates) { +// // All expected connected federates to transient have sent responses with NET to RTI +// transient->start_time_is_set = true; +// } +// lf_mutex_unlock(&rti_mutex); +// } diff --git a/core/federated/RTI/rti_lib.h b/core/federated/RTI/rti_lib.h index 2680c18cc..a41d94199 100644 --- a/core/federated/RTI/rti_lib.h +++ b/core/federated/RTI/rti_lib.h @@ -592,14 +592,14 @@ void reset_transient_federate(federate_t* fed); * @param fed_id: The ID of the transient joining federate * @return true, if successfully sent, false otherwise. */ -bool send_current_tag_query(federate_t* conn_fed, uint16_t fed_id); +// bool send_current_tag_query(federate_t* conn_fed, uint16_t fed_id); /** * Handles current tag query response received form my_fed. * * @param my_fed: the federate from whom the response is received. */ -void handle_current_tag_query_response(federate_t *my_fed); +// void handle_current_tag_query_response(federate_t *my_fed); ////////////////////////////////////////////////////////// #endif // RTI_LIB_H diff --git a/core/federated/federate.c b/core/federated/federate.c index bb0ac6d35..cbfb91e6b 100644 --- a/core/federated/federate.c +++ b/core/federated/federate.c @@ -2416,68 +2416,68 @@ void handle_stop_request_message() { * * FIXME: This function assumes the caller does hold the mutex lock? */ -void handle_next_event_tag_query(){ - tracepoint_federate_from_RTI(receive_CuTAG_QR, _lf_my_fed_id, &NEVER_TAG); - - // Extract the transient federate Id - size_t bytes_to_read = sizeof(uint16_t); - unsigned char buffer[bytes_to_read]; - read_from_socket_errexit(_fed.socket_TCP_RTI, bytes_to_read, buffer, - "Failed to read the transient federate ID."); - - // Read it - uint16_t transient_id = extract_uint16(buffer); - // Check if the message is intended for this federate - assert(_lf_my_fed_id != transient_id); - LF_PRINT_DEBUG("Receiving NET query message regarding transient federate %d.", transient_id); - - // Get the next event tag in the reactions queue - // tag_t next_tag = _fed.last_sent_LTC; - // tag_t next_tag = get_next_event_tag(); - tag_t next_tag = lf_tag(); - - instant_t logical_time = next_tag.time; - - // Answer with the time instant of the next event tag - send_current_tag_query_response(logical_time, transient_id); -} - -/** - * Send to RTI the answer to current tag query. - * - * @param time The time. - * @param transient_id The transient federate id to send back - * Print a soft error message otherwise - */ -void send_current_tag_query_response(instant_t time, uint16_t transient_id) { - LF_PRINT_DEBUG("Sending logical time " PRINTF_TIME " to the RTI regarding NET QR RES of trabsient %d.", time, transient_id); - size_t bytes_to_write = 1 + sizeof(instant_t) + sizeof(uint16_t); - unsigned char buffer[bytes_to_write]; - buffer[0] = MSG_TYPE_CURRENT_TAG_QUERY_RESPONSE; - encode_int64(time, &(buffer[1])); - encode_uint16(transient_id, &(buffer[9])); - lf_mutex_lock(&outbound_socket_mutex); - if (_fed.socket_TCP_RTI < 0) { - lf_print_warning("Socket is no longer connected. Dropping message."); - lf_mutex_unlock(&outbound_socket_mutex); - return; - } - - tag_t tag = {.time = time, .microstep = 0}; - // Trace the event when tracing is enabled - tracepoint_federate_to_RTI(send_CuTAG_QR_RES, _lf_my_fed_id, &tag); - - ssize_t bytes_written = write_to_socket(_fed.socket_TCP_RTI, bytes_to_write, buffer); - if (bytes_written < (ssize_t)bytes_to_write) { - lf_print_error_and_exit("Failed to send time " PRINTF_TIME " to the RTI." - " Error code %d: %s", - time - start_time, - errno, - strerror(errno) - ); - } - lf_mutex_unlock(&outbound_socket_mutex); -} +// void handle_next_event_tag_query(){ +// tracepoint_federate_from_RTI(receive_CuTAG_QR, _lf_my_fed_id, &NEVER_TAG); + +// // Extract the transient federate Id +// size_t bytes_to_read = sizeof(uint16_t); +// unsigned char buffer[bytes_to_read]; +// read_from_socket_errexit(_fed.socket_TCP_RTI, bytes_to_read, buffer, +// "Failed to read the transient federate ID."); + +// // Read it +// uint16_t transient_id = extract_uint16(buffer); +// // Check if the message is intended for this federate +// assert(_lf_my_fed_id != transient_id); +// LF_PRINT_DEBUG("Receiving NET query message regarding transient federate %d.", transient_id); + +// // Get the next event tag in the reactions queue +// // tag_t next_tag = _fed.last_sent_LTC; +// // tag_t next_tag = get_next_event_tag(); +// tag_t next_tag = lf_tag(); + +// instant_t logical_time = next_tag.time; + +// // Answer with the time instant of the next event tag +// send_current_tag_query_response(logical_time, transient_id); +// } + +// /** +// * Send to RTI the answer to current tag query. +// * +// * @param time The time. +// * @param transient_id The transient federate id to send back +// * Print a soft error message otherwise +// */ +// void send_current_tag_query_response(instant_t time, uint16_t transient_id) { +// LF_PRINT_DEBUG("Sending logical time " PRINTF_TIME " to the RTI regarding NET QR RES of trabsient %d.", time, transient_id); +// size_t bytes_to_write = 1 + sizeof(instant_t) + sizeof(uint16_t); +// unsigned char buffer[bytes_to_write]; +// buffer[0] = MSG_TYPE_CURRENT_TAG_QUERY_RESPONSE; +// encode_int64(time, &(buffer[1])); +// encode_uint16(transient_id, &(buffer[9])); +// lf_mutex_lock(&outbound_socket_mutex); +// if (_fed.socket_TCP_RTI < 0) { +// lf_print_warning("Socket is no longer connected. Dropping message."); +// lf_mutex_unlock(&outbound_socket_mutex); +// return; +// } + +// tag_t tag = {.time = time, .microstep = 0}; +// // Trace the event when tracing is enabled +// tracepoint_federate_to_RTI(send_CuTAG_QR_RES, _lf_my_fed_id, &tag); + +// ssize_t bytes_written = write_to_socket(_fed.socket_TCP_RTI, bytes_to_write, buffer); +// if (bytes_written < (ssize_t)bytes_to_write) { +// lf_print_error_and_exit("Failed to send time " PRINTF_TIME " to the RTI." +// " Error code %d: %s", +// time - start_time, +// errno, +// strerror(errno) +// ); +// } +// lf_mutex_unlock(&outbound_socket_mutex); +// } /////////////////// End of transient time coordination ///////////////////////// @@ -2683,9 +2683,9 @@ void* listen_to_rti_TCP(void* args) { case MSG_TYPE_PORT_ABSENT: handle_port_absent_message(_fed.socket_TCP_RTI, -1); break; - case MSG_TYPE_CURRENT_TAG_QUERY: - handle_next_event_tag_query(); - break; + // case MSG_TYPE_CURRENT_TAG_QUERY: + // handle_next_event_tag_query(); + // break; case MSG_TYPE_CLOCK_SYNC_T1: case MSG_TYPE_CLOCK_SYNC_T4: lf_print_error("Federate %d received unexpected clock sync message from RTI on TCP socket.", From 62c9fe71ba2a4b8828e502e2f1cad04c79049d69 Mon Sep 17 00:00:00 2001 From: ChadliaJerad Date: Thu, 15 Jun 2023 11:21:06 -0700 Subject: [PATCH 37/80] Fix after merge --- core/threaded/reactor_threaded.c | 2 +- core/trace.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/core/threaded/reactor_threaded.c b/core/threaded/reactor_threaded.c index e28355050..19079ab4f 100644 --- a/core/threaded/reactor_threaded.c +++ b/core/threaded/reactor_threaded.c @@ -702,7 +702,7 @@ void _lf_initialize_start_tag(environment_t *env) { // Get a start_time from the RTI synchronize_with_other_federates(env); // Resets start_time in federated execution according to the RTI. - env->current_tag = env->effective_start_tag; + env->current_tag = effective_start_tag; #endif _lf_initialize_timers(env); diff --git a/core/trace.c b/core/trace.c index 6f56b89b3..afaf2ef35 100644 --- a/core/trace.c +++ b/core/trace.c @@ -286,7 +286,7 @@ void start_trace(trace_t* trace) { fclose(test_file_exists); // Get the root of the original file name memset(filename_, '\0', sizeof(filename_)); - strncpy(filename_, filename, strlen(filename) - 4); + strncpy(filename_, trace->filename, strlen(trace->filename) - 4); // Add an index char *ind = convert_int_to_string(i++); strcat(filename_, ind); From 2f6ae6cc3c4d5f2c8a74fea44acc7c6f6b8f3146 Mon Sep 17 00:00:00 2001 From: ChadliaJerad Date: Fri, 23 Jun 2023 15:18:49 -0700 Subject: [PATCH 38/80] Fix redundant messages --- core/federated/RTI/rti.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/core/federated/RTI/rti.c b/core/federated/RTI/rti.c index e47d921b5..6c3b771c5 100644 --- a/core/federated/RTI/rti.c +++ b/core/federated/RTI/rti.c @@ -118,7 +118,6 @@ int main(int argc, const char* argv[]) { _f_rti->number_of_transient_federates, _f_rti->federation_id); - lf_print("Starting RTI for %d federates in federation ID %s.", _f_rti->number_of_enclaves, _f_rti->federation_id); assert(_f_rti->number_of_enclaves < UINT16_MAX); assert(_f_rti->number_of_transient_federates < UINT16_MAX); // Allocate memory for the federates @@ -133,6 +132,5 @@ int main(int argc, const char* argv[]) { int socket_descriptor = start_rti_server(_f_rti->user_specified_port); wait_for_federates(socket_descriptor); - lf_print("RTI is exiting."); return 0; } From b5fb4a16a8991c45488675f26e7ca955cf81ad64 Mon Sep 17 00:00:00 2001 From: ChadliaJerad Date: Fri, 23 Jun 2023 15:37:54 -0700 Subject: [PATCH 39/80] Monor fixes --- core/federated/RTI/rti_lib.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/core/federated/RTI/rti_lib.c b/core/federated/RTI/rti_lib.c index c127b5bc8..72ef68e64 100644 --- a/core/federated/RTI/rti_lib.c +++ b/core/federated/RTI/rti_lib.c @@ -2026,7 +2026,7 @@ void* connect_to_transient_federates_thread() { // The following blocks until a federate connects. int socket_id = -1; while(1) { - if (!_f_rti->all_federates_exited) { + if (_f_rti->all_federates_exited) { return NULL; } socket_id = accept(_f_rti->socket_descriptor_TCP, &client_fd, &client_length); @@ -2065,8 +2065,9 @@ void* connect_to_transient_federates_thread() { // This has to be done after clock synchronization is finished // or that thread may end up attempting to handle incoming clock // synchronization messages. - lf_thread_create(&(_f_rti->enclaves[fed_id]->thread_id), federate_thread_TCP, &(_f_rti->enclaves[fed_id])); - _f_rti->enclaves[fed_id]->is_transient = true; + federate_t *fed = _f_rti->enclaves[fed_id]; + lf_thread_create(&(fed->thread_id), federate_thread_TCP, fed); + fed->is_transient = true; _f_rti->number_of_connected_transient_federates++; lf_print("RTI: Transient federate %d joined.", fed_id); } From 4eb1b1d530ec74e51fd5f4ee159ca5c8765ecbc4 Mon Sep 17 00:00:00 2001 From: ChadliaJerad Date: Fri, 23 Jun 2023 16:14:08 -0700 Subject: [PATCH 40/80] Another minor fix --- core/federated/RTI/rti_lib.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/core/federated/RTI/rti_lib.c b/core/federated/RTI/rti_lib.c index 72ef68e64..856b9c181 100644 --- a/core/federated/RTI/rti_lib.c +++ b/core/federated/RTI/rti_lib.c @@ -846,7 +846,8 @@ void handle_timestamp(federate_t *my_fed) { continue; } if (lf_tag_compare(federate_start_tag, upstream->enclave.last_granted) < 0) { - federate_start_tag = upstream->enclave.last_granted; + federate_start_tag.time = upstream->enclave.last_granted.time; + federate_start_tag.microstep = upstream->enclave.last_granted.microstep; federate_start_tag.microstep++; } } @@ -858,7 +859,8 @@ void handle_timestamp(federate_t *my_fed) { continue; } if (lf_tag_compare(federate_start_tag, downstream->enclave.last_granted) < 0) { - federate_start_tag = downstream->enclave.last_granted; + federate_start_tag.time = downstream->enclave.last_granted.time; + federate_start_tag.microstep = downstream->enclave.last_granted.microstep; federate_start_tag.microstep++; } } @@ -869,15 +871,14 @@ void handle_timestamp(federate_t *my_fed) { my_fed->start_time_is_set = true; my_fed->effective_start_tag.time += DELAY_START; my_fed->effective_start_tag.microstep = 0u; - federate_start_tag = my_fed->effective_start_tag; } lf_mutex_unlock(&rti_mutex); - + // Once the effective start time set, sent it to the joining transient, // together with the start time of the federation. // Send the start time - send_start_tag(my_fed, start_time, federate_start_tag); + send_start_tag(my_fed, start_time, my_fed->effective_start_tag); } } From 05a8636d1da12c06a3822c8fdd0024434f59fccc Mon Sep 17 00:00:00 2001 From: ChadliaJerad Date: Wed, 5 Jul 2023 15:13:13 -0700 Subject: [PATCH 41/80] Fix wrong fed id in tracing absent messages --- core/federated/RTI/rti_lib.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/federated/RTI/rti_lib.c b/core/federated/RTI/rti_lib.c index 80d1a7585..252c35d00 100644 --- a/core/federated/RTI/rti_lib.c +++ b/core/federated/RTI/rti_lib.c @@ -306,7 +306,7 @@ void handle_port_absent_message(federate_t* sending_federate, unsigned char* buf tag_t tag = extract_tag(&(buffer[1 + 2 * sizeof(uint16_t)])); if (_f_rti->tracing_enabled) { - tracepoint_rti_from_federate(_f_rti->trace, receive_PORT_ABS, federate_id, &tag); + tracepoint_rti_from_federate(_f_rti->trace, receive_PORT_ABS, sending_federate, &tag); } // Need to acquire the mutex lock to ensure that the thread handling From dc1728e0360f4d502f8fdf04b0cfb1c319a03d54 Mon Sep 17 00:00:00 2001 From: ChadliaJerad Date: Wed, 5 Jul 2023 15:18:01 -0700 Subject: [PATCH 42/80] Access the id, not the federate --- core/federated/RTI/rti_lib.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/federated/RTI/rti_lib.c b/core/federated/RTI/rti_lib.c index 252c35d00..cf1300e28 100644 --- a/core/federated/RTI/rti_lib.c +++ b/core/federated/RTI/rti_lib.c @@ -306,7 +306,7 @@ void handle_port_absent_message(federate_t* sending_federate, unsigned char* buf tag_t tag = extract_tag(&(buffer[1 + 2 * sizeof(uint16_t)])); if (_f_rti->tracing_enabled) { - tracepoint_rti_from_federate(_f_rti->trace, receive_PORT_ABS, sending_federate, &tag); + tracepoint_rti_from_federate(_f_rti->trace, receive_PORT_ABS, sending_federate->enclave.id, &tag); } // Need to acquire the mutex lock to ensure that the thread handling From b0bea8d8f6822a2c25ee12b73725a435ed39ac43 Mon Sep 17 00:00:00 2001 From: ChadliaJerad Date: Wed, 5 Jul 2023 16:20:18 -0700 Subject: [PATCH 43/80] Fix the first waiting time of a transient --- core/threaded/reactor_threaded.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/threaded/reactor_threaded.c b/core/threaded/reactor_threaded.c index 82881a87e..0b72ac48e 100644 --- a/core/threaded/reactor_threaded.c +++ b/core/threaded/reactor_threaded.c @@ -710,7 +710,7 @@ void _lf_initialize_start_tag(environment_t *env) { start_time, _lf_fed_STA_offset); // Ignore interrupts to this wait. We don't want to start executing until // physical time matches or exceeds the logical start time. - while (!wait_until(env, start_time, &env->event_q_changed)) {} + while (!wait_until(env, effective_start_tag.time, &env->event_q_changed)) {} LF_PRINT_DEBUG("Done waiting for start time " PRINTF_TIME ".", start_time); LF_PRINT_DEBUG("Physical time is ahead of current time by " PRINTF_TIME ". This should be small.", lf_time_physical() - start_time); From e7146d96aaed7b0e185c6008bfe98b918109a58f Mon Sep 17 00:00:00 2001 From: ChadliaJerad Date: Mon, 17 Jul 2023 10:36:03 -0700 Subject: [PATCH 44/80] Compute TAG of transients' downstream, and assign a TAG even when a federate is not connected --- core/federated/RTI/enclave.c | 45 ++++++++++++-- core/federated/RTI/rti_lib.c | 117 +++++++++++++++++++++++++---------- 2 files changed, 122 insertions(+), 40 deletions(-) diff --git a/core/federated/RTI/enclave.c b/core/federated/RTI/enclave.c index 563c29bc9..caecbfd53 100644 --- a/core/federated/RTI/enclave.c +++ b/core/federated/RTI/enclave.c @@ -63,30 +63,62 @@ void logical_tag_complete(enclave_t* enclave, tag_t completed) { tag_advance_grant_t tag_advance_grant_if_safe(enclave_t* e) { tag_advance_grant_t result = {.tag = NEVER_TAG, .is_provisional = false}; + // Check how many upstream federates are connected + int num_connected_upstream = 0; + // Find the earliest LTC of upstream enclaves (M). tag_t min_upstream_completed = FOREVER_TAG; for (int j = 0; j < e->num_upstream; j++) { enclave_t *upstream = _e_rti->enclaves[e->upstream[j]]; - // Ignore this enclave if it no longer connected. - if (upstream->state == NOT_CONNECTED) continue; - + // Do Ignore this enclave if it no longer connected. + if (upstream->state != NOT_CONNECTED) { + num_connected_upstream++; + // continue; + } + tag_t candidate = lf_delay_tag(upstream->completed, e->upstream_delay[j]); if (lf_tag_compare(candidate, min_upstream_completed) < 0) { min_upstream_completed = candidate; } } + LF_PRINT_LOG("Minimum upstream LTC for federate/enclave %d is " PRINTF_TAG "(adjusted by after delay).", e->id, min_upstream_completed.time - start_time, min_upstream_completed.microstep); - if (lf_tag_compare(min_upstream_completed, e->last_granted) > 0 + if ( + num_connected_upstream != 0 + && lf_tag_compare(min_upstream_completed, e->last_granted) > 0 && lf_tag_compare(min_upstream_completed, e->next_event) >= 0 // The enclave has to advance its tag ) { result.tag = min_upstream_completed; return result; + } else if (num_connected_upstream == 0) { + // When none of the upstream federates is connected (case of transients), + // check their TAG + // Find the earliest TAG of upstream enclaves (M). + tag_t min_upstream_granted = FOREVER_TAG; + + for (int j = 0; j < e->num_upstream; j++) { + enclave_t *upstream = _e_rti->enclaves[e->upstream[j]]; + tag_t candidate = e->last_granted; + if (lf_tag_compare(candidate, min_upstream_granted) < 0) { + min_upstream_granted = candidate; + } + } + + if (lf_tag_compare(min_upstream_granted, FOREVER_TAG) != 0) { + min_upstream_granted = e->next_event; + } + + if (lf_tag_compare(min_upstream_granted, FOREVER_TAG) != 0) { + min_upstream_granted = e->next_event; + result.tag = min_upstream_granted; + return result; + } } // Can't make progress based only on upstream LTCs. @@ -111,7 +143,7 @@ tag_advance_grant_t tag_advance_grant_if_safe(enclave_t* e) { enclave_t *upstream = _e_rti->enclaves[e->upstream[j]]; // Ignore this enclave if it is no longer connected. - if (upstream->state == NOT_CONNECTED) continue; + // if (upstream->state == NOT_CONNECTED) continue; // Find the (transitive) next event tag upstream. tag_t upstream_next_event = transitive_next_event( @@ -239,6 +271,7 @@ tag_advance_grant_t next_event_tag(enclave_t* e, tag_t next_event_tag) { void notify_advance_grant_if_safe(enclave_t* e) { tag_advance_grant_t grant = tag_advance_grant_if_safe(e); + if (lf_tag_compare(grant.tag, NEVER_TAG) != 0) { if (grant.is_provisional) { notify_provisional_tag_advance_grant(e, grant.tag); @@ -249,7 +282,7 @@ void notify_advance_grant_if_safe(enclave_t* e) { } tag_t transitive_next_event(enclave_t* e, tag_t candidate, bool visited[]) { - if (visited[e->id] || e->state == NOT_CONNECTED) { + if (visited[e->id] /*|| e->state == NOT_CONNECTED*/) { // Enclave has stopped executing or we have visited it before. // No point in checking upstream enclaves. return candidate; diff --git a/core/federated/RTI/rti_lib.c b/core/federated/RTI/rti_lib.c index bb7562d79..2e7eacfec 100644 --- a/core/federated/RTI/rti_lib.c +++ b/core/federated/RTI/rti_lib.c @@ -174,8 +174,8 @@ int create_server(int32_t specified_port, uint16_t port, socket_type_t socket_ty } void notify_tag_advance_grant(enclave_t* e, tag_t tag) { - if (e->state == NOT_CONNECTED - || lf_tag_compare(tag, e->last_granted) <= 0 + if (/*e->state == NOT_CONNECTED + || */lf_tag_compare(tag, e->last_granted) <= 0 || lf_tag_compare(tag, e->last_provisionally_granted) < 0 ) { return; @@ -199,6 +199,7 @@ void notify_tag_advance_grant(enclave_t* e, tag_t tag) { // function. During this call, the socket might close, causing the following write_to_socket // to fail. Consider a failure here a soft failure and update the federate's status. ssize_t bytes_written = write_to_socket(((federate_t*)e)->socket, message_length, buffer); + e->last_granted = tag; if (bytes_written < (ssize_t)message_length) { lf_print_error("RTI failed to send tag advance grant to federate %d.", e->id); if (bytes_written < 0) { @@ -206,15 +207,15 @@ void notify_tag_advance_grant(enclave_t* e, tag_t tag) { // FIXME: We need better error handling, but don't stop other execution here. } } else { - e->last_granted = tag; + // e->last_granted = tag; LF_PRINT_LOG("RTI sent to federate %d the tag advance grant (TAG) " PRINTF_TAG ".", e->id, tag.time - start_time, tag.microstep); } } void notify_provisional_tag_advance_grant(enclave_t* e, tag_t tag) { - if (e->state == NOT_CONNECTED - || lf_tag_compare(tag, e->last_granted) <= 0 + if (/*e->state == NOT_CONNECTED + || */lf_tag_compare(tag, e->last_granted) <= 0 || lf_tag_compare(tag, e->last_provisionally_granted) <= 0 ) { return; @@ -238,7 +239,7 @@ void notify_provisional_tag_advance_grant(enclave_t* e, tag_t tag) { // function. During this call, the socket might close, causing the following write_to_socket // to fail. Consider a failure here a soft failure and update the federate's status. ssize_t bytes_written = write_to_socket(((federate_t*)e)->socket, message_length, buffer); - + e->last_provisionally_granted = tag; if (bytes_written < (ssize_t)message_length) { lf_print_error("RTI failed to send tag advance grant to federate %d.", e->id); if (bytes_written < 0) { @@ -246,7 +247,7 @@ void notify_provisional_tag_advance_grant(enclave_t* e, tag_t tag) { // FIXME: We need better error handling, but don't stop other execution here. } } else { - e->last_provisionally_granted = tag; + // e->last_provisionally_granted = tag; LF_PRINT_LOG("RTI sent to federate %d the Provisional Tag Advance Grant (PTAG) " PRINTF_TAG ".", e->id, tag.time - start_time, tag.microstep); @@ -260,7 +261,7 @@ void notify_provisional_tag_advance_grant(enclave_t* e, tag_t tag) { federate_t* upstream = _f_rti->enclaves[e->upstream[j]]; // Ignore this federate if it has resigned. - if (upstream->enclave.state == NOT_CONNECTED) continue; + // if (upstream->enclave.state == NOT_CONNECTED) continue; // To handle cycles, need to create a boolean array to keep // track of which upstream federates have been visited. bool* visited = (bool*)calloc(_f_rti->number_of_enclaves, sizeof(bool)); // Initializes to 0. @@ -584,9 +585,9 @@ void _lf_rti_broadcast_stop_time_to_federates_locked() { // Iterate over federates and send each the message. for (int i = 0; i < _f_rti->number_of_enclaves; i++) { federate_t *fed = _f_rti->enclaves[i]; - if (fed->enclave.state == NOT_CONNECTED) { - continue; - } + // if (fed->enclave.state == NOT_CONNECTED) { + // continue; + // } if (lf_tag_compare(fed->enclave.next_event, _f_rti->max_stop_tag) >= 0) { // Need the next_event to be no greater than the stop tag. fed->enclave.next_event = _f_rti->max_stop_tag; @@ -809,7 +810,8 @@ void handle_timestamp(federate_t *my_fed) { LF_PRINT_DEBUG("RTI received timestamp message with time: " PRINTF_TIME ".", timestamp); lf_mutex_lock(&rti_mutex); - my_fed->effective_start_tag.time = timestamp; + + my_fed->effective_start_tag = (tag_t){.time = timestamp, .microstep = 0u}; // Processing the TIMESTAMP depends on whether it is the startup phase (all // persistent federates joined) or not. @@ -848,41 +850,88 @@ void handle_timestamp(federate_t *my_fed) { // get the max of the TAGs. // FIXME: what about PTAGs? // FIXME: Maybe we can use TAGs from a transient that left??? + // for (int j = 0; j < my_fed->enclave.num_upstream; j++) { + // federate_t* upstream = _f_rti->enclaves[my_fed->enclave.upstream[j]]; + // // Ignore this federate if it has resigned or if it a transient that + // // is absent + // if (upstream->enclave.state == NOT_CONNECTED) { + // continue; + // } + // if (lf_tag_compare(federate_start_tag, upstream->enclave.last_granted) < 0) { + // federate_start_tag.time = upstream->enclave.last_granted.time; + // federate_start_tag.microstep = upstream->enclave.last_granted.microstep; + // federate_start_tag.microstep++; + // } + // } + tag_t federate_start_tag = NEVER_TAG; - for (int j = 0; j < my_fed->enclave.num_upstream; j++) { - federate_t* upstream = _f_rti->enclaves[my_fed->enclave.upstream[j]]; - // Ignore this federate if it has resigned or if it a transient that - // is absent - if (upstream->enclave.state == NOT_CONNECTED) { - continue; - } - if (lf_tag_compare(federate_start_tag, upstream->enclave.last_granted) < 0) { - federate_start_tag.time = upstream->enclave.last_granted.time; - federate_start_tag.microstep = upstream->enclave.last_granted.microstep; - federate_start_tag.microstep++; - } - } - // Iterate over the downstream federates to query the current event tag. + // Iterate over the downstream federates for (int j = 0; j < my_fed->enclave.num_downstream; j++) { federate_t* downstream = _f_rti->enclaves[my_fed->enclave.downstream[j]]; + // Ignore this federate if it has resigned. - if (downstream->enclave.state == NOT_CONNECTED) { - continue; - } - if (lf_tag_compare(federate_start_tag, downstream->enclave.last_granted) < 0) { + // if (downstream->enclave.state == NOT_CONNECTED) { + // continue; + // } + + // Get the max over the TAG of the downstreams + if (lf_tag_compare(downstream->enclave.last_granted, federate_start_tag) > 0) { federate_start_tag.time = downstream->enclave.last_granted.time; federate_start_tag.microstep = downstream->enclave.last_granted.microstep; - federate_start_tag.microstep++; } + + // int compare_tag_ltc = lf_tag_compare(downstream->enclave.last_granted, downstream->enclave.completed); + // if (compare_tag_ltc >= 0) { + // federate_start_tag.time = downstream->enclave.last_granted.time; + // federate_start_tag.microstep = downstream->enclave.last_granted.microstep; + // } else { + // if (lf_tag_compare(downstream->enclave.completed, downstream->enclave.next_event) >= 0) { + // federate_start_tag.time = downstream->enclave.completed.time; + // federate_start_tag.microstep = downstream->enclave.completed.microstep; + // } else { + // if (lf_tag_compare(downstream->enclave.next_event, NEVER_TAG) == 0) {//|| + // // lf_tag_compare(downstream->enclave.next_event, stop_tag) == 0) + // federate_start_tag.time = downstream->enclave.completed.time; + // federate_start_tag.microstep = downstream->enclave.completed.microstep; + // } else { + // federate_start_tag.time = downstream->enclave.next_event.time; + // federate_start_tag.microstep = downstream->enclave.next_event.microstep; + // } + // } + // } + + // if (lf_tag_compare(federate_start_tag, downstream_proposed_tag) < 0) { + // federate_start_tag.time = downstream_proposed_tag.time; + // federate_start_tag.microstep = downstream_proposed_tag.microstep; + // } + // // When the TAG of the downstream have never been set or have been set + // // but the federate could advance, then + // // if (lf_tag_compare(downstream->enclave.last_granted, NEVER_TAG) < 0) { + // // if (lf_tag_compare(downstream->enclave.last_granted, downstream->enclave.completed) < 0) { + + // // } + // if (lf_tag_compare(federate_start_tag, downstream->enclave.last_granted) < 0) { + // federate_start_tag.time = downstream->enclave.last_granted.time; + // federate_start_tag.microstep = downstream->enclave.last_granted.microstep; + // federate_start_tag.microstep++; + // } } // If the transient federate has no connected upstream or downstream federates, // then do not wait for the start time - if (lf_tag_compare(federate_start_tag, NEVER_TAG) == 0) { + if (lf_tag_compare(federate_start_tag, NEVER_TAG) != 0 + && (lf_tag_compare(federate_start_tag, my_fed->effective_start_tag) > 0)) + { + my_fed->effective_start_tag.time = federate_start_tag.time; + my_fed->effective_start_tag.microstep = federate_start_tag.microstep; + my_fed->effective_start_tag.microstep++; + } else { my_fed->start_time_is_set = true; + // FIXME: Should add the delay? my_fed->effective_start_tag.time += DELAY_START; my_fed->effective_start_tag.microstep = 0u; - } + } + lf_mutex_unlock(&rti_mutex); // Once the effective start time set, sent it to the joining transient, @@ -2107,7 +2156,7 @@ void reset_transient_federate(federate_t* fed) { strncpy(fed->server_hostname ,"localhost", INET_ADDRSTRLEN); fed->server_ip_addr.s_addr = 0; fed->server_port = -1; - fed->enclave.requested_stop = false; + fed->requested_stop = false; fed->is_transient = true; // FIXME: Should it be reset to the NEVER_TAG? fed->effective_start_tag = NEVER_TAG; From 120599dbbb87b8bbf603a188769a6f92c2c79f7c Mon Sep 17 00:00:00 2001 From: ChadliaJerad Date: Fri, 21 Jul 2023 18:55:11 -0700 Subject: [PATCH 45/80] Final way to issue a TAG for federates with tansient upstreams --- core/federated/RTI/enclave.c | 48 +++----- core/federated/RTI/rti_lib.c | 221 +++++++++++++++-------------------- core/federated/RTI/rti_lib.h | 26 +---- 3 files changed, 115 insertions(+), 180 deletions(-) diff --git a/core/federated/RTI/enclave.c b/core/federated/RTI/enclave.c index caecbfd53..a35ed4046 100644 --- a/core/federated/RTI/enclave.c +++ b/core/federated/RTI/enclave.c @@ -73,10 +73,11 @@ tag_advance_grant_t tag_advance_grant_if_safe(enclave_t* e) { enclave_t *upstream = _e_rti->enclaves[e->upstream[j]]; // Do Ignore this enclave if it no longer connected. - if (upstream->state != NOT_CONNECTED) { - num_connected_upstream++; - // continue; + if (upstream->state == NOT_CONNECTED) { + continue; } + + num_connected_upstream++; tag_t candidate = lf_delay_tag(upstream->completed, e->upstream_delay[j]); @@ -89,37 +90,20 @@ tag_advance_grant_t tag_advance_grant_if_safe(enclave_t* e) { "(adjusted by after delay).", e->id, min_upstream_completed.time - start_time, min_upstream_completed.microstep); - if ( - num_connected_upstream != 0 - && lf_tag_compare(min_upstream_completed, e->last_granted) > 0 + + if (num_connected_upstream == 0) { + // When none of the upstream federates is connected (case of transients), + if (lf_tag_compare(e->next_event, FOREVER_TAG) != 0) { + result.tag = e->next_event; + return result; + } + } else if ( + lf_tag_compare(min_upstream_completed, e->last_granted) > 0 && lf_tag_compare(min_upstream_completed, e->next_event) >= 0 // The enclave has to advance its tag ) { result.tag = min_upstream_completed; return result; - } else if (num_connected_upstream == 0) { - // When none of the upstream federates is connected (case of transients), - // check their TAG - // Find the earliest TAG of upstream enclaves (M). - tag_t min_upstream_granted = FOREVER_TAG; - - for (int j = 0; j < e->num_upstream; j++) { - enclave_t *upstream = _e_rti->enclaves[e->upstream[j]]; - tag_t candidate = e->last_granted; - if (lf_tag_compare(candidate, min_upstream_granted) < 0) { - min_upstream_granted = candidate; - } - } - - if (lf_tag_compare(min_upstream_granted, FOREVER_TAG) != 0) { - min_upstream_granted = e->next_event; - } - - if (lf_tag_compare(min_upstream_granted, FOREVER_TAG) != 0) { - min_upstream_granted = e->next_event; - result.tag = min_upstream_granted; - return result; - } - } + } // Can't make progress based only on upstream LTCs. // If all (transitive) upstream enclaves of the enclave @@ -143,7 +127,7 @@ tag_advance_grant_t tag_advance_grant_if_safe(enclave_t* e) { enclave_t *upstream = _e_rti->enclaves[e->upstream[j]]; // Ignore this enclave if it is no longer connected. - // if (upstream->state == NOT_CONNECTED) continue; + if (upstream->state == NOT_CONNECTED) continue; // Find the (transitive) next event tag upstream. tag_t upstream_next_event = transitive_next_event( @@ -282,7 +266,7 @@ void notify_advance_grant_if_safe(enclave_t* e) { } tag_t transitive_next_event(enclave_t* e, tag_t candidate, bool visited[]) { - if (visited[e->id] /*|| e->state == NOT_CONNECTED*/) { + if (visited[e->id] || e->state == NOT_CONNECTED) { // Enclave has stopped executing or we have visited it before. // No point in checking upstream enclaves. return candidate; diff --git a/core/federated/RTI/rti_lib.c b/core/federated/RTI/rti_lib.c index 0c30ab947..0834700e0 100644 --- a/core/federated/RTI/rti_lib.c +++ b/core/federated/RTI/rti_lib.c @@ -161,19 +161,24 @@ int create_server(int32_t specified_port, uint16_t port, socket_type_t socket_ty return socket_descriptor; } -void notify_tag_advance_grant(enclave_t* e, tag_t tag) { - if (/*e->state == NOT_CONNECTED - || */lf_tag_compare(tag, e->last_granted) <= 0 - || lf_tag_compare(tag, e->last_provisionally_granted) < 0 - ) { - return; - } - // Need to make sure that the destination federate's thread has already - // sent the starting MSG_TYPE_TIMESTAMP message. - while (e->state == PENDING) { - // Need to wait here. - lf_cond_wait(&sent_start_time); +void notify_tag_advance_grant_delayed(enclave_t* e, tag_t tag) { + federate_t* fed = (federate_t*)e; + + // Check wether there is already a pending grant + if (lf_tag_compare(fed->pending_grant, NEVER_TAG) == 0) { + fed->pending_grant = tag; + lf_thread_create(&(fed->pending_grant_thread_id), pending_grant_thread, fed); + } else if (lf_tag_compare(fed->pending_grant, tag) >= 0) { + // FIXME: do nothing? + } else { + // FIXME: It should be really weired to receive and earlier tag gant than + // the pending one. + // Should this be a fata error? } +} + +void notify_tag_advance_grant_immediate(enclave_t* e, tag_t tag) { + // Case where the TAG notification is immediate size_t message_length = 1 + sizeof(int64_t) + sizeof(uint32_t); unsigned char buffer[message_length]; buffer[0] = MSG_TYPE_TAG_ADVANCE_GRANT; @@ -201,9 +206,41 @@ void notify_tag_advance_grant(enclave_t* e, tag_t tag) { } } +void notify_tag_advance_grant(enclave_t* e, tag_t tag) { + if (e->state == NOT_CONNECTED + || lf_tag_compare(tag, e->last_granted) <= 0 + || lf_tag_compare(tag, e->last_provisionally_granted) < 0 + ) { + return; + } + // Need to make sure that the destination federate's thread has already + // sent the starting MSG_TYPE_TIMESTAMP message. + while (e->state == PENDING) { + // Need to wait here. + lf_cond_wait(&sent_start_time); + } + + // Check if sending the tag advance grant needs to be delayed or not + // Delay is needed when a federate has, at least one, absent upstream transient + int num_absent_upstram_transients = 0; + for (int j = 0; j < e->num_upstream; j++) { + federate_t *upstream = (federate_t*)(_f_rti->enclaves[e->upstream[j]]); + // Do Ignore this enclave if it no longer connected. + if ((upstream->enclave.state == NOT_CONNECTED) && (upstream->is_transient)) { + num_absent_upstram_transients++; + } + } + + if (num_absent_upstram_transients > 0) { + notify_tag_advance_grant_delayed(e, tag); + } else { + notify_tag_advance_grant_immediate(e, tag); + } +} + void notify_provisional_tag_advance_grant(enclave_t* e, tag_t tag) { - if (/*e->state == NOT_CONNECTED - || */lf_tag_compare(tag, e->last_granted) <= 0 + if (e->state == NOT_CONNECTED + || lf_tag_compare(tag, e->last_granted) <= 0 || lf_tag_compare(tag, e->last_provisionally_granted) <= 0 ) { return; @@ -214,6 +251,7 @@ void notify_provisional_tag_advance_grant(enclave_t* e, tag_t tag) { // Need to wait here. lf_cond_wait(&sent_start_time); } + size_t message_length = 1 + sizeof(int64_t) + sizeof(uint32_t); unsigned char buffer[message_length]; buffer[0] = MSG_TYPE_PROVISIONAL_TAG_ADVANCE_GRANT; @@ -834,91 +872,40 @@ void handle_timestamp(federate_t *my_fed) { // A transient has joined after the startup phase // At this point, we already hold the mutex - // Iterate over the upstream federates and downstream federates to find - // get the max of the TAGs. - // FIXME: what about PTAGs? - // FIXME: Maybe we can use TAGs from a transient that left??? - // for (int j = 0; j < my_fed->enclave.num_upstream; j++) { - // federate_t* upstream = _f_rti->enclaves[my_fed->enclave.upstream[j]]; - // // Ignore this federate if it has resigned or if it a transient that - // // is absent - // if (upstream->enclave.state == NOT_CONNECTED) { - // continue; - // } - // if (lf_tag_compare(federate_start_tag, upstream->enclave.last_granted) < 0) { - // federate_start_tag.time = upstream->enclave.last_granted.time; - // federate_start_tag.microstep = upstream->enclave.last_granted.microstep; - // federate_start_tag.microstep++; - // } - // } - - tag_t federate_start_tag = NEVER_TAG; // Iterate over the downstream federates for (int j = 0; j < my_fed->enclave.num_downstream; j++) { federate_t* downstream = _f_rti->enclaves[my_fed->enclave.downstream[j]]; // Ignore this federate if it has resigned. - // if (downstream->enclave.state == NOT_CONNECTED) { - // continue; - // } + if (downstream->enclave.state == NOT_CONNECTED) { + continue; + } // Get the max over the TAG of the downstreams - if (lf_tag_compare(downstream->enclave.last_granted, federate_start_tag) > 0) { - federate_start_tag.time = downstream->enclave.last_granted.time; - federate_start_tag.microstep = downstream->enclave.last_granted.microstep; + if (lf_tag_compare(downstream->enclave.last_granted, my_fed->effective_start_tag) > 0) { + my_fed->effective_start_tag = downstream->enclave.last_granted; + my_fed->effective_start_tag.microstep++; } - - // int compare_tag_ltc = lf_tag_compare(downstream->enclave.last_granted, downstream->enclave.completed); - // if (compare_tag_ltc >= 0) { - // federate_start_tag.time = downstream->enclave.last_granted.time; - // federate_start_tag.microstep = downstream->enclave.last_granted.microstep; - // } else { - // if (lf_tag_compare(downstream->enclave.completed, downstream->enclave.next_event) >= 0) { - // federate_start_tag.time = downstream->enclave.completed.time; - // federate_start_tag.microstep = downstream->enclave.completed.microstep; - // } else { - // if (lf_tag_compare(downstream->enclave.next_event, NEVER_TAG) == 0) {//|| - // // lf_tag_compare(downstream->enclave.next_event, stop_tag) == 0) - // federate_start_tag.time = downstream->enclave.completed.time; - // federate_start_tag.microstep = downstream->enclave.completed.microstep; - // } else { - // federate_start_tag.time = downstream->enclave.next_event.time; - // federate_start_tag.microstep = downstream->enclave.next_event.microstep; - // } - // } - // } - - // if (lf_tag_compare(federate_start_tag, downstream_proposed_tag) < 0) { - // federate_start_tag.time = downstream_proposed_tag.time; - // federate_start_tag.microstep = downstream_proposed_tag.microstep; - // } - // // When the TAG of the downstream have never been set or have been set - // // but the federate could advance, then - // // if (lf_tag_compare(downstream->enclave.last_granted, NEVER_TAG) < 0) { - // // if (lf_tag_compare(downstream->enclave.last_granted, downstream->enclave.completed) < 0) { - - // // } - // if (lf_tag_compare(federate_start_tag, downstream->enclave.last_granted) < 0) { - // federate_start_tag.time = downstream->enclave.last_granted.time; - // federate_start_tag.microstep = downstream->enclave.last_granted.microstep; - // federate_start_tag.microstep++; - // } } - // If the transient federate has no connected upstream or downstream federates, - // then do not wait for the start time - if (lf_tag_compare(federate_start_tag, NEVER_TAG) != 0 - && (lf_tag_compare(federate_start_tag, my_fed->effective_start_tag) > 0)) - { - my_fed->effective_start_tag.time = federate_start_tag.time; - my_fed->effective_start_tag.microstep = federate_start_tag.microstep; - my_fed->effective_start_tag.microstep++; - } else { - my_fed->start_time_is_set = true; - // FIXME: Should add the delay? - my_fed->effective_start_tag.time += DELAY_START; - my_fed->effective_start_tag.microstep = 0u; - } + // For every downstream that has a pending grant that is higher then the + // effective_start_time of the federate, cancel it + for (int j = 0; j < my_fed->enclave.num_downstream; j++) { + federate_t* downstream = _f_rti->enclaves[my_fed->enclave.downstream[j]]; + + // Ignore this federate if it has resigned. + if (downstream->enclave.state == NOT_CONNECTED) { + continue; + } + + // Check the pending TAG, if any + if ( + lf_tag_compare(downstream->pending_grant, NEVER_TAG) != 0 + && lf_tag_compare(downstream->pending_grant, my_fed->effective_start_tag) > 0 + ) { + downstream->pending_grant = NEVER_TAG; + } + } lf_mutex_unlock(&rti_mutex); @@ -1714,6 +1701,7 @@ void initialize_federate(federate_t* fed, uint16_t id) { fed->num_of_conn_federates = 0; fed->num_of_conn_federates_sent_net = 0; fed->start_time_is_set = false; + fed->pending_grant = NEVER_TAG; } int32_t start_rti_server(uint16_t port) { @@ -2146,48 +2134,25 @@ void reset_transient_federate(federate_t* fed) { fed->server_port = -1; fed->requested_stop = false; fed->is_transient = true; - // FIXME: Should it be reset to the NEVER_TAG? fed->effective_start_tag = NEVER_TAG; fed->num_of_conn_federates = 0; fed->num_of_conn_federates_sent_net = 0; fed->start_time_is_set = false; + fed->pending_grant = NEVER_TAG; } -// void handle_current_tag_query_response(federate_t *my_fed) { -// // Get the logical time instant and the transient fed_id from the socket -// size_t buffer_size = sizeof(instant_t) + sizeof(uint16_t); -// unsigned char buffer[buffer_size]; -// // Read bytes from the socket. We need 8 bytes. -// ssize_t bytes_read = read_from_socket(my_fed->socket, buffer_size, buffer); -// if (bytes_read < (ssize_t)sizeof(int64_t)) { -// lf_print_error("ERROR reading next event query response from federate %d.\n", my_fed->enclave.id); -// } - -// // Get the timestamp and the transient federate id -// instant_t timestamp = swap_bytes_if_big_endian_int64(*((int64_t *)(buffer))); -// uint16_t transient_fed_id = extract_uint16((&buffer[8])); -// if (_f_rti->tracing_enabled) { -// tag_t tag = {.time = timestamp, .microstep = 0}; -// tracepoint_RTI_from_federate(receive_CuTAG_QR_RES, my_fed->enclave.id, &tag); -// } -// LF_PRINT_LOG("RTI received current TAG query response message: " PRINTF_TIME, timestamp); - -// // FIXME: Should the lock be inside the if statement only? -// lf_mutex_lock(&rti_mutex); -// // Processing the TIMESTAMP depends on whether it is the startup phase (all -// // persistent federates joined) or not. -// federate_t* transient = _f_rti->enclaves[transient_fed_id]; - -// // Set the start_time of the transient federate to be the maximum among -// // current tag of upstreams and the physical time at which it joined . -// if (timestamp > transient->effective_start_tag.time) { -// transient->effective_start_tag.time = timestamp; -// } -// // Check that upstream and downstream federates of the transient did propose a start_time -// transient->num_of_conn_federates_sent_net++; -// if (transient->num_of_conn_federates_sent_net == transient->num_of_conn_federates) { -// // All expected connected federates to transient have sent responses with NET to RTI -// transient->start_time_is_set = true; -// } -// lf_mutex_unlock(&rti_mutex); -// } +void* pending_grant_thread(void* federate) { + federate_t* fed = (federate_t*)federate; + + lf_sleep(fed->pending_grant.time - lf_time_physical()); + + lf_mutex_lock(&rti_mutex); + + // If the pending grant becomes NEVER_TAG, then this means that it should + // not be sent + if(lf_tag_compare(fed->pending_grant, NEVER_TAG) != 0) { + notify_tag_advance_grant_immediate(&(fed->enclave), fed->pending_grant); + fed->pending_grant = NEVER_TAG; + } + lf_mutex_unlock(&rti_mutex); +} diff --git a/core/federated/RTI/rti_lib.h b/core/federated/RTI/rti_lib.h index cc79bb2fd..55ff8e9ce 100644 --- a/core/federated/RTI/rti_lib.h +++ b/core/federated/RTI/rti_lib.h @@ -76,6 +76,8 @@ typedef struct federate_t { // that sent responded to the next event tag query form the RTI. bool start_time_is_set; // Boolean variable used to signal that all connected federates // have sent a response to next event tag query from the RTI. + tag_t pending_grant; // Tga pending to be granted + lf_thread_t pending_grant_thread_id; // The ID of the thread handling the pending tag grant } federate_t; /** @@ -584,28 +586,12 @@ void* connect_to_transient_federates_thread(); void reset_transient_federate(federate_t* fed); /** - * Queries conn_fed for its current Tag (using MSG_TYPE_CURRENT_TAG_QUERY). - * If the function fails to send the query, for example in case the federate is - * not connected (can be a transient one itself), then return false. In such case, - * the RTI will not wait to receive an answer from it. - * - * The fed_id of the transient federate is sent to conn_fed, which should be - * returned as is within MSG_TYPE_CURRENT_TAG_QUERY_RESPONSE. The aim is to - * identify which of the transient federates has initiated the request. This enables - * the support of two diffrent transient federates joining close eanough in time. - * - * @param conn_fed: the federate to which to send the NET request - * @param fed_id: The ID of the transient joining federate - * @return true, if successfully sent, false otherwise. + * @brief Thread that sleeps for a period of time, and then wakes up to check if + * a tag advance grant needs to be sent. + * @param fed the fedarate whose tag advance grant needs to be delayed. */ -// bool send_current_tag_query(federate_t* conn_fed, uint16_t fed_id); +void* pending_grant_thread(void* fed); -/** - * Handles current tag query response received form my_fed. - * - * @param my_fed: the federate from whom the response is received. - */ -// void handle_current_tag_query_response(federate_t *my_fed); ////////////////////////////////////////////////////////// #endif // RTI_LIB_H From 4c38eec5f4ff79af1e5e89332cbf72415ddcdc08 Mon Sep 17 00:00:00 2001 From: ChadliaJerad Date: Wed, 26 Jul 2023 17:04:17 -0700 Subject: [PATCH 46/80] Sending PATG can be delayed as well, if a trabsient's downstream has exclusively absent transient updtreams --- core/federated/RTI/rti_lib.c | 257 ++++++++++++++++++++++------------- core/federated/RTI/rti_lib.h | 78 ++++++++++- 2 files changed, 236 insertions(+), 99 deletions(-) diff --git a/core/federated/RTI/rti_lib.c b/core/federated/RTI/rti_lib.c index 0834700e0..d4f045067 100644 --- a/core/federated/RTI/rti_lib.c +++ b/core/federated/RTI/rti_lib.c @@ -161,50 +161,7 @@ int create_server(int32_t specified_port, uint16_t port, socket_type_t socket_ty return socket_descriptor; } -void notify_tag_advance_grant_delayed(enclave_t* e, tag_t tag) { - federate_t* fed = (federate_t*)e; - - // Check wether there is already a pending grant - if (lf_tag_compare(fed->pending_grant, NEVER_TAG) == 0) { - fed->pending_grant = tag; - lf_thread_create(&(fed->pending_grant_thread_id), pending_grant_thread, fed); - } else if (lf_tag_compare(fed->pending_grant, tag) >= 0) { - // FIXME: do nothing? - } else { - // FIXME: It should be really weired to receive and earlier tag gant than - // the pending one. - // Should this be a fata error? - } -} -void notify_tag_advance_grant_immediate(enclave_t* e, tag_t tag) { - // Case where the TAG notification is immediate - size_t message_length = 1 + sizeof(int64_t) + sizeof(uint32_t); - unsigned char buffer[message_length]; - buffer[0] = MSG_TYPE_TAG_ADVANCE_GRANT; - encode_int64(tag.time, &(buffer[1])); - encode_int32((int32_t)tag.microstep, &(buffer[1 + sizeof(int64_t)])); - - if (_f_rti->tracing_enabled) { - tracepoint_rti_to_federate(_f_rti->trace, send_TAG, e->id, &tag); - } - // This function is called in notify_advance_grant_if_safe(), which is a long - // function. During this call, the socket might close, causing the following write_to_socket - // to fail. Consider a failure here a soft failure and update the federate's status. - ssize_t bytes_written = write_to_socket(((federate_t*)e)->socket, message_length, buffer); - e->last_granted = tag; - if (bytes_written < (ssize_t)message_length) { - lf_print_error("RTI failed to send tag advance grant to federate %d.", e->id); - if (bytes_written < 0) { - e->state = NOT_CONNECTED; - // FIXME: We need better error handling, but don't stop other execution here. - } - } else { - // e->last_granted = tag; - LF_PRINT_LOG("RTI sent to federate %d the tag advance grant (TAG) " PRINTF_TAG ".", - e->id, tag.time - start_time, tag.microstep); - } -} void notify_tag_advance_grant(enclave_t* e, tag_t tag) { if (e->state == NOT_CONNECTED @@ -252,59 +209,21 @@ void notify_provisional_tag_advance_grant(enclave_t* e, tag_t tag) { lf_cond_wait(&sent_start_time); } - size_t message_length = 1 + sizeof(int64_t) + sizeof(uint32_t); - unsigned char buffer[message_length]; - buffer[0] = MSG_TYPE_PROVISIONAL_TAG_ADVANCE_GRANT; - encode_int64(tag.time, &(buffer[1])); - encode_int32((int32_t)tag.microstep, &(buffer[1 + sizeof(int64_t)])); - - if (_f_rti->tracing_enabled){ - tracepoint_rti_to_federate(_f_rti->trace, send_PTAG, e->id, &tag); - } - // This function is called in notify_advance_grant_if_safe(), which is a long - // function. During this call, the socket might close, causing the following write_to_socket - // to fail. Consider a failure here a soft failure and update the federate's status. - ssize_t bytes_written = write_to_socket(((federate_t*)e)->socket, message_length, buffer); - e->last_provisionally_granted = tag; - if (bytes_written < (ssize_t)message_length) { - lf_print_error("RTI failed to send tag advance grant to federate %d.", e->id); - if (bytes_written < 0) { - e->state = NOT_CONNECTED; - // FIXME: We need better error handling, but don't stop other execution here. + // Check if sending the tag advance grant needs to be delayed or not + // Delay is needed when a federate has, at least one, absent upstream transient + int num_absent_upstram_transients = 0; + for (int j = 0; j < e->num_upstream; j++) { + federate_t *upstream = (federate_t*)(_f_rti->enclaves[e->upstream[j]]); + // Do Ignore this enclave if it no longer connected. + if ((upstream->enclave.state == NOT_CONNECTED) && (upstream->is_transient)) { + num_absent_upstram_transients++; } - } else { - // e->last_provisionally_granted = tag; - LF_PRINT_LOG("RTI sent to federate %d the Provisional Tag Advance Grant (PTAG) " PRINTF_TAG ".", - e->id, tag.time - start_time, tag.microstep); - - // Send PTAG to all upstream federates, if they have not had - // a later or equal PTAG or TAG sent previously and if their transitive - // NET is greater than or equal to the tag. - // NOTE: This could later be replaced with a TNET mechanism once - // we have an available encoding of causality interfaces. - // That might be more efficient. - for (int j = 0; j < e->num_upstream; j++) { - federate_t* upstream = _f_rti->enclaves[e->upstream[j]]; - - // Ignore this federate if it has resigned. - // if (upstream->enclave.state == NOT_CONNECTED) continue; - // To handle cycles, need to create a boolean array to keep - // track of which upstream federates have been visited. - bool* visited = (bool*)calloc(_f_rti->number_of_enclaves, sizeof(bool)); // Initializes to 0. - - // Find the (transitive) next event tag upstream. - tag_t upstream_next_event = transitive_next_event( - &(upstream->enclave), upstream->enclave.next_event, visited); - free(visited); - // If these tags are equal, then - // a TAG or PTAG should have already been granted, - // in which case, another will not be sent. But it - // may not have been already granted. - if (lf_tag_compare(upstream_next_event, tag) >= 0) { - notify_provisional_tag_advance_grant(&(upstream->enclave), tag); - } + } - } + if (num_absent_upstram_transients > 0) { + notify_provisional_tag_advance_grant_delayed(e, tag); + } else { + notify_provisional_tag_advance_grant_immediate(e, tag); } } @@ -898,13 +817,21 @@ void handle_timestamp(federate_t *my_fed) { continue; } - // Check the pending TAG, if any + // Check the pending tag grant, if any, and keep it only if it is + // sonner than the effective start tag if ( lf_tag_compare(downstream->pending_grant, NEVER_TAG) != 0 && lf_tag_compare(downstream->pending_grant, my_fed->effective_start_tag) > 0 ) { downstream->pending_grant = NEVER_TAG; } + // Same for the possible pending provisional tag grant + if ( + lf_tag_compare(downstream->pending_provisional_grant, NEVER_TAG) != 0 + && lf_tag_compare(downstream->pending_provisional_grant, my_fed->effective_start_tag) > 0 + ) { + downstream->pending_provisional_grant = NEVER_TAG; + } } lf_mutex_unlock(&rti_mutex); @@ -2144,7 +2071,10 @@ void reset_transient_federate(federate_t* fed) { void* pending_grant_thread(void* federate) { federate_t* fed = (federate_t*)federate; - lf_sleep(fed->pending_grant.time - lf_time_physical()); + interval_t sleep_interval = fed->pending_grant.time - lf_time_physical(); + if (sleep_interval > 0) { + lf_sleep(sleep_interval); + } lf_mutex_lock(&rti_mutex); @@ -2156,3 +2086,138 @@ void* pending_grant_thread(void* federate) { } lf_mutex_unlock(&rti_mutex); } + +void notify_tag_advance_grant_delayed(enclave_t* e, tag_t tag) { + federate_t* fed = (federate_t*)e; + + // Check wether there is already a pending grant + if (lf_tag_compare(fed->pending_grant, NEVER_TAG) == 0) { + fed->pending_grant = tag; + lf_thread_create(&(fed->pending_grant_thread_id), pending_grant_thread, fed); + } else if (lf_tag_compare(fed->pending_grant, tag) >= 0) { + // FIXME: do nothing? + } else { + // FIXME: It should be really weired to receive and earlier tag gant than + // the pending one. + // Should this be a fatal error? + } +} + +void notify_tag_advance_grant_immediate(enclave_t* e, tag_t tag) { + // Case where the TAG notification is immediate + size_t message_length = 1 + sizeof(int64_t) + sizeof(uint32_t); + unsigned char buffer[message_length]; + buffer[0] = MSG_TYPE_TAG_ADVANCE_GRANT; + encode_int64(tag.time, &(buffer[1])); + encode_int32((int32_t)tag.microstep, &(buffer[1 + sizeof(int64_t)])); + + if (_f_rti->tracing_enabled) { + tracepoint_rti_to_federate(_f_rti->trace, send_TAG, e->id, &tag); + } + // This function is called in notify_advance_grant_if_safe(), which is a long + // function. During this call, the socket might close, causing the following write_to_socket + // to fail. Consider a failure here a soft failure and update the federate's status. + ssize_t bytes_written = write_to_socket(((federate_t*)e)->socket, message_length, buffer); + e->last_granted = tag; + if (bytes_written < (ssize_t)message_length) { + lf_print_error("RTI failed to send tag advance grant to federate %d.", e->id); + if (bytes_written < 0) { + e->state = NOT_CONNECTED; + // FIXME: We need better error handling, but don't stop other execution here. + } + } else { + // e->last_granted = tag; + LF_PRINT_LOG("RTI sent to federate %d the tag advance grant (TAG) " PRINTF_TAG ".", + e->id, tag.time - start_time, tag.microstep); + } +} + +void* pending_provisional_grant_thread(void* federate) { + federate_t* fed = (federate_t*)federate; + + interval_t sleep_interval = fed->pending_provisional_grant.time - lf_time_physical(); + if (sleep_interval > 0) { + lf_sleep(sleep_interval); + } + + lf_mutex_lock(&rti_mutex); + + // If the pending grant becomes NEVER_TAG, then this means that it should + // not be sent + if(lf_tag_compare(fed->pending_provisional_grant, NEVER_TAG) != 0) { + notify_provisional_tag_advance_grant_immediate(&(fed->enclave), fed->pending_provisional_grant); + fed->pending_provisional_grant = NEVER_TAG; + } + lf_mutex_unlock(&rti_mutex); +} + +void notify_provisional_tag_advance_grant_delayed(enclave_t* e, tag_t tag) { + federate_t* fed = (federate_t*)e; + + // Check wether there is already a pending grant + if (lf_tag_compare(fed->pending_provisional_grant, NEVER_TAG) == 0) { + fed->pending_provisional_grant = tag; + lf_thread_create(&(fed->pending_provisional_grant_thread_id), pending_provisional_grant_thread, fed); + } else if (lf_tag_compare(fed->pending_provisional_grant, tag) >= 0) { + // FIXME: do nothing? + } else { + // FIXME: It should be really weired to receive and earlier tag gant than + // the pending one. + // Should this be a fatal error? + } +} + +void notify_provisional_tag_advance_grant_immediate(enclave_t* e, tag_t tag) { + size_t message_length = 1 + sizeof(int64_t) + sizeof(uint32_t); + unsigned char buffer[message_length]; + buffer[0] = MSG_TYPE_PROVISIONAL_TAG_ADVANCE_GRANT; + encode_int64(tag.time, &(buffer[1])); + encode_int32((int32_t)tag.microstep, &(buffer[1 + sizeof(int64_t)])); + + if (_f_rti->tracing_enabled){ + tracepoint_rti_to_federate(_f_rti->trace, send_PTAG, e->id, &tag); + } + // This function is called in notify_advance_grant_if_safe(), which is a long + // function. During this call, the socket might close, causing the following write_to_socket + // to fail. Consider a failure here a soft failure and update the federate's status. + ssize_t bytes_written = write_to_socket(((federate_t*)e)->socket, message_length, buffer); + if (bytes_written < (ssize_t)message_length) { + lf_print_error("RTI failed to send tag advance grant to federate %d.", e->id); + if (bytes_written < 0) { + e->state = NOT_CONNECTED; + // FIXME: We need better error handling, but don't stop other execution here. + } + } else { + e->last_provisionally_granted = tag; + LF_PRINT_LOG("RTI sent to federate %d the Provisional Tag Advance Grant (PTAG) " PRINTF_TAG ".", + e->id, tag.time - start_time, tag.microstep); + + // Send PTAG to all upstream federates, if they have not had + // a later or equal PTAG or TAG sent previously and if their transitive + // NET is greater than or equal to the tag. + // NOTE: This could later be replaced with a TNET mechanism once + // we have an available encoding of causality interfaces. + // That might be more efficient. + for (int j = 0; j < e->num_upstream; j++) { + federate_t* upstream = _f_rti->enclaves[e->upstream[j]]; + + // Ignore this federate if it has resigned. + // if (upstream->enclave.state == NOT_CONNECTED) continue; + // To handle cycles, need to create a boolean array to keep + // track of which upstream federates have been visited. + bool* visited = (bool*)calloc(_f_rti->number_of_enclaves, sizeof(bool)); // Initializes to 0. + + // Find the (transitive) next event tag upstream. + tag_t upstream_next_event = transitive_next_event( + &(upstream->enclave), upstream->enclave.next_event, visited); + free(visited); + // If these tags are equal, then + // a TAG or PTAG should have already been granted, + // in which case, another will not be sent. But it + // may not have been already granted. + if (lf_tag_compare(upstream_next_event, tag) >= 0) { + notify_provisional_tag_advance_grant(&(upstream->enclave), tag); + } + } + } +} diff --git a/core/federated/RTI/rti_lib.h b/core/federated/RTI/rti_lib.h index 55ff8e9ce..32959b358 100644 --- a/core/federated/RTI/rti_lib.h +++ b/core/federated/RTI/rti_lib.h @@ -76,8 +76,10 @@ typedef struct federate_t { // that sent responded to the next event tag query form the RTI. bool start_time_is_set; // Boolean variable used to signal that all connected federates // have sent a response to next event tag query from the RTI. - tag_t pending_grant; // Tga pending to be granted + tag_t pending_grant; // The pending tag advance grant + tag_t pending_provisional_grant; // The pending provisional tag advance grant lf_thread_t pending_grant_thread_id; // The ID of the thread handling the pending tag grant + lf_thread_t pending_provisional_grant_thread_id; // The ID of the thread handling the pending provitional tag grant } federate_t; /** @@ -588,9 +590,79 @@ void reset_transient_federate(federate_t* fed); /** * @brief Thread that sleeps for a period of time, and then wakes up to check if * a tag advance grant needs to be sent. - * @param fed the fedarate whose tag advance grant needs to be delayed. + * @param federate the fedarate whose tag advance grant needs to be delayed. */ -void* pending_grant_thread(void* fed); +void* pending_grant_thread(void* federate); + +/** + * Notify a tag advance grant (TAG) message to the specified federate after + * the physical time reaches the tag. A thread is created to this end. + * Once the delay period passed, if the pending tag have not been reset to + * NEVER_TAG, the tag advance grant will be immediate. + * + * This function assumes that the caller holds the mutex lock. + * + * @param e The enclave. + * @param tag The tag to grant. + */ +void notify_tag_advance_grant_delayed(enclave_t* e, tag_t tag); + +/** + * Notify a tag advance grant (TAG) message to the specified federate after + * the physical time reaches the tag. A thread is created to this end. + * Do not notify it if a previously sent PTAG was greater or if a + * previously sent TAG was greater or equal. + * + * This function will keep a record of this TAG in the federate's last_granted + * field. + * + * This function assumes that the caller holds the mutex lock. + * + * FIXME: This needs two implementations, one for enclaves and one for federates. + * + * @param e The enclave. + * @param tag The tag to grant. + */ +void notify_tag_advance_grant_immediate(enclave_t* e, tag_t tag); + +/** + * @brief Thread that sleeps for a period of time, and then wakes up to check if + * a provisional tag advance grant needs to be sent. + * @param federate the federate whose provisional tag advance grant needs to be delayed. + */ +void* pending_provisional_grant_thread(void* federate); + +/** + * Notify a provisional tag advance grant (TAG) message to the specified federate + * after the physical time reaches the tag. A thread is created to this end. + * Once the delay period passed, if the pending tag have not been reset to + * NEVER_TAG, the tag advance grant will be immediate. + * + * This function assumes that the caller holds the mutex lock. + * + * @param e The enclave. + * @param tag The provisional tag to grant. + */ +void notify_provisional_tag_advance_grant_delayed(enclave_t* e, tag_t tag); + +/** + * Notify a tag advance grant (TAG) message to the specified federate after + * the physical time reaches the tag. A thread is created to this end. + * Do not notify it if a previously sent PTAG was greater or if a + * previously sent TAG was greater or equal. + * + * This function will keep a record of this TAG in the federate's last_granted + * field. + * + * This function assumes that the caller holds the mutex lock. + * + * FIXME: This needs two implementations, one for enclaves and one for federates. + * + * @param e The enclave. + * @param tag The tag to grant. + */ +void notify_provisional_tag_advance_grant_immediate(enclave_t* e, tag_t tag); + ////////////////////////////////////////////////////////// From 92a5d1d2eda69c624b2cb1423c60daf5b6d0f14e Mon Sep 17 00:00:00 2001 From: ChadliaJerad Date: Fri, 28 Jul 2023 11:27:30 -0700 Subject: [PATCH 47/80] Fix timeout error of the RTI --- core/federated/RTI/rti_lib.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/core/federated/RTI/rti_lib.c b/core/federated/RTI/rti_lib.c index d4f045067..409535f6f 100644 --- a/core/federated/RTI/rti_lib.c +++ b/core/federated/RTI/rti_lib.c @@ -1706,12 +1706,6 @@ void wait_for_federates(int socket_descriptor) { _f_rti->all_federates_exited = true; lf_print("All transient threads exited."); - - if (_f_rti->number_of_transient_federates == 0) { - lf_thread_join(responder_thread, &thread_exit_status); - } else if (_f_rti->number_of_transient_federates > 0) { - lf_thread_join(transient_thread, &thread_exit_status); - } // Shutdown and close the socket so that the accept() call in // respond_to_erroneous_connections returns. That thread should then From 33d8ae4a3d8c758949199693e8016556fbf0bbec Mon Sep 17 00:00:00 2001 From: ChadliaJerad Date: Fri, 28 Jul 2023 12:47:56 -0700 Subject: [PATCH 48/80] Make sure a pending tagand a pending ptag are mutually exclusive. Adjust comments --- core/federated/RTI/rti_lib.c | 30 +++++++++++---------- core/federated/RTI/rti_lib.h | 52 +++++++++++++++--------------------- 2 files changed, 37 insertions(+), 45 deletions(-) diff --git a/core/federated/RTI/rti_lib.c b/core/federated/RTI/rti_lib.c index 409535f6f..7b43f01e2 100644 --- a/core/federated/RTI/rti_lib.c +++ b/core/federated/RTI/rti_lib.c @@ -2085,16 +2085,18 @@ void notify_tag_advance_grant_delayed(enclave_t* e, tag_t tag) { federate_t* fed = (federate_t*)e; // Check wether there is already a pending grant + // And check the pending provisional grant as well + lf_mutex_lock(&rti_mutex); if (lf_tag_compare(fed->pending_grant, NEVER_TAG) == 0) { + // If a tag is issued, then stop any possible provisional tag grant fed->pending_grant = tag; + fed->pending_provisional_grant = NEVER_TAG; lf_thread_create(&(fed->pending_grant_thread_id), pending_grant_thread, fed); - } else if (lf_tag_compare(fed->pending_grant, tag) >= 0) { - // FIXME: do nothing? } else { - // FIXME: It should be really weired to receive and earlier tag gant than - // the pending one. - // Should this be a fatal error? + // If there is already a pending tag grant, then let it be sent first + // FIXME: Is this correct? } + lf_mutex_unlock(&rti_mutex); } void notify_tag_advance_grant_immediate(enclave_t* e, tag_t tag) { @@ -2148,17 +2150,17 @@ void* pending_provisional_grant_thread(void* federate) { void notify_provisional_tag_advance_grant_delayed(enclave_t* e, tag_t tag) { federate_t* fed = (federate_t*)e; - // Check wether there is already a pending grant - if (lf_tag_compare(fed->pending_provisional_grant, NEVER_TAG) == 0) { + // Proceed with the delayed provisional tag grant notification only if + // there is no pending grant and no provisional pending grant + lf_mutex_lock(&rti_mutex); + if ( + (lf_tag_compare(fed->pending_grant, NEVER_TAG) == 0) + && (lf_tag_compare(fed->pending_provisional_grant, NEVER_TAG) >= 0) + ) { fed->pending_provisional_grant = tag; lf_thread_create(&(fed->pending_provisional_grant_thread_id), pending_provisional_grant_thread, fed); - } else if (lf_tag_compare(fed->pending_provisional_grant, tag) >= 0) { - // FIXME: do nothing? - } else { - // FIXME: It should be really weired to receive and earlier tag gant than - // the pending one. - // Should this be a fatal error? - } + } + lf_mutex_unlock(&rti_mutex); } void notify_provisional_tag_advance_grant_immediate(enclave_t* e, tag_t tag) { diff --git a/core/federated/RTI/rti_lib.h b/core/federated/RTI/rti_lib.h index 32959b358..9131511f2 100644 --- a/core/federated/RTI/rti_lib.h +++ b/core/federated/RTI/rti_lib.h @@ -589,7 +589,9 @@ void reset_transient_federate(federate_t* fed); /** * @brief Thread that sleeps for a period of time, and then wakes up to check if - * a tag advance grant needs to be sent. + * a tag advance grant needs to be sent. That is, if the pending tag have not + * been reset to NEVER_TAG, the tag advance grant will be immediate. + * * @param federate the fedarate whose tag advance grant needs to be delayed. */ void* pending_grant_thread(void* federate); @@ -597,28 +599,20 @@ void* pending_grant_thread(void* federate); /** * Notify a tag advance grant (TAG) message to the specified federate after * the physical time reaches the tag. A thread is created to this end. - * Once the delay period passed, if the pending tag have not been reset to - * NEVER_TAG, the tag advance grant will be immediate. - * - * This function assumes that the caller holds the mutex lock. * + * If a provisionl tag advance grant is pending, cancel it. If there is another + * pending tag advance grant, do not proceed with the thread creation. + * * @param e The enclave. * @param tag The tag to grant. */ void notify_tag_advance_grant_delayed(enclave_t* e, tag_t tag); /** - * Notify a tag advance grant (TAG) message to the specified federate after - * the physical time reaches the tag. A thread is created to this end. - * Do not notify it if a previously sent PTAG was greater or if a - * previously sent TAG was greater or equal. - * - * This function will keep a record of this TAG in the federate's last_granted - * field. - * - * This function assumes that the caller holds the mutex lock. + * Notify a tag advance grant (TAG) message to the specified federate immediately. * - * FIXME: This needs two implementations, one for enclaves and one for federates. + * This function will keep a record of this TAG in the enclave's last_granted + * field. * * @param e The enclave. * @param tag The tag to grant. @@ -626,19 +620,21 @@ void notify_tag_advance_grant_delayed(enclave_t* e, tag_t tag); void notify_tag_advance_grant_immediate(enclave_t* e, tag_t tag); /** - * @brief Thread that sleeps for a period of time, and then wakes up to check if - * a provisional tag advance grant needs to be sent. + * Thread that sleeps for a period of time, and then wakes up to check if + * a provisional tag advance grant needs to be sent. That is, if the pending + * provisional tag have not been reset to NEVER_TAG, the provisional tag advance + * grant will be immediate. + * * @param federate the federate whose provisional tag advance grant needs to be delayed. */ void* pending_provisional_grant_thread(void* federate); /** - * Notify a provisional tag advance grant (TAG) message to the specified federate + * Notify a provisional tag advance grant (PTAG) message to the specified federate * after the physical time reaches the tag. A thread is created to this end. - * Once the delay period passed, if the pending tag have not been reset to - * NEVER_TAG, the tag advance grant will be immediate. * - * This function assumes that the caller holds the mutex lock. + * If a tag advance grant or a provisional one is pending, then do not proceed + * with the thread creation. * * @param e The enclave. * @param tag The provisional tag to grant. @@ -646,17 +642,11 @@ void* pending_provisional_grant_thread(void* federate); void notify_provisional_tag_advance_grant_delayed(enclave_t* e, tag_t tag); /** - * Notify a tag advance grant (TAG) message to the specified federate after - * the physical time reaches the tag. A thread is created to this end. - * Do not notify it if a previously sent PTAG was greater or if a - * previously sent TAG was greater or equal. - * - * This function will keep a record of this TAG in the federate's last_granted - * field. - * - * This function assumes that the caller holds the mutex lock. + * Notify a provisional tag advance grant (PTAG) message to the specified federate + * immediately. * - * FIXME: This needs two implementations, one for enclaves and one for federates. + * This function will keep a record of this TAG in the enclave's last_provisionally_granted + * field. * * @param e The enclave. * @param tag The tag to grant. From 6c95b51298ae2d9dc2441fc35750eb7bda17c3e3 Mon Sep 17 00:00:00 2001 From: ChadliaJerad Date: Fri, 28 Jul 2023 14:22:19 -0700 Subject: [PATCH 49/80] Update lingua-franca-ref.txt to point to transient-federates branch --- lingua-franca-ref.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lingua-franca-ref.txt b/lingua-franca-ref.txt index 1f7391f92..ea44042e8 100644 --- a/lingua-franca-ref.txt +++ b/lingua-franca-ref.txt @@ -1 +1 @@ -master +transient-federates From ef6ab52fb4be5d7046228bf1bdd4aecf9372b9b8 Mon Sep 17 00:00:00 2001 From: ChadliaJerad Date: Tue, 1 Aug 2023 11:47:27 -0700 Subject: [PATCH 50/80] Fix format warning to get more output --- core/federated/clock-sync.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/core/federated/clock-sync.c b/core/federated/clock-sync.c index fdbf0642b..0fd2d751c 100644 --- a/core/federated/clock-sync.c +++ b/core/federated/clock-sync.c @@ -443,8 +443,8 @@ void handle_T4_clock_sync_message(unsigned char* buffer, int socket, instant_t r LF_PRINT_LOG("Clock sync:" " New offset: " PRINTF_TIME "." " Round trip delay to RTI (now): " PRINTF_TIME "." - " (AVG): " PRINTF_TIME "." - " (SD): " PRINTF_TIME "." + " (AVG): %ld." + " (SD): %ld." " Local round trip delay: " PRINTF_TIME "." " Test offset: " PRINTF_TIME ".", _lf_time_physical_clock_offset, From 919591121bc0d4ce6ba06db0a0d24ef2f41a8466 Mon Sep 17 00:00:00 2001 From: ChadliaJerad Date: Thu, 3 Aug 2023 15:47:38 -0700 Subject: [PATCH 51/80] Less format warning --- core/federated/clock-sync.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/federated/clock-sync.c b/core/federated/clock-sync.c index 0fd2d751c..21a3efbfb 100644 --- a/core/federated/clock-sync.c +++ b/core/federated/clock-sync.c @@ -444,7 +444,7 @@ void handle_T4_clock_sync_message(unsigned char* buffer, int socket, instant_t r " New offset: " PRINTF_TIME "." " Round trip delay to RTI (now): " PRINTF_TIME "." " (AVG): %ld." - " (SD): %ld." + " (SD): %lld." " Local round trip delay: " PRINTF_TIME "." " Test offset: " PRINTF_TIME ".", _lf_time_physical_clock_offset, From 27150bfe694a81c85b662031ea91cd2bb956ad79 Mon Sep 17 00:00:00 2001 From: ChadliaJerad Date: Fri, 4 Aug 2023 13:43:45 -0700 Subject: [PATCH 52/80] Add lf_stop() --- core/federated/federate.c | 14 ++++++++++++++ include/core/federated/federate.h | 10 ++++++++++ 2 files changed, 24 insertions(+) diff --git a/core/federated/federate.c b/core/federated/federate.c index ce6515ce4..59d934709 100644 --- a/core/federated/federate.c +++ b/core/federated/federate.c @@ -2945,4 +2945,18 @@ void set_federation_id(const char* fid) { void set_federation_trace_object(trace_t * trace) { _fed.trace = trace; } + +void lf_stop() { + environment_t *env; + int num_env = _lf_get_environments(&env); + + for (int i = 0 ; i < num_env ; i++) { + tag_t new_stop_tag; + new_stop_tag.time = env[i].current_tag.time; + new_stop_tag.microstep = env[i].current_tag.microstep + 1; + _lf_set_stop_tag(&env[i], new_stop_tag); + } + // termination(); +} + #endif diff --git a/include/core/federated/federate.h b/include/core/federated/federate.h index 0d1a34009..596e01908 100644 --- a/include/core/federated/federate.h +++ b/include/core/federated/federate.h @@ -479,4 +479,14 @@ void synchronize_with_other_federates(); */ void wait_until_port_status_known(environment_t* env, int portID, interval_t STAA); +/** + * @brief Stop the execution of a federate. + * Every enclave within the federate will stop at one microstep later than its + * current tag. Unlike lf_request_stop(), this process does not require any + * involvement from the RTI, nor does it necessitate any consensus. + * + * This function is particularly useful for testing transient federates. + */ +void lf_stop(); + #endif // FEDERATE_H From 9a8f97cdc1a3b49b133afbb5ea3cab953d31f269 Mon Sep 17 00:00:00 2001 From: Peter Donovan Date: Fri, 4 Aug 2023 13:56:59 -0700 Subject: [PATCH 53/80] Add lf_get_federates_bin_directory. --- core/CMakeLists.txt | 1 + core/federated/federate.c | 4 ++++ include/core/federated/federate.h | 6 ++++++ 3 files changed, 11 insertions(+) diff --git a/core/CMakeLists.txt b/core/CMakeLists.txt index f28e424c7..7de808897 100644 --- a/core/CMakeLists.txt +++ b/core/CMakeLists.txt @@ -121,4 +121,5 @@ define(USER_THREADS) define(SCHEDULER) define(LF_SOURCE_DIRECTORY) define(LF_PACKAGE_DIRECTORY) +define(LF_FEDERATES_BIN_DIRECTORY) define(LF_FILE_SEPARATOR) diff --git a/core/federated/federate.c b/core/federated/federate.c index 59d934709..4585adc9e 100644 --- a/core/federated/federate.c +++ b/core/federated/federate.c @@ -1217,6 +1217,10 @@ instant_t get_start_time_from_rti(instant_t my_physical_time) { return timestamp; } +char* lf_get_federates_bin_directory() { + return LF_FEDERATES_BIN_DIRECTORY; +} + ////////////////////////////////Port Status Handling/////////////////////////////////////// extern lf_action_base_t* _lf_action_table[]; diff --git a/include/core/federated/federate.h b/include/core/federated/federate.h index 596e01908..29955684c 100644 --- a/include/core/federated/federate.h +++ b/include/core/federated/federate.h @@ -489,4 +489,10 @@ void wait_until_port_status_known(environment_t* env, int portID, interval_t STA */ void lf_stop(); +/** + * @brief Return the directory containing the executables of the individual + * federates. + */ +char* lf_get_federates_bin_directory(); + #endif // FEDERATE_H From 584c85fd907b243bb6392e9bef0a0816f2ee3b59 Mon Sep 17 00:00:00 2001 From: ChadliaJerad Date: Fri, 4 Aug 2023 17:59:17 -0700 Subject: [PATCH 54/80] Add lf_get_federation_id() --- core/federated/federate.c | 4 ++++ include/core/federated/federate.h | 5 +++++ 2 files changed, 9 insertions(+) diff --git a/core/federated/federate.c b/core/federated/federate.c index 4585adc9e..7a1f95332 100644 --- a/core/federated/federate.c +++ b/core/federated/federate.c @@ -2963,4 +2963,8 @@ void lf_stop() { // termination(); } +char* lf_get_federation_id() { + return federation_metadata.federation_id; +} + #endif diff --git a/include/core/federated/federate.h b/include/core/federated/federate.h index 29955684c..6eacd44c5 100644 --- a/include/core/federated/federate.h +++ b/include/core/federated/federate.h @@ -495,4 +495,9 @@ void lf_stop(); */ char* lf_get_federates_bin_directory(); +/** + * @brief Returns the federation id. + */ +char* lf_get_federation_id(); + #endif // FEDERATE_H From 644ed2c17057419b05d2115c116b789030000ab9 Mon Sep 17 00:00:00 2001 From: ChadliaJerad Date: Sun, 6 Aug 2023 23:01:21 -0700 Subject: [PATCH 55/80] More doc --- include/core/federated/federate.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/core/federated/federate.h b/include/core/federated/federate.h index 6eacd44c5..447271d9d 100644 --- a/include/core/federated/federate.h +++ b/include/core/federated/federate.h @@ -497,6 +497,8 @@ char* lf_get_federates_bin_directory(); /** * @brief Returns the federation id. + * + * This function is useful for creating federates on runtime. */ char* lf_get_federation_id(); From 11314fccb5e1574d303c908fa620f3b24dacfed0 Mon Sep 17 00:00:00 2001 From: ChadliaJerad Date: Mon, 7 Aug 2023 13:45:17 -0700 Subject: [PATCH 56/80] Fix typo --- core/federated/RTI/rti_lib.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/federated/RTI/rti_lib.c b/core/federated/RTI/rti_lib.c index 7b43f01e2..7a6021c9a 100644 --- a/core/federated/RTI/rti_lib.c +++ b/core/federated/RTI/rti_lib.c @@ -1679,7 +1679,7 @@ void wait_for_federates(int socket_descriptor) { for (int i = 0 ; i < _f_rti->number_of_enclaves ; i++) { if (!_f_rti->enclaves[i]->is_transient) { federate_t* fed = _f_rti->enclaves[i]; - lf_print("RTI: Waiting for thread handling peristent federate %d.", fed->enclave.id); + lf_print("RTI: Waiting for thread handling persistent federate %d.", fed->enclave.id); lf_thread_join(fed->thread_id, &thread_exit_status); free_in_transit_message_q(fed->in_transit_message_tags); lf_print("RTI: Federate %d thread exited.", fed->enclave.id); From 12efc498e2a83ec11a131be7c1daf3803dd6c27b Mon Sep 17 00:00:00 2001 From: ChadliaJerad Date: Mon, 7 Aug 2023 13:47:01 -0700 Subject: [PATCH 57/80] Add lf_get_start_time() and lf_get_effective_start_time() to federate --- core/federated/federate.c | 9 ++++++++- include/core/federated/federate.h | 10 ++++++++++ 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/core/federated/federate.c b/core/federated/federate.c index 7a1f95332..702dc4aec 100644 --- a/core/federated/federate.c +++ b/core/federated/federate.c @@ -1309,7 +1309,6 @@ void update_last_known_status_on_input_ports(tag_t tag) { } } - /** * Update the last known status tag of a network input port * to the value of "tag". This is the largest tag at which the status @@ -2967,4 +2966,12 @@ char* lf_get_federation_id() { return federation_metadata.federation_id; } +time_t lf_get_effective_start_time() { + return effective_start_tag.time; +} + +time_t lf_get_start_time() { + return start_time; +} + #endif diff --git a/include/core/federated/federate.h b/include/core/federated/federate.h index 447271d9d..c67076f61 100644 --- a/include/core/federated/federate.h +++ b/include/core/federated/federate.h @@ -502,4 +502,14 @@ char* lf_get_federates_bin_directory(); */ char* lf_get_federation_id(); +/** + * @brief Returns the effective start time of the federate. The start_time of persistent + * federates is equal to their effective_start_time. Transient federates, however, + * have their effective_start_time higher or equal to their start_time. + */ +time_t lf_get_effective_start_time(); + +/** @brief Returns the start time of the federate. */ +time_t lf_get_start_time(); + #endif // FEDERATE_H From 1f339a66a4d7bb571313591827307f91341143fc Mon Sep 17 00:00:00 2001 From: ChadliaJerad Date: Mon, 7 Aug 2023 14:09:15 -0700 Subject: [PATCH 58/80] When waiting for transients to join, do not error and exit if no socket, but try again --- core/federated/RTI/rti_lib.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/core/federated/RTI/rti_lib.c b/core/federated/RTI/rti_lib.c index 7a6021c9a..1eb6e6551 100644 --- a/core/federated/RTI/rti_lib.c +++ b/core/federated/RTI/rti_lib.c @@ -1997,8 +1997,6 @@ void* connect_to_transient_federates_thread() { if (socket_id >= 0) { // Got a socket break; - } else if (socket_id < 0 && (errno != EAGAIN || errno != EWOULDBLOCK)) { - lf_print_error_and_exit("RTI failed to accept the socket. %s.", strerror(errno)); } else { // Try again lf_print_warning("RTI failed to accept the socket. %s. Trying again.", strerror(errno)); From 8dcc23a214ccbcb19d5cc89e1fff7e75f203a3f3 Mon Sep 17 00:00:00 2001 From: ChadliaJerad Date: Mon, 7 Aug 2023 15:36:50 -0700 Subject: [PATCH 59/80] Fix wrong type --- core/federated/federate.c | 6 +++--- include/core/federated/federate.h | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/core/federated/federate.c b/core/federated/federate.c index 702dc4aec..768e49fe8 100644 --- a/core/federated/federate.c +++ b/core/federated/federate.c @@ -1204,7 +1204,7 @@ instant_t get_start_time_from_rti(instant_t my_physical_time) { buffer[0]); } - // Read the federation start_time first, then the effective start_time after + // Read the federation start_time first, then the effective_start_tag after instant_t timestamp = extract_int64(&(buffer[1])); effective_start_tag = extract_tag(&(buffer[9])); @@ -2966,11 +2966,11 @@ char* lf_get_federation_id() { return federation_metadata.federation_id; } -time_t lf_get_effective_start_time() { +instant_t lf_get_effective_start_time() { return effective_start_tag.time; } -time_t lf_get_start_time() { +instant_t lf_get_start_time() { return start_time; } diff --git a/include/core/federated/federate.h b/include/core/federated/federate.h index c67076f61..ce21fe880 100644 --- a/include/core/federated/federate.h +++ b/include/core/federated/federate.h @@ -507,9 +507,9 @@ char* lf_get_federation_id(); * federates is equal to their effective_start_time. Transient federates, however, * have their effective_start_time higher or equal to their start_time. */ -time_t lf_get_effective_start_time(); +instant_t lf_get_effective_start_time(); /** @brief Returns the start time of the federate. */ -time_t lf_get_start_time(); +instant_t lf_get_start_time(); #endif // FEDERATE_H From 53c6ad92b1b1e77800b8ed3a4e87fb65491d0c2b Mon Sep 17 00:00:00 2001 From: ChadliaJerad Date: Mon, 7 Aug 2023 16:19:09 -0700 Subject: [PATCH 60/80] Insert message to debug CI --- core/federated/federate.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/core/federated/federate.c b/core/federated/federate.c index 768e49fe8..95cff711b 100644 --- a/core/federated/federate.c +++ b/core/federated/federate.c @@ -2639,6 +2639,8 @@ void synchronize_with_other_federates(void) { // Note that this does not grant execution to this federate. start_time = get_start_time_from_rti(lf_time_physical()); + lf_print(">>>> Start_time is: "PRINTF_TIME" and effective is: "PRINTF_TIME, start_time, effective_start_time); + // Start a thread to listen for incoming TCP messages from the RTI. // @note Up until this point, the federate has been listening for messages // from the RTI in a sequential manner in the main thread. From now on, a From be897db33218a8617149760d184caa7301346d5a Mon Sep 17 00:00:00 2001 From: ChadliaJerad Date: Mon, 7 Aug 2023 16:56:41 -0700 Subject: [PATCH 61/80] Fix message to debug CI --- core/federated/federate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/federated/federate.c b/core/federated/federate.c index 95cff711b..edd22a6a7 100644 --- a/core/federated/federate.c +++ b/core/federated/federate.c @@ -2639,7 +2639,7 @@ void synchronize_with_other_federates(void) { // Note that this does not grant execution to this federate. start_time = get_start_time_from_rti(lf_time_physical()); - lf_print(">>>> Start_time is: "PRINTF_TIME" and effective is: "PRINTF_TIME, start_time, effective_start_time); + lf_print(">>>> Start_time is: "PRINTF_TIME" and effective is: "PRINTF_TIME, lf_get_start_time(), lf_get_effective_start_time()); // Start a thread to listen for incoming TCP messages from the RTI. // @note Up until this point, the federate has been listening for messages From 204040e3d428f7b17f3eb447953d55729b5a065e Mon Sep 17 00:00:00 2001 From: ChadliaJerad Date: Mon, 7 Aug 2023 21:55:04 -0700 Subject: [PATCH 62/80] Attempt to understand what happens in Transients.lf in CI --- core/federated/RTI/rti_lib.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/core/federated/RTI/rti_lib.c b/core/federated/RTI/rti_lib.c index 1eb6e6551..ec94e2901 100644 --- a/core/federated/RTI/rti_lib.c +++ b/core/federated/RTI/rti_lib.c @@ -756,8 +756,6 @@ void handle_timestamp(federate_t *my_fed) { lf_mutex_lock(&rti_mutex); - my_fed->effective_start_tag = (tag_t){.time = timestamp, .microstep = 0u}; - // Processing the TIMESTAMP depends on whether it is the startup phase (all // persistent federates joined) or not. if (_f_rti->num_feds_proposed_start < (_f_rti->number_of_enclaves - _f_rti->number_of_transient_federates)) { @@ -786,8 +784,10 @@ void handle_timestamp(federate_t *my_fed) { // Send the start_time my_fed->effective_start_tag = (tag_t){.time = start_time, .microstep = 0u}; + lf_print("RTI: >>>> Sending to federate %d the start_time: "PRINTF_TIME" and effective: "PRINTF_TIME, my_fed->enclave.id, start_time, my_fed->effective_start_tag.time); send_start_tag(my_fed, start_time, my_fed->effective_start_tag); } else { + my_fed->effective_start_tag = (tag_t){.time = timestamp, .microstep = 0u}; // A transient has joined after the startup phase // At this point, we already hold the mutex @@ -840,6 +840,7 @@ void handle_timestamp(federate_t *my_fed) { // together with the start time of the federation. // Send the start time + lf_print("RTI: >>>> Sending to federate %d the start_time: "PRINTF_TIME" and effective: "PRINTF_TIME, my_fed->enclave.id, start_time, my_fed->effective_start_tag.time); send_start_tag(my_fed, start_time, my_fed->effective_start_tag); } } From 3d4332bd57612cc5347b6dc9b81cb06e09174108 Mon Sep 17 00:00:00 2001 From: ChadliaJerad Date: Mon, 7 Aug 2023 22:31:59 -0700 Subject: [PATCH 63/80] Attempt to solve a corner case related to the startime of a transient federate --- core/federated/RTI/rti_lib.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/core/federated/RTI/rti_lib.c b/core/federated/RTI/rti_lib.c index ec94e2901..027a87589 100644 --- a/core/federated/RTI/rti_lib.c +++ b/core/federated/RTI/rti_lib.c @@ -787,6 +787,11 @@ void handle_timestamp(federate_t *my_fed) { lf_print("RTI: >>>> Sending to federate %d the start_time: "PRINTF_TIME" and effective: "PRINTF_TIME, my_fed->enclave.id, start_time, my_fed->effective_start_tag.time); send_start_tag(my_fed, start_time, my_fed->effective_start_tag); } else { + // This is rather a possible extreme corner case, where a transient sends its timestamp, and only + // enters the if section after all persistents have joined. + if (timestamp < start_time) { + timestamp = start_time; + } my_fed->effective_start_tag = (tag_t){.time = timestamp, .microstep = 0u}; // A transient has joined after the startup phase // At this point, we already hold the mutex From 7249e0cd96371afca79493eb8026d2ee959b70f2 Mon Sep 17 00:00:00 2001 From: ChadliaJerad Date: Mon, 7 Aug 2023 23:13:48 -0700 Subject: [PATCH 64/80] Clean logging after timestamp issue fixed --- core/federated/RTI/rti_lib.c | 4 ++-- core/federated/federate.c | 2 -- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/core/federated/RTI/rti_lib.c b/core/federated/RTI/rti_lib.c index 027a87589..ac7fe48a6 100644 --- a/core/federated/RTI/rti_lib.c +++ b/core/federated/RTI/rti_lib.c @@ -784,7 +784,6 @@ void handle_timestamp(federate_t *my_fed) { // Send the start_time my_fed->effective_start_tag = (tag_t){.time = start_time, .microstep = 0u}; - lf_print("RTI: >>>> Sending to federate %d the start_time: "PRINTF_TIME" and effective: "PRINTF_TIME, my_fed->enclave.id, start_time, my_fed->effective_start_tag.time); send_start_tag(my_fed, start_time, my_fed->effective_start_tag); } else { // This is rather a possible extreme corner case, where a transient sends its timestamp, and only @@ -792,7 +791,9 @@ void handle_timestamp(federate_t *my_fed) { if (timestamp < start_time) { timestamp = start_time; } + my_fed->effective_start_tag = (tag_t){.time = timestamp, .microstep = 0u}; + // A transient has joined after the startup phase // At this point, we already hold the mutex @@ -845,7 +846,6 @@ void handle_timestamp(federate_t *my_fed) { // together with the start time of the federation. // Send the start time - lf_print("RTI: >>>> Sending to federate %d the start_time: "PRINTF_TIME" and effective: "PRINTF_TIME, my_fed->enclave.id, start_time, my_fed->effective_start_tag.time); send_start_tag(my_fed, start_time, my_fed->effective_start_tag); } } diff --git a/core/federated/federate.c b/core/federated/federate.c index edd22a6a7..768e49fe8 100644 --- a/core/federated/federate.c +++ b/core/federated/federate.c @@ -2639,8 +2639,6 @@ void synchronize_with_other_federates(void) { // Note that this does not grant execution to this federate. start_time = get_start_time_from_rti(lf_time_physical()); - lf_print(">>>> Start_time is: "PRINTF_TIME" and effective is: "PRINTF_TIME, lf_get_start_time(), lf_get_effective_start_time()); - // Start a thread to listen for incoming TCP messages from the RTI. // @note Up until this point, the federate has been listening for messages // from the RTI in a sequential manner in the main thread. From now on, a From 25beb9571285359b69b5fbd1d09bc36fb49a3d61 Mon Sep 17 00:00:00 2001 From: ChadliaJerad Date: Wed, 9 Aug 2023 01:22:09 -0700 Subject: [PATCH 65/80] Make transients timers in sync with the federation start_time, not the transient's effective_start_time --- core/reactor_common.c | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/core/reactor_common.c b/core/reactor_common.c index b7a3c9ac7..56a329530 100644 --- a/core/reactor_common.c +++ b/core/reactor_common.c @@ -488,7 +488,24 @@ void _lf_initialize_timer(environment_t* env, trigger_t* timer) { return; } #endif - if (timer->offset == 0) { + + // Use temporary offset value + interval_t offset = timer->offset; + +#ifdef FEDERATED + // If the federate is a transient who joined after the startup phase, adjust + // the offset to account for the events that have to be missed + if (lf_get_start_time() != lf_get_effective_start_time()) { + interval_t remaing_of_period = (lf_get_effective_start_time() - lf_get_start_time() - offset) % timer->period; + if (remaing_of_period == 0) { + offset = 0; + } else { + offset = timer->period - remaing_of_period; + } + } +#endif // FEDERATED + + if (offset == 0) { for (int i = 0; i < timer->number_of_reactions; i++) { _lf_trigger_reaction(env, timer->reactions[i], -1); tracepoint_schedule(env->trace, timer, 0LL); // Trace even though schedule is not called. @@ -501,7 +518,7 @@ void _lf_initialize_timer(environment_t* env, trigger_t* timer) { } } else { // Schedule at t + offset. - delay = timer->offset; + delay = offset; } // Get an event_t struct to put on the event queue. From c0f9d723b35906578e65e340b1464ad443fafa4e Mon Sep 17 00:00:00 2001 From: ChadliaJerad Date: Tue, 15 Aug 2023 11:44:16 -0700 Subject: [PATCH 66/80] Cleanup residual code and comments --- core/federated/RTI/rti_lib.c | 16 ++---- core/federated/RTI/rti_lib.h | 7 --- core/federated/federate.c | 78 ----------------------------- include/core/federated/net_common.h | 41 --------------- include/core/trace.h | 12 ++--- 5 files changed, 7 insertions(+), 147 deletions(-) diff --git a/core/federated/RTI/rti_lib.c b/core/federated/RTI/rti_lib.c index ac7fe48a6..075fa8032 100644 --- a/core/federated/RTI/rti_lib.c +++ b/core/federated/RTI/rti_lib.c @@ -530,9 +530,9 @@ void _lf_rti_broadcast_stop_time_to_federates_locked() { // Iterate over federates and send each the message. for (int i = 0; i < _f_rti->number_of_enclaves; i++) { federate_t *fed = _f_rti->enclaves[i]; - // if (fed->enclave.state == NOT_CONNECTED) { - // continue; - // } + if (fed->enclave.state == NOT_CONNECTED) { + continue; + } if (lf_tag_compare(fed->enclave.next_event, _f_rti->max_stop_tag) >= 0) { // Need the next_event to be no greater than the stop tag. fed->enclave.next_event = _f_rti->max_stop_tag; @@ -1106,9 +1106,6 @@ void* federate_thread_TCP(void* fed) { case MSG_TYPE_TIMESTAMP: handle_timestamp(my_fed); break; - // case MSG_TYPE_CURRENT_TAG_QUERY_RESPONSE: - // handle_current_tag_query_response(my_fed); - // break; case MSG_TYPE_ADDRESS_QUERY: handle_address_query(my_fed->enclave.id); break; @@ -1151,7 +1148,6 @@ void* federate_thread_TCP(void* fed) { close(my_fed->socket); // from unistd.h // Manual clean, in case of a transient federate - // FIXME: Should free_in_transit_message_q be called in case of persistent federates as well? if (my_fed->is_transient) { free_in_transit_message_q(my_fed->in_transit_message_tags); lf_print("RTI: Transient Federate %d thread exited.", my_fed->enclave.id); @@ -1631,9 +1627,6 @@ void initialize_federate(federate_t* fed, uint16_t id) { fed->server_port = -1; fed->is_transient = true; fed->effective_start_tag = NEVER_TAG; - fed->num_of_conn_federates = 0; - fed->num_of_conn_federates_sent_net = 0; - fed->start_time_is_set = false; fed->pending_grant = NEVER_TAG; } @@ -2060,9 +2053,6 @@ void reset_transient_federate(federate_t* fed) { fed->requested_stop = false; fed->is_transient = true; fed->effective_start_tag = NEVER_TAG; - fed->num_of_conn_federates = 0; - fed->num_of_conn_federates_sent_net = 0; - fed->start_time_is_set = false; fed->pending_grant = NEVER_TAG; } diff --git a/core/federated/RTI/rti_lib.h b/core/federated/RTI/rti_lib.h index 9131511f2..e6cbef14f 100644 --- a/core/federated/RTI/rti_lib.h +++ b/core/federated/RTI/rti_lib.h @@ -69,13 +69,6 @@ typedef struct federate_t { // server of the federate. bool is_transient; // Indicates whether the federate is transient or persistent. tag_t effective_start_tag; // Records the start time of the federate, which is mainly useful for transient federates - int num_of_conn_federates; // Records the total number of connected federates among the upstream and - // downstream federates. This is used only in the case of transient federate, for - // computing the start time. - int num_of_conn_federates_sent_net; // Records the total number of connected federates - // that sent responded to the next event tag query form the RTI. - bool start_time_is_set; // Boolean variable used to signal that all connected federates - // have sent a response to next event tag query from the RTI. tag_t pending_grant; // The pending tag advance grant tag_t pending_provisional_grant; // The pending provisional tag advance grant lf_thread_t pending_grant_thread_id; // The ID of the thread handling the pending tag grant diff --git a/core/federated/federate.c b/core/federated/federate.c index 60c4b3c95..4390914c2 100644 --- a/core/federated/federate.c +++ b/core/federated/federate.c @@ -2322,81 +2322,6 @@ void handle_stop_request_message() { lf_mutex_unlock(&outbound_socket_mutex); } -////////////////// Start of transient time coordination //////////////////////// - -/** - * Handle a Next Event Tag query received from the RTI. Such message is sent when - * a transient federate attempts to join a federation after the startup phase. - * The funtion will read the NET in the event queue and call - * send_current_tag_query_answer(). - * - * FIXME: This function assumes the caller does hold the mutex lock? - */ -// void handle_next_event_tag_query(){ -// tracepoint_federate_from_RTI(receive_CuTAG_QR, _lf_my_fed_id, &NEVER_TAG); - -// // Extract the transient federate Id -// size_t bytes_to_read = sizeof(uint16_t); -// unsigned char buffer[bytes_to_read]; -// read_from_socket_errexit(_fed.socket_TCP_RTI, bytes_to_read, buffer, -// "Failed to read the transient federate ID."); - -// // Read it -// uint16_t transient_id = extract_uint16(buffer); -// // Check if the message is intended for this federate -// assert(_lf_my_fed_id != transient_id); -// LF_PRINT_DEBUG("Receiving NET query message regarding transient federate %d.", transient_id); - -// // Get the next event tag in the reactions queue -// // tag_t next_tag = _fed.last_sent_LTC; -// // tag_t next_tag = get_next_event_tag(); -// tag_t next_tag = lf_tag(); - -// instant_t logical_time = next_tag.time; - -// // Answer with the time instant of the next event tag -// send_current_tag_query_response(logical_time, transient_id); -// } - -// /** -// * Send to RTI the answer to current tag query. -// * -// * @param time The time. -// * @param transient_id The transient federate id to send back -// * Print a soft error message otherwise -// */ -// void send_current_tag_query_response(instant_t time, uint16_t transient_id) { -// LF_PRINT_DEBUG("Sending logical time " PRINTF_TIME " to the RTI regarding NET QR RES of trabsient %d.", time, transient_id); -// size_t bytes_to_write = 1 + sizeof(instant_t) + sizeof(uint16_t); -// unsigned char buffer[bytes_to_write]; -// buffer[0] = MSG_TYPE_CURRENT_TAG_QUERY_RESPONSE; -// encode_int64(time, &(buffer[1])); -// encode_uint16(transient_id, &(buffer[9])); -// lf_mutex_lock(&outbound_socket_mutex); -// if (_fed.socket_TCP_RTI < 0) { -// lf_print_warning("Socket is no longer connected. Dropping message."); -// lf_mutex_unlock(&outbound_socket_mutex); -// return; -// } - -// tag_t tag = {.time = time, .microstep = 0}; -// // Trace the event when tracing is enabled -// tracepoint_federate_to_RTI(send_CuTAG_QR_RES, _lf_my_fed_id, &tag); - -// ssize_t bytes_written = write_to_socket(_fed.socket_TCP_RTI, bytes_to_write, buffer); -// if (bytes_written < (ssize_t)bytes_to_write) { -// lf_print_error_and_exit("Failed to send time " PRINTF_TIME " to the RTI." -// " Error code %d: %s", -// time - start_time, -// errno, -// strerror(errno) -// ); -// } -// lf_mutex_unlock(&outbound_socket_mutex); -// } - -/////////////////// End of transient time coordination ///////////////////////// - /** * Close sockets used to communicate with other federates, if they are open, * and send a MSG_TYPE_RESIGN message to the RTI. This implements the function @@ -2614,9 +2539,6 @@ void* listen_to_rti_TCP(void* args) { case MSG_TYPE_PORT_ABSENT: handle_port_absent_message(_fed.socket_TCP_RTI, -1); break; - // case MSG_TYPE_CURRENT_TAG_QUERY: - // handle_next_event_tag_query(); - // break; case MSG_TYPE_CLOCK_SYNC_T1: case MSG_TYPE_CLOCK_SYNC_T4: lf_print_error("Federate %d received unexpected clock sync message from RTI on TCP socket.", diff --git a/include/core/federated/net_common.h b/include/core/federated/net_common.h index 8d18dc8b9..b2630535c 100644 --- a/include/core/federated/net_common.h +++ b/include/core/federated/net_common.h @@ -696,47 +696,6 @@ THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define MSG_TYPE_NEIGHBOR_STRUCTURE 24 #define MSG_TYPE_NEIGHBOR_STRUCTURE_HEADER_SIZE 9 -/////////// Messages used by joining transient federates /////////////// -//// Overview of the algorithm: -//// When a transient federate joins the deferation after the startup phase (all -//// persistent federates have joined and received, or all least are receiveing -//// their start_time), its start_time is decided based on the current Tags -//// of its upstream federates and its own physical join time. Current Tags are -//// queried by the RTI, by sending MSG_TYPE_CURRENT_TAG_QUERY. Federates will -//// answer with MSG_TYPE_CURRENT_TAG_QUERY_RESPONSE. -//// The start_time of the transient will be the the maximun of all received -//// MSG_TYPE_CURRENT_TAG_QUERY_RESPONSE and its physical join time. -//// This choice avoids deadline violations and enables hot join (without any -//// federate to halt its execution). - -/** - * Byte identifying a query of a federate about its current Tag. This is useful - * when deciding about the start_time of a joining transient federate. - */ -#define MSG_TYPE_CURRENT_TAG_QUERY 30 - -/** - * Byte identifying a response to a MSG_TYPE_CURRENT_TAG_QUERY. This is useful - * when deciding about the start_time of a joining transient federate. - */ -#define MSG_TYPE_CURRENT_TAG_QUERY_RESPONSE 31 - -/** - * Byte identifying a request sent by the RTI to upstream and downstream federates - * of a joining transient. It asks them to halt reactions at the given tag. This - * is useful for the synchronous logical start a joining transient federate. - */ -#define MSG_TYPE_HALT 32 - -/** - * Byte identifying a request sent by the RTI to upstream and downstream federates - * of a joining transient to resume reactions at the tag sent within MSG_TYPE_HALT. - * This is useful for the synchronous logical start a joining transient federate. - */ -#define MSG_TYPE_RESUME 33 - -/////////// End of messages used by joining transient federates /////////////// - ///////////////////////////////////////////// //// Rejection codes diff --git a/include/core/trace.h b/include/core/trace.h index 8a594d584..234595580 100644 --- a/include/core/trace.h +++ b/include/core/trace.h @@ -121,10 +121,8 @@ typedef enum receive_ADR_QR, receive_UNIDENTIFIED, // Transient - send_CuTAG_QR, - send_CuTAG_QR_RES, - receive_CuTAG_QR, - receive_CuTAG_QR_RES, + send_STOP, + receive_STOP, NUM_EVENT_TYPES } trace_event_t; @@ -189,10 +187,8 @@ static const char *trace_event_names[] = { "Receiving ADR_QR", "Receiving UNIDENTIFIED", // Transient - "Sending CuTAG_QR", - "Sending CuTAG_QR_RES", - "Receiving CuTAG_QR", - "Receiving CuTAG_QR_RES" + "Sending STOP", + "Receiving STOP" }; // FIXME: Target property should specify the capacity of the trace buffer. From b8c586975659d94c39f6b72a5c69417dbc793161 Mon Sep 17 00:00:00 2001 From: ChadliaJerad Date: Tue, 15 Aug 2023 13:03:46 -0700 Subject: [PATCH 67/80] Add rti phase --- core/federated/RTI/rti_lib.c | 7 +++++-- core/federated/RTI/rti_lib.h | 12 ++++++++++++ 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/core/federated/RTI/rti_lib.c b/core/federated/RTI/rti_lib.c index 075fa8032..6873b5d40 100644 --- a/core/federated/RTI/rti_lib.c +++ b/core/federated/RTI/rti_lib.c @@ -758,7 +758,7 @@ void handle_timestamp(federate_t *my_fed) { // Processing the TIMESTAMP depends on whether it is the startup phase (all // persistent federates joined) or not. - if (_f_rti->num_feds_proposed_start < (_f_rti->number_of_enclaves - _f_rti->number_of_transient_federates)) { + if (_f_rti->phase == startup_phase) { // This is equivalent to: _f_rti->num_feds_proposed_start < (_f_rti->number_of_enclaves - _f_rti->number_of_transient_federates) if (timestamp > _f_rti->max_start_time) { _f_rti->max_start_time = timestamp; } @@ -769,6 +769,7 @@ void handle_timestamp(federate_t *my_fed) { if (_f_rti->num_feds_proposed_start == (_f_rti->number_of_enclaves - _f_rti->number_of_transient_federates)) { // All federates have proposed a start time. lf_cond_broadcast(&received_start_times); + _f_rti->phase = execution_phase; } else { // Some federates have not yet proposed a start time. // wait for a notification. @@ -777,6 +778,7 @@ void handle_timestamp(federate_t *my_fed) { lf_cond_wait(&received_start_times); } } + lf_mutex_unlock(&rti_mutex); // Add an offset to this start time to get everyone starting together. @@ -785,7 +787,7 @@ void handle_timestamp(federate_t *my_fed) { // Send the start_time my_fed->effective_start_tag = (tag_t){.time = start_time, .microstep = 0u}; send_start_tag(my_fed, start_time, my_fed->effective_start_tag); - } else { + } else if (_f_rti == execution_phase) { // This is rather a possible extreme corner case, where a transient sends its timestamp, and only // enters the if section after all persistents have joined. if (timestamp < start_time) { @@ -1963,6 +1965,7 @@ void initialize_RTI(){ _f_rti->num_feds_proposed_start = 0, _f_rti->all_federates_exited = false, _f_rti->federation_id = "Unidentified Federation", + _f_rti->phase = startup_phase, _f_rti->user_specified_port = 0, _f_rti->final_port_TCP = 0, _f_rti->socket_descriptor_TCP = -1, diff --git a/core/federated/RTI/rti_lib.h b/core/federated/RTI/rti_lib.h index e6cbef14f..2b3b7da28 100644 --- a/core/federated/RTI/rti_lib.h +++ b/core/federated/RTI/rti_lib.h @@ -84,6 +84,15 @@ typedef enum clock_sync_stat { clock_sync_on } clock_sync_stat; +/** + * The RTI life cycle phase. + */ +typedef enum rti_phase { + startup_phase, + execution_phase, + shutdown_phase +} rti_phase; + /** * Structure that an RTI instance uses to keep track of its own and its * corresponding federates' state. @@ -142,6 +151,9 @@ typedef struct federation_rti_t { */ const char* federation_id; + // RTI current phase + rti_phase phase; + /************* TCP server information *************/ /** The desired port specified by the user on the command line. */ uint16_t user_specified_port; From 2195be923ba1a5b0fafb8f5727e22cf1d13c587a Mon Sep 17 00:00:00 2001 From: ChadliaJerad Date: Tue, 15 Aug 2023 13:22:08 -0700 Subject: [PATCH 68/80] Add hot_swap_federate instance --- core/federated/RTI/rti.c | 12 ++++++++++++ core/federated/RTI/rti_lib.c | 5 +++++ 2 files changed, 17 insertions(+) diff --git a/core/federated/RTI/rti.c b/core/federated/RTI/rti.c index 6c3b771c5..14a6a9450 100644 --- a/core/federated/RTI/rti.c +++ b/core/federated/RTI/rti.c @@ -57,6 +57,11 @@ THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. extern enclave_rti_t * _e_rti; extern federation_rti_t* _f_rti; +/** + * Federate instance to support hot swap + */ +extern federate_t * hot_swap_federate; + /** * The tracing mechanism uses the number of workers variable `_lf_number_of_workers`. * For RTI tracing, the number of workers is set as the number of federates. @@ -120,6 +125,7 @@ int main(int argc, const char* argv[]) { assert(_f_rti->number_of_enclaves < UINT16_MAX); assert(_f_rti->number_of_transient_federates < UINT16_MAX); + // Allocate memory for the federates _f_rti->enclaves = (federate_t**)calloc(_f_rti->number_of_enclaves, sizeof(federate_t*)); for (uint16_t i = 0; i < _f_rti->number_of_enclaves; i++) { @@ -130,6 +136,12 @@ int main(int argc, const char* argv[]) { // Initialize the RTI enclaves _e_rti = (enclave_rti_t*)_f_rti; + // Allocate memory for hot_swap_federate, if there are transient federates + if (_f_rti->number_of_transient_federates > 0) { + hot_swap_federate = (federate_t *)malloc(sizeof(federate_t)); + initialize_federate(hot_swap_federate, -1); + } + int socket_descriptor = start_rti_server(_f_rti->user_specified_port); wait_for_federates(socket_descriptor); return 0; diff --git a/core/federated/RTI/rti_lib.c b/core/federated/RTI/rti_lib.c index 6873b5d40..9fc8746d7 100644 --- a/core/federated/RTI/rti_lib.c +++ b/core/federated/RTI/rti_lib.c @@ -34,6 +34,11 @@ extern instant_t start_time; */ federation_rti_t *_f_rti; +/** + * Referance to the ederate instance to support hot swap + */ +federate_t * hot_swap_federate; + lf_mutex_t rti_mutex; lf_cond_t received_start_times; lf_cond_t sent_start_time; From fef4b801c29b90726e4270c637e22aa693f4a177 Mon Sep 17 00:00:00 2001 From: ChadliaJerad Date: Thu, 17 Aug 2023 16:53:39 -0700 Subject: [PATCH 69/80] Effective start time computation of a transition accounts for its 'inheretd' LTC and dowstream PTAGs as well --- core/federated/RTI/rti_lib.c | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/core/federated/RTI/rti_lib.c b/core/federated/RTI/rti_lib.c index 9fc8746d7..e21eefad5 100644 --- a/core/federated/RTI/rti_lib.c +++ b/core/federated/RTI/rti_lib.c @@ -792,17 +792,29 @@ void handle_timestamp(federate_t *my_fed) { // Send the start_time my_fed->effective_start_tag = (tag_t){.time = start_time, .microstep = 0u}; send_start_tag(my_fed, start_time, my_fed->effective_start_tag); - } else if (_f_rti == execution_phase) { + } else if (_f_rti->phase == execution_phase) { + // A transient has joined after the startup phase + // At this point, we already hold the mutex + // This is rather a possible extreme corner case, where a transient sends its timestamp, and only // enters the if section after all persistents have joined. if (timestamp < start_time) { timestamp = start_time; } + //// Algorithm for computing the effective_start_time of a joining transient + // The effective_start_time will be the max among all the following tags: + // - At tag: (joining time, 0 microstep) + // - The latest completed logical tag + 1 microstep + // - The latest granted tag + 1 microstep, of every downstream federate + // - The latest provisionnaly granted tag + 1 microstep, of every downstream federate + my_fed->effective_start_tag = (tag_t){.time = timestamp, .microstep = 0u}; - // A transient has joined after the startup phase - // At this point, we already hold the mutex + if (lf_tag_compare(my_fed->enclave.completed, my_fed->effective_start_tag) > 0) { + my_fed->effective_start_tag = my_fed->enclave.completed; + my_fed->effective_start_tag.microstep++; + } // Iterate over the downstream federates for (int j = 0; j < my_fed->enclave.num_downstream; j++) { @@ -818,6 +830,12 @@ void handle_timestamp(federate_t *my_fed) { my_fed->effective_start_tag = downstream->enclave.last_granted; my_fed->effective_start_tag.microstep++; } + + // Get the max over the PTAG of the downstreams + if (lf_tag_compare(downstream->enclave.last_provisionally_granted, my_fed->effective_start_tag) > 0) { + my_fed->effective_start_tag = downstream->enclave.last_provisionally_granted; + my_fed->effective_start_tag.microstep++; + } } // For every downstream that has a pending grant that is higher then the From 100ea5f44068052709a03a9b512fe9c8672c399e Mon Sep 17 00:00:00 2001 From: ChadliaJerad Date: Thu, 17 Aug 2023 16:56:03 -0700 Subject: [PATCH 70/80] hot_swap_federate does not need to be in the scope of rti.c --- core/federated/RTI/rti.c | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/core/federated/RTI/rti.c b/core/federated/RTI/rti.c index 14a6a9450..798c0b62c 100644 --- a/core/federated/RTI/rti.c +++ b/core/federated/RTI/rti.c @@ -57,11 +57,6 @@ THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. extern enclave_rti_t * _e_rti; extern federation_rti_t* _f_rti; -/** - * Federate instance to support hot swap - */ -extern federate_t * hot_swap_federate; - /** * The tracing mechanism uses the number of workers variable `_lf_number_of_workers`. * For RTI tracing, the number of workers is set as the number of federates. @@ -136,12 +131,6 @@ int main(int argc, const char* argv[]) { // Initialize the RTI enclaves _e_rti = (enclave_rti_t*)_f_rti; - // Allocate memory for hot_swap_federate, if there are transient federates - if (_f_rti->number_of_transient_federates > 0) { - hot_swap_federate = (federate_t *)malloc(sizeof(federate_t)); - initialize_federate(hot_swap_federate, -1); - } - int socket_descriptor = start_rti_server(_f_rti->user_specified_port); wait_for_federates(socket_descriptor); return 0; From 5b5395529e82d2d87e60bf108fbb8ad89e7bc16e Mon Sep 17 00:00:00 2001 From: ChadliaJerad Date: Thu, 17 Aug 2023 17:05:26 -0700 Subject: [PATCH 71/80] Add (immediate) stop requests handling in the federates --- core/federated/RTI/rti_lib.c | 15 +++++++++++++++ core/federated/federate.c | 23 +++++++++++++++++++++-- include/core/federated/federate.h | 7 +++++++ include/core/federated/net_common.h | 10 ++++++++++ 4 files changed, 53 insertions(+), 2 deletions(-) diff --git a/core/federated/RTI/rti_lib.c b/core/federated/RTI/rti_lib.c index e21eefad5..ea353e56f 100644 --- a/core/federated/RTI/rti_lib.c +++ b/core/federated/RTI/rti_lib.c @@ -2004,6 +2004,21 @@ void initialize_RTI(){ ////////////////////////////////////////////////////////// +void send_stop(federate_t * fed) { + // Reply with a stop granted to all federates + unsigned char outgoing_buffer[MSG_TYPE_STOP_LENGTH]; + outgoing_buffer[0] = MSG_TYPE_STOP; + lf_print("RTI sent MSG_TYPE_STOP to federate %d.", fed->enclave.id); + + if (_f_rti->tracing_enabled) { + tracepoint_rti_to_federate(_f_rti->trace, send_STOP, fed->enclave.id, NULL); + } + write_to_socket_errexit(fed->socket, MSG_TYPE_STOP_LENGTH, outgoing_buffer, + "RTI failed to send MSG_TYPE_STOP message to federate %d.", fed->enclave.id); + + LF_PRINT_LOG("RTI sent MSG_TYPE_STOP to federate %d.", fed->enclave.id); +} + void* connect_to_transient_federates_thread() { // This loop will continue to accept connections of transient federates, as // soon as there is room diff --git a/core/federated/federate.c b/core/federated/federate.c index 4390914c2..deb44af09 100644 --- a/core/federated/federate.c +++ b/core/federated/federate.c @@ -2244,6 +2244,21 @@ void handle_stop_granted_message() { } } +/** + * Handle a MSG_TYPE_STOP message from the RTI. + * + * This function simply calls lf_stop(). + */ +void handle_stop() { + // Trace the event when tracing is enabled + tracepoint_federate_from_rti(_fed.trace, receive_STOP, _lf_my_fed_id, NULL); + + lf_print("Received from RTI a MSG_TYPE_STOP at physical time " PRINTF_TIME ".", + lf_time_physical()); + + lf_stop(); +} + /** * Handle a MSG_TYPE_STOP_REQUEST message from the RTI. */ @@ -2358,7 +2373,7 @@ void terminate_execution(environment_t* env) { tracepoint_federate_to_rti(_fed.trace, send_RESIGN, _lf_my_fed_id, &tag); ssize_t written = write_to_socket(_fed.socket_TCP_RTI, bytes_to_write, &(buffer[0])); if (written == bytes_to_write) { - LF_PRINT_LOG("Resigned."); + lf_print("Resigned %d", _lf_my_fed_id); } } lf_mutex_unlock(&outbound_socket_mutex); @@ -2536,6 +2551,9 @@ void* listen_to_rti_TCP(void* args) { case MSG_TYPE_STOP_GRANTED: handle_stop_granted_message(); break; + case MSG_TYPE_STOP: + handle_stop(); + break; case MSG_TYPE_PORT_ABSENT: handle_port_absent_message(_fed.socket_TCP_RTI, -1); break; @@ -2881,7 +2899,8 @@ void lf_stop() { new_stop_tag.microstep = env[i].current_tag.microstep + 1; _lf_set_stop_tag(&env[i], new_stop_tag); } - // termination(); + + LF_PRINT_LOG("Federate is stopping."); } char* lf_get_federation_id() { diff --git a/include/core/federated/federate.h b/include/core/federated/federate.h index ce21fe880..fd0d9600c 100644 --- a/include/core/federated/federate.h +++ b/include/core/federated/federate.h @@ -512,4 +512,11 @@ instant_t lf_get_effective_start_time(); /** @brief Returns the start time of the federate. */ instant_t lf_get_start_time(); +/** + * Handle a MSG_TYPE_STOP message from the RTI. + * + * This function simply calls lf_stop() + */ +void handle_stop(); + #endif // FEDERATE_H diff --git a/include/core/federated/net_common.h b/include/core/federated/net_common.h index b2630535c..9b78622af 100644 --- a/include/core/federated/net_common.h +++ b/include/core/federated/net_common.h @@ -696,6 +696,16 @@ THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define MSG_TYPE_NEIGHBOR_STRUCTURE 24 #define MSG_TYPE_NEIGHBOR_STRUCTURE_HEADER_SIZE 9 +/** + * Byte sent by the RTI ordering the federate to stop. Upon receiving the meaasage, + * the federate will call lf_stop(), which will make him resign at its current_tag + * plus 1 microstep. + * The next 8 bytes will be the time at which the federates will stop. * + * The next 4 bytes will be the microstep at which the federates will stop.. + */ +#define MSG_TYPE_STOP 30 +#define MSG_TYPE_STOP_LENGTH 1 + ///////////////////////////////////////////// //// Rejection codes From 1c579bb3983295c03f53d7ae979179392971a425 Mon Sep 17 00:00:00 2001 From: ChadliaJerad Date: Thu, 17 Aug 2023 17:14:25 -0700 Subject: [PATCH 72/80] Add support of the hot swap mechasism --- core/federated/RTI/rti_lib.c | 238 ++++++++++++++++++++++++++--------- core/federated/RTI/rti_lib.h | 9 +- 2 files changed, 187 insertions(+), 60 deletions(-) diff --git a/core/federated/RTI/rti_lib.c b/core/federated/RTI/rti_lib.c index ea353e56f..16c5650a5 100644 --- a/core/federated/RTI/rti_lib.c +++ b/core/federated/RTI/rti_lib.c @@ -29,16 +29,18 @@ // Global variables defined in tag.c: extern instant_t start_time; -/** - * Reference to federate_rti_t instance. - */ +// Reference to federate_rti_t instance. federation_rti_t *_f_rti; -/** - * Referance to the ederate instance to support hot swap - */ +// Referance to the ederate instance to support hot swap federate_t * hot_swap_federate; +// Indicates if a hot swap process is in progress +bool hot_swap_in_progress = false; + +// Indicates thatthe old federate has stopped +bool hot_swap_old_resigned = false; + lf_mutex_t rti_mutex; lf_cond_t received_start_times; lf_cond_t sent_start_time; @@ -1096,6 +1098,12 @@ void handle_federate_resign(federate_t *my_fed) { lf_print("Federate %d has resigned.", my_fed->enclave.id); + + // Signal the hot swap mechanism, if needed + if (hot_swap_in_progress && hot_swap_federate->enclave.id == my_fed->enclave.id) { + hot_swap_old_resigned = true; + } + // Check downstream federates to see whether they should now be granted a TAG. // To handle cycles, need to create a boolean array to keep // track of which upstream federates have been visited. @@ -1183,6 +1191,7 @@ void* federate_thread_TCP(void* fed) { // Reset the status of the leaving federate reset_transient_federate(my_fed); + lf_mutex_unlock(&rti_mutex); } @@ -1278,18 +1287,63 @@ int32_t receive_and_check_fed_id_message(int socket_id, struct sockaddr_in* clie send_reject(socket_id, FEDERATE_ID_OUT_OF_RANGE); return -1; } else { + // Identify if it is a new connection or a hot swap + // Reject if: + // - duplicate of a connected persistent federate + // - or hot_swap is already in progress (Only 1 hot swap at a time!), for that + // particular federate + // - or it is a hot swap, but it is not the execution phase yet + if ((_f_rti->enclaves[fed_id])->enclave.state != NOT_CONNECTED) { - lf_print_error("RTI received duplicate federate ID: %d.", fed_id); - if (_f_rti->tracing_enabled) { - tracepoint_rti_to_federate(_f_rti->trace, send_REJECT, fed_id, NULL); + if (!is_transient) { + lf_print_error("RTI received unallowed duplicate persistent federate ID: %d.", fed_id); + if (_f_rti->tracing_enabled) { + tracepoint_rti_to_federate(_f_rti->trace, send_REJECT, fed_id, NULL); + } + send_reject(socket_id, FEDERATE_ID_IN_USE); + lf_print(">>> Rejected, because a duplicate of a persistent."); + return -1; + } else if ( + hot_swap_in_progress + || _f_rti->phase != execution_phase + ) { + lf_print_warning("Reject for %d, because Hot swap is already in progress for federate %d.",fed_id, hot_swap_federate->enclave.id); + if (_f_rti->tracing_enabled) { + tracepoint_rti_to_federate(_f_rti->trace, send_REJECT, fed_id, NULL); + } + send_reject(socket_id, FEDERATE_ID_IN_USE); + return -1; + } - send_reject(socket_id, FEDERATE_ID_IN_USE); - return -1; } } } } - federate_t* fed = _f_rti->enclaves[fed_id]; + + // If the federate is already connected (making the request a duplicate), and that + // the federate is transient, and it is the execution phase, then mark that a hot + // swap is in progreass and initialize the hot_swap_federate. + // Otherwise, proceed with a normal transinet connection + federate_t* fed; + if ( + (_f_rti->enclaves[fed_id])->enclave.state != NOT_CONNECTED + && is_transient + && (_f_rti->enclaves[fed_id])->is_transient + && _f_rti->phase == execution_phase + && !hot_swap_in_progress + ) { + // Allocate memory for the new federate and initilize it + hot_swap_federate = (federate_t *)malloc(sizeof(federate_t)); + initialize_federate(hot_swap_federate, fed_id); + + // Set that hot swap is in progress + hot_swap_in_progress = true; + fed = hot_swap_federate; + lf_print("RTI: Hot Swap starting for federate %d.", fed_id); + } else { + fed = _f_rti->enclaves[fed_id]; + } + // The MSG_TYPE_FED_IDS message has the right federation ID. // Assign the address information for federate. // The IP address is stored here as an in_addr struct (in .server_ip_addr) that can be useful @@ -1314,7 +1368,8 @@ int32_t receive_and_check_fed_id_message(int socket_id, struct sockaddr_in* clie // because it is waiting for the start time to be // sent by the RTI before beginning its execution. fed->enclave.state = PENDING; - _f_rti->enclaves[fed_id]->is_transient = is_transient; + fed->is_transient = is_transient; + fed->enclave.id = fed_id; LF_PRINT_DEBUG("RTI responding with MSG_TYPE_ACK to federate %d.", fed_id); // Send an MSG_TYPE_ACK message. @@ -1345,7 +1400,12 @@ int receive_connection_information(int socket_id, uint16_t fed_id) { send_reject(socket_id, UNEXPECTED_MESSAGE); return 0; } else { - federate_t* fed = _f_rti->enclaves[fed_id]; + federate_t* fed; + if (hot_swap_in_progress) { + fed = hot_swap_federate; + } else { + fed = _f_rti->enclaves[fed_id]; + } // Read the number of upstream and downstream connections fed->enclave.num_upstream = extract_int32(&(connection_info_header[1])); fed->enclave.num_downstream = extract_int32(&(connection_info_header[1 + sizeof(int32_t)])); @@ -1411,7 +1471,12 @@ int receive_udp_message_and_set_up_clock_sync(int socket_id, uint16_t fed_id) { send_reject(socket_id, UNEXPECTED_MESSAGE); return 0; } else { - federate_t *fed = _f_rti->enclaves[fed_id]; + federate_t* fed; + if (hot_swap_in_progress) { + fed = hot_swap_federate; + } else { + fed = _f_rti->enclaves[fed_id]; + } if (_f_rti->clock_sync_global_status >= clock_sync_init) {// If no initial clock sync, no need perform initial clock sync. uint16_t federate_UDP_port_number = extract_uint16(&(response[1])); @@ -1653,6 +1718,7 @@ void initialize_federate(federate_t* fed, uint16_t id) { fed->is_transient = true; fed->effective_start_tag = NEVER_TAG; fed->pending_grant = NEVER_TAG; + fed->pending_provisional_grant = NEVER_TAG; } int32_t start_rti_server(uint16_t port) { @@ -1711,6 +1777,7 @@ void wait_for_federates(int socket_descriptor) { } lf_print("All persistent threads exited."); + _f_rti->phase = shutdown_phase; // Wait for transient federate threads to exit. // NOTE: It is important to separate the waiting of persistent federates from @@ -2022,66 +2089,116 @@ void send_stop(federate_t * fed) { void* connect_to_transient_federates_thread() { // This loop will continue to accept connections of transient federates, as // soon as there is room + while (!_f_rti->all_federates_exited) { - if (_f_rti->number_of_connected_transient_federates < _f_rti->number_of_transient_federates) { - // Continue waiting for an incoming connection requests from transients. - struct sockaddr client_fd; - uint32_t client_length = sizeof(client_fd); - // The following blocks until a federate connects. - int socket_id = -1; - while(1) { - if (_f_rti->all_federates_exited) { - return NULL; - } - socket_id = accept(_f_rti->socket_descriptor_TCP, &client_fd, &client_length); - if (socket_id >= 0) { - // Got a socket - break; - } else { - // Try again - lf_print_warning("RTI failed to accept the socket. %s. Trying again.", strerror(errno)); - continue; - } + // Continue waiting for an incoming connection requests from transients + // to join, or for hot swap. + struct sockaddr client_fd; + uint32_t client_length = sizeof(client_fd); + // The following blocks until a federate connects. + int socket_id = -1; + while(1) { + if (_f_rti->all_federates_exited) { + return NULL; } - - // Send RTI hello when RTI -a option is on. - #ifdef __RTI_AUTH__ - if (_f_rti->authentication_enabled) { - if (!authenticate_federate(socket_id)) { - lf_print_warning("RTI failed to authenticate the incoming federate."); - // Ignore the federate that failed authentication. - continue; - } + socket_id = accept(_f_rti->socket_descriptor_TCP, &client_fd, &client_length); + if (socket_id >= 0) { + // Got a socket + break; + } else { + // Try again + lf_print_warning("RTI failed to accept the socket. %s. Trying again.", strerror(errno)); + continue; } - #endif - - // The first message from the federate should contain its ID and the federation ID. - int32_t fed_id = receive_and_check_fed_id_message(socket_id, (struct sockaddr_in*)&client_fd); - // FIXME: THIS IS A TEMPORARY HACK THAT MAKES FEDERATES WITH EVEN IDs PERSISTENT - // AND THOSE WITH ODD IDs TRANSIENT!!! - if (fed_id >= 0 - && receive_connection_information(socket_id, (uint16_t)fed_id) - && receive_udp_message_and_set_up_clock_sync(socket_id, (uint16_t)fed_id)) { - + } + + // Send RTI hello when RTI -a option is on. + #ifdef __RTI_AUTH__ + if (_f_rti->authentication_enabled) { + if (!authenticate_federate(socket_id)) { + lf_print_warning("RTI failed to authenticate the incoming federate."); + // Ignore the federate that failed authentication. + continue; + } + } + #endif + + // The first message from the federate should contain its ID and the federation ID. + // The function also detects if a hot swap request is initiated. + int32_t fed_id = receive_and_check_fed_id_message(socket_id, (struct sockaddr_in*)&client_fd); + + if (fed_id >= 0 + && receive_connection_information(socket_id, (uint16_t)fed_id) + && receive_udp_message_and_set_up_clock_sync(socket_id, (uint16_t)fed_id)) + { + lf_mutex_lock(&rti_mutex); + if (hot_swap_in_progress) { + lf_print("RTI: Hot swap confirmed for federate %d.", fed_id); + + // Then send STOP + federate_t *fed_old = _f_rti->enclaves[fed_id]; + hot_swap_federate->enclave.completed = fed_old->enclave.completed; + + LF_PRINT_LOG("RTI: Send MSG_TYPE_STOP to old federate %d.", fed_id); + send_stop(fed_old); + lf_mutex_unlock(&rti_mutex); + + // Wait for the old federate to send MSG_TYPE_RESIGN + LF_PRINT_LOG("RTI: Waiting for old federate %d to send resign.", fed_id); + // FIXME: Should this have a timeout? + while(!hot_swap_old_resigned); + + // The latest LTC is the tag at which the old federate resigned. This is useful + // for computing the effective_start_time of the new joining federate. + hot_swap_federate->enclave.completed = fed_old->enclave.completed; + + // Create a thread to communicate with the federate. + // This has to be done after clock synchronization is finished + // or that thread may end up attempting to handle incoming clock + // synchronization messages. + lf_thread_create(&(hot_swap_federate->thread_id), federate_thread_TCP, hot_swap_federate); + + // Redirect the federate in _f_rti + _f_rti->enclaves[fed_id] = hot_swap_federate; + + // Free the old federate memory and reset the Hot wap indicators + // FIXME: Is this enough to free the memory allocated to the federate? + free(fed_old); + lf_mutex_lock(&rti_mutex); + hot_swap_in_progress = false; + lf_mutex_unlock(&rti_mutex); + + lf_print("RTI: Hot swap succeeded for federate %d.", fed_id); + } else { + lf_mutex_unlock(&rti_mutex); + // Create a thread to communicate with the federate. // This has to be done after clock synchronization is finished // or that thread may end up attempting to handle incoming clock // synchronization messages. federate_t *fed = _f_rti->enclaves[fed_id]; lf_thread_create(&(fed->thread_id), federate_thread_TCP, fed); - fed->is_transient = true; - _f_rti->number_of_connected_transient_federates++; lf_print("RTI: Transient federate %d joined.", fed_id); } + _f_rti->number_of_connected_transient_federates++; + } else { + // If a hot swap was initialed, but the connection information or/and clock + // synchronization fail, then reset hot_swap_in_profress, and free the memory + // allocated for hot_swap_federate + if (hot_swap_in_progress) { + lf_print("RTI: Hot swap canceled for federate %d.", fed_id); + lf_mutex_lock(&rti_mutex); + hot_swap_in_progress = false; + lf_mutex_unlock(&rti_mutex); + + // FIXME: Is this enough to free the memory of a federate_t data structure? + free(hot_swap_federate); + } } } } void reset_transient_federate(federate_t* fed) { - // Reset of the enclave-related attributes - // FIXME: Should check further what to reset in the enclave data structure - fed->enclave.last_granted = NEVER_TAG; - fed->enclave.last_provisionally_granted = NEVER_TAG; fed->enclave.next_event = NEVER_TAG; fed->enclave.state = NOT_CONNECTED; // Reset of the federate-related attributes @@ -2095,6 +2212,9 @@ void reset_transient_federate(federate_t* fed) { fed->is_transient = true; fed->effective_start_tag = NEVER_TAG; fed->pending_grant = NEVER_TAG; + + // FIXME: Need to free the enclave's memory for updstreams, delays, and downstreams + // FIXME: There is room though to check if the interface has changed??? Do we allow this? } void* pending_grant_thread(void* federate) { diff --git a/core/federated/RTI/rti_lib.h b/core/federated/RTI/rti_lib.h index 2b3b7da28..676255cff 100644 --- a/core/federated/RTI/rti_lib.h +++ b/core/federated/RTI/rti_lib.h @@ -587,11 +587,18 @@ void initialize_RTI(); void* connect_to_transient_federates_thread(); /** - * Reset the federate with the specified ID. The federate has to be transient. + * Reset the federate. The federate has to be transient. * @param fed A pointer to the federate */ void reset_transient_federate(federate_t* fed); +/** + * @brief a request for immediate stop to the federate + * + * @param fed: the deferate to stop + */ +void send_stop(federate_t * fed); + /** * @brief Thread that sleeps for a period of time, and then wakes up to check if * a tag advance grant needs to be sent. That is, if the pending tag have not From 47a2612fc8b2a7d66ee6ae7c86fd77eee518496e Mon Sep 17 00:00:00 2001 From: ChadliaJerad Date: Fri, 18 Aug 2023 16:35:55 -0700 Subject: [PATCH 73/80] Add support for STOP message in fedsd --- util/tracing/visualization/fedsd.py | 20 +++++++++++++------- util/tracing/visualization/fedsd_helper.py | 4 +++- 2 files changed, 16 insertions(+), 8 deletions(-) diff --git a/util/tracing/visualization/fedsd.py b/util/tracing/visualization/fedsd.py index ce77998b5..faaa34a6f 100644 --- a/util/tracing/visualization/fedsd.py +++ b/util/tracing/visualization/fedsd.py @@ -26,6 +26,7 @@ .TAG { stroke: #08a578; fill: #08a578} \ .TIMESTAMP { stroke: grey; fill: grey } \ .FED_ID {stroke: #80DD99; fill: #80DD99 } \ + .STOP {stroke: #d0b7eb; fill: #d0b7eb} \ .ADV {stroke-linecap="round" ; stroke: "red" ; fill: "red"} \ text { \ font-size: smaller; \ @@ -53,7 +54,7 @@ # Events matching at the sender and receiver ends depend on whether they are tagged # (the elapsed logical time and microstep have to be the same) or not. # Set of tagged events (messages) -non_tagged_messages = {'FED_ID', 'ACK', 'REJECT', 'ADR_RQ', 'ADR_AD', 'MSG', 'P2P_MSG'} +non_tagged_messages = {'FED_ID', 'ACK', 'REJECT', 'ADR_RQ', 'ADR_AD', 'MSG', 'P2P_MSG', 'STOP'} def load_and_process_csv_file(csv_file) : ''' @@ -124,11 +125,16 @@ def load_and_process_csv_file(csv_file) : if (not fed_df.empty): # Get the federate id number fed_id = fed_df.iloc[-1]['self_id'] - # Add to the list of sequence diagram actors and add the name - actors.append(fed_id) - actors_names[fed_id] = Path(fed_trace).stem - # Derive the x coordinate of the actor - x_coor[fed_id] = (padding * 2) + (spacing * (len(actors) - 1)) + ### Check that the federate id have not been entrered yet. + ### This is particlurly useful for transient actors, when + ### they leave and join several times + if (actors.count(fed_id) == 0): + # Add to the list of sequence diagram actors and add the name + actors.append(fed_id) + actors_names[fed_id] = Path(fed_trace).stem + # Derive the x coordinate of the actor + x_coor[fed_id] = (padding * 2) + (spacing * (len(actors)-1)) + fed_df['x1'] = x_coor[fed_id] trace_df = pd.concat([trace_df, fed_df]) fed_df = fed_df[0:0] @@ -292,7 +298,7 @@ def load_and_process_csv_file(csv_file) : # FIXME: Using microseconds is hardwired here. physical_time = f'{int(row["physical_time"]/1000):,}' - if (row['event'] in {'FED_ID', 'ACK', 'REJECT', 'ADR_RQ', 'ADR_AD', 'MSG', 'P2P_MSG'}): + if (row['event'] in {'FED_ID', 'ACK', 'REJECT', 'ADR_RQ', 'ADR_AD', 'MSG', 'P2P_MSG', 'STOP'}): label = row['event'] else: label = row['event'] + '(' + f'{int(row["logical_time"]):,}' + ', ' + str(row['microstep']) + ')' diff --git a/util/tracing/visualization/fedsd_helper.py b/util/tracing/visualization/fedsd_helper.py index 37b339669..1a42f965a 100644 --- a/util/tracing/visualization/fedsd_helper.py +++ b/util/tracing/visualization/fedsd_helper.py @@ -44,7 +44,9 @@ "Receiving ADR_AD": "ADR_AD", "Receiving ADR_QR": "ADR_QR", "Receiving UNIDENTIFIED": "UNIDENTIFIED", - "Scheduler advancing time ends": "AdvLT" + "Scheduler advancing time ends": "AdvLT", + "Sending STOP": "STOP", + "Receiving STOP": "STOP" } prune_event_name.setdefault(" ", "UNIDENTIFIED") From 89de3fb2592f65ab97ee05aa6b24fe20d99c78f5 Mon Sep 17 00:00:00 2001 From: ChadliaJerad Date: Wed, 23 Aug 2023 06:12:24 -0700 Subject: [PATCH 74/80] Re-add the support of stop messages --- util/tracing/visualization/fedsd.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/util/tracing/visualization/fedsd.py b/util/tracing/visualization/fedsd.py index 24f0b0558..632a9bcff 100644 --- a/util/tracing/visualization/fedsd.py +++ b/util/tracing/visualization/fedsd.py @@ -82,7 +82,9 @@ "Receiving ADR_AD": "ADR_AD", "Receiving ADR_QR": "ADR_QR", "Receiving UNIDENTIFIED": "UNIDENTIFIED", - "Scheduler advancing time ends": "AdvLT" + "Scheduler advancing time ends": "AdvLT", + "Sending STOP": "STOP", + "Receiving STOP": "STOP" } prune_event_name.setdefault(" ", "UNIDENTIFIED") @@ -204,7 +206,6 @@ def svg_string_draw_label(x1, y1, x2, y2, label) : else: rotation = 0 str_line = '\t'+label+'\n' - #print('rot = '+str(rotation)+' x1='+str(x1)+' y1='+str(y1)+' x2='+str(x2)+' y2='+str(y2)) return str_line From 407561634b8cbf654e13d660112946480d983f81 Mon Sep 17 00:00:00 2001 From: ChadliaJerad Date: Wed, 23 Aug 2023 12:43:31 -0700 Subject: [PATCH 75/80] Add Stop message to fedsd --- core/federated/RTI/rti_lib.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/core/federated/RTI/rti_lib.c b/core/federated/RTI/rti_lib.c index 16c5650a5..7473e8c7e 100644 --- a/core/federated/RTI/rti_lib.c +++ b/core/federated/RTI/rti_lib.c @@ -1099,10 +1099,10 @@ void handle_federate_resign(federate_t *my_fed) { lf_print("Federate %d has resigned.", my_fed->enclave.id); - // Signal the hot swap mechanism, if needed - if (hot_swap_in_progress && hot_swap_federate->enclave.id == my_fed->enclave.id) { - hot_swap_old_resigned = true; - } + // // Signal the hot swap mechanism, if needed + // if (hot_swap_in_progress && hot_swap_federate->enclave.id == my_fed->enclave.id) { + // hot_swap_old_resigned = true; + // } // Check downstream federates to see whether they should now be granted a TAG. // To handle cycles, need to create a boolean array to keep @@ -1195,6 +1195,11 @@ void* federate_thread_TCP(void* fed) { lf_mutex_unlock(&rti_mutex); } + // Signal the hot swap mechanism, if needed + if (hot_swap_in_progress && hot_swap_federate->enclave.id == my_fed->enclave.id) { + hot_swap_old_resigned = true; + } + return NULL; } From 3ddc540f40fdbdeae3ffd9292c8a566d4f2bae4e Mon Sep 17 00:00:00 2001 From: ChadliaJerad Date: Wed, 23 Aug 2023 13:18:04 -0700 Subject: [PATCH 76/80] Revert "Make transients timers in sync with the federation start_time, not the transient's effective_start_time" This reverts commit 25beb9571285359b69b5fbd1d09bc36fb49a3d61. --- core/reactor_common.c | 21 ++------------------- 1 file changed, 2 insertions(+), 19 deletions(-) diff --git a/core/reactor_common.c b/core/reactor_common.c index 56a329530..b7a3c9ac7 100644 --- a/core/reactor_common.c +++ b/core/reactor_common.c @@ -488,24 +488,7 @@ void _lf_initialize_timer(environment_t* env, trigger_t* timer) { return; } #endif - - // Use temporary offset value - interval_t offset = timer->offset; - -#ifdef FEDERATED - // If the federate is a transient who joined after the startup phase, adjust - // the offset to account for the events that have to be missed - if (lf_get_start_time() != lf_get_effective_start_time()) { - interval_t remaing_of_period = (lf_get_effective_start_time() - lf_get_start_time() - offset) % timer->period; - if (remaing_of_period == 0) { - offset = 0; - } else { - offset = timer->period - remaing_of_period; - } - } -#endif // FEDERATED - - if (offset == 0) { + if (timer->offset == 0) { for (int i = 0; i < timer->number_of_reactions; i++) { _lf_trigger_reaction(env, timer->reactions[i], -1); tracepoint_schedule(env->trace, timer, 0LL); // Trace even though schedule is not called. @@ -518,7 +501,7 @@ void _lf_initialize_timer(environment_t* env, trigger_t* timer) { } } else { // Schedule at t + offset. - delay = offset; + delay = timer->offset; } // Get an event_t struct to put on the event queue. From e2acf9bf450409650356f1427a3be912454cf3b0 Mon Sep 17 00:00:00 2001 From: ChadliaJerad Date: Wed, 23 Aug 2023 21:21:14 -0700 Subject: [PATCH 77/80] Revert "Remove the path to trace_to_csv, as no longer valid" This reverts commit 8515b392eef6a845542271301358ba37a9f7f232. --- util/tracing/launch-fedsd.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/util/tracing/launch-fedsd.sh b/util/tracing/launch-fedsd.sh index 600cc4d37..341e67332 100755 --- a/util/tracing/launch-fedsd.sh +++ b/util/tracing/launch-fedsd.sh @@ -75,7 +75,7 @@ rti_csv_file='' for each_lft_file in $lft_files_list do # Tranform to csv - trace_to_csv $each_lft_file + ${base}/bin/trace_to_csv $each_lft_file # Get the file name csv=${each_lft_file%.*} if [ $csv == 'rti' ] From 21d5e709a86db9b54dc9e4263432e1163662c037 Mon Sep 17 00:00:00 2001 From: Chadlia Jerad Date: Tue, 10 Oct 2023 22:29:26 +0100 Subject: [PATCH 78/80] Move functions declarations, so that they become visible at the lf program level --- core/federated/federate.c | 23 +++++++++++++++++++++ include/core/federated/federate.h | 33 ------------------------------- include/core/reactor_common.h | 5 +++++ 3 files changed, 28 insertions(+), 33 deletions(-) diff --git a/core/federated/federate.c b/core/federated/federate.c index 1209e16a9..ef7221693 100644 --- a/core/federated/federate.c +++ b/core/federated/federate.c @@ -1208,6 +1208,10 @@ instant_t get_start_time_from_rti(instant_t my_physical_time) { return timestamp; } +/** + * @brief Return the directory containing the executables of the individual + * federates. + */ char* lf_get_federates_bin_directory() { return LF_FEDERATES_BIN_DIRECTORY; } @@ -2892,6 +2896,14 @@ void set_federation_trace_object(trace_t * trace) { _fed.trace = trace; } +/** + * @brief Stop the execution of a federate. + * Every enclave within the federate will stop at one microstep later than its + * current tag. Unlike lf_request_stop(), this process does not require any + * involvement from the RTI, nor does it necessitate any consensus. + * + * This function is particularly useful for testing transient federates. + */ void lf_stop() { environment_t *env; int num_env = _lf_get_environments(&env); @@ -2906,14 +2918,25 @@ void lf_stop() { LF_PRINT_LOG("Federate is stopping."); } +/** + * @brief Returns the federation id. + * + * This function is useful for creating federates on runtime. + */ char* lf_get_federation_id() { return federation_metadata.federation_id; } +/** + * @brief Returns the effective start time of the federate. The start_time of persistent + * federates is equal to their effective_start_time. Transient federates, however, + * have their effective_start_time higher or equal to their start_time. + */ instant_t lf_get_effective_start_time() { return effective_start_tag.time; } +/** @brief Returns the start time of the federate. */ instant_t lf_get_start_time() { return start_time; } diff --git a/include/core/federated/federate.h b/include/core/federated/federate.h index 654632b5e..bf31caad3 100644 --- a/include/core/federated/federate.h +++ b/include/core/federated/federate.h @@ -478,39 +478,6 @@ void synchronize_with_other_federates(); */ void wait_until_port_status_known(environment_t* env, int portID, interval_t STAA); -/** - * @brief Stop the execution of a federate. - * Every enclave within the federate will stop at one microstep later than its - * current tag. Unlike lf_request_stop(), this process does not require any - * involvement from the RTI, nor does it necessitate any consensus. - * - * This function is particularly useful for testing transient federates. - */ -void lf_stop(); - -/** - * @brief Return the directory containing the executables of the individual - * federates. - */ -char* lf_get_federates_bin_directory(); - -/** - * @brief Returns the federation id. - * - * This function is useful for creating federates on runtime. - */ -char* lf_get_federation_id(); - -/** - * @brief Returns the effective start time of the federate. The start_time of persistent - * federates is equal to their effective_start_time. Transient federates, however, - * have their effective_start_time higher or equal to their start_time. - */ -instant_t lf_get_effective_start_time(); - -/** @brief Returns the start time of the federate. */ -instant_t lf_get_start_time(); - /** * Handle a MSG_TYPE_STOP message from the RTI. * diff --git a/include/core/reactor_common.h b/include/core/reactor_common.h index be74165b7..047d75476 100644 --- a/include/core/reactor_common.h +++ b/include/core/reactor_common.h @@ -35,6 +35,11 @@ typedef enum parse_rti_code_t { parse_rti_code_t parse_rti_addr(const char* rti_addr); void set_federation_id(const char* fid); void set_federation_trace_object(trace_t * trace); +void lf_stop(); +char* lf_get_federates_bin_directory(); +char* lf_get_federation_id(); +instant_t lf_get_effective_start_time(); +instant_t lf_get_start_time(); #endif extern struct allocation_record_t* _lf_reactors_to_free; From d790105a1d5b595e441e1cc3c72b1d845550b55e Mon Sep 17 00:00:00 2001 From: Chadlia Jerad Date: Mon, 13 Nov 2023 11:09:46 +0100 Subject: [PATCH 79/80] Remove wrongly inserted redundant function declaration after merge --- core/federated/federate.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/core/federated/federate.c b/core/federated/federate.c index 432405102..2009f0e57 100644 --- a/core/federated/federate.c +++ b/core/federated/federate.c @@ -1216,14 +1216,6 @@ char* lf_get_federates_bin_directory() { return LF_FEDERATES_BIN_DIRECTORY; } -/** - * @brief Return the directory containing the executables of the individual - * federates. - */ -char* lf_get_federates_bin_directory() { - return LF_FEDERATES_BIN_DIRECTORY; -} - //////////////////////////////// Port Status Handling /////////////////////////////////////// extern lf_action_base_t* _lf_action_table[]; From 55158ea984c2b5a69e800242da1557336f97e868 Mon Sep 17 00:00:00 2001 From: Chadlia Jerad Date: Mon, 13 Nov 2023 18:03:17 +0100 Subject: [PATCH 80/80] Fix the federate util functions declarations --- core/federated/federate.c | 31 ++++------------------------- include/core/federated/federate.h | 33 +++++++++++++++++++++++++++++++ include/core/reactor_common.h | 5 ----- 3 files changed, 37 insertions(+), 32 deletions(-) diff --git a/core/federated/federate.c b/core/federated/federate.c index 2009f0e57..fdb2dd865 100644 --- a/core/federated/federate.c +++ b/core/federated/federate.c @@ -1208,14 +1208,6 @@ instant_t get_start_time_from_rti(instant_t my_physical_time) { return timestamp; } -/** - * @brief Return the directory containing the executables of the individual - * federates. - */ -char* lf_get_federates_bin_directory() { - return LF_FEDERATES_BIN_DIRECTORY; -} - //////////////////////////////// Port Status Handling /////////////////////////////////////// extern lf_action_base_t* _lf_action_table[]; @@ -2919,14 +2911,6 @@ void set_federation_trace_object(trace_t * trace) { _fed.trace = trace; } -/** - * @brief Stop the execution of a federate. - * Every enclave within the federate will stop at one microstep later than its - * current tag. Unlike lf_request_stop(), this process does not require any - * involvement from the RTI, nor does it necessitate any consensus. - * - * This function is particularly useful for testing transient federates. - */ void lf_stop() { environment_t *env; int num_env = _lf_get_environments(&env); @@ -2941,25 +2925,18 @@ void lf_stop() { LF_PRINT_LOG("Federate is stopping."); } -/** - * @brief Returns the federation id. - * - * This function is useful for creating federates on runtime. - */ +char* lf_get_federates_bin_directory() { + return LF_FEDERATES_BIN_DIRECTORY; +} + char* lf_get_federation_id() { return federation_metadata.federation_id; } -/** - * @brief Returns the effective start time of the federate. The start_time of persistent - * federates is equal to their effective_start_time. Transient federates, however, - * have their effective_start_time higher or equal to their start_time. - */ instant_t lf_get_effective_start_time() { return effective_start_tag.time; } -/** @brief Returns the start time of the federate. */ instant_t lf_get_start_time() { return start_time; } diff --git a/include/core/federated/federate.h b/include/core/federated/federate.h index bf31caad3..5c40f1509 100644 --- a/include/core/federated/federate.h +++ b/include/core/federated/federate.h @@ -485,4 +485,37 @@ void wait_until_port_status_known(environment_t* env, int portID, interval_t STA */ void handle_stop(); +/** + * @brief Stop the execution of a federate. + * Every enclave within the federate will stop at one microstep later than its + * current tag. Unlike lf_request_stop(), this process does not require any + * involvement from the RTI, nor does it necessitate any consensus. + * + * This function is particularly useful for testing transient federates. + */ +void lf_stop(); + +/** + * @brief Return the directory containing the executables of the individual + * federates. + */ +char* lf_get_federates_bin_directory(); + +/** + * @brief Returns the federation id. + * + * This function is useful for creating federates on runtime. + */ +char* lf_get_federation_id(); + +/** + * @brief Returns the effective start time of the federate. The start_time of persistent + * federates is equal to their effective_start_time. Transient federates, however, + * have their effective_start_time higher or equal to their start_time. + */ +instant_t lf_get_effective_start_time(); + +/** @brief Returns the start time of the federate. */ +instant_t lf_get_start_time(); + #endif // FEDERATE_H diff --git a/include/core/reactor_common.h b/include/core/reactor_common.h index 047d75476..be74165b7 100644 --- a/include/core/reactor_common.h +++ b/include/core/reactor_common.h @@ -35,11 +35,6 @@ typedef enum parse_rti_code_t { parse_rti_code_t parse_rti_addr(const char* rti_addr); void set_federation_id(const char* fid); void set_federation_trace_object(trace_t * trace); -void lf_stop(); -char* lf_get_federates_bin_directory(); -char* lf_get_federation_id(); -instant_t lf_get_effective_start_time(); -instant_t lf_get_start_time(); #endif extern struct allocation_record_t* _lf_reactors_to_free;