diff --git a/include/scscfsproutlet.h b/include/scscfsproutlet.h index a8b5c808f..ff8e31e77 100644 --- a/include/scscfsproutlet.h +++ b/include/scscfsproutlet.h @@ -147,7 +147,7 @@ class SCSCFSproutlet : public Sproutlet /// Record the time an INVITE took to reach ringing state. /// /// @param ringing_us Time spent until a 180 Ringing, in microseconds. - void track_session_setup_time(uint64_t tsx_start_time_usec, bool video_call); + uint64_t track_session_setup_time(uint64_t tsx_start_time_usec, bool video_call); /// Translate RequestURI using ENUM service if appropriate. void translate_request_uri(pjsip_msg* req, pj_pool_t* pool, SAS::TrailId trail); diff --git a/modules/cpp-common b/modules/cpp-common index 92f87b637..78dd50deb 160000 --- a/modules/cpp-common +++ b/modules/cpp-common @@ -1 +1 @@ -Subproject commit 92f87b6377fe22a052b1c0e73c6b9b4f4e15bfa6 +Subproject commit 78dd50deb55cbb4e59f3e3ab75edbb2d03cdeb04 diff --git a/src/Makefile b/src/Makefile index dc2293aff..a52c65341 100644 --- a/src/Makefile +++ b/src/Makefile @@ -9,6 +9,7 @@ SPROUT_COMMON_SOURCES := logger.cpp \ stack.cpp \ dnsparser.cpp \ dnscachedresolver.cpp \ + static_dns_cache.cpp \ baseresolver.cpp \ sipresolver.cpp \ bono.cpp \ diff --git a/src/authenticationsproutlet.cpp b/src/authenticationsproutlet.cpp index dcb6c348c..205a9f4d0 100644 --- a/src/authenticationsproutlet.cpp +++ b/src/authenticationsproutlet.cpp @@ -871,7 +871,7 @@ void AuthenticationSproutletTsx::create_challenge(pjsip_digest_credential* crede // We've failed to store the nonce in memcached, so we have no hope of // successfully authenticating any repsonse to a 401 Unauthorized. Send // a 500 Server Internal Error instead. - TRC_DEBUG("Failed to store nonce in memcached"); + TRC_ERROR("Failed to store nonce in memcached, for impi %s", impi.c_str()); rsp->line.status.code = PJSIP_SC_INTERNAL_SERVER_ERROR; rsp->line.status.reason = *pjsip_get_status_text(PJSIP_SC_INTERNAL_SERVER_ERROR); @@ -902,7 +902,7 @@ void AuthenticationSproutletTsx::create_challenge(pjsip_digest_credential* crede // a 4xx error to the client. if (av_source_unavailable) { - TRC_DEBUG("Downstream node is overloaded or unresponsive, unable to get Authentication vector"); + TRC_ERROR("Downstream node is overloaded or unresponsive, unable to get Authentication vector"); rsp->line.status.code = PJSIP_SC_SERVER_TIMEOUT; rsp->line.status.reason = *pjsip_get_status_text(PJSIP_SC_SERVER_TIMEOUT); SAS::Event event(trail(), SASEvent::AUTHENTICATION_FAILED_OVERLOAD, 0); diff --git a/src/compositesproutlet.cpp b/src/compositesproutlet.cpp index 758220e14..1fbdf5eff 100644 --- a/src/compositesproutlet.cpp +++ b/src/compositesproutlet.cpp @@ -1,5 +1,5 @@ /** - * @file compositesproutlet.h + * @file compositesproutlet.cpp * * Copyright (C) Metaswitch Networks 2017 * If license terms are provided to you in a COPYING file in the root directory diff --git a/src/dnsresolver.cpp b/src/dnsresolver.cpp index 0ebccf86d..b55164886 100644 --- a/src/dnsresolver.cpp +++ b/src/dnsresolver.cpp @@ -1,5 +1,5 @@ /** - * @file enumservice.cpp class implementation for an ENUM service provider + * @file dnsresolver.cpp class implementation for a DNS resolver * * Copyright (C) Metaswitch Networks 2015 * If license terms are provided to you in a COPYING file in the root directory diff --git a/src/enumservice.cpp b/src/enumservice.cpp index a61c3960d..62e0cf911 100644 --- a/src/enumservice.cpp +++ b/src/enumservice.cpp @@ -1,5 +1,5 @@ /** - * @file dnsresolver.cpp class implementation for a DNS resolver + * @file enumservice.cpp class implementation for an ENUM service provider * * Copyright (C) Metaswitch Networks 2017 * If license terms are provided to you in a COPYING file in the root directory diff --git a/src/hss_sip_mapping.cpp b/src/hss_sip_mapping.cpp index 98a9fcce3..a6e26dd62 100644 --- a/src/hss_sip_mapping.cpp +++ b/src/hss_sip_mapping.cpp @@ -60,6 +60,15 @@ pjsip_status_code determine_hss_sip_response(HTTPCode http_code, break; case HTTP_SERVER_UNAVAILABLE: + // The HSS is unavailable - the client should retry on timeout but no + // other Clearwater nodes should (as Sprout will already have retried on + // timeout). Reject with a 504 (503 is used for overload). + TRC_ERROR("Rejecting %s request as unable to contact HSS: %d", + sip_msg_type, http_code); + + st_code = (pjsip_status_code)554; + break; + case HTTP_GATEWAY_TIMEOUT: // The HSS is unavailable - the client should retry on timeout but no // other Clearwater nodes should (as Sprout will already have retried on diff --git a/src/icscfrouter.cpp b/src/icscfrouter.cpp index 53a297fc2..c71ccff1c 100644 --- a/src/icscfrouter.cpp +++ b/src/icscfrouter.cpp @@ -374,6 +374,7 @@ int ICSCFUARouter::hss_query() { // HSS failed to respond or responded with invalid data, so reject the // request with a 480. + TRC_ERROR("I-CSCF UAR got %d HTTP response from Homestead", rc); status_code = PJSIP_SC_TEMPORARILY_UNAVAILABLE; } else @@ -447,6 +448,7 @@ int ICSCFLIRouter::hss_query() // HSS failed to respond or responded with invalid data, so reject the // request with a 480. // LCOV_EXCL_START + TRC_ERROR("I-CSCF UAR got %d HTTP response from Homestead", rc); status_code = PJSIP_SC_TEMPORARILY_UNAVAILABLE; // LCOV_EXCL_STOP } diff --git a/src/icscfsproutlet.cpp b/src/icscfsproutlet.cpp index 91156657a..1a957b748 100644 --- a/src/icscfsproutlet.cpp +++ b/src/icscfsproutlet.cpp @@ -403,6 +403,7 @@ void ICSCFSproutletRegTsx::on_rx_response(pjsip_msg* rsp, int fork_id) // capabilitires, or the HSS is temporarily unavailable). There was at // least one valid S-CSCF (as this is retry processing). The I-CSCF // must return 504 (TS 24.229, 5.3.1.3) in this case. + TRC_ERROR("I-CSCF can't select an S-CSCF for REGISTER request"); rsp->line.status.code = PJSIP_SC_SERVER_TIMEOUT; rsp->line.status.reason = *pjsip_get_status_text(rsp->line.status.code); diff --git a/src/main.cpp b/src/main.cpp index d42b39a6e..d5809d8c7 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -87,6 +87,7 @@ extern "C" { #include "sprout_alarmdefinition.h" #include "sproutlet_options.h" #include "astaire_impistore.h" +#include "updater.h" enum OptionTypes { @@ -2003,7 +2004,15 @@ int main(int argc, char* argv[]) hc); // Create a DNS resolver and a SIP specific resolver. - dns_resolver = new DnsCachedResolver(opt.dns_servers, opt.dns_timeout); + dns_resolver = new DnsCachedResolver(opt.dns_servers, + opt.dns_timeout, + "/etc/clearwater/dns.json"); + + // Reload dns.json on SIGHUP + Updater* dns_updater = + new Updater(dns_resolver, + std::mem_fun(&DnsCachedResolver::reload_static_records)); + if (opt.pcscf_enabled) { sip_resolver = new SIPResolver(dns_resolver, opt.sip_blacklist_duration, 0); @@ -2562,6 +2571,7 @@ int main(int argc, char* argv[]) delete enum_service; delete scscf_acr_factory; + delete dns_updater; delete sip_resolver; delete http_resolver; delete astaire_resolver; diff --git a/src/registrarsproutlet.cpp b/src/registrarsproutlet.cpp index 94a1e4db2..827720903 100644 --- a/src/registrarsproutlet.cpp +++ b/src/registrarsproutlet.cpp @@ -529,6 +529,7 @@ void RegistrarSproutletTsx::process_register_request(pjsip_msg *req) // Failed to connect to the local store. Reject the register with a 500 // response. // LCOV_EXCL_START - the can't fail to connect to the store we use for UT + TRC_ERROR("Failed to connect to local store, private_id= %s", private_id.c_str()); st_code = PJSIP_SC_INTERNAL_SERVER_ERROR; SAS::Event event(trail(), SASEvent::REGISTER_FAILED_REGSTORE, 0); @@ -604,7 +605,7 @@ void RegistrarSproutletTsx::process_register_request(pjsip_msg *req) if (gen_hdr == NULL) { // LCOV_EXCL_START - can't see how this could ever happen - TRC_ERROR("Failed to add RFC 5626 headers"); + TRC_ERROR("Failed to add RFC 5626 headers to reply"); SAS::Event event(trail(), SASEvent::REGISTER_FAILED_5636, 0); event.add_var_param(public_id); diff --git a/src/rphservice.cpp b/src/rphservice.cpp index 30ec375ac..219aca984 100644 --- a/src/rphservice.cpp +++ b/src/rphservice.cpp @@ -221,9 +221,9 @@ SIPEventPriorityLevel RPHService::lookup_priority(std::string rph_value, // We received a message with an unknown RPH value. This could be because: // - It is not defined in the IANA namespace. // - It is not assigned a priority value in the rph.json file. - TRC_WARNING("An unknown RPH value \"%s\" was received on an incoming message." - " This message will be handled, but will not be prioritized.", - rph_value.c_str()); + TRC_DEBUG("An unknown RPH value \"%s\" was received on an incoming message." + " This message will be handled, but will not be prioritized.", + rph_value.c_str()); SAS::Event event(trail, SASEvent::RPH_VALUE_UNKNOWN, 0); event.add_var_param(rph_value); SAS::report_event(event); diff --git a/src/scscfsproutlet.cpp b/src/scscfsproutlet.cpp index c3769e0b8..e08cd8bd2 100644 --- a/src/scscfsproutlet.cpp +++ b/src/scscfsproutlet.cpp @@ -378,7 +378,7 @@ void SCSCFSproutlet::track_app_serv_comm_success(const std::string& uri, } } -void SCSCFSproutlet::track_session_setup_time(uint64_t tsx_start_time_usec, +uint64_t SCSCFSproutlet::track_session_setup_time(uint64_t tsx_start_time_usec, bool video_call) { // Calculate how long it has taken to setup the session. @@ -394,6 +394,8 @@ void SCSCFSproutlet::track_session_setup_time(uint64_t tsx_start_time_usec, { _audio_session_setup_time_tbl->accumulate(ringing_usec); } + + return ringing_usec; } SCSCFSproutletTsx::SCSCFSproutletTsx(SCSCFSproutlet* scscf, @@ -884,7 +886,15 @@ void SCSCFSproutletTsx::on_tx_response(pjsip_msg* rsp) ((st_code == PJSIP_SC_RINGING) || PJSIP_IS_STATUS_IN_CLASS(st_code, 200))) { - _scscf->track_session_setup_time(_tsx_start_time_usec, _video_call); + uint64_t setup_time = _scscf->track_session_setup_time(_tsx_start_time_usec, _video_call); + if (setup_time > 2000000) + { + pjsip_cid_hdr* cid = PJSIP_MSG_CID_HDR(rsp); + TRC_WARNING("Call setup time exceeded 2 seconds for Call-ID %.*s (was %lu us)", + cid->id.slen, + cid->id.ptr, + setup_time); + } _record_session_setup_time = false; } } @@ -1137,6 +1147,7 @@ pjsip_status_code SCSCFSproutletTsx::determine_served_user(pjsip_msg* req) if ((http_code == HTTP_SERVER_UNAVAILABLE) || (http_code == HTTP_GATEWAY_TIMEOUT)) { // Send a SIP 504 response if we got a 500/503 HTTP response. + TRC_ERROR("AS retargeting failed to lookup IFCs for served user"); status_code = PJSIP_SC_SERVER_TIMEOUT; } else @@ -1248,7 +1259,7 @@ pjsip_status_code SCSCFSproutletTsx::determine_served_user(pjsip_msg* req) } else { - TRC_DEBUG("Failed to retrieve ServiceProfile for %s", served_user.c_str()); + TRC_ERROR("Failed to retrieve ServiceProfile for %s", served_user.c_str()); if ((http_code == HTTP_SERVER_UNAVAILABLE) || (http_code == HTTP_GATEWAY_TIMEOUT)) { diff --git a/src/subscriptionsproutlet.cpp b/src/subscriptionsproutlet.cpp index 91cc21c77..35896cf8c 100644 --- a/src/subscriptionsproutlet.cpp +++ b/src/subscriptionsproutlet.cpp @@ -342,6 +342,8 @@ void SubscriptionSproutletTsx::process_subscription_request(pjsip_msg* req) std::string aor; if (!irs_info._associated_uris.get_default_impu(aor, false)) { + TRC_ERROR("SUBSCRIBE for public ID %s uses AOR %s", public_id.c_str(), aor.c_str()); + TRC_ERROR("Could not determine default URI, Subscribe is being rejected with 403"); pjsip_msg* rsp = create_response(req, PJSIP_SC_FORBIDDEN); send_response(rsp); free_msg(req); @@ -349,7 +351,6 @@ void SubscriptionSproutletTsx::process_subscription_request(pjsip_msg* req) return; } - TRC_DEBUG("aor = %s", aor.c_str()); TRC_DEBUG("SUBSCRIBE for public ID %s uses AOR %s", public_id.c_str(), aor.c_str()); // Create a subscription object from the request that we can pass down to @@ -406,6 +407,7 @@ void SubscriptionSproutletTsx::process_subscription_request(pjsip_msg* req) { // Failed to connect to the local store. Reject the subscribe with a 500 // response. + TRC_ERROR("Failed to connect to local store for SUBSCRIBE, aor=%s", aor.c_str()); st_code = PJSIP_SC_INTERNAL_SERVER_ERROR; // Build and send the reply. diff --git a/src/thread_dispatcher.cpp b/src/thread_dispatcher.cpp index 8be128f38..b925b2e67 100644 --- a/src/thread_dispatcher.cpp +++ b/src/thread_dispatcher.cpp @@ -257,7 +257,7 @@ bool process_queue_element() if ((rdata->msg_info.msg->type == PJSIP_REQUEST_MSG) && (rdata->msg_info.msg->line.req.method.id != PJSIP_ACK_METHOD)) { - TRC_DEBUG("Returning 500 response following exception"); + TRC_ERROR("Returning 500 response following exception"); reject_with_retry_header(rdata, PJSIP_SC_INTERNAL_SERVER_ERROR); } @@ -523,7 +523,7 @@ static void reject_rx_msg_overload(pjsip_rx_data* rdata, SAS::TrailId trail) { // Respond statelessly with a 503 Service Unavailable, including a // Retry-After header with a zero length timeout. - TRC_DEBUG("Rejected request due to overload"); + TRC_WARNING("Rejected request due to overload"); SAS::Marker start_marker(trail, MARKER_ID_START, 1u); SAS::report_marker(start_marker);