diff --git a/config.cmake b/config.cmake index 0e10801648d..f8f565d715e 100644 --- a/config.cmake +++ b/config.cmake @@ -445,6 +445,12 @@ config_string( UNDEF_DISABLED ) +config_option( + KernelEPbudgetThreshold EP_THRESHOLD "Enable endpoint budget threshold support." + DEFAULT ON + DEPENDS "KernelIsMCS; NOT KernelVerificationBuild" +) + config_option( KernelClz32 CLZ_32 "Define a __clzsi2 function to count leading zeros for uint32_t arguments. \ Only needed on platforms which lack a builtin instruction." DEFAULT OFF diff --git a/include/kernel/sporadic.h b/include/kernel/sporadic.h index 7c024281b84..afd0c51dc0b 100644 --- a/include/kernel/sporadic.h +++ b/include/kernel/sporadic.h @@ -198,3 +198,8 @@ void refill_budget_check(ticks_t used); */ void refill_unblock_check(sched_context_t *sc); +/* + * Mering refills until the head refill has at least `min_budget` budget. + */ +bool_t refill_merge_until_budget(sched_context_t *sc, ticks_t min_budget); + diff --git a/include/object/endpoint.h b/include/object/endpoint.h index 13c430b920f..ec87a0c7460 100644 --- a/include/object/endpoint.h +++ b/include/object/endpoint.h @@ -43,3 +43,26 @@ void cancelBadgedSends(endpoint_t *epptr, word_t badge); void replyFromKernel_error(tcb_t *thread); void replyFromKernel_success_empty(tcb_t *thread); +#ifdef CONFIG_EP_THRESHOLD +exception_t ep_decodeSetBudgetThreshold(word_t invLabel, word_t length, word_t *buffer); + +/* Needed because fields can't cross word boundaries in bf... */ +static inline ticks_t ep_get_budget_threshold(endpoint_t *ep) +{ + uint16_t low = endpoint_ptr_get_budget_threshold_low(ep); + uint16_t high = endpoint_ptr_get_budget_threshold_high(ep); + + return ((ticks_t)high << 16) | low; +} + +static inline void ep_set_budget_threshold(endpoint_t *ep, uint32_t threshold) +{ + uint16_t low = threshold & 0xff; + uint16_t high = (threshold >> 16) & 0xff; + + endpoint_ptr_set_budget_threshold_high(ep, high); + endpoint_ptr_set_budget_threshold_low(ep, low); +} + +#endif + diff --git a/include/object/reply.h b/include/object/reply.h index e87564302e3..c19e1c8d616 100644 --- a/include/object/reply.h +++ b/include/object/reply.h @@ -35,3 +35,4 @@ void reply_remove(reply_t *reply, tcb_t *tcb); /* Remove a specific tcb, and the reply it is blocking on, from the call stack */ void reply_remove_tcb(tcb_t *tcb); +exception_t decodeReplyInvocation(word_t label, word_t length, word_t *buffer); diff --git a/include/object/structures_32.bf b/include/object/structures_32.bf index c1ff7af181a..090ea6650c6 100644 --- a/include/object/structures_32.bf +++ b/include/object/structures_32.bf @@ -146,7 +146,13 @@ block sched_control_cap { -- Endpoint: size = 16 bytes block endpoint { +#ifdef CONFIG_KERNEL_MCS + field budget_threshold_low 16 + field budget_threshold_high 16 + padding 32 +#else padding 64 +#endif field_high epQueue_head 28 padding 4 diff --git a/include/object/structures_64.bf b/include/object/structures_64.bf index b642d0af8c0..97805db2416 100644 --- a/include/object/structures_64.bf +++ b/include/object/structures_64.bf @@ -202,13 +202,17 @@ block sched_control_cap { -- Endpoint: size = 16 bytes block endpoint { - field epQueue_head 64 - #if BF_CANONICAL_RANGE == 48 - padding 16 + field budget_threshold_low 16 + field_high epQueue_head 48 + field budget_threshold_high 16 field_high epQueue_tail 46 #elif BF_CANONICAL_RANGE == 39 - padding 25 + field budget_threshold_low 16 + padding 9 + field_high epQueue_head 39 + field budget_threshold_high 16 + padding 9 field_high epQueue_tail 37 #else #error "Unspecified canonical address range" diff --git a/libsel4/include/interfaces/object-api.xml b/libsel4/include/interfaces/object-api.xml index 767a0876365..7d31e97b28b 100644 --- a/libsel4/include/interfaces/object-api.xml +++ b/libsel4/include/interfaces/object-api.xml @@ -1544,4 +1544,45 @@ + + + + + Sets the budget threshold value of an endpoint. + + + This configures the minimum amount of budget sporadic threads need to have before calls + to the endpoint will proceed. If, at the time of the call, there is not enough remaining + budget to pass the threshold, the caller will either timeout fault or be postponed till + it has enough budget. Once the caller has enough budget, the call will proceed normally. + + This can be used to avoid running out of budget during calls, ensuring that passive + servers do not timeout fault or get postponed, as long as they stay within the threshold + budget. + + Round-robin callers are not checked because they cannot run out of budget. + + This is a Reply invocation as for endpoints it would be indistinguishable from IPC calls. + + + + + + + The is a CPtr to a capability of the wrong type. + + + + + does not have Read right. + Or, is not valid or the wrong type. + + + + + The is too large. + + + + diff --git a/libsel4/include/sel4/types.h b/libsel4/include/sel4/types.h index 74a7b1fc536..c32f925b768 100644 --- a/libsel4/include/sel4/types.h +++ b/libsel4/include/sel4/types.h @@ -40,6 +40,7 @@ typedef seL4_CPtr seL4_Untyped; typedef seL4_CPtr seL4_DomainSet; typedef seL4_CPtr seL4_SchedContext; typedef seL4_CPtr seL4_SchedControl; +typedef seL4_CPtr seL4_ReplyCap; typedef seL4_Uint64 seL4_Time; diff --git a/libsel4/tools/syscall_stub_gen.py b/libsel4/tools/syscall_stub_gen.py index 6f465383db9..39e469a4c9c 100644 --- a/libsel4/tools/syscall_stub_gen.py +++ b/libsel4/tools/syscall_stub_gen.py @@ -256,6 +256,7 @@ def init_data_types(wordsize): CapType("seL4_DomainSet", wordsize), CapType("seL4_SchedContext", wordsize), CapType("seL4_SchedControl", wordsize), + CapType("seL4_ReplyCap", wordsize), ] return types diff --git a/src/api/syscall.c b/src/api/syscall.c index bf98dd07ce6..5006a58c52f 100644 --- a/src/api/syscall.c +++ b/src/api/syscall.c @@ -473,29 +473,8 @@ static void handleRecv(bool_t isBlocking) } } -#ifdef CONFIG_KERNEL_MCS -static inline void mcsPreemptionPoint(void) -{ - /* at this point we could be handling a timer interrupt which actually ends the current - * threads timeslice. However, preemption is possible on revoke, which could have deleted - * the current thread and/or the current scheduling context, rendering them invalid. */ - if (isSchedulable(NODE_STATE(ksCurThread))) { - /* if the thread is schedulable, the tcb and scheduling context are still valid */ - checkBudget(); - } else if (sc_active(NODE_STATE(ksCurSC))) { - /* otherwise, if the thread is not schedulable, the SC could be valid - charge it if so */ - chargeBudget(NODE_STATE(ksConsumed), false); - } else { - /* If the current SC is no longer configured the time can no - * longer be charged to it. Simply dropping the consumed time - * here is equivalent to having charged the consumed time and - * then having cleared the SC. */ - NODE_STATE(ksConsumed) = 0; - } -} -#else +#ifndef CONFIG_KERNEL_MCS #define handleRecv(isBlocking, canReply) handleRecv(isBlocking) -#define mcsPreemptionPoint() #define handleInvocation(isCall, isBlocking, canDonate, firstPhase, cptr) handleInvocation(isCall, isBlocking) #endif @@ -517,43 +496,19 @@ static void handleYield(void) exception_t handleSyscall(syscall_t syscall) { - exception_t ret; - irq_t irq; MCS_DO_IF_BUDGET({ switch (syscall) { case SysSend: - ret = handleInvocation(false, true, false, false, getRegister(NODE_STATE(ksCurThread), capRegister)); - if (unlikely(ret != EXCEPTION_NONE)) { - mcsPreemptionPoint(); - irq = getActiveIRQ(); - if (IRQT_TO_IRQ(irq) != IRQT_TO_IRQ(irqInvalid)) { - handleInterrupt(irq); - } - } - + handleInvocation(false, true, false, false, getRegister(NODE_STATE(ksCurThread), capRegister)); break; case SysNBSend: - ret = handleInvocation(false, false, false, false, getRegister(NODE_STATE(ksCurThread), capRegister)); - if (unlikely(ret != EXCEPTION_NONE)) { - mcsPreemptionPoint(); - irq = getActiveIRQ(); - if (IRQT_TO_IRQ(irq) != IRQT_TO_IRQ(irqInvalid)) { - handleInterrupt(irq); - } - } + handleInvocation(false, false, false, false, getRegister(NODE_STATE(ksCurThread), capRegister)); break; case SysCall: - ret = handleInvocation(true, true, true, false, getRegister(NODE_STATE(ksCurThread), capRegister)); - if (unlikely(ret != EXCEPTION_NONE)) { - mcsPreemptionPoint(); - irq = getActiveIRQ(); - if (IRQT_TO_IRQ(irq) != IRQT_TO_IRQ(irqInvalid)) { - handleInterrupt(irq); - } - } + handleInvocation(true, true, true, false, getRegister(NODE_STATE(ksCurThread), capRegister)); break; case SysRecv: @@ -579,7 +534,7 @@ exception_t handleSyscall(syscall_t syscall) break; case SysReplyRecv: { cptr_t reply = getRegister(NODE_STATE(ksCurThread), replyRegister); - ret = handleInvocation(false, false, true, true, reply); + exception_t ret = handleInvocation(false, false, true, true, reply); /* reply cannot error and is not preemptible */ assert(ret == EXCEPTION_NONE); handleRecv(true, true); @@ -588,31 +543,22 @@ exception_t handleSyscall(syscall_t syscall) case SysNBSendRecv: { cptr_t dest = getNBSendRecvDest(); - ret = handleInvocation(false, false, true, true, dest); + exception_t ret = handleInvocation(false, false, true, true, dest); if (unlikely(ret != EXCEPTION_NONE)) { - mcsPreemptionPoint(); - irq = getActiveIRQ(); - if (IRQT_TO_IRQ(irq) != IRQT_TO_IRQ(irqInvalid)) { - handleInterrupt(irq); - } break; } handleRecv(true, true); break; } - case SysNBSendWait: - ret = handleInvocation(false, false, true, true, getRegister(NODE_STATE(ksCurThread), replyRegister)); + case SysNBSendWait: { + exception_t ret = handleInvocation(false, false, true, true, getRegister(NODE_STATE(ksCurThread), replyRegister)); if (unlikely(ret != EXCEPTION_NONE)) { - mcsPreemptionPoint(); - irq = getActiveIRQ(); - if (IRQT_TO_IRQ(irq) != IRQT_TO_IRQ(irqInvalid)) { - handleInterrupt(irq); - } break; } handleRecv(true, false); break; + } #endif case SysNBRecv: handleRecv(false, true); diff --git a/src/fastpath/fastpath.c b/src/fastpath/fastpath.c index 681d79cbc1d..24a763284b8 100644 --- a/src/fastpath/fastpath.c +++ b/src/fastpath/fastpath.c @@ -155,7 +155,20 @@ void NORETURN fastpath_call(word_t cptr, word_t msgInfo) if (unlikely(reply == NULL)) { slowpath(SysCall); } -#endif + sched_context_t *sc = NODE_STATE(ksCurThread)->tcbSchedContext; + // sched_context_t *sc = NODE_STATE(ksCurSC); + +#ifdef CONFIG_EP_THRESHOLD + ticks_t threshold = ep_get_budget_threshold(ep_ptr); + if (!isRoundRobin(sc) && threshold) { + updateTimestamp(); + if (refill_capacity(sc, NODE_STATE(ksConsumed)) < threshold) { + slowpath(SysCall); + } + } +#endif // CONFIG_EP_THRESHOLD + +#endif // CONFIG_KERNEL_MCS #ifdef ENABLE_SMP_SUPPORT /* Ensure both threads have the same affinity */ @@ -192,7 +205,6 @@ void NORETURN fastpath_call(word_t cptr, word_t msgInfo) thread_state_ptr_set_replyObject_np(&NODE_STATE(ksCurThread)->tcbState, REPLY_REF(reply)); reply->replyTCB = NODE_STATE(ksCurThread); - sched_context_t *sc = NODE_STATE(ksCurThread)->tcbSchedContext; sc->scTcb = dest; dest->tcbSchedContext = sc; NODE_STATE(ksCurThread)->tcbSchedContext = NULL; diff --git a/src/kernel/sporadic.c b/src/kernel/sporadic.c index be0de7c9d0f..e06180f3096 100644 --- a/src/kernel/sporadic.c +++ b/src/kernel/sporadic.c @@ -347,6 +347,17 @@ void refill_budget_check(ticks_t usage) REFILL_SANITY_END(sc); } +bool_t refill_merge_until_budget(sched_context_t *sc, ticks_t min_budget) +{ + while (refill_capacity(sc, 0) < min_budget) { + if (refill_single(sc)) { + return false; + } + merge_nonoverlapping_head_refill(sc); + } + return true; +} + static inline void merge_overlapping_head_refill(sched_context_t *sc) { refill_t old_head = refill_pop_head(sc); diff --git a/src/kernel/thread.c b/src/kernel/thread.c index 11594e4f59b..79601452f0b 100644 --- a/src/kernel/thread.c +++ b/src/kernel/thread.c @@ -649,8 +649,6 @@ void chargeBudget(ticks_t consumed, bool_t canTimeoutFault) if (likely(isSchedulable(NODE_STATE(ksCurThread)))) { assert(NODE_STATE(ksCurThread)->tcbSchedContext == NODE_STATE(ksCurSC)); endTimeslice(canTimeoutFault); - rescheduleRequired(); - NODE_STATE(ksReprogram) = true; } } @@ -670,6 +668,8 @@ void endTimeslice(bool_t can_timeout_fault) /* postpone until ready */ postpone(NODE_STATE(ksCurSC)); } + rescheduleRequired(); + NODE_STATE(ksReprogram) = true; } #else diff --git a/src/object/endpoint.c b/src/object/endpoint.c index 822d712033e..d3b24f186d2 100644 --- a/src/object/endpoint.c +++ b/src/object/endpoint.c @@ -494,4 +494,42 @@ void reorderEP(endpoint_t *epptr, tcb_t *thread) queue = tcbEPAppend(thread, queue); ep_ptr_set_queue(epptr, queue); } -#endif + +#ifdef CONFIG_EP_THRESHOLD + +exception_t ep_decodeSetBudgetThreshold(word_t invLabel, word_t length, word_t *buffer) +{ + if (length < TIME_ARG_SIZE || current_extra_caps.excaprefs[0] == NULL) { + userError("EndpointSetBudgetThreshold: Truncated message."); + current_syscall_error.type = seL4_TruncatedMessage; + return EXCEPTION_SYSCALL_ERROR; + } + cap_t ep_cap = current_extra_caps.excaprefs[0]->cap; + if (cap_get_capType(ep_cap) != cap_endpoint_cap) { + userError("EndpointSetBudgetThreshold: Target cap not an endpoint cap"); + current_syscall_error.type = seL4_InvalidCapability; + current_syscall_error.invalidCapNumber = 1; + return EXCEPTION_SYSCALL_ERROR; + } + if (!cap_endpoint_cap_get_capCanReceive(ep_cap)) { + userError("EndpointSetBudgetThreshold: Missing Read permission on endpoint."); + current_syscall_error.type = seL4_InvalidCapability; + current_syscall_error.invalidCapNumber = 1; + return EXCEPTION_SYSCALL_ERROR; + } + time_t threshold_us = mode_parseTimeArg(0, buffer); + ticks_t threshold = usToTicks(threshold_us); + + if (threshold > (ticks_t)UINT32_MAX) { + userError("EndpointSetBudgetThreshold: Threshold too large."); + current_syscall_error.type = seL4_RangeError; + current_syscall_error.rangeErrorMin = 0; + current_syscall_error.rangeErrorMax = ticksToUs(UINT32_MAX); + return EXCEPTION_SYSCALL_ERROR; + } + setThreadState(NODE_STATE(ksCurThread), ThreadState_Restart); + ep_set_budget_threshold(EP_PTR(cap_endpoint_cap_get_capEPPtr(ep_cap)), threshold); + return EXCEPTION_NONE; +} +#endif // CONFIG_EP_THRESHOLD +#endif // CONFIG_KERNEL_MCS diff --git a/src/object/objecttype.c b/src/object/objecttype.c index 6088f2288c2..876f392a985 100644 --- a/src/object/objecttype.c +++ b/src/object/objecttype.c @@ -18,6 +18,7 @@ #include #include #ifdef CONFIG_KERNEL_MCS +#include #include #include #endif @@ -695,6 +696,11 @@ exception_t decodeInvocation(word_t invLabel, word_t length, #ifdef CONFIG_KERNEL_MCS case cap_reply_cap: +#ifdef CONFIG_EP_THRESHOLD + if (unlikely(call)) { + return decodeReplyInvocation(invLabel, length, buffer); + } +#endif setThreadState(NODE_STATE(ksCurThread), ThreadState_Restart); return performInvocation_Reply( NODE_STATE(ksCurThread), @@ -786,10 +792,46 @@ exception_t decodeInvocation(word_t invLabel, word_t length, } #ifdef CONFIG_KERNEL_MCS +/* + * According to commit a2ec92a2f, there are no overlapping heads any more. + * Only refill_unblock_check() creates overlaps, but fixes them immediately. + * Even if there is, the task will be scheduled again and retry the call, + * so it is safe to not check for this corner case either way. + */ +static exception_t ep_check_budget(endpoint_t *ep, bool_t canDonate, bool_t block) +{ +#ifdef CONFIG_EP_THRESHOLD + sched_context_t *sc = NODE_STATE(ksCurSC); + word_t threshold = ep_get_budget_threshold(ep); + + if (!isRoundRobin(sc) && threshold && canDonate && refill_capacity(sc, 0) < threshold) { + /* Non-blocking send, fail silently */ + if (!block) { + return EXCEPTION_SYSCALL_ERROR; + } + commitTime(); + if (refill_merge_until_budget(sc, threshold)) { + endTimeslice(true); + return EXCEPTION_PREEMPTED; + } else { + /* Never enough budget */ + userError("Not enough budget to call endpoint with budget threshold"); + current_syscall_error.type = seL4_IllegalOperation; + return EXCEPTION_SYSCALL_ERROR; + } + } +#endif + return EXCEPTION_NONE; +} + exception_t performInvocation_Endpoint(endpoint_t *ep, word_t badge, bool_t canGrant, bool_t canGrantReply, bool_t block, bool_t call, bool_t canDonate) { + exception_t status = ep_check_budget(ep, canDonate, block); + if (status != EXCEPTION_NONE) { + return status; + } sendIPC(block, call, badge, canGrant, canGrantReply, canDonate, NODE_STATE(ksCurThread), ep); return EXCEPTION_NONE; diff --git a/src/object/reply.c b/src/object/reply.c index 9a47575f286..b3df64b50d3 100644 --- a/src/object/reply.c +++ b/src/object/reply.c @@ -134,3 +134,19 @@ void reply_remove_tcb(tcb_t *tcb) reply->replyNext = call_stack_new(0, false); reply_unlink(reply, tcb); } + +#ifdef CONFIG_EP_THRESHOLD +#include + +NO_INLINE exception_t decodeReplyInvocation(word_t label, word_t length, word_t *buffer) +{ + switch (label) { + case EndpointSetBudgetThreshold: + return ep_decodeSetBudgetThreshold(length, length, buffer); + default: + userError("Reply invocation: Illegal operation attempted."); + current_syscall_error.type = seL4_IllegalOperation; + return EXCEPTION_SYSCALL_ERROR; + } +} +#endif