diff --git a/config.cmake b/config.cmake
index 0e10801648d..f8f565d715e 100644
--- a/config.cmake
+++ b/config.cmake
@@ -445,6 +445,12 @@ config_string(
UNDEF_DISABLED
)
+config_option(
+ KernelEPbudgetThreshold EP_THRESHOLD "Enable endpoint budget threshold support."
+ DEFAULT ON
+ DEPENDS "KernelIsMCS; NOT KernelVerificationBuild"
+)
+
config_option(
KernelClz32 CLZ_32 "Define a __clzsi2 function to count leading zeros for uint32_t arguments. \
Only needed on platforms which lack a builtin instruction." DEFAULT OFF
diff --git a/include/kernel/sporadic.h b/include/kernel/sporadic.h
index 7c024281b84..afd0c51dc0b 100644
--- a/include/kernel/sporadic.h
+++ b/include/kernel/sporadic.h
@@ -198,3 +198,8 @@ void refill_budget_check(ticks_t used);
*/
void refill_unblock_check(sched_context_t *sc);
+/*
+ * Mering refills until the head refill has at least `min_budget` budget.
+ */
+bool_t refill_merge_until_budget(sched_context_t *sc, ticks_t min_budget);
+
diff --git a/include/object/endpoint.h b/include/object/endpoint.h
index 13c430b920f..ec87a0c7460 100644
--- a/include/object/endpoint.h
+++ b/include/object/endpoint.h
@@ -43,3 +43,26 @@ void cancelBadgedSends(endpoint_t *epptr, word_t badge);
void replyFromKernel_error(tcb_t *thread);
void replyFromKernel_success_empty(tcb_t *thread);
+#ifdef CONFIG_EP_THRESHOLD
+exception_t ep_decodeSetBudgetThreshold(word_t invLabel, word_t length, word_t *buffer);
+
+/* Needed because fields can't cross word boundaries in bf... */
+static inline ticks_t ep_get_budget_threshold(endpoint_t *ep)
+{
+ uint16_t low = endpoint_ptr_get_budget_threshold_low(ep);
+ uint16_t high = endpoint_ptr_get_budget_threshold_high(ep);
+
+ return ((ticks_t)high << 16) | low;
+}
+
+static inline void ep_set_budget_threshold(endpoint_t *ep, uint32_t threshold)
+{
+ uint16_t low = threshold & 0xff;
+ uint16_t high = (threshold >> 16) & 0xff;
+
+ endpoint_ptr_set_budget_threshold_high(ep, high);
+ endpoint_ptr_set_budget_threshold_low(ep, low);
+}
+
+#endif
+
diff --git a/include/object/reply.h b/include/object/reply.h
index e87564302e3..c19e1c8d616 100644
--- a/include/object/reply.h
+++ b/include/object/reply.h
@@ -35,3 +35,4 @@ void reply_remove(reply_t *reply, tcb_t *tcb);
/* Remove a specific tcb, and the reply it is blocking on, from the call stack */
void reply_remove_tcb(tcb_t *tcb);
+exception_t decodeReplyInvocation(word_t label, word_t length, word_t *buffer);
diff --git a/include/object/structures_32.bf b/include/object/structures_32.bf
index c1ff7af181a..090ea6650c6 100644
--- a/include/object/structures_32.bf
+++ b/include/object/structures_32.bf
@@ -146,7 +146,13 @@ block sched_control_cap {
-- Endpoint: size = 16 bytes
block endpoint {
+#ifdef CONFIG_KERNEL_MCS
+ field budget_threshold_low 16
+ field budget_threshold_high 16
+ padding 32
+#else
padding 64
+#endif
field_high epQueue_head 28
padding 4
diff --git a/include/object/structures_64.bf b/include/object/structures_64.bf
index b642d0af8c0..97805db2416 100644
--- a/include/object/structures_64.bf
+++ b/include/object/structures_64.bf
@@ -202,13 +202,17 @@ block sched_control_cap {
-- Endpoint: size = 16 bytes
block endpoint {
- field epQueue_head 64
-
#if BF_CANONICAL_RANGE == 48
- padding 16
+ field budget_threshold_low 16
+ field_high epQueue_head 48
+ field budget_threshold_high 16
field_high epQueue_tail 46
#elif BF_CANONICAL_RANGE == 39
- padding 25
+ field budget_threshold_low 16
+ padding 9
+ field_high epQueue_head 39
+ field budget_threshold_high 16
+ padding 9
field_high epQueue_tail 37
#else
#error "Unspecified canonical address range"
diff --git a/libsel4/include/interfaces/object-api.xml b/libsel4/include/interfaces/object-api.xml
index 767a0876365..7d31e97b28b 100644
--- a/libsel4/include/interfaces/object-api.xml
+++ b/libsel4/include/interfaces/object-api.xml
@@ -1544,4 +1544,45 @@
+
+
+
+
+ Sets the budget threshold value of an endpoint.
+
+
+ This configures the minimum amount of budget sporadic threads need to have before calls
+ to the endpoint will proceed. If, at the time of the call, there is not enough remaining
+ budget to pass the threshold, the caller will either timeout fault or be postponed till
+ it has enough budget. Once the caller has enough budget, the call will proceed normally.
+
+ This can be used to avoid running out of budget during calls, ensuring that passive
+ servers do not timeout fault or get postponed, as long as they stay within the threshold
+ budget.
+
+ Round-robin callers are not checked because they cannot run out of budget.
+
+ This is a Reply invocation as for endpoints it would be indistinguishable from IPC calls.
+
+
+
+
+
+
+ The is a CPtr to a capability of the wrong type.
+
+
+
+
+ does not have Read right.
+ Or, is not valid or the wrong type.
+
+
+
+
+ The is too large.
+
+
+
+
diff --git a/libsel4/include/sel4/types.h b/libsel4/include/sel4/types.h
index 74a7b1fc536..c32f925b768 100644
--- a/libsel4/include/sel4/types.h
+++ b/libsel4/include/sel4/types.h
@@ -40,6 +40,7 @@ typedef seL4_CPtr seL4_Untyped;
typedef seL4_CPtr seL4_DomainSet;
typedef seL4_CPtr seL4_SchedContext;
typedef seL4_CPtr seL4_SchedControl;
+typedef seL4_CPtr seL4_ReplyCap;
typedef seL4_Uint64 seL4_Time;
diff --git a/libsel4/tools/syscall_stub_gen.py b/libsel4/tools/syscall_stub_gen.py
index 6f465383db9..39e469a4c9c 100644
--- a/libsel4/tools/syscall_stub_gen.py
+++ b/libsel4/tools/syscall_stub_gen.py
@@ -256,6 +256,7 @@ def init_data_types(wordsize):
CapType("seL4_DomainSet", wordsize),
CapType("seL4_SchedContext", wordsize),
CapType("seL4_SchedControl", wordsize),
+ CapType("seL4_ReplyCap", wordsize),
]
return types
diff --git a/src/api/syscall.c b/src/api/syscall.c
index bf98dd07ce6..5006a58c52f 100644
--- a/src/api/syscall.c
+++ b/src/api/syscall.c
@@ -473,29 +473,8 @@ static void handleRecv(bool_t isBlocking)
}
}
-#ifdef CONFIG_KERNEL_MCS
-static inline void mcsPreemptionPoint(void)
-{
- /* at this point we could be handling a timer interrupt which actually ends the current
- * threads timeslice. However, preemption is possible on revoke, which could have deleted
- * the current thread and/or the current scheduling context, rendering them invalid. */
- if (isSchedulable(NODE_STATE(ksCurThread))) {
- /* if the thread is schedulable, the tcb and scheduling context are still valid */
- checkBudget();
- } else if (sc_active(NODE_STATE(ksCurSC))) {
- /* otherwise, if the thread is not schedulable, the SC could be valid - charge it if so */
- chargeBudget(NODE_STATE(ksConsumed), false);
- } else {
- /* If the current SC is no longer configured the time can no
- * longer be charged to it. Simply dropping the consumed time
- * here is equivalent to having charged the consumed time and
- * then having cleared the SC. */
- NODE_STATE(ksConsumed) = 0;
- }
-}
-#else
+#ifndef CONFIG_KERNEL_MCS
#define handleRecv(isBlocking, canReply) handleRecv(isBlocking)
-#define mcsPreemptionPoint()
#define handleInvocation(isCall, isBlocking, canDonate, firstPhase, cptr) handleInvocation(isCall, isBlocking)
#endif
@@ -517,43 +496,19 @@ static void handleYield(void)
exception_t handleSyscall(syscall_t syscall)
{
- exception_t ret;
- irq_t irq;
MCS_DO_IF_BUDGET({
switch (syscall)
{
case SysSend:
- ret = handleInvocation(false, true, false, false, getRegister(NODE_STATE(ksCurThread), capRegister));
- if (unlikely(ret != EXCEPTION_NONE)) {
- mcsPreemptionPoint();
- irq = getActiveIRQ();
- if (IRQT_TO_IRQ(irq) != IRQT_TO_IRQ(irqInvalid)) {
- handleInterrupt(irq);
- }
- }
-
+ handleInvocation(false, true, false, false, getRegister(NODE_STATE(ksCurThread), capRegister));
break;
case SysNBSend:
- ret = handleInvocation(false, false, false, false, getRegister(NODE_STATE(ksCurThread), capRegister));
- if (unlikely(ret != EXCEPTION_NONE)) {
- mcsPreemptionPoint();
- irq = getActiveIRQ();
- if (IRQT_TO_IRQ(irq) != IRQT_TO_IRQ(irqInvalid)) {
- handleInterrupt(irq);
- }
- }
+ handleInvocation(false, false, false, false, getRegister(NODE_STATE(ksCurThread), capRegister));
break;
case SysCall:
- ret = handleInvocation(true, true, true, false, getRegister(NODE_STATE(ksCurThread), capRegister));
- if (unlikely(ret != EXCEPTION_NONE)) {
- mcsPreemptionPoint();
- irq = getActiveIRQ();
- if (IRQT_TO_IRQ(irq) != IRQT_TO_IRQ(irqInvalid)) {
- handleInterrupt(irq);
- }
- }
+ handleInvocation(true, true, true, false, getRegister(NODE_STATE(ksCurThread), capRegister));
break;
case SysRecv:
@@ -579,7 +534,7 @@ exception_t handleSyscall(syscall_t syscall)
break;
case SysReplyRecv: {
cptr_t reply = getRegister(NODE_STATE(ksCurThread), replyRegister);
- ret = handleInvocation(false, false, true, true, reply);
+ exception_t ret = handleInvocation(false, false, true, true, reply);
/* reply cannot error and is not preemptible */
assert(ret == EXCEPTION_NONE);
handleRecv(true, true);
@@ -588,31 +543,22 @@ exception_t handleSyscall(syscall_t syscall)
case SysNBSendRecv: {
cptr_t dest = getNBSendRecvDest();
- ret = handleInvocation(false, false, true, true, dest);
+ exception_t ret = handleInvocation(false, false, true, true, dest);
if (unlikely(ret != EXCEPTION_NONE)) {
- mcsPreemptionPoint();
- irq = getActiveIRQ();
- if (IRQT_TO_IRQ(irq) != IRQT_TO_IRQ(irqInvalid)) {
- handleInterrupt(irq);
- }
break;
}
handleRecv(true, true);
break;
}
- case SysNBSendWait:
- ret = handleInvocation(false, false, true, true, getRegister(NODE_STATE(ksCurThread), replyRegister));
+ case SysNBSendWait: {
+ exception_t ret = handleInvocation(false, false, true, true, getRegister(NODE_STATE(ksCurThread), replyRegister));
if (unlikely(ret != EXCEPTION_NONE)) {
- mcsPreemptionPoint();
- irq = getActiveIRQ();
- if (IRQT_TO_IRQ(irq) != IRQT_TO_IRQ(irqInvalid)) {
- handleInterrupt(irq);
- }
break;
}
handleRecv(true, false);
break;
+ }
#endif
case SysNBRecv:
handleRecv(false, true);
diff --git a/src/fastpath/fastpath.c b/src/fastpath/fastpath.c
index 681d79cbc1d..24a763284b8 100644
--- a/src/fastpath/fastpath.c
+++ b/src/fastpath/fastpath.c
@@ -155,7 +155,20 @@ void NORETURN fastpath_call(word_t cptr, word_t msgInfo)
if (unlikely(reply == NULL)) {
slowpath(SysCall);
}
-#endif
+ sched_context_t *sc = NODE_STATE(ksCurThread)->tcbSchedContext;
+ // sched_context_t *sc = NODE_STATE(ksCurSC);
+
+#ifdef CONFIG_EP_THRESHOLD
+ ticks_t threshold = ep_get_budget_threshold(ep_ptr);
+ if (!isRoundRobin(sc) && threshold) {
+ updateTimestamp();
+ if (refill_capacity(sc, NODE_STATE(ksConsumed)) < threshold) {
+ slowpath(SysCall);
+ }
+ }
+#endif // CONFIG_EP_THRESHOLD
+
+#endif // CONFIG_KERNEL_MCS
#ifdef ENABLE_SMP_SUPPORT
/* Ensure both threads have the same affinity */
@@ -192,7 +205,6 @@ void NORETURN fastpath_call(word_t cptr, word_t msgInfo)
thread_state_ptr_set_replyObject_np(&NODE_STATE(ksCurThread)->tcbState, REPLY_REF(reply));
reply->replyTCB = NODE_STATE(ksCurThread);
- sched_context_t *sc = NODE_STATE(ksCurThread)->tcbSchedContext;
sc->scTcb = dest;
dest->tcbSchedContext = sc;
NODE_STATE(ksCurThread)->tcbSchedContext = NULL;
diff --git a/src/kernel/sporadic.c b/src/kernel/sporadic.c
index be0de7c9d0f..e06180f3096 100644
--- a/src/kernel/sporadic.c
+++ b/src/kernel/sporadic.c
@@ -347,6 +347,17 @@ void refill_budget_check(ticks_t usage)
REFILL_SANITY_END(sc);
}
+bool_t refill_merge_until_budget(sched_context_t *sc, ticks_t min_budget)
+{
+ while (refill_capacity(sc, 0) < min_budget) {
+ if (refill_single(sc)) {
+ return false;
+ }
+ merge_nonoverlapping_head_refill(sc);
+ }
+ return true;
+}
+
static inline void merge_overlapping_head_refill(sched_context_t *sc)
{
refill_t old_head = refill_pop_head(sc);
diff --git a/src/kernel/thread.c b/src/kernel/thread.c
index 11594e4f59b..79601452f0b 100644
--- a/src/kernel/thread.c
+++ b/src/kernel/thread.c
@@ -649,8 +649,6 @@ void chargeBudget(ticks_t consumed, bool_t canTimeoutFault)
if (likely(isSchedulable(NODE_STATE(ksCurThread)))) {
assert(NODE_STATE(ksCurThread)->tcbSchedContext == NODE_STATE(ksCurSC));
endTimeslice(canTimeoutFault);
- rescheduleRequired();
- NODE_STATE(ksReprogram) = true;
}
}
@@ -670,6 +668,8 @@ void endTimeslice(bool_t can_timeout_fault)
/* postpone until ready */
postpone(NODE_STATE(ksCurSC));
}
+ rescheduleRequired();
+ NODE_STATE(ksReprogram) = true;
}
#else
diff --git a/src/object/endpoint.c b/src/object/endpoint.c
index 822d712033e..d3b24f186d2 100644
--- a/src/object/endpoint.c
+++ b/src/object/endpoint.c
@@ -494,4 +494,42 @@ void reorderEP(endpoint_t *epptr, tcb_t *thread)
queue = tcbEPAppend(thread, queue);
ep_ptr_set_queue(epptr, queue);
}
-#endif
+
+#ifdef CONFIG_EP_THRESHOLD
+
+exception_t ep_decodeSetBudgetThreshold(word_t invLabel, word_t length, word_t *buffer)
+{
+ if (length < TIME_ARG_SIZE || current_extra_caps.excaprefs[0] == NULL) {
+ userError("EndpointSetBudgetThreshold: Truncated message.");
+ current_syscall_error.type = seL4_TruncatedMessage;
+ return EXCEPTION_SYSCALL_ERROR;
+ }
+ cap_t ep_cap = current_extra_caps.excaprefs[0]->cap;
+ if (cap_get_capType(ep_cap) != cap_endpoint_cap) {
+ userError("EndpointSetBudgetThreshold: Target cap not an endpoint cap");
+ current_syscall_error.type = seL4_InvalidCapability;
+ current_syscall_error.invalidCapNumber = 1;
+ return EXCEPTION_SYSCALL_ERROR;
+ }
+ if (!cap_endpoint_cap_get_capCanReceive(ep_cap)) {
+ userError("EndpointSetBudgetThreshold: Missing Read permission on endpoint.");
+ current_syscall_error.type = seL4_InvalidCapability;
+ current_syscall_error.invalidCapNumber = 1;
+ return EXCEPTION_SYSCALL_ERROR;
+ }
+ time_t threshold_us = mode_parseTimeArg(0, buffer);
+ ticks_t threshold = usToTicks(threshold_us);
+
+ if (threshold > (ticks_t)UINT32_MAX) {
+ userError("EndpointSetBudgetThreshold: Threshold too large.");
+ current_syscall_error.type = seL4_RangeError;
+ current_syscall_error.rangeErrorMin = 0;
+ current_syscall_error.rangeErrorMax = ticksToUs(UINT32_MAX);
+ return EXCEPTION_SYSCALL_ERROR;
+ }
+ setThreadState(NODE_STATE(ksCurThread), ThreadState_Restart);
+ ep_set_budget_threshold(EP_PTR(cap_endpoint_cap_get_capEPPtr(ep_cap)), threshold);
+ return EXCEPTION_NONE;
+}
+#endif // CONFIG_EP_THRESHOLD
+#endif // CONFIG_KERNEL_MCS
diff --git a/src/object/objecttype.c b/src/object/objecttype.c
index 6088f2288c2..876f392a985 100644
--- a/src/object/objecttype.c
+++ b/src/object/objecttype.c
@@ -18,6 +18,7 @@
#include