From 8e048c798adaabef530a1526f7ce8c6c3cd3475e Mon Sep 17 00:00:00 2001 From: Bjoern Brandenburg Date: Sun, 9 Aug 2015 13:18:48 +0200 Subject: Add LITMUS^RT core implementation This patch adds the core of LITMUS^RT: - library functionality (heaps, rt_domain, prioritization, etc.) - budget enforcement logic - job management - system call backends - virtual devices (control page, etc.) - scheduler plugin API (and dummy plugin) This code compiles, but is not yet integrated with the rest of Linux. --- litmus/Kconfig | 193 ++++++++++++++ litmus/Makefile | 18 ++ litmus/bheap.c | 316 +++++++++++++++++++++++ litmus/binheap.c | 387 ++++++++++++++++++++++++++++ litmus/budget.c | 116 +++++++++ litmus/clustered.c | 119 +++++++++ litmus/ctrldev.c | 160 ++++++++++++ litmus/edf_common.c | 200 +++++++++++++++ litmus/fdso.c | 308 +++++++++++++++++++++++ litmus/fp_common.c | 119 +++++++++ litmus/jobs.c | 82 ++++++ litmus/litmus.c | 681 ++++++++++++++++++++++++++++++++++++++++++++++++++ litmus/litmus_proc.c | 573 ++++++++++++++++++++++++++++++++++++++++++ litmus/locking.c | 188 ++++++++++++++ litmus/preempt.c | 141 +++++++++++ litmus/rt_domain.c | 353 ++++++++++++++++++++++++++ litmus/sched_plugin.c | 238 ++++++++++++++++++ litmus/srp.c | 308 +++++++++++++++++++++++ litmus/sync.c | 152 +++++++++++ litmus/trace.c | 11 + litmus/uncachedev.c | 102 ++++++++ 21 files changed, 4765 insertions(+) create mode 100644 litmus/bheap.c create mode 100644 litmus/binheap.c create mode 100644 litmus/budget.c create mode 100644 litmus/clustered.c create mode 100644 litmus/ctrldev.c create mode 100644 litmus/edf_common.c create mode 100644 litmus/fdso.c create mode 100644 litmus/fp_common.c create mode 100644 litmus/jobs.c create mode 100644 litmus/litmus.c create mode 100644 litmus/litmus_proc.c create mode 100644 litmus/locking.c create mode 100644 litmus/preempt.c create mode 100644 litmus/rt_domain.c create mode 100644 litmus/sched_plugin.c create mode 100644 litmus/srp.c create mode 100644 litmus/sync.c create mode 100644 litmus/uncachedev.c (limited to 'litmus') diff --git a/litmus/Kconfig b/litmus/Kconfig index 5408ef6b159b..fdf31f3dd6c2 100644 --- a/litmus/Kconfig +++ b/litmus/Kconfig @@ -1,5 +1,184 @@ menu "LITMUS^RT" +menu "Scheduling" + +config RELEASE_MASTER + bool "Release-master Support" + depends on ARCH_HAS_SEND_PULL_TIMERS && SMP + default n + help + Allow one processor to act as a dedicated interrupt processor + that services all timer interrupts, but that does not schedule + real-time tasks. See RTSS'09 paper for details + (http://www.cs.unc.edu/~anderson/papers.html). + +config PREFER_LOCAL_LINKING + bool "Link newly arrived tasks locally if possible" + depends on SMP + default y + help + In linking-based schedulers such as GSN-EDF, if an idle CPU processes + a job arrival (i.e., when a job resumed or was released), it can + either link the task to itself and schedule it immediately (to avoid + unnecessary scheduling latency) or it can try to link it to the CPU + where it executed previously (to maximize cache affinity, at the + expense of increased latency due to the need to send an IPI). + + In lightly loaded systems, this option can significantly reduce + scheduling latencies. In heavily loaded systems (where CPUs are + rarely idle), it will likely make hardly a difference. + + If unsure, say yes. + +config LITMUS_QUANTUM_LENGTH_US + int "quantum length (in us)" + default 1000 + range 500 10000 + help + Determine the desired quantum length, in microseconds, which + is used to determine the granularity of scheduling in + quantum-driven plugins (primarily PFAIR). This parameter does not + affect event-driven plugins (such as the EDF-based plugins and P-FP). + Default: 1000us = 1ms. + +config BUG_ON_MIGRATION_DEADLOCK + bool "Panic on suspected migration deadlock" + default y + help + This is a debugging option. The LITMUS^RT migration support code for + global scheduling contains a simple heuristic to detect when the + system deadlocks due to circular stack dependencies. + + For example, such a deadlock exists if CPU 0 waits for task A's stack + to become available while using task B's stack, and CPU 1 waits for + task B's stack to become available while using task A's stack. Such + a situation can arise in (buggy) global scheduling plugins. + + With this option enabled, such a scenario with result in a BUG(). + You can turn off this option when debugging on real hardware (e.g., + to rescue traces, etc. that would be hard to get after a panic). + + Only turn this off if you really know what you are doing. If this + BUG() triggers, the scheduler is broken and turning off this option + won't fix it. + + +endmenu + +menu "Real-Time Synchronization" + +config NP_SECTION + bool "Non-preemptive section support" + default y + help + Allow tasks to become non-preemptable. + Note that plugins still need to explicitly support non-preemptivity. + Currently, only the GSN-EDF, PSN-EDF, and P-FP plugins have such support. + + This is required to support locking protocols such as the FMLP. + If disabled, all tasks will be considered preemptable at all times. + +config LITMUS_LOCKING + bool "Support for real-time locking protocols" + depends on NP_SECTION + default y + help + Enable LITMUS^RT's multiprocessor real-time locking protocols with + predicable maximum blocking times. + + Say Yes if you want to include locking protocols such as the FMLP and + Baker's SRP. + +endmenu + +menu "Performance Enhancements" + +config SCHED_CPU_AFFINITY + bool "Local Migration Affinity" + depends on X86 && SYSFS + default y + help + Rescheduled tasks prefer CPUs near to their previously used CPU. + This may improve cache performance through possible preservation of + cache affinity, at the expense of (slightly) more involved scheduling + logic. + + Warning: May make bugs harder to find since tasks may migrate less often. + + NOTES: + * Feature is not utilized by PFair/PD^2. + + Say Yes if unsure. + +config ALLOW_EARLY_RELEASE + bool "Allow Early Releasing" + default y + help + Allow tasks to release jobs early (while still maintaining job + precedence constraints). Only supported by EDF schedulers. Early + releasing must be explicitly requested by real-time tasks via + the task_params passed to sys_set_task_rt_param(). + + Early releasing can improve job response times while maintaining + real-time correctness. However, it can easily peg your CPUs + since tasks never suspend to wait for their next job. As such, early + releasing is really only useful in the context of implementing + bandwidth servers, interrupt handling threads, or short-lived + computations. + + Beware that early releasing may affect real-time analysis + if using locking protocols or I/O. + + Say Yes if unsure. + +choice + prompt "EDF Tie-Break Behavior" + default EDF_TIE_BREAK_LATENESS_NORM + help + Allows the configuration of tie-breaking behavior when the deadlines + of two EDF-scheduled tasks are equal. + + config EDF_TIE_BREAK_LATENESS + bool "Lateness-based Tie Break" + help + Break ties between two jobs, A and B, based upon the lateness of their + prior jobs. The job with the greatest lateness has priority. Note that + lateness has a negative value if the prior job finished before its + deadline. + + config EDF_TIE_BREAK_LATENESS_NORM + bool "Normalized Lateness-based Tie Break" + help + Break ties between two jobs, A and B, based upon the lateness, normalized + by relative deadline, of their prior jobs. The job with the greatest + normalized lateness has priority. Note that lateness has a negative value + if the prior job finished before its deadline. + + Normalized lateness tie-breaks are likely desireable over non-normalized + tie-breaks if the execution times and/or relative deadlines of tasks in a + task set vary greatly. + + config EDF_TIE_BREAK_HASH + bool "Hash-based Tie Breaks" + help + Break ties between two jobs, A and B, with equal deadlines by using a + uniform hash; i.e.: hash(A.pid, A.job_num) < hash(B.pid, B.job_num). Job + A has ~50% of winning a given tie-break. + + config EDF_PID_TIE_BREAK + bool "PID-based Tie Breaks" + help + Break ties based upon OS-assigned thread IDs. Use this option if + required by algorithm's real-time analysis or per-task response-time + jitter must be minimized. + + NOTES: + * This tie-breaking method was default in Litmus 2012.2 and before. + +endchoice + +endmenu + menu "Tracing" config FEATHER_TRACE @@ -154,6 +333,20 @@ config SCHED_DEBUG_TRACE_CALLER If unsure, say No. +config PREEMPT_STATE_TRACE + bool "Trace preemption state machine transitions" + depends on SCHED_DEBUG_TRACE && DEBUG_KERNEL + default n + help + With this option enabled, each CPU will log when it transitions + states in the preemption state machine. This state machine is + used to determine how to react to IPIs (avoid races with in-flight IPIs). + + Warning: this creates a lot of information in the debug trace. Only + recommended when you are debugging preemption-related races. + + If unsure, say No. + endmenu endmenu diff --git a/litmus/Makefile b/litmus/Makefile index 6318f1c6fac8..c85abc7389c5 100644 --- a/litmus/Makefile +++ b/litmus/Makefile @@ -2,6 +2,24 @@ # Makefile for LITMUS^RT # +obj-y = sched_plugin.o litmus.o \ + preempt.o \ + litmus_proc.o \ + budget.o \ + clustered.o \ + jobs.o \ + sync.o \ + rt_domain.o \ + edf_common.o \ + fp_common.o \ + fdso.o \ + locking.o \ + srp.o \ + bheap.o \ + binheap.o \ + ctrldev.o \ + uncachedev.o + obj-$(CONFIG_FEATHER_TRACE) += ft_event.o ftdev.o obj-$(CONFIG_SCHED_TASK_TRACE) += sched_task_trace.o obj-$(CONFIG_SCHED_DEBUG_TRACE) += sched_trace.o diff --git a/litmus/bheap.c b/litmus/bheap.c new file mode 100644 index 000000000000..2707e0122b6d --- /dev/null +++ b/litmus/bheap.c @@ -0,0 +1,316 @@ +#include +#include +#include + +void bheap_init(struct bheap* heap) +{ + heap->head = NULL; + heap->min = NULL; +} + +void bheap_node_init(struct bheap_node** _h, void* value) +{ + struct bheap_node* h = *_h; + h->parent = NULL; + h->next = NULL; + h->child = NULL; + h->degree = NOT_IN_HEAP; + h->value = value; + h->ref = _h; +} + + +/* make child a subtree of root */ +static void __bheap_link(struct bheap_node* root, + struct bheap_node* child) +{ + child->parent = root; + child->next = root->child; + root->child = child; + root->degree++; +} + +/* merge root lists */ +static struct bheap_node* __bheap_merge(struct bheap_node* a, + struct bheap_node* b) +{ + struct bheap_node* head = NULL; + struct bheap_node** pos = &head; + + while (a && b) { + if (a->degree < b->degree) { + *pos = a; + a = a->next; + } else { + *pos = b; + b = b->next; + } + pos = &(*pos)->next; + } + if (a) + *pos = a; + else + *pos = b; + return head; +} + +/* reverse a linked list of nodes. also clears parent pointer */ +static struct bheap_node* __bheap_reverse(struct bheap_node* h) +{ + struct bheap_node* tail = NULL; + struct bheap_node* next; + + if (!h) + return h; + + h->parent = NULL; + while (h->next) { + next = h->next; + h->next = tail; + tail = h; + h = next; + h->parent = NULL; + } + h->next = tail; + return h; +} + +static void __bheap_min(bheap_prio_t higher_prio, struct bheap* heap, + struct bheap_node** prev, struct bheap_node** node) +{ + struct bheap_node *_prev, *cur; + *prev = NULL; + + if (!heap->head) { + *node = NULL; + return; + } + + *node = heap->head; + _prev = heap->head; + cur = heap->head->next; + while (cur) { + if (higher_prio(cur, *node)) { + *node = cur; + *prev = _prev; + } + _prev = cur; + cur = cur->next; + } +} + +static void __bheap_union(bheap_prio_t higher_prio, struct bheap* heap, + struct bheap_node* h2) +{ + struct bheap_node* h1; + struct bheap_node *prev, *x, *next; + if (!h2) + return; + h1 = heap->head; + if (!h1) { + heap->head = h2; + return; + } + h1 = __bheap_merge(h1, h2); + prev = NULL; + x = h1; + next = x->next; + while (next) { + if (x->degree != next->degree || + (next->next && next->next->degree == x->degree)) { + /* nothing to do, advance */ + prev = x; + x = next; + } else if (higher_prio(x, next)) { + /* x becomes the root of next */ + x->next = next->next; + __bheap_link(x, next); + } else { + /* next becomes the root of x */ + if (prev) + prev->next = next; + else + h1 = next; + __bheap_link(next, x); + x = next; + } + next = x->next; + } + heap->head = h1; +} + +static struct bheap_node* __bheap_extract_min(bheap_prio_t higher_prio, + struct bheap* heap) +{ + struct bheap_node *prev, *node; + __bheap_min(higher_prio, heap, &prev, &node); + if (!node) + return NULL; + if (prev) + prev->next = node->next; + else + heap->head = node->next; + __bheap_union(higher_prio, heap, __bheap_reverse(node->child)); + return node; +} + +/* insert (and reinitialize) a node into the heap */ +void bheap_insert(bheap_prio_t higher_prio, struct bheap* heap, + struct bheap_node* node) +{ + struct bheap_node *min; + node->child = NULL; + node->parent = NULL; + node->next = NULL; + node->degree = 0; + if (heap->min && higher_prio(node, heap->min)) { + /* swap min cache */ + min = heap->min; + min->child = NULL; + min->parent = NULL; + min->next = NULL; + min->degree = 0; + __bheap_union(higher_prio, heap, min); + heap->min = node; + } else + __bheap_union(higher_prio, heap, node); +} + +void bheap_uncache_min(bheap_prio_t higher_prio, struct bheap* heap) +{ + struct bheap_node* min; + if (heap->min) { + min = heap->min; + heap->min = NULL; + bheap_insert(higher_prio, heap, min); + } +} + +/* merge addition into target */ +void bheap_union(bheap_prio_t higher_prio, + struct bheap* target, struct bheap* addition) +{ + /* first insert any cached minima, if necessary */ + bheap_uncache_min(higher_prio, target); + bheap_uncache_min(higher_prio, addition); + __bheap_union(higher_prio, target, addition->head); + /* this is a destructive merge */ + addition->head = NULL; +} + +struct bheap_node* bheap_peek(bheap_prio_t higher_prio, + struct bheap* heap) +{ + if (!heap->min) + heap->min = __bheap_extract_min(higher_prio, heap); + return heap->min; +} + +struct bheap_node* bheap_take(bheap_prio_t higher_prio, + struct bheap* heap) +{ + struct bheap_node *node; + if (!heap->min) + heap->min = __bheap_extract_min(higher_prio, heap); + node = heap->min; + heap->min = NULL; + if (node) + node->degree = NOT_IN_HEAP; + return node; +} + +int bheap_decrease(bheap_prio_t higher_prio, struct bheap_node* node) +{ + struct bheap_node *parent; + struct bheap_node** tmp_ref; + void* tmp; + + /* bubble up */ + parent = node->parent; + while (parent && higher_prio(node, parent)) { + /* swap parent and node */ + tmp = parent->value; + parent->value = node->value; + node->value = tmp; + /* swap references */ + *(parent->ref) = node; + *(node->ref) = parent; + tmp_ref = parent->ref; + parent->ref = node->ref; + node->ref = tmp_ref; + /* step up */ + node = parent; + parent = node->parent; + } + + return parent != NULL; +} + +void bheap_delete(bheap_prio_t higher_prio, struct bheap* heap, + struct bheap_node* node) +{ + struct bheap_node *parent, *prev, *pos; + struct bheap_node** tmp_ref; + void* tmp; + + if (heap->min != node) { + /* bubble up */ + parent = node->parent; + while (parent) { + /* swap parent and node */ + tmp = parent->value; + parent->value = node->value; + node->value = tmp; + /* swap references */ + *(parent->ref) = node; + *(node->ref) = parent; + tmp_ref = parent->ref; + parent->ref = node->ref; + node->ref = tmp_ref; + /* step up */ + node = parent; + parent = node->parent; + } + /* now delete: + * first find prev */ + prev = NULL; + pos = heap->head; + while (pos != node) { + BUG_ON(!pos); /* fell off the list -> deleted from wrong heap */ + prev = pos; + pos = pos->next; + } + /* we have prev, now remove node */ + if (prev) + prev->next = node->next; + else + heap->head = node->next; + __bheap_union(higher_prio, heap, __bheap_reverse(node->child)); + } else + heap->min = NULL; + node->degree = NOT_IN_HEAP; +} + +/* allocate a heap node for value and insert into the heap */ +int bheap_add(bheap_prio_t higher_prio, struct bheap* heap, + void* value, int gfp_flags) +{ + struct bheap_node* hn = bheap_node_alloc(gfp_flags); + if (likely(hn)) { + bheap_node_init(&hn, value); + bheap_insert(higher_prio, heap, hn); + } + return hn != NULL; +} + +void* bheap_take_del(bheap_prio_t higher_prio, + struct bheap* heap) +{ + struct bheap_node* hn = bheap_take(higher_prio, heap); + void* ret = NULL; + if (hn) { + ret = hn->value; + bheap_node_free(hn); + } + return ret; +} diff --git a/litmus/binheap.c b/litmus/binheap.c new file mode 100644 index 000000000000..d3ab34b92096 --- /dev/null +++ b/litmus/binheap.c @@ -0,0 +1,387 @@ +#include + +/* Returns true of the root ancestor of node is the root of the given heap. */ +int binheap_is_in_this_heap(struct binheap_node *node, + struct binheap* heap) +{ + if(!binheap_is_in_heap(node)) { + return 0; + } + + while(node->parent != NULL) { + node = node->parent; + } + + return (node == heap->root); +} + + +/* Update the node reference pointers. Same logic as Litmus binomial heap. */ +static void __update_ref(struct binheap_node *parent, + struct binheap_node *child) +{ + *(parent->ref_ptr) = child; + *(child->ref_ptr) = parent; + + swap(parent->ref_ptr, child->ref_ptr); +} + + +/* Swaps data between two nodes. */ +static void __binheap_swap(struct binheap_node *parent, + struct binheap_node *child) +{ + swap(parent->data, child->data); + __update_ref(parent, child); +} + + +/* Swaps memory and data between two nodes. Actual nodes swap instead of + * just data. Needed when we delete nodes from the heap. + */ +static void __binheap_swap_safe(struct binheap *handle, + struct binheap_node *a, + struct binheap_node *b) +{ + swap(a->data, b->data); + __update_ref(a, b); + + if((a->parent != NULL) && (a->parent == b->parent)) { + /* special case: shared parent */ + swap(a->parent->left, a->parent->right); + } + else { + /* Update pointers to swap parents. */ + + if(a->parent) { + if(a == a->parent->left) { + a->parent->left = b; + } + else { + a->parent->right = b; + } + } + + if(b->parent) { + if(b == b->parent->left) { + b->parent->left = a; + } + else { + b->parent->right = a; + } + } + + swap(a->parent, b->parent); + } + + /* swap children */ + + if(a->left) { + a->left->parent = b; + + if(a->right) { + a->right->parent = b; + } + } + + if(b->left) { + b->left->parent = a; + + if(b->right) { + b->right->parent = a; + } + } + + swap(a->left, b->left); + swap(a->right, b->right); + + + /* update next/last/root pointers */ + + if(a == handle->next) { + handle->next = b; + } + else if(b == handle->next) { + handle->next = a; + } + + if(a == handle->last) { + handle->last = b; + } + else if(b == handle->last) { + handle->last = a; + } + + if(a == handle->root) { + handle->root = b; + } + else if(b == handle->root) { + handle->root = a; + } +} + + +/** + * Update the pointer to the last node in the complete binary tree. + * Called internally after the root node has been deleted. + */ +static void __binheap_update_last(struct binheap *handle) +{ + struct binheap_node *temp = handle->last; + + /* find a "bend" in the tree. */ + while(temp->parent && (temp == temp->parent->left)) { + temp = temp->parent; + } + + /* step over to sibling if we're not at root */ + if(temp->parent != NULL) { + temp = temp->parent->left; + } + + /* now travel right as far as possible. */ + while(temp->right != NULL) { + temp = temp->right; + } + + /* take one step to the left if we're not at the bottom-most level. */ + if(temp->left != NULL) { + temp = temp->left; + } + + handle->last = temp; +} + + +/** + * Update the pointer to the node that will take the next inserted node. + * Called internally after a node has been inserted. + */ +static void __binheap_update_next(struct binheap *handle) +{ + struct binheap_node *temp = handle->next; + + /* find a "bend" in the tree. */ + while(temp->parent && (temp == temp->parent->right)) { + temp = temp->parent; + } + + /* step over to sibling if we're not at root */ + if(temp->parent != NULL) { + temp = temp->parent->right; + } + + /* now travel left as far as possible. */ + while(temp->left != NULL) { + temp = temp->left; + } + + handle->next = temp; +} + + + +/* bubble node up towards root */ +static void __binheap_bubble_up(struct binheap *handle, + struct binheap_node *node) +{ + /* let BINHEAP_POISON data bubble to the top */ + + while((node->parent != NULL) && + ((node->data == BINHEAP_POISON) || + handle->compare(node, node->parent))) { + __binheap_swap(node->parent, node); + node = node->parent; + } +} + + +/* bubble node down, swapping with min-child */ +static void __binheap_bubble_down(struct binheap *handle) +{ + struct binheap_node *node = handle->root; + + while(node->left != NULL) { + if(node->right && handle->compare(node->right, node->left)) { + if(handle->compare(node->right, node)) { + __binheap_swap(node, node->right); + node = node->right; + } + else { + break; + } + } + else { + if(handle->compare(node->left, node)) { + __binheap_swap(node, node->left); + node = node->left; + } + else { + break; + } + } + } +} + + +void __binheap_add(struct binheap_node *new_node, + struct binheap *handle, + void *data) +{ + new_node->data = data; + new_node->ref = new_node; + new_node->ref_ptr = &(new_node->ref); + + if(!binheap_empty(handle)) { + /* insert left side first */ + if(handle->next->left == NULL) { + handle->next->left = new_node; + new_node->parent = handle->next; + new_node->left = NULL; + new_node->right = NULL; + + handle->last = new_node; + + __binheap_bubble_up(handle, new_node); + } + else { + /* left occupied. insert right. */ + handle->next->right = new_node; + new_node->parent = handle->next; + new_node->left = NULL; + new_node->right = NULL; + + handle->last = new_node; + + __binheap_update_next(handle); + __binheap_bubble_up(handle, new_node); + } + } + else { + /* first node in heap */ + + new_node->parent = NULL; + new_node->left = NULL; + new_node->right = NULL; + + handle->root = new_node; + handle->next = new_node; + handle->last = new_node; + } +} + + +/** + * Removes the root node from the heap. The node is removed after coalescing + * the binheap_node with its original data pointer at the root of the tree. + * + * The 'last' node in the tree is then swapped up to the root and bubbled + * down. + */ +void __binheap_delete_root(struct binheap *handle, + struct binheap_node *container) +{ + struct binheap_node *root = handle->root; + + if(root != container) { + /* coalesce */ + __binheap_swap_safe(handle, root, container); + root = container; + } + + if(handle->last != root) { + /* swap 'last' node up to root and bubble it down. */ + + struct binheap_node *to_move = handle->last; + + if(to_move->parent != root) { + handle->next = to_move->parent; + + if(handle->next->right == to_move) { + /* disconnect from parent */ + to_move->parent->right = NULL; + handle->last = handle->next->left; + } + else { + /* find new 'last' before we disconnect */ + __binheap_update_last(handle); + + /* disconnect from parent */ + to_move->parent->left = NULL; + } + } + else { + /* 'last' is direct child of root */ + + handle->next = to_move; + + if(to_move == to_move->parent->right) { + to_move->parent->right = NULL; + handle->last = to_move->parent->left; + } + else { + to_move->parent->left = NULL; + handle->last = to_move; + } + } + to_move->parent = NULL; + + /* reconnect as root. We can't just swap data ptrs since root node + * may be freed after this function returns. + */ + to_move->left = root->left; + to_move->right = root->right; + if(to_move->left != NULL) { + to_move->left->parent = to_move; + } + if(to_move->right != NULL) { + to_move->right->parent = to_move; + } + + handle->root = to_move; + + /* bubble down */ + __binheap_bubble_down(handle); + } + else { + /* removing last node in tree */ + handle->root = NULL; + handle->next = NULL; + handle->last = NULL; + } + + /* mark as removed */ + container->parent = BINHEAP_POISON; +} + + +/** + * Delete an arbitrary node. Bubble node to delete up to the root, + * and then delete to root. + */ +void __binheap_delete(struct binheap_node *node_to_delete, + struct binheap *handle) +{ + struct binheap_node *target = node_to_delete->ref; + void *temp_data = target->data; + + /* temporarily set data to null to allow node to bubble up to the top. */ + target->data = BINHEAP_POISON; + + __binheap_bubble_up(handle, target); + __binheap_delete_root(handle, node_to_delete); + + node_to_delete->data = temp_data; /* restore node data pointer */ +} + + +/** + * Bubble up a node whose pointer has decreased in value. + */ +void __binheap_decrease(struct binheap_node *orig_node, + struct binheap *handle) +{ + struct binheap_node *target = orig_node->ref; + + __binheap_bubble_up(handle, target); +} diff --git a/litmus/budget.c b/litmus/budget.c new file mode 100644 index 000000000000..47bf78a19f87 --- /dev/null +++ b/litmus/budget.c @@ -0,0 +1,116 @@ +#include +#include +#include + +#include +#include + +#include + +struct enforcement_timer { + /* The enforcement timer is used to accurately police + * slice budgets. */ + struct hrtimer timer; + int armed; +}; + +DEFINE_PER_CPU(struct enforcement_timer, budget_timer); + +static enum hrtimer_restart on_enforcement_timeout(struct hrtimer *timer) +{ + struct enforcement_timer* et = container_of(timer, + struct enforcement_timer, + timer); + unsigned long flags; + + local_irq_save(flags); + TRACE("enforcement timer fired.\n"); + et->armed = 0; + /* activate scheduler */ + litmus_reschedule_local(); + local_irq_restore(flags); + + return HRTIMER_NORESTART; +} + +/* assumes called with IRQs off */ +static void cancel_enforcement_timer(struct enforcement_timer* et) +{ + int ret; + + TRACE("cancelling enforcement timer.\n"); + + /* Since interrupts are disabled and et->armed is only + * modified locally, we do not need any locks. + */ + + if (et->armed) { + ret = hrtimer_try_to_cancel(&et->timer); + /* Should never be inactive. */ + BUG_ON(ret == 0); + /* Should never be running concurrently. */ + BUG_ON(ret == -1); + + et->armed = 0; + } +} + +/* assumes called with IRQs off */ +static void arm_enforcement_timer(struct enforcement_timer* et, + struct task_struct* t) +{ + lt_t when_to_fire; + TRACE_TASK(t, "arming enforcement timer.\n"); + + WARN_ONCE(!hrtimer_is_hres_active(&et->timer), + KERN_ERR "WARNING: no high resolution timers available!?\n"); + + /* Calling this when there is no budget left for the task + * makes no sense, unless the task is non-preemptive. */ + BUG_ON(budget_exhausted(t) && (!is_np(t))); + + /* __hrtimer_start_range_ns() cancels the timer + * anyway, so we don't have to check whether it is still armed */ + + if (likely(!is_np(t))) { + when_to_fire = litmus_clock() + budget_remaining(t); + __hrtimer_start_range_ns(&et->timer, + ns_to_ktime(when_to_fire), + 0 /* delta */, + HRTIMER_MODE_ABS_PINNED, + 0 /* no wakeup */); + et->armed = 1; + } +} + + +/* expects to be called with IRQs off */ +void update_enforcement_timer(struct task_struct* t) +{ + struct enforcement_timer* et = this_cpu_ptr(&budget_timer); + + if (t && budget_precisely_enforced(t)) { + /* Make sure we call into the scheduler when this budget + * expires. */ + arm_enforcement_timer(et, t); + } else if (et->armed) { + /* Make sure we don't cause unnecessary interrupts. */ + cancel_enforcement_timer(et); + } +} + + +static int __init init_budget_enforcement(void) +{ + int cpu; + struct enforcement_timer* et; + + for (cpu = 0; cpu < NR_CPUS; cpu++) { + et = &per_cpu(budget_timer, cpu); + hrtimer_init(&et->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); + et->timer.function = on_enforcement_timeout; + } + return 0; +} + +module_init(init_budget_enforcement); diff --git a/litmus/clustered.c b/litmus/clustered.c new file mode 100644 index 000000000000..de2aca2a271c --- /dev/null +++ b/litmus/clustered.c @@ -0,0 +1,119 @@ +#include +#include +#include +#include + +#include +#include + +int get_shared_cpu_map(cpumask_var_t mask, unsigned int cpu, unsigned int index) +{ + struct cpu_cacheinfo* info = get_cpu_cacheinfo(cpu); + struct cacheinfo *ci; + + if (!info || index >= info->num_leaves) { + TRACE("no shared-cache CPUs: info=%d index=%u\n", + info != NULL, index); + return 1; + } + + if (!info->info_list) { + TRACE("no shared-cache CPUs: no info_list (cpu\n"); + } + ci = info->info_list + index; + + cpumask_copy(mask, &ci->shared_cpu_map); + + TRACE("get_shared: P%u@L%u -> %d siblings\n ", cpu, index, cpumask_weight(mask)); + + return 0; +} + +int get_cluster_size(enum cache_level level) +{ + cpumask_var_t mask; + int ok; + int num_cpus; + + if (level == GLOBAL_CLUSTER) + return num_online_cpus(); + else { + if (!zalloc_cpumask_var(&mask, GFP_ATOMIC)) + return -ENOMEM; + /* assumes CPU 0 is representative of all CPUs */ + ok = get_shared_cpu_map(mask, 0, level); + /* ok == 0 means we got the map; otherwise it's an invalid cache level */ + if (ok == 0) + num_cpus = cpumask_weight(mask); + free_cpumask_var(mask); + + if (ok == 0) + return num_cpus; + else + return -EINVAL; + } +} + +int assign_cpus_to_clusters(enum cache_level level, + struct scheduling_cluster* clusters[], + unsigned int num_clusters, + struct cluster_cpu* cpus[], + unsigned int num_cpus) +{ + cpumask_var_t mask; + unsigned int i, free_cluster = 0, low_cpu; + int err = 0; + + if (!zalloc_cpumask_var(&mask, GFP_ATOMIC)) + return -ENOMEM; + + /* clear cluster pointers */ + for (i = 0; i < num_cpus; i++) { + cpus[i]->id = i; + cpus[i]->cluster = NULL; + } + + /* initialize clusters */ + for (i = 0; i < num_clusters; i++) { + clusters[i]->id = i; + INIT_LIST_HEAD(&clusters[i]->cpus); + } + + /* Assign each CPU. Two assumtions are made: + * 1) The index of a cpu in cpus corresponds to its processor id (i.e., the index in a cpu mask). + * 2) All cpus that belong to some cluster are online. + */ + for_each_online_cpu(i) { + /* get lowest-id CPU in cluster */ + if (level != GLOBAL_CLUSTER) { + err = get_shared_cpu_map(mask, cpus[i]->id, level); + if (err != 0) { + /* ugh... wrong cache level? Either caller screwed up + * or the CPU topology is weird. */ + printk(KERN_ERR "Could not set up clusters for L%d sharing (max: L%d).\n", + level, err); + err = -EINVAL; + goto out; + } + low_cpu = cpumask_first(mask); + } else + low_cpu = 0; + if (low_cpu == i) { + /* caller must provide an appropriate number of clusters */ + BUG_ON(free_cluster >= num_clusters); + + /* create new cluster */ + cpus[i]->cluster = clusters[free_cluster++]; + } else { + /* low_cpu points to the right cluster + * Assumption: low_cpu is actually online and was processed earlier. */ + cpus[i]->cluster = cpus[low_cpu]->cluster; + } + /* enqueue in cpus list */ + list_add_tail(&cpus[i]->cluster_list, &cpus[i]->cluster->cpus); + printk(KERN_INFO "Assigning CPU%u to cluster %u\n.", i, cpus[i]->cluster->id); + } +out: + free_cpumask_var(mask); + return err; +} diff --git a/litmus/ctrldev.c b/litmus/ctrldev.c new file mode 100644 index 000000000000..877f2786b4c8 --- /dev/null +++ b/litmus/ctrldev.c @@ -0,0 +1,160 @@ +#include +#include +#include +#include +#include + +#include + +/* only one page for now, but we might want to add a RO version at some point */ + +#define CTRL_NAME "litmus/ctrl" + +/* allocate t->rt_param.ctrl_page*/ +static int alloc_ctrl_page(struct task_struct *t) +{ + int err = 0; + + /* only allocate if the task doesn't have one yet */ + if (!tsk_rt(t)->ctrl_page) { + tsk_rt(t)->ctrl_page = (void*) get_zeroed_page(GFP_KERNEL); + if (!tsk_rt(t)->ctrl_page) + err = -ENOMEM; + /* will get de-allocated in task teardown */ + TRACE_TASK(t, "%s ctrl_page = %p\n", __FUNCTION__, + tsk_rt(t)->ctrl_page); + } + return err; +} + +static int map_ctrl_page(struct task_struct *t, struct vm_area_struct* vma) +{ + int err; + + struct page* ctrl = virt_to_page(tsk_rt(t)->ctrl_page); + + TRACE_CUR(CTRL_NAME + ": mapping %p (pfn:%lx) to 0x%lx (prot:%lx)\n", + tsk_rt(t)->ctrl_page,page_to_pfn(ctrl), vma->vm_start, + vma->vm_page_prot); + + /* Map it into the vma. */ + err = vm_insert_page(vma, vma->vm_start, ctrl); + + if (err) + TRACE_CUR(CTRL_NAME ": vm_insert_page() failed (%d)\n", err); + + return err; +} + +static void litmus_ctrl_vm_close(struct vm_area_struct* vma) +{ + TRACE_CUR("%s flags=0x%x prot=0x%x\n", __FUNCTION__, + vma->vm_flags, vma->vm_page_prot); + + TRACE_CUR(CTRL_NAME + ": %p:%p vma:%p vma->vm_private_data:%p closed.\n", + (void*) vma->vm_start, (void*) vma->vm_end, vma, + vma->vm_private_data); +} + +static int litmus_ctrl_vm_fault(struct vm_area_struct* vma, + struct vm_fault* vmf) +{ + TRACE_CUR("%s flags=0x%x (off:%ld)\n", __FUNCTION__, + vma->vm_flags, vmf->pgoff); + + /* This function should never be called, since all pages should have + * been mapped by mmap() already. */ + WARN_ONCE(1, "Page faults should be impossible in the control page\n"); + + return VM_FAULT_SIGBUS; +} + +static struct vm_operations_struct litmus_ctrl_vm_ops = { + .close = litmus_ctrl_vm_close, + .fault = litmus_ctrl_vm_fault, +}; + +static int litmus_ctrl_mmap(struct file* filp, struct vm_area_struct* vma) +{ + int err = 0; + + /* first make sure mapper knows what he's doing */ + + /* you can only get one page */ + if (vma->vm_end - vma->vm_start != PAGE_SIZE) + return -EINVAL; + + /* you can only map the "first" page */ + if (vma->vm_pgoff != 0) + return -EINVAL; + + /* you can't share it with anyone */ + if (vma->vm_flags & (VM_MAYSHARE | VM_SHARED)) + return -EINVAL; + + vma->vm_ops = &litmus_ctrl_vm_ops; + /* This mapping should not be kept across forks, + * cannot be expanded, and is not a "normal" page. */ + vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_READ | VM_WRITE; + + /* We don't want the first write access to trigger a "minor" page fault + * to mark the page as dirty. This is transient, private memory, we + * don't care if it was touched or not. PAGE_SHARED means RW access, but + * not execute, and avoids copy-on-write behavior. + * See protection_map in mmap.c. */ + vma->vm_page_prot = PAGE_SHARED; + + err = alloc_ctrl_page(current); + if (!err) + err = map_ctrl_page(current, vma); + + TRACE_CUR("%s flags=0x%x prot=0x%lx\n", + __FUNCTION__, vma->vm_flags, vma->vm_page_prot); + + return err; +} + +static struct file_operations litmus_ctrl_fops = { + .owner = THIS_MODULE, + .mmap = litmus_ctrl_mmap, +}; + +static struct miscdevice litmus_ctrl_dev = { + .name = CTRL_NAME, + .minor = MISC_DYNAMIC_MINOR, + .fops = &litmus_ctrl_fops, +}; + +static int __init init_litmus_ctrl_dev(void) +{ + int err; + + BUILD_BUG_ON(sizeof(struct control_page) > PAGE_SIZE); + + BUILD_BUG_ON(sizeof(union np_flag) != sizeof(uint32_t)); + + BUILD_BUG_ON(offsetof(struct control_page, sched.raw) + != LITMUS_CP_OFFSET_SCHED); + BUILD_BUG_ON(offsetof(struct control_page, irq_count) + != LITMUS_CP_OFFSET_IRQ_COUNT); + BUILD_BUG_ON(offsetof(struct control_page, ts_syscall_start) + != LITMUS_CP_OFFSET_TS_SC_START); + BUILD_BUG_ON(offsetof(struct control_page, irq_syscall_start) + != LITMUS_CP_OFFSET_IRQ_SC_START); + + printk("Initializing LITMUS^RT control device.\n"); + err = misc_register(&litmus_ctrl_dev); + if (err) + printk("Could not allocate %s device (%d).\n", CTRL_NAME, err); + return err; +} + +static void __exit exit_litmus_ctrl_dev(void) +{ + misc_deregister(&litmus_ctrl_dev); +} + +module_init(init_litmus_ctrl_dev); +module_exit(exit_litmus_ctrl_dev); diff --git a/litmus/edf_common.c b/litmus/edf_common.c new file mode 100644 index 000000000000..5aca2934a7b5 --- /dev/null +++ b/litmus/edf_common.c @@ -0,0 +1,200 @@ +/* + * kernel/edf_common.c + * + * Common functions for EDF based scheduler. + */ + +#include +#include +#include + +#include +#include +#include + +#include + +#ifdef CONFIG_EDF_TIE_BREAK_LATENESS_NORM +#include +#endif + +#ifdef CONFIG_EDF_TIE_BREAK_HASH +#include +static inline long edf_hash(struct task_struct *t) +{ + /* pid is 32 bits, so normally we would shove that into the + * upper 32-bits and and put the job number in the bottom + * and hash the 64-bit number with hash_64(). Sadly, + * in testing, hash_64() doesn't distribute keys were the + * upper bits are close together (as would be the case with + * pids) and job numbers are equal (as would be the case with + * synchronous task sets with all relative deadlines equal). + * + * A 2006 Linux patch proposed the following solution + * (but for some reason it wasn't accepted...). + * + * At least this workaround works for 32-bit systems as well. + */ + return hash_32(hash_32((u32)tsk_rt(t)->job_params.job_no, 32) ^ t->pid, 32); +} +#endif + + +/* edf_higher_prio - returns true if first has a higher EDF priority + * than second. Deadline ties are broken by PID. + * + * both first and second may be NULL + */ +int edf_higher_prio(struct task_struct* first, + struct task_struct* second) +{ + struct task_struct *first_task = first; + struct task_struct *second_task = second; + + /* There is no point in comparing a task to itself. */ + if (first && first == second) { + TRACE_TASK(first, + "WARNING: pointless edf priority comparison.\n"); + return 0; + } + + + /* check for NULL tasks */ + if (!first || !second) + return first && !second; + +#ifdef CONFIG_LITMUS_LOCKING + + /* Check for inherited priorities. Change task + * used for comparison in such a case. + */ + if (unlikely(first->rt_param.inh_task)) + first_task = first->rt_param.inh_task; + if (unlikely(second->rt_param.inh_task)) + second_task = second->rt_param.inh_task; + + /* Check for priority boosting. Tie-break by start of boosting. + */ + if (unlikely(is_priority_boosted(first_task))) { + /* first_task is boosted, how about second_task? */ + if (!is_priority_boosted(second_task) || + lt_before(get_boost_start(first_task), + get_boost_start(second_task))) + return 1; + else + return 0; + } else if (unlikely(is_priority_boosted(second_task))) + /* second_task is boosted, first is not*/ + return 0; + +#endif + + if (earlier_deadline(first_task, second_task)) { + return 1; + } + else if (get_deadline(first_task) == get_deadline(second_task)) { + /* Need to tie break. All methods must set pid_break to 0/1 if + * first_task does not have priority over second_task. + */ + int pid_break; + + +#if defined(CONFIG_EDF_TIE_BREAK_LATENESS) + /* Tie break by lateness. Jobs with greater lateness get + * priority. This should spread tardiness across all tasks, + * especially in task sets where all tasks have the same + * period and relative deadlines. + */ + if (get_lateness(first_task) > get_lateness(second_task)) { + return 1; + } + pid_break = (get_lateness(first_task) == get_lateness(second_task)); + + +#elif defined(CONFIG_EDF_TIE_BREAK_LATENESS_NORM) + /* Tie break by lateness, normalized by relative deadline. Jobs with + * greater normalized lateness get priority. + * + * Note: Considered using the algebraically equivalent + * lateness(first)*relative_deadline(second) > + lateness(second)*relative_deadline(first) + * to avoid fixed-point math, but values are prone to overflow if inputs + * are on the order of several seconds, even in 64-bit. + */ + fp_t fnorm = _frac(get_lateness(first_task), + get_rt_relative_deadline(first_task)); + fp_t snorm = _frac(get_lateness(second_task), + get_rt_relative_deadline(second_task)); + if (_gt(fnorm, snorm)) { + return 1; + } + pid_break = _eq(fnorm, snorm); + + +#elif defined(CONFIG_EDF_TIE_BREAK_HASH) + /* Tie break by comparing hashs of (pid, job#) tuple. There should be + * a 50% chance that first_task has a higher priority than second_task. + */ + long fhash = edf_hash(first_task); + long shash = edf_hash(second_task); + if (fhash < shash) { + return 1; + } + pid_break = (fhash == shash); +#else + + + /* CONFIG_EDF_PID_TIE_BREAK */ + pid_break = 1; // fall through to tie-break by pid; +#endif + + /* Tie break by pid */ + if(pid_break) { + if (first_task->pid < second_task->pid) { + return 1; + } + else if (first_task->pid == second_task->pid) { + /* If the PIDs are the same then the task with the + * inherited priority wins. + */ + if (!second->rt_param.inh_task) { + return 1; + } + } + } + } + return 0; /* fall-through. prio(second_task) > prio(first_task) */ +} + +int edf_ready_order(struct bheap_node* a, struct bheap_node* b) +{ + return edf_higher_prio(bheap2task(a), bheap2task(b)); +} + +void edf_domain_init(rt_domain_t* rt, check_resched_needed_t resched, + release_jobs_t release) +{ + rt_domain_init(rt, edf_ready_order, resched, release); +} + +/* need_to_preempt - check whether the task t needs to be preempted + * call only with irqs disabled and with ready_lock acquired + * THIS DOES NOT TAKE NON-PREEMPTIVE SECTIONS INTO ACCOUNT! + */ +int edf_preemption_needed(rt_domain_t* rt, struct task_struct *t) +{ + /* we need the read lock for edf_ready_queue */ + /* no need to preempt if there is nothing pending */ + if (!__jobs_pending(rt)) + return 0; + /* we need to reschedule if t doesn't exist */ + if (!t) + return 1; + + /* NOTE: We cannot check for non-preemptibility since we + * don't know what address space we're currently in. + */ + + /* make sure to get non-rt stuff out of the way */ + return !is_realtime(t) || edf_higher_prio(__next_ready(rt), t); +} diff --git a/litmus/fdso.c b/litmus/fdso.c new file mode 100644 index 000000000000..0ff54e41839c --- /dev/null +++ b/litmus/fdso.c @@ -0,0 +1,308 @@ +/* fdso.c - file descriptor attached shared objects + * + * (c) 2007 B. Brandenburg, LITMUS^RT project + * + * Notes: + * - objects descriptor (OD) tables are not cloned during a fork. + * - objects are created on-demand, and freed after the last reference + * is dropped. + * - for now, object types are hard coded. + * - As long as we have live objects, we keep a reference to the inode. + */ + +#include +#include +#include +#include +#include + +#include + +extern struct fdso_ops generic_lock_ops; + +static const struct fdso_ops* fdso_ops[] = { + &generic_lock_ops, /* FMLP_SEM */ + &generic_lock_ops, /* SRP_SEM */ + &generic_lock_ops, /* MPCP_SEM */ + &generic_lock_ops, /* MPCP_VS_SEM */ + &generic_lock_ops, /* DPCP_SEM */ + &generic_lock_ops, /* PCP_SEM */ + &generic_lock_ops, /* DFLP_SEM */ +}; + +static int fdso_create(void** obj_ref, obj_type_t type, void* __user config) +{ + BUILD_BUG_ON(ARRAY_SIZE(fdso_ops) != MAX_OBJ_TYPE + 1); + + if (fdso_ops[type]->create) + return fdso_ops[type]->create(obj_ref, type, config); + else + return -EINVAL; +} + +static void fdso_destroy(obj_type_t type, void* obj) +{ + fdso_ops[type]->destroy(type, obj); +} + +static int fdso_open(struct od_table_entry* entry, void* __user config) +{ + if (fdso_ops[entry->obj->type]->open) + return fdso_ops[entry->obj->type]->open(entry, config); + else + return 0; +} + +static int fdso_close(struct od_table_entry* entry) +{ + if (fdso_ops[entry->obj->type]->close) + return fdso_ops[entry->obj->type]->close(entry); + else + return 0; +} + +/* inode must be locked already */ +static int alloc_inode_obj(struct inode_obj_id** obj_ref, + struct inode* inode, + obj_type_t type, + unsigned int id, + void* __user config) +{ + struct inode_obj_id* obj; + void* raw_obj; + int err; + + obj = kmalloc(sizeof(*obj), GFP_KERNEL); + if (!obj) { + return -ENOMEM; + } + + err = fdso_create(&raw_obj, type, config); + if (err != 0) { + kfree(obj); + return err; + } + + INIT_LIST_HEAD(&obj->list); + atomic_set(&obj->count, 1); + obj->type = type; + obj->id = id; + obj->obj = raw_obj; + obj->inode = inode; + + list_add(&obj->list, &inode->i_obj_list); + atomic_inc(&inode->i_count); + + printk(KERN_DEBUG "alloc_inode_obj(%p, %d, %d): object created\n", inode, type, id); + + *obj_ref = obj; + return 0; +} + +/* inode must be locked already */ +static struct inode_obj_id* get_inode_obj(struct inode* inode, + obj_type_t type, + unsigned int id) +{ + struct list_head* pos; + struct inode_obj_id* obj = NULL; + + list_for_each(pos, &inode->i_obj_list) { + obj = list_entry(pos, struct inode_obj_id, list); + if (obj->id == id && obj->type == type) { + atomic_inc(&obj->count); + return obj; + } + } + printk(KERN_DEBUG "get_inode_obj(%p, %d, %d): couldn't find object\n", inode, type, id); + return NULL; +} + + +static void put_inode_obj(struct inode_obj_id* obj) +{ + struct inode* inode; + int let_go = 0; + + inode = obj->inode; + if (atomic_dec_and_test(&obj->count)) { + + mutex_lock(&inode->i_obj_mutex); + /* no new references can be obtained */ + if (!atomic_read(&obj->count)) { + list_del(&obj->list); + fdso_destroy(obj->type, obj->obj); + kfree(obj); + let_go = 1; + } + mutex_unlock(&inode->i_obj_mutex); + if (let_go) + iput(inode); + } +} + +static struct od_table_entry* get_od_entry(struct task_struct* t) +{ + struct od_table_entry* table; + int i; + + + table = t->od_table; + if (!table) { + table = kzalloc(sizeof(*table) * MAX_OBJECT_DESCRIPTORS, + GFP_KERNEL); + t->od_table = table; + } + + for (i = 0; table && i < MAX_OBJECT_DESCRIPTORS; i++) + if (!table[i].used) { + table[i].used = 1; + return table + i; + } + return NULL; +} + +static int put_od_entry(struct od_table_entry* od) +{ + put_inode_obj(od->obj); + od->used = 0; + return 0; +} + +static long close_od_entry(struct od_table_entry *od) +{ + long ret; + + /* Give the class a chance to reject the close. */ + ret = fdso_close(od); + if (ret == 0) + ret = put_od_entry(od); + + return ret; +} + +void exit_od_table(struct task_struct* t) +{ + int i; + + if (t->od_table) { + for (i = 0; i < MAX_OBJECT_DESCRIPTORS; i++) + if (t->od_table[i].used) + close_od_entry(t->od_table + i); + kfree(t->od_table); + t->od_table = NULL; + } +} + +static int do_sys_od_open(struct file* file, obj_type_t type, int id, + void* __user config) +{ + int idx = 0, err = 0; + struct inode* inode; + struct inode_obj_id* obj = NULL; + struct od_table_entry* entry; + + inode = file_inode(file); + + entry = get_od_entry(current); + if (!entry) + return -ENOMEM; + + mutex_lock(&inode->i_obj_mutex); + obj = get_inode_obj(inode, type, id); + if (!obj) + err = alloc_inode_obj(&obj, inode, type, id, config); + if (err != 0) { + obj = NULL; + idx = err; + entry->used = 0; + } else { + entry->obj = obj; + entry->class = fdso_ops[type]; + idx = entry - current->od_table; + } + + mutex_unlock(&inode->i_obj_mutex); + + /* open only if creation succeeded */ + if (!err) + err = fdso_open(entry, config); + if (err < 0) { + /* The class rejected the open call. + * We need to clean up and tell user space. + */ + if (obj) + put_od_entry(entry); + idx = err; + } + + return idx; +} + + +struct od_table_entry* get_entry_for_od(int od) +{ + struct task_struct *t = current; + + if (!t->od_table) + return NULL; + if (od < 0 || od >= MAX_OBJECT_DESCRIPTORS) + return NULL; + if (!t->od_table[od].used) + return NULL; + return t->od_table + od; +} + + +asmlinkage long sys_od_open(int fd, int type, int obj_id, void* __user config) +{ + int ret = 0; + struct file* file; + + /* + 1) get file from fd, get inode from file + 2) lock inode + 3) try to lookup object + 4) if not present create and enqueue object, inc inode refcnt + 5) increment refcnt of object + 6) alloc od_table_entry, setup ptrs + 7) unlock inode + 8) return offset in od_table as OD + */ + + if (type < MIN_OBJ_TYPE || type > MAX_OBJ_TYPE) { + ret = -EINVAL; + goto out; + } + + file = fget(fd); + if (!file) { + ret = -EBADF; + goto out; + } + + ret = do_sys_od_open(file, type, obj_id, config); + + fput(file); + +out: + return ret; +} + + +asmlinkage long sys_od_close(int od) +{ + int ret = -EINVAL; + struct task_struct *t = current; + + if (od < 0 || od >= MAX_OBJECT_DESCRIPTORS) + return ret; + + if (!t->od_table || !t->od_table[od].used) + return ret; + + + ret = close_od_entry(t->od_table + od); + + return ret; +} diff --git a/litmus/fp_common.c b/litmus/fp_common.c new file mode 100644 index 000000000000..964a4729deff --- /dev/null +++ b/litmus/fp_common.c @@ -0,0 +1,119 @@ +/* + * litmus/fp_common.c + * + * Common functions for fixed-priority scheduler. + */ + +#include +#include +#include + +#include +#include +#include + +#include + +/* fp_higher_prio - returns true if first has a higher static priority + * than second. Ties are broken by PID. + * + * both first and second may be NULL + */ +int fp_higher_prio(struct task_struct* first, + struct task_struct* second) +{ + struct task_struct *first_task = first; + struct task_struct *second_task = second; + + /* There is no point in comparing a task to itself. */ + if (unlikely(first && first == second)) { + TRACE_TASK(first, + "WARNING: pointless FP priority comparison.\n"); + return 0; + } + + + /* check for NULL tasks */ + if (!first || !second) + return first && !second; + + if (!is_realtime(second_task)) + return 1; + +#ifdef CONFIG_LITMUS_LOCKING + + /* Check for inherited priorities. Change task + * used for comparison in such a case. + */ + if (unlikely(first->rt_param.inh_task)) + first_task = first->rt_param.inh_task; + if (unlikely(second->rt_param.inh_task)) + second_task = second->rt_param.inh_task; + + /* Check for priority boosting. Tie-break by start of boosting. + */ + if (unlikely(is_priority_boosted(first_task))) { + /* first_task is boosted, how about second_task? */ + if (is_priority_boosted(second_task)) + /* break by priority point */ + return lt_before(get_boost_start(first_task), + get_boost_start(second_task)); + else + /* priority boosting wins. */ + return 1; + } else if (unlikely(is_priority_boosted(second_task))) + /* second_task is boosted, first is not*/ + return 0; + +#endif + + /* Comparisons to itself are not expected; priority inheritance + * should also not cause this to happen. */ + BUG_ON(first_task == second_task); + + if (get_priority(first_task) < get_priority(second_task)) + return 1; + else if (get_priority(first_task) == get_priority(second_task)) + /* Break by PID. */ + return first_task->pid < second_task->pid; + else + return 0; +} + +int fp_ready_order(struct bheap_node* a, struct bheap_node* b) +{ + return fp_higher_prio(bheap2task(a), bheap2task(b)); +} + +void fp_domain_init(rt_domain_t* rt, check_resched_needed_t resched, + release_jobs_t release) +{ + rt_domain_init(rt, fp_ready_order, resched, release); +} + +/* need_to_preempt - check whether the task t needs to be preempted + */ +int fp_preemption_needed(struct fp_prio_queue *q, struct task_struct *t) +{ + struct task_struct *pending; + + pending = fp_prio_peek(q); + + if (!pending) + return 0; + if (!t) + return 1; + + /* make sure to get non-rt stuff out of the way */ + return !is_realtime(t) || fp_higher_prio(pending, t); +} + +void fp_prio_queue_init(struct fp_prio_queue* q) +{ + int i; + + for (i = 0; i < FP_PRIO_BIT_WORDS; i++) + q->bitmask[i] = 0; + for (i = 0; i < LITMUS_MAX_PRIORITY; i++) + bheap_init(&q->queue[i]); +} diff --git a/litmus/jobs.c b/litmus/jobs.c new file mode 100644 index 000000000000..0dd36b9343d6 --- /dev/null +++ b/litmus/jobs.c @@ -0,0 +1,82 @@ +/* litmus/jobs.c - common job control code + */ + +#include + +#include +#include +#include +#include + +static inline void setup_release(struct task_struct *t, lt_t release) +{ + /* prepare next release */ + t->rt_param.job_params.release = release; + t->rt_param.job_params.deadline = release + get_rt_relative_deadline(t); + t->rt_param.job_params.exec_time = 0; + + /* update job sequence number */ + t->rt_param.job_params.job_no++; +} + +void prepare_for_next_period(struct task_struct *t) +{ + BUG_ON(!t); + + /* Record lateness before we set up the next job's + * release and deadline. Lateness may be negative. + */ + t->rt_param.job_params.lateness = + (long long)litmus_clock() - + (long long)t->rt_param.job_params.deadline; + + if (tsk_rt(t)->sporadic_release) { + TRACE_TASK(t, "sporadic release at %llu\n", + tsk_rt(t)->sporadic_release_time); + /* sporadic release */ + setup_release(t, tsk_rt(t)->sporadic_release_time); + tsk_rt(t)->sporadic_release = 0; + } else { + /* periodic release => add period */ + setup_release(t, get_release(t) + get_rt_period(t)); + } +} + +void release_at(struct task_struct *t, lt_t start) +{ + BUG_ON(!t); + setup_release(t, start); + tsk_rt(t)->completed = 0; +} + +long default_wait_for_release_at(lt_t release_time) +{ + struct task_struct *t = current; + unsigned long flags; + + local_irq_save(flags); + tsk_rt(t)->sporadic_release_time = release_time; + smp_wmb(); + tsk_rt(t)->sporadic_release = 1; + local_irq_restore(flags); + + return litmus->complete_job(); +} + + +/* + * Deactivate current task until the beginning of the next period. + */ +long complete_job(void) +{ + preempt_disable(); + TRACE_CUR("job completion indicated at %llu\n", litmus_clock()); + /* Mark that we do not excute anymore */ + tsk_rt(current)->completed = 1; + /* call schedule, this will return when a new job arrives + * it also takes care of preparing for the next release + */ + litmus_reschedule_local(); + preempt_enable(); + return 0; +} diff --git a/litmus/litmus.c b/litmus/litmus.c new file mode 100644 index 000000000000..703360c68609 --- /dev/null +++ b/litmus/litmus.c @@ -0,0 +1,681 @@ +/* + * litmus.c -- Implementation of the LITMUS syscalls, + * the LITMUS intialization code, + * and the procfs interface.. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#ifdef CONFIG_SCHED_CPU_AFFINITY +#include +#endif + +/* Number of RT tasks that exist in the system */ +atomic_t rt_task_count = ATOMIC_INIT(0); + +#ifdef CONFIG_RELEASE_MASTER +/* current master CPU for handling timer IRQs */ +atomic_t release_master_cpu = ATOMIC_INIT(NO_CPU); +#endif + +static struct kmem_cache * bheap_node_cache; +extern struct kmem_cache * release_heap_cache; + +struct bheap_node* bheap_node_alloc(int gfp_flags) +{ + return kmem_cache_alloc(bheap_node_cache, gfp_flags); +} + +void bheap_node_free(struct bheap_node* hn) +{ + kmem_cache_free(bheap_node_cache, hn); +} + +struct release_heap* release_heap_alloc(int gfp_flags); +void release_heap_free(struct release_heap* rh); + +/** + * Get the quantum alignment as a cmdline option. + * Default is staggered quanta, as this results in lower overheads. + */ +static bool aligned_quanta = 0; +module_param(aligned_quanta, bool, 0644); + +u64 cpu_stagger_offset(int cpu) +{ + u64 offset = 0; + + if (!aligned_quanta) { + offset = LITMUS_QUANTUM_LENGTH_NS; + do_div(offset, num_possible_cpus()); + offset *= cpu; + } + return offset; +} + +/* + * sys_set_task_rt_param + * @pid: Pid of the task which scheduling parameters must be changed + * @param: New real-time extension parameters such as the execution cost and + * period + * Syscall for manipulating with task rt extension params + * Returns EFAULT if param is NULL. + * ESRCH if pid is not corrsponding + * to a valid task. + * EINVAL if either period or execution cost is <=0 + * EPERM if pid is a real-time task + * 0 if success + * + * Only non-real-time tasks may be configured with this system call + * to avoid races with the scheduler. In practice, this means that a + * task's parameters must be set _before_ calling sys_prepare_rt_task() + * + * find_task_by_vpid() assumes that we are in the same namespace of the + * target. + */ +asmlinkage long sys_set_rt_task_param(pid_t pid, struct rt_task __user * param) +{ + struct rt_task tp; + struct task_struct *target; + int retval = -EINVAL; + + printk("Setting up rt task parameters for process %d.\n", pid); + + if (pid < 0 || param == 0) { + goto out; + } + if (copy_from_user(&tp, param, sizeof(tp))) { + retval = -EFAULT; + goto out; + } + + /* Task search and manipulation must be protected */ + read_lock_irq(&tasklist_lock); + rcu_read_lock(); + if (!(target = find_task_by_vpid(pid))) { + retval = -ESRCH; + rcu_read_unlock(); + goto out_unlock; + } + rcu_read_unlock(); + + if (is_realtime(target)) { + /* The task is already a real-time task. + * We cannot not allow parameter changes at this point. + */ + retval = -EBUSY; + goto out_unlock; + } + + /* set relative deadline to be implicit if left unspecified */ + if (tp.relative_deadline == 0) + tp.relative_deadline = tp.period; + + if (tp.exec_cost <= 0) + goto out_unlock; + if (tp.period <= 0) + goto out_unlock; + if (min(tp.relative_deadline, tp.period) < tp.exec_cost) /*density check*/ + { + printk(KERN_INFO "litmus: real-time task %d rejected " + "because task density > 1.0\n", pid); + goto out_unlock; + } + if (tp.cls != RT_CLASS_HARD && + tp.cls != RT_CLASS_SOFT && + tp.cls != RT_CLASS_BEST_EFFORT) + { + printk(KERN_INFO "litmus: real-time task %d rejected " + "because its class is invalid\n", pid); + goto out_unlock; + } + if (tp.budget_policy != NO_ENFORCEMENT && + tp.budget_policy != QUANTUM_ENFORCEMENT && + tp.budget_policy != PRECISE_ENFORCEMENT) + { + printk(KERN_INFO "litmus: real-time task %d rejected " + "because unsupported budget enforcement policy " + "specified (%d)\n", + pid, tp.budget_policy); + goto out_unlock; + } + + target->rt_param.task_params = tp; + + retval = 0; + out_unlock: + read_unlock_irq(&tasklist_lock); + out: + return retval; +} + +/* + * Getter of task's RT params + * returns EINVAL if param or pid is NULL + * returns ESRCH if pid does not correspond to a valid task + * returns EFAULT if copying of parameters has failed. + * + * find_task_by_vpid() assumes that we are in the same namespace of the + * target. + */ +asmlinkage long sys_get_rt_task_param(pid_t pid, struct rt_task __user * param) +{ + int retval = -EINVAL; + struct task_struct *source; + struct rt_task lp; + if (param == 0 || pid < 0) + goto out; + read_lock(&tasklist_lock); + if (!(source = find_task_by_vpid(pid))) { + retval = -ESRCH; + goto out_unlock; + } + lp = source->rt_param.task_params; + read_unlock(&tasklist_lock); + /* Do copying outside the lock */ + retval = + copy_to_user(param, &lp, sizeof(lp)) ? -EFAULT : 0; + return retval; + out_unlock: + read_unlock(&tasklist_lock); + out: + return retval; + +} + +/* + * This is the crucial function for periodic task implementation, + * It checks if a task is periodic, checks if such kind of sleep + * is permitted and calls plugin-specific sleep, which puts the + * task into a wait array. + * returns 0 on successful wakeup + * returns EPERM if current conditions do not permit such sleep + * returns EINVAL if current task is not able to go to sleep + */ +asmlinkage long sys_complete_job(void) +{ + int retval = -EPERM; + if (!is_realtime(current)) { + retval = -EINVAL; + goto out; + } + /* Task with negative or zero period cannot sleep */ + if (get_rt_period(current) <= 0) { + retval = -EINVAL; + goto out; + } + /* The plugin has to put the task into an + * appropriate queue and call schedule + */ + retval = litmus->complete_job(); + out: + return retval; +} + +/* This is an "improved" version of sys_complete_job that + * addresses the problem of unintentionally missing a job after + * an overrun. + * + * returns 0 on successful wakeup + * returns EPERM if current conditions do not permit such sleep + * returns EINVAL if current task is not able to go to sleep + */ +asmlinkage long sys_wait_for_job_release(unsigned int job) +{ + int retval = -EPERM; + if (!is_realtime(current)) { + retval = -EINVAL; + goto out; + } + + /* Task with negative or zero period cannot sleep */ + if (get_rt_period(current) <= 0) { + retval = -EINVAL; + goto out; + } + + retval = 0; + + /* first wait until we have "reached" the desired job + * + * This implementation has at least two problems: + * + * 1) It doesn't gracefully handle the wrap around of + * job_no. Since LITMUS is a prototype, this is not much + * of a problem right now. + * + * 2) It is theoretically racy if a job release occurs + * between checking job_no and calling sleep_next_period(). + * A proper solution would requiring adding another callback + * in the plugin structure and testing the condition with + * interrupts disabled. + * + * FIXME: At least problem 2 should be taken care of eventually. + */ + while (!retval && job > current->rt_param.job_params.job_no) + /* If the last job overran then job <= job_no and we + * don't send the task to sleep. + */ + retval = litmus->complete_job(); + out: + return retval; +} + +/* This is a helper syscall to query the current job sequence number. + * + * returns 0 on successful query + * returns EPERM if task is not a real-time task. + * returns EFAULT if &job is not a valid pointer. + */ +asmlinkage long sys_query_job_no(unsigned int __user *job) +{ + int retval = -EPERM; + if (is_realtime(current)) + retval = put_user(current->rt_param.job_params.job_no, job); + + return retval; +} + +/* sys_null_call() is only used for determining raw system call + * overheads (kernel entry, kernel exit). It has no useful side effects. + * If ts is non-NULL, then the current Feather-Trace time is recorded. + */ +asmlinkage long sys_null_call(cycles_t __user *ts) +{ + long ret = 0; + cycles_t now; + + if (ts) { + now = get_cycles(); + ret = put_user(now, ts); + } + + return ret; +} + +/* p is a real-time task. Re-init its state as a best-effort task. */ +static void reinit_litmus_state(struct task_struct* p, int restore) +{ + struct rt_task user_config = {}; + void* ctrl_page = NULL; + + if (restore) { + /* Safe user-space provided configuration data. + * and allocated page. */ + user_config = p->rt_param.task_params; + ctrl_page = p->rt_param.ctrl_page; + } + + /* We probably should not be inheriting any task's priority + * at this point in time. + */ + WARN_ON(p->rt_param.inh_task); + + /* Cleanup everything else. */ + memset(&p->rt_param, 0, sizeof(p->rt_param)); + + /* Restore preserved fields. */ + if (restore) { + p->rt_param.task_params = user_config; + p->rt_param.ctrl_page = ctrl_page; + } +} + +long litmus_admit_task(struct task_struct* tsk) +{ + long retval = 0; + + BUG_ON(is_realtime(tsk)); + + tsk_rt(tsk)->heap_node = NULL; + tsk_rt(tsk)->rel_heap = NULL; + + if (get_rt_relative_deadline(tsk) == 0 || + get_exec_cost(tsk) > + min(get_rt_relative_deadline(tsk), get_rt_period(tsk)) ) { + TRACE_TASK(tsk, + "litmus admit: invalid task parameters " + "(e = %lu, p = %lu, d = %lu)\n", + get_exec_cost(tsk), get_rt_period(tsk), + get_rt_relative_deadline(tsk)); + retval = -EINVAL; + goto out; + } + + INIT_LIST_HEAD(&tsk_rt(tsk)->list); + + /* allocate heap node for this task */ + tsk_rt(tsk)->heap_node = bheap_node_alloc(GFP_ATOMIC); + tsk_rt(tsk)->rel_heap = release_heap_alloc(GFP_ATOMIC); + + if (!tsk_rt(tsk)->heap_node || !tsk_rt(tsk)->rel_heap) { + printk(KERN_WARNING "litmus: no more heap node memory!?\n"); + + retval = -ENOMEM; + goto out; + } else { + bheap_node_init(&tsk_rt(tsk)->heap_node, tsk); + } + + preempt_disable(); + + retval = litmus->admit_task(tsk); + + if (!retval) { + sched_trace_task_name(tsk); + sched_trace_task_param(tsk); + atomic_inc(&rt_task_count); + } + + preempt_enable(); + +out: + if (retval) { + if (tsk_rt(tsk)->heap_node) + bheap_node_free(tsk_rt(tsk)->heap_node); + if (tsk_rt(tsk)->rel_heap) + release_heap_free(tsk_rt(tsk)->rel_heap); + } + return retval; +} + +void litmus_clear_state(struct task_struct* tsk) +{ + BUG_ON(bheap_node_in_heap(tsk_rt(tsk)->heap_node)); + bheap_node_free(tsk_rt(tsk)->heap_node); + release_heap_free(tsk_rt(tsk)->rel_heap); + + atomic_dec(&rt_task_count); + reinit_litmus_state(tsk, 1); +} + +/* called from sched_setscheduler() */ +void litmus_exit_task(struct task_struct* tsk) +{ + if (is_realtime(tsk)) { + sched_trace_task_completion(tsk, 1); + + litmus->task_exit(tsk); + } +} + +static DECLARE_RWSEM(plugin_switch_mutex); + +void litmus_plugin_switch_disable(void) +{ + down_read(&plugin_switch_mutex); +} + +void litmus_plugin_switch_enable(void) +{ + up_read(&plugin_switch_mutex); +} + +static int __do_plugin_switch(struct sched_plugin* plugin) +{ + int ret; + + + /* don't switch if there are active real-time tasks */ + if (atomic_read(&rt_task_count) == 0) { + TRACE("deactivating plugin %s\n", litmus->plugin_name); + ret = litmus->deactivate_plugin(); + if (0 != ret) + goto out; + + TRACE("activating plugin %s\n", plugin->plugin_name); + ret = plugin->activate_plugin(); + if (0 != ret) { + printk(KERN_INFO "Can't activate %s (%d).\n", + plugin->plugin_name, ret); + plugin = &linux_sched_plugin; + } + + printk(KERN_INFO "Switching to LITMUS^RT plugin %s.\n", plugin->plugin_name); + litmus = plugin; + } else + ret = -EBUSY; +out: + TRACE("do_plugin_switch() => %d\n", ret); + return ret; +} + +static atomic_t ready_to_switch; + +static int do_plugin_switch(void *_plugin) +{ + unsigned long flags; + int ret = 0; + + local_save_flags(flags); + local_irq_disable(); + hard_irq_disable(); + + if (atomic_dec_and_test(&ready_to_switch)) + { + ret = __do_plugin_switch((struct sched_plugin*) _plugin); + atomic_set(&ready_to_switch, INT_MAX); + } + + do { + cpu_relax(); + } while (atomic_read(&ready_to_switch) != INT_MAX); + + local_irq_restore(flags); + return ret; +} + +/* Switching a plugin in use is tricky. + * We must watch out that no real-time tasks exists + * (and that none is created in parallel) and that the plugin is not + * currently in use on any processor (in theory). + */ +int switch_sched_plugin(struct sched_plugin* plugin) +{ + int err; + struct domain_proc_info* domain_info; + + BUG_ON(!plugin); + + if (atomic_read(&rt_task_count) == 0) { + down_write(&plugin_switch_mutex); + + deactivate_domain_proc(); + + get_online_cpus(); + atomic_set(&ready_to_switch, num_online_cpus()); + err = stop_cpus(cpu_online_mask, do_plugin_switch, plugin); + put_online_cpus(); + + if (!litmus->get_domain_proc_info(&domain_info)) + activate_domain_proc(domain_info); + + up_write(&plugin_switch_mutex); + return err; + } else + return -EBUSY; +} + +/* Called upon fork. + * p is the newly forked task. + */ +void litmus_fork(struct task_struct* p) +{ + if (is_realtime(p)) { + /* clean out any litmus related state, don't preserve anything */ + reinit_litmus_state(p, 0); + /* Don't let the child be a real-time task. */ + p->sched_reset_on_fork = 1; + } else + /* non-rt tasks might have ctrl_page set */ + tsk_rt(p)->ctrl_page = NULL; + + /* od tables are never inherited across a fork */ + p->od_table = NULL; +} + +/* Called upon execve(). + * current is doing the exec. + * Don't let address space specific stuff leak. + */ +void litmus_exec(void) +{ + struct task_struct* p = current; + + if (is_realtime(p)) { + WARN_ON(p->rt_param.inh_task); + if (tsk_rt(p)->ctrl_page) { + free_page((unsigned long) tsk_rt(p)->ctrl_page); + tsk_rt(p)->ctrl_page = NULL; + } + } +} + +/* Called when dead_tsk is being deallocated + */ +void exit_litmus(struct task_struct *dead_tsk) +{ + /* We also allow non-RT tasks to + * allocate control pages to allow + * measurements with non-RT tasks. + * So check if we need to free the page + * in any case. + */ + if (tsk_rt(dead_tsk)->ctrl_page) { + TRACE_TASK(dead_tsk, + "freeing ctrl_page %p\n", + tsk_rt(dead_tsk)->ctrl_page); + free_page((unsigned long) tsk_rt(dead_tsk)->ctrl_page); + } + + /* Tasks should not be real-time tasks any longer at this point. */ + BUG_ON(is_realtime(dead_tsk)); +} + +void litmus_do_exit(struct task_struct *exiting_tsk) +{ + /* This task called do_exit(), but is still a real-time task. To avoid + * complications later, we force it to be a non-real-time task now. */ + + struct sched_param param = { .sched_priority = MAX_RT_PRIO - 1 }; + + TRACE_TASK(exiting_tsk, "exiting, demoted to SCHED_FIFO\n"); + sched_setscheduler_nocheck(exiting_tsk, SCHED_FIFO, ¶m); +} + +void litmus_dealloc(struct task_struct *tsk) +{ + /* tsk is no longer a real-time task */ + TRACE_TASK(tsk, "Deallocating real-time task data\n"); + litmus->task_cleanup(tsk); + litmus_clear_state(tsk); +} + +/* move current non-RT task to a specific CPU */ +int litmus_be_migrate_to(int cpu) +{ + struct cpumask single_cpu_aff; + + cpumask_clear(&single_cpu_aff); + cpumask_set_cpu(cpu, &single_cpu_aff); + return sched_setaffinity(current->pid, &single_cpu_aff); +} + +#ifdef CONFIG_MAGIC_SYSRQ +int sys_kill(int pid, int sig); + +static void sysrq_handle_kill_rt_tasks(int key) +{ + struct task_struct *t; + read_lock(&tasklist_lock); + for_each_process(t) { + if (is_realtime(t)) { + sys_kill(t->pid, SIGKILL); + } + } + read_unlock(&tasklist_lock); +} + +static struct sysrq_key_op sysrq_kill_rt_tasks_op = { + .handler = sysrq_handle_kill_rt_tasks, + .help_msg = "quit-rt-tasks(X)", + .action_msg = "sent SIGKILL to all LITMUS^RT real-time tasks", +}; +#endif + +extern struct sched_plugin linux_sched_plugin; + +static int litmus_shutdown_nb(struct notifier_block *unused1, + unsigned long unused2, void *unused3) +{ + /* Attempt to switch back to regular Linux scheduling. + * Forces the active plugin to clean up. + */ + if (litmus != &linux_sched_plugin) { + int ret = switch_sched_plugin(&linux_sched_plugin); + if (ret) { + printk("Auto-shutdown of active Litmus plugin failed.\n"); + } + } + return NOTIFY_DONE; +} + +static struct notifier_block shutdown_notifier = { + .notifier_call = litmus_shutdown_nb, +}; + +static int __init _init_litmus(void) +{ + /* Common initializers, + * mode change lock is used to enforce single mode change + * operation. + */ + printk("Starting LITMUS^RT kernel\n"); + + register_sched_plugin(&linux_sched_plugin); + + bheap_node_cache = KMEM_CACHE(bheap_node, SLAB_PANIC); + release_heap_cache = KMEM_CACHE(release_heap, SLAB_PANIC); + +#ifdef CONFIG_MAGIC_SYSRQ + /* offer some debugging help */ + if (!register_sysrq_key('x', &sysrq_kill_rt_tasks_op)) + printk("Registered kill rt tasks magic sysrq.\n"); + else + printk("Could not register kill rt tasks magic sysrq.\n"); +#endif + + init_litmus_proc(); + + register_reboot_notifier(&shutdown_notifier); + + return 0; +} + +static void _exit_litmus(void) +{ + unregister_reboot_notifier(&shutdown_notifier); + + exit_litmus_proc(); + kmem_cache_destroy(bheap_node_cache); + kmem_cache_destroy(release_heap_cache); +} + +module_init(_init_litmus); +module_exit(_exit_litmus); diff --git a/litmus/litmus_proc.c b/litmus/litmus_proc.c new file mode 100644 index 000000000000..2ef1669eff17 --- /dev/null +++ b/litmus/litmus_proc.c @@ -0,0 +1,573 @@ +/* + * litmus_proc.c -- Implementation of the /proc/litmus directory tree. + */ + +#include +#include +#include +#include + +#include +#include + +#include + +/* in litmus/litmus.c */ +extern atomic_t rt_task_count; + +static struct proc_dir_entry *litmus_dir = NULL, + *curr_file = NULL, + *stat_file = NULL, + *plugs_dir = NULL, +#ifdef CONFIG_RELEASE_MASTER + *release_master_file = NULL, +#endif + *plugs_file = NULL, + *domains_dir = NULL, + *cpus_dir = NULL; + + +/* in litmus/sync.c */ +int count_tasks_waiting_for_release(void); + +static int litmus_stats_proc_show(struct seq_file *m, void *v) +{ + seq_printf(m, + "real-time tasks = %d\n" + "ready for release = %d\n", + atomic_read(&rt_task_count), + count_tasks_waiting_for_release()); + return 0; +} + +static int litmus_stats_proc_open(struct inode *inode, struct file *file) +{ + return single_open(file, litmus_stats_proc_show, PDE_DATA(inode)); +} + +static const struct file_operations litmus_stats_proc_fops = { + .open = litmus_stats_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + + +static int litmus_loaded_proc_show(struct seq_file *m, void *v) +{ + print_sched_plugins(m); + return 0; +} + +static int litmus_loaded_proc_open(struct inode *inode, struct file *file) +{ + return single_open(file, litmus_loaded_proc_show, PDE_DATA(inode)); +} + +static const struct file_operations litmus_loaded_proc_fops = { + .open = litmus_loaded_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + + + + +/* in litmus/litmus.c */ +int switch_sched_plugin(struct sched_plugin*); + +static ssize_t litmus_active_proc_write(struct file *file, + const char __user *buffer, size_t count, + loff_t *ppos) +{ + char name[65]; + struct sched_plugin* found; + ssize_t ret = -EINVAL; + int err; + + + ret = copy_and_chomp(name, sizeof(name), buffer, count); + if (ret < 0) + return ret; + + found = find_sched_plugin(name); + + if (found) { + err = switch_sched_plugin(found); + if (err) { + printk(KERN_INFO "Could not switch plugin: %d\n", err); + ret = err; + } + } else { + printk(KERN_INFO "Plugin '%s' is unknown.\n", name); + ret = -ESRCH; + } + + return ret; +} + +static int litmus_active_proc_show(struct seq_file *m, void *v) +{ + seq_printf(m, "%s\n", litmus->plugin_name); + return 0; +} + +static int litmus_active_proc_open(struct inode *inode, struct file *file) +{ + return single_open(file, litmus_active_proc_show, PDE_DATA(inode)); +} + +static const struct file_operations litmus_active_proc_fops = { + .open = litmus_active_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, + .write = litmus_active_proc_write, +}; + + +#ifdef CONFIG_RELEASE_MASTER +static ssize_t litmus_release_master_proc_write( + struct file *file, + const char __user *buffer, size_t count, + loff_t *ppos) +{ + int cpu, err, online = 0; + char msg[64]; + ssize_t len; + + len = copy_and_chomp(msg, sizeof(msg), buffer, count); + + if (len < 0) + return len; + + if (strcmp(msg, "NO_CPU") == 0) + atomic_set(&release_master_cpu, NO_CPU); + else { + err = sscanf(msg, "%d", &cpu); + if (err == 1 && cpu >= 0 && (online = cpu_online(cpu))) { + atomic_set(&release_master_cpu, cpu); + } else { + TRACE("invalid release master: '%s' " + "(err:%d cpu:%d online:%d)\n", + msg, err, cpu, online); + len = -EINVAL; + } + } + return len; +} + +static int litmus_release_master_proc_show(struct seq_file *m, void *v) +{ + int master; + master = atomic_read(&release_master_cpu); + if (master == NO_CPU) + seq_printf(m, "NO_CPU\n"); + else + seq_printf(m, "%d\n", master); + return 0; +} + +static int litmus_release_master_proc_open(struct inode *inode, struct file *file) +{ + return single_open(file, litmus_release_master_proc_show, PDE_DATA(inode)); +} + +static const struct file_operations litmus_release_master_proc_fops = { + .open = litmus_release_master_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, + .write = litmus_release_master_proc_write, +}; +#endif + +int __init init_litmus_proc(void) +{ + litmus_dir = proc_mkdir("litmus", NULL); + if (!litmus_dir) { + printk(KERN_ERR "Could not allocate LITMUS^RT procfs entry.\n"); + return -ENOMEM; + } + + curr_file = proc_create("active_plugin", 0644, litmus_dir, + &litmus_active_proc_fops); + + if (!curr_file) { + printk(KERN_ERR "Could not allocate active_plugin " + "procfs entry.\n"); + return -ENOMEM; + } + +#ifdef CONFIG_RELEASE_MASTER + release_master_file = proc_create("release_master", 0644, litmus_dir, + &litmus_release_master_proc_fops); + if (!release_master_file) { + printk(KERN_ERR "Could not allocate release_master " + "procfs entry.\n"); + return -ENOMEM; + } +#endif + + stat_file = proc_create("stats", 0444, litmus_dir, &litmus_stats_proc_fops); + + plugs_dir = proc_mkdir("plugins", litmus_dir); + if (!plugs_dir){ + printk(KERN_ERR "Could not allocate plugins directory " + "procfs entry.\n"); + return -ENOMEM; + } + + plugs_file = proc_create("loaded", 0444, plugs_dir, + &litmus_loaded_proc_fops); + + domains_dir = proc_mkdir("domains", litmus_dir); + if (!domains_dir) { + printk(KERN_ERR "Could not allocate domains directory " + "procfs entry.\n"); + return -ENOMEM; + } + + cpus_dir = proc_mkdir("cpus", litmus_dir); + if (!cpus_dir) { + printk(KERN_ERR "Could not allocate cpus directory " + "procfs entry.\n"); + return -ENOMEM; + } + + return 0; +} + +void exit_litmus_proc(void) +{ + if (cpus_dir || domains_dir) { + deactivate_domain_proc(); + if (cpus_dir) + remove_proc_entry("cpus", litmus_dir); + if (domains_dir) + remove_proc_entry("domains", litmus_dir); + } + if (plugs_file) + remove_proc_entry("loaded", plugs_dir); + if (plugs_dir) + remove_proc_entry("plugins", litmus_dir); + if (stat_file) + remove_proc_entry("stats", litmus_dir); + if (curr_file) + remove_proc_entry("active_plugin", litmus_dir); +#ifdef CONFIG_RELEASE_MASTER + if (release_master_file) + remove_proc_entry("release_master", litmus_dir); +#endif + if (litmus_dir) + remove_proc_entry("litmus", NULL); +} + +long make_plugin_proc_dir(struct sched_plugin* plugin, + struct proc_dir_entry** pde_in) +{ + struct proc_dir_entry *pde_new = NULL; + long rv; + + if (!plugin || !plugin->plugin_name){ + printk(KERN_ERR "Invalid plugin struct passed to %s.\n", + __func__); + rv = -EINVAL; + goto out_no_pde; + } + + if (!plugs_dir){ + printk(KERN_ERR "Could not make plugin sub-directory, because " + "/proc/litmus/plugins does not exist.\n"); + rv = -ENOENT; + goto out_no_pde; + } + + pde_new = proc_mkdir(plugin->plugin_name, plugs_dir); + if (!pde_new){ + printk(KERN_ERR "Could not make plugin sub-directory: " + "out of memory?.\n"); + rv = -ENOMEM; + goto out_no_pde; + } + + rv = 0; + *pde_in = pde_new; + goto out_ok; + +out_no_pde: + *pde_in = NULL; +out_ok: + return rv; +} + +void remove_plugin_proc_dir(struct sched_plugin* plugin) +{ + if (!plugin || !plugin->plugin_name){ + printk(KERN_ERR "Invalid plugin struct passed to %s.\n", + __func__); + return; + } + remove_proc_entry(plugin->plugin_name, plugs_dir); +} + + + +/* misc. I/O helper functions */ + +int copy_and_chomp(char *kbuf, unsigned long ksize, + __user const char* ubuf, unsigned long ulength) +{ + /* caller must provide buffer space */ + BUG_ON(!ksize); + + ksize--; /* leave space for null byte */ + + if (ksize > ulength) + ksize = ulength; + + if(copy_from_user(kbuf, ubuf, ksize)) + return -EFAULT; + + kbuf[ksize] = '\0'; + + /* chomp kbuf */ + if (ksize > 0 && kbuf[ksize - 1] == '\n') + kbuf[ksize - 1] = '\0'; + + return ksize; +} + +/* helper functions for clustered plugins */ +static const char* cache_level_names[] = { + "ALL", + "L1", + "L2", + "L3", +}; + +int parse_cache_level(const char *cache_name, enum cache_level *level) +{ + int err = -EINVAL; + int i; + /* do a quick and dirty comparison to find the cluster size */ + for (i = GLOBAL_CLUSTER; i <= L3_CLUSTER; i++) + if (!strcmp(cache_name, cache_level_names[i])) { + *level = (enum cache_level) i; + err = 0; + break; + } + return err; +} + +const char* cache_level_name(enum cache_level level) +{ + int idx = level; + + if (idx >= GLOBAL_CLUSTER && idx <= L3_CLUSTER) + return cache_level_names[idx]; + else + return "INVALID"; +} + + + + +/* proc file interface to configure the cluster size */ + +static ssize_t litmus_cluster_proc_write(struct file *file, + const char __user *buffer, size_t count, + loff_t *ppos) +{ + enum cache_level *level = (enum cache_level *) PDE_DATA(file_inode(file)); + ssize_t len; + char cache_name[8]; + + len = copy_and_chomp(cache_name, sizeof(cache_name), buffer, count); + + if (len > 0 && parse_cache_level(cache_name, level)) { + printk(KERN_INFO "Cluster '%s' is unknown.\n", cache_name); + len = -EINVAL; + } + + return len; +} + +static int litmus_cluster_proc_show(struct seq_file *m, void *v) +{ + enum cache_level *level = (enum cache_level *) m->private; + + seq_printf(m, "%s\n", cache_level_name(*level)); + return 0; +} + +static int litmus_cluster_proc_open(struct inode *inode, struct file *file) +{ + return single_open(file, litmus_cluster_proc_show, PDE_DATA(inode)); +} + +static const struct file_operations litmus_cluster_proc_fops = { + .open = litmus_cluster_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, + .write = litmus_cluster_proc_write, +}; + +struct proc_dir_entry* create_cluster_file(struct proc_dir_entry* parent, + enum cache_level* level) +{ + struct proc_dir_entry* cluster_file; + + + cluster_file = proc_create_data("cluster", 0644, parent, + &litmus_cluster_proc_fops, + (void *) level); + if (!cluster_file) { + printk(KERN_ERR + "Could not cluster procfs entry.\n"); + } + return cluster_file; +} + +static struct domain_proc_info* active_mapping = NULL; + +static int litmus_mapping_proc_show(struct seq_file *m, void *v) +{ + struct cd_mapping *mapping = (struct cd_mapping*) m->private; + + if(!mapping) + return 0; + + seq_printf(m, "%*pb\n", cpumask_pr_args(mapping->mask)); + return 0; +} + +static int litmus_mapping_proc_open(struct inode *inode, struct file *file) +{ + return single_open(file, litmus_mapping_proc_show, PDE_DATA(inode)); +} + +static const struct file_operations litmus_domain_proc_fops = { + .open = litmus_mapping_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +long activate_domain_proc(struct domain_proc_info* map) +{ + int i; + char name[8]; + + if (!map) + return -EINVAL; + if (cpus_dir == NULL || domains_dir == NULL) + return -EINVAL; + + if (active_mapping) + deactivate_domain_proc(); + + active_mapping = map; + + for (i = 0; i < map->num_cpus; ++i) { + struct cd_mapping* m = &map->cpu_to_domains[i]; + snprintf(name, sizeof(name), "%d", m->id); + m->proc_file = proc_create_data(name, 0444, cpus_dir, + &litmus_domain_proc_fops, (void*)m); + } + + for (i = 0; i < map->num_domains; ++i) { + struct cd_mapping* m = &map->domain_to_cpus[i]; + snprintf(name, sizeof(name), "%d", m->id); + m->proc_file = proc_create_data(name, 0444, domains_dir, + &litmus_domain_proc_fops, (void*)m); + } + + return 0; +} + +long deactivate_domain_proc() +{ + int i; + char name[65]; + + struct domain_proc_info* map = active_mapping; + + if (!map) + return -EINVAL; + + for (i = 0; i < map->num_cpus; ++i) { + struct cd_mapping* m = &map->cpu_to_domains[i]; + snprintf(name, sizeof(name), "%d", m->id); + remove_proc_entry(name, cpus_dir); + m->proc_file = NULL; + } + for (i = 0; i < map->num_domains; ++i) { + struct cd_mapping* m = &map->domain_to_cpus[i]; + snprintf(name, sizeof(name), "%d", m->id); + remove_proc_entry(name, domains_dir); + m->proc_file = NULL; + } + + active_mapping = NULL; + + return 0; +} + +long init_domain_proc_info(struct domain_proc_info* m, + int num_cpus, int num_domains) +{ + int i; + int num_alloced_cpu_masks = 0; + int num_alloced_domain_masks = 0; + + m->cpu_to_domains = + kmalloc(sizeof(*(m->cpu_to_domains))*num_cpus, + GFP_ATOMIC); + if(!m->cpu_to_domains) + goto failure; + + m->domain_to_cpus = + kmalloc(sizeof(*(m->domain_to_cpus))*num_domains, + GFP_ATOMIC); + if(!m->domain_to_cpus) + goto failure; + + for(i = 0; i < num_cpus; ++i) { + if(!zalloc_cpumask_var(&m->cpu_to_domains[i].mask, GFP_ATOMIC)) + goto failure; + ++num_alloced_cpu_masks; + } + for(i = 0; i < num_domains; ++i) { + if(!zalloc_cpumask_var(&m->domain_to_cpus[i].mask, GFP_ATOMIC)) + goto failure; + ++num_alloced_domain_masks; + } + + return 0; + +failure: + for(i = 0; i < num_alloced_cpu_masks; ++i) + free_cpumask_var(m->cpu_to_domains[i].mask); + for(i = 0; i < num_alloced_domain_masks; ++i) + free_cpumask_var(m->domain_to_cpus[i].mask); + if(m->cpu_to_domains) + kfree(m->cpu_to_domains); + if(m->domain_to_cpus) + kfree(m->domain_to_cpus); + return -ENOMEM; +} + +void destroy_domain_proc_info(struct domain_proc_info* m) +{ + int i; + for(i = 0; i < m->num_cpus; ++i) + free_cpumask_var(m->cpu_to_domains[i].mask); + for(i = 0; i < m->num_domains; ++i) + free_cpumask_var(m->domain_to_cpus[i].mask); + kfree(m->cpu_to_domains); + kfree(m->domain_to_cpus); + memset(m, sizeof(*m), 0); +} diff --git a/litmus/locking.c b/litmus/locking.c new file mode 100644 index 000000000000..43d9aece2e74 --- /dev/null +++ b/litmus/locking.c @@ -0,0 +1,188 @@ +#include +#include +#include + +#ifdef CONFIG_LITMUS_LOCKING + +#include +#include +#include +#include +#include + +static int create_generic_lock(void** obj_ref, obj_type_t type, void* __user arg); +static int open_generic_lock(struct od_table_entry* entry, void* __user arg); +static int close_generic_lock(struct od_table_entry* entry); +static void destroy_generic_lock(obj_type_t type, void* sem); + +struct fdso_ops generic_lock_ops = { + .create = create_generic_lock, + .open = open_generic_lock, + .close = close_generic_lock, + .destroy = destroy_generic_lock +}; + +static inline bool is_lock(struct od_table_entry* entry) +{ + return entry->class == &generic_lock_ops; +} + +static inline struct litmus_lock* get_lock(struct od_table_entry* entry) +{ + BUG_ON(!is_lock(entry)); + return (struct litmus_lock*) entry->obj->obj; +} + +static int create_generic_lock(void** obj_ref, obj_type_t type, void* __user arg) +{ + struct litmus_lock* lock; + int err; + + err = litmus->allocate_lock(&lock, type, arg); + if (err == 0) + *obj_ref = lock; + return err; +} + +static int open_generic_lock(struct od_table_entry* entry, void* __user arg) +{ + struct litmus_lock* lock = get_lock(entry); + if (lock->ops->open) + return lock->ops->open(lock, arg); + else + return 0; /* default: any task can open it */ +} + +static int close_generic_lock(struct od_table_entry* entry) +{ + struct litmus_lock* lock = get_lock(entry); + if (lock->ops->close) + return lock->ops->close(lock); + else + return 0; /* default: closing succeeds */ +} + +static void destroy_generic_lock(obj_type_t type, void* obj) +{ + struct litmus_lock* lock = (struct litmus_lock*) obj; + lock->ops->deallocate(lock); +} + +asmlinkage long sys_litmus_lock(int lock_od) +{ + long err = -EINVAL; + struct od_table_entry* entry; + struct litmus_lock* l; + + TS_SYSCALL_IN_START; + + TS_SYSCALL_IN_END; + + TS_LOCK_START; + + entry = get_entry_for_od(lock_od); + if (entry && is_lock(entry)) { + l = get_lock(entry); + TRACE_CUR("attempts to lock 0x%p\n", l); + err = l->ops->lock(l); + } + + /* Note: task my have been suspended or preempted in between! Take + * this into account when computing overheads. */ + TS_LOCK_END; + + TS_SYSCALL_OUT_START; + + return err; +} + +asmlinkage long sys_litmus_unlock(int lock_od) +{ + long err = -EINVAL; + struct od_table_entry* entry; + struct litmus_lock* l; + + TS_SYSCALL_IN_START; + + TS_SYSCALL_IN_END; + + TS_UNLOCK_START; + + entry = get_entry_for_od(lock_od); + if (entry && is_lock(entry)) { + l = get_lock(entry); + TRACE_CUR("attempts to unlock 0x%p\n", l); + err = l->ops->unlock(l); + } + + /* Note: task my have been preempted in between! Take this into + * account when computing overheads. */ + TS_UNLOCK_END; + + TS_SYSCALL_OUT_START; + + return err; +} + +struct task_struct* __waitqueue_remove_first(wait_queue_head_t *wq) +{ + wait_queue_t* q; + struct task_struct* t = NULL; + + if (waitqueue_active(wq)) { + q = list_entry(wq->task_list.next, + wait_queue_t, task_list); + t = (struct task_struct*) q->private; + __remove_wait_queue(wq, q); + } + return(t); +} + +unsigned int __add_wait_queue_prio_exclusive( + wait_queue_head_t* head, + prio_wait_queue_t *new) +{ + struct list_head *pos; + unsigned int passed = 0; + + new->wq.flags |= WQ_FLAG_EXCLUSIVE; + + /* find a spot where the new entry is less than the next */ + list_for_each(pos, &head->task_list) { + prio_wait_queue_t* queued = list_entry(pos, prio_wait_queue_t, + wq.task_list); + + if (unlikely(lt_before(new->priority, queued->priority) || + (new->priority == queued->priority && + new->tie_breaker < queued->tie_breaker))) { + /* pos is not less than new, thus insert here */ + __list_add(&new->wq.task_list, pos->prev, pos); + goto out; + } + passed++; + } + + /* if we get to this point either the list is empty or every entry + * queued element is less than new. + * Let's add new to the end. */ + list_add_tail(&new->wq.task_list, &head->task_list); +out: + return passed; +} + + +#else + +struct fdso_ops generic_lock_ops = {}; + +asmlinkage long sys_litmus_lock(int sem_od) +{ + return -ENOSYS; +} + +asmlinkage long sys_litmus_unlock(int sem_od) +{ + return -ENOSYS; +} + +#endif diff --git a/litmus/preempt.c b/litmus/preempt.c new file mode 100644 index 000000000000..03e9b5acfb5d --- /dev/null +++ b/litmus/preempt.c @@ -0,0 +1,141 @@ +#include + +#include +#include +#include + +/* The rescheduling state of each processor. + */ +DEFINE_PER_CPU_SHARED_ALIGNED(atomic_t, resched_state); + +void sched_state_will_schedule(struct task_struct* tsk) +{ + /* Litmus hack: we only care about processor-local invocations of + * set_tsk_need_resched(). We can't reliably set the flag remotely + * since it might race with other updates to the scheduling state. We + * can't rely on the runqueue lock protecting updates to the sched + * state since processors do not acquire the runqueue locks for all + * updates to the sched state (to avoid acquiring two runqueue locks at + * the same time). Further, if tsk is residing on a remote processor, + * then that processor doesn't actually know yet that it is going to + * reschedule; it still must receive an IPI (unless a local invocation + * races). + */ + if (likely(task_cpu(tsk) == smp_processor_id())) { + VERIFY_SCHED_STATE(TASK_SCHEDULED | SHOULD_SCHEDULE | TASK_PICKED | WILL_SCHEDULE); + if (is_in_sched_state(TASK_PICKED | PICKED_WRONG_TASK)) + set_sched_state(PICKED_WRONG_TASK); + else + set_sched_state(WILL_SCHEDULE); + } else + /* Litmus tasks should never be subject to a remote + * set_tsk_need_resched(). */ + BUG_ON(is_realtime(tsk)); +#ifdef CONFIG_PREEMPT_STATE_TRACE + TRACE_TASK(tsk, "set_tsk_need_resched() ret:%p\n", + __builtin_return_address(0)); +#endif +} + +/* Called by the IPI handler after another CPU called smp_send_resched(). */ +void sched_state_ipi(void) +{ + /* If the IPI was slow, we might be in any state right now. The IPI is + * only meaningful if we are in SHOULD_SCHEDULE. */ + if (is_in_sched_state(SHOULD_SCHEDULE)) { + /* Cause scheduler to be invoked. + * This will cause a transition to WILL_SCHEDULE. */ + set_tsk_need_resched(current); + TRACE_STATE("IPI -> set_tsk_need_resched(%s/%d)\n", + current->comm, current->pid); + TS_SEND_RESCHED_END; + } else { + /* ignore */ + TRACE_STATE("ignoring IPI in state %x (%s)\n", + get_sched_state(), + sched_state_name(get_sched_state())); + } +} + +/* Called by plugins to cause a CPU to reschedule. IMPORTANT: the caller must + * hold the lock that is used to serialize scheduling decisions. */ +void litmus_reschedule(int cpu) +{ + int picked_transition_ok = 0; + int scheduled_transition_ok = 0; + + /* The (remote) CPU could be in any state. */ + + /* The critical states are TASK_PICKED and TASK_SCHEDULED, as the CPU + * is not aware of the need to reschedule at this point. */ + + /* is a context switch in progress? */ + if (cpu_is_in_sched_state(cpu, TASK_PICKED)) + picked_transition_ok = sched_state_transition_on( + cpu, TASK_PICKED, PICKED_WRONG_TASK); + + if (!picked_transition_ok && + cpu_is_in_sched_state(cpu, TASK_SCHEDULED)) { + /* We either raced with the end of the context switch, or the + * CPU was in TASK_SCHEDULED anyway. */ + scheduled_transition_ok = sched_state_transition_on( + cpu, TASK_SCHEDULED, SHOULD_SCHEDULE); + } + + /* If the CPU was in state TASK_SCHEDULED, then we need to cause the + * scheduler to be invoked. */ + if (scheduled_transition_ok) { + if (smp_processor_id() == cpu) { + set_tsk_need_resched(current); + preempt_set_need_resched(); + } else { + TS_SEND_RESCHED_START(cpu); + smp_send_reschedule(cpu); + } + } + + TRACE_STATE("%s picked-ok:%d sched-ok:%d\n", + __FUNCTION__, + picked_transition_ok, + scheduled_transition_ok); +} + +void litmus_reschedule_local(void) +{ + if (is_in_sched_state(TASK_PICKED)) + set_sched_state(PICKED_WRONG_TASK); + else if (is_in_sched_state(TASK_SCHEDULED + | SHOULD_SCHEDULE + | PICKED_WRONG_TASK)) { + set_sched_state(WILL_SCHEDULE); + set_tsk_need_resched(current); + preempt_set_need_resched(); + } +} + +#ifdef CONFIG_DEBUG_KERNEL + +void sched_state_plugin_check(void) +{ + if (!is_in_sched_state(TASK_PICKED | PICKED_WRONG_TASK)) { + TRACE("!!!! plugin did not call sched_state_task_picked()!" + "Calling sched_state_task_picked() is mandatory---fix this.\n"); + set_sched_state(TASK_PICKED); + } +} + +#define NAME_CHECK(x) case x: return #x +const char* sched_state_name(int s) +{ + switch (s) { + NAME_CHECK(TASK_SCHEDULED); + NAME_CHECK(SHOULD_SCHEDULE); + NAME_CHECK(WILL_SCHEDULE); + NAME_CHECK(TASK_PICKED); + NAME_CHECK(PICKED_WRONG_TASK); + default: + return "UNKNOWN"; + }; +} + +#endif diff --git a/litmus/rt_domain.c b/litmus/rt_domain.c new file mode 100644 index 000000000000..e5dec0bbbba9 --- /dev/null +++ b/litmus/rt_domain.c @@ -0,0 +1,353 @@ +/* + * litmus/rt_domain.c + * + * LITMUS real-time infrastructure. This file contains the + * functions that manipulate RT domains. RT domains are an abstraction + * of a ready queue and a release queue. + */ + +#include +#include +#include +#include + +#include +#include +#include + +#include + +#include + +#include + +/* Uncomment when debugging timer races... */ +#if 0 +#define VTRACE_TASK TRACE_TASK +#define VTRACE TRACE +#else +#define VTRACE_TASK(t, fmt, args...) /* shut up */ +#define VTRACE(fmt, args...) /* be quiet already */ +#endif + +static int dummy_resched(rt_domain_t *rt) +{ + return 0; +} + +static int dummy_order(struct bheap_node* a, struct bheap_node* b) +{ + return 0; +} + +/* default implementation: use default lock */ +static void default_release_jobs(rt_domain_t* rt, struct bheap* tasks) +{ + merge_ready(rt, tasks); +} + +static unsigned int time2slot(lt_t time) +{ + return (unsigned int) time2quanta(time, FLOOR) % RELEASE_QUEUE_SLOTS; +} + +static enum hrtimer_restart on_release_timer(struct hrtimer *timer) +{ + unsigned long flags; + struct release_heap* rh; + rh = container_of(timer, struct release_heap, timer); + + TS_RELEASE_LATENCY(rh->release_time); + + VTRACE("on_release_timer(0x%p) starts.\n", timer); + + TS_RELEASE_START; + + + raw_spin_lock_irqsave(&rh->dom->release_lock, flags); + VTRACE("CB has the release_lock 0x%p\n", &rh->dom->release_lock); + /* remove from release queue */ + list_del(&rh->list); + raw_spin_unlock_irqrestore(&rh->dom->release_lock, flags); + VTRACE("CB returned release_lock 0x%p\n", &rh->dom->release_lock); + + /* call release callback */ + rh->dom->release_jobs(rh->dom, &rh->heap); + /* WARNING: rh can be referenced from other CPUs from now on. */ + + TS_RELEASE_END; + + VTRACE("on_release_timer(0x%p) ends.\n", timer); + + return HRTIMER_NORESTART; +} + +/* allocated in litmus.c */ +struct kmem_cache * release_heap_cache; + +struct release_heap* release_heap_alloc(int gfp_flags) +{ + struct release_heap* rh; + rh= kmem_cache_alloc(release_heap_cache, gfp_flags); + if (rh) { + /* initialize timer */ + hrtimer_init(&rh->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); + rh->timer.function = on_release_timer; + } + return rh; +} + +void release_heap_free(struct release_heap* rh) +{ + /* make sure timer is no longer in use */ + hrtimer_cancel(&rh->timer); + kmem_cache_free(release_heap_cache, rh); +} + +/* Caller must hold release lock. + * Will return heap for given time. If no such heap exists prior to + * the invocation it will be created. + */ +static struct release_heap* get_release_heap(rt_domain_t *rt, + struct task_struct* t, + int use_task_heap) +{ + struct list_head* pos; + struct release_heap* heap = NULL; + struct release_heap* rh; + lt_t release_time = get_release(t); + unsigned int slot = time2slot(release_time); + + /* initialize pos for the case that the list is empty */ + pos = rt->release_queue.slot[slot].next; + list_for_each(pos, &rt->release_queue.slot[slot]) { + rh = list_entry(pos, struct release_heap, list); + if (release_time == rh->release_time) { + /* perfect match -- this happens on hyperperiod + * boundaries + */ + heap = rh; + break; + } else if (lt_before(release_time, rh->release_time)) { + /* we need to insert a new node since rh is + * already in the future + */ + break; + } + } + if (!heap && use_task_heap) { + /* use pre-allocated release heap */ + rh = tsk_rt(t)->rel_heap; + + rh->dom = rt; + rh->release_time = release_time; + + /* add to release queue */ + list_add(&rh->list, pos->prev); + heap = rh; + } + return heap; +} + +static void reinit_release_heap(struct task_struct* t) +{ + struct release_heap* rh; + + /* use pre-allocated release heap */ + rh = tsk_rt(t)->rel_heap; + + /* Make sure it is safe to use. The timer callback could still + * be executing on another CPU; hrtimer_cancel() will wait + * until the timer callback has completed. However, under no + * circumstances should the timer be active (= yet to be + * triggered). + * + * WARNING: If the CPU still holds the release_lock at this point, + * deadlock may occur! + */ + BUG_ON(hrtimer_cancel(&rh->timer)); + + /* initialize */ + bheap_init(&rh->heap); +#ifdef CONFIG_RELEASE_MASTER + atomic_set(&rh->info.state, HRTIMER_START_ON_INACTIVE); +#endif +} +/* arm_release_timer() - start local release timer or trigger + * remote timer (pull timer) + * + * Called by add_release() with: + * - tobe_lock taken + * - IRQ disabled + */ +#ifdef CONFIG_RELEASE_MASTER +#define arm_release_timer(t) arm_release_timer_on((t), NO_CPU) +static void arm_release_timer_on(rt_domain_t *_rt , int target_cpu) +#else +static void arm_release_timer(rt_domain_t *_rt) +#endif +{ + rt_domain_t *rt = _rt; + struct list_head list; + struct list_head *pos, *safe; + struct task_struct* t; + struct release_heap* rh; + + VTRACE("arm_release_timer() at %llu\n", litmus_clock()); + list_replace_init(&rt->tobe_released, &list); + + list_for_each_safe(pos, safe, &list) { + /* pick task of work list */ + t = list_entry(pos, struct task_struct, rt_param.list); + sched_trace_task_release(t); + list_del(pos); + + /* put into release heap while holding release_lock */ + raw_spin_lock(&rt->release_lock); + VTRACE_TASK(t, "I have the release_lock 0x%p\n", &rt->release_lock); + + rh = get_release_heap(rt, t, 0); + if (!rh) { + /* need to use our own, but drop lock first */ + raw_spin_unlock(&rt->release_lock); + VTRACE_TASK(t, "Dropped release_lock 0x%p\n", + &rt->release_lock); + + reinit_release_heap(t); + VTRACE_TASK(t, "release_heap ready\n"); + + raw_spin_lock(&rt->release_lock); + VTRACE_TASK(t, "Re-acquired release_lock 0x%p\n", + &rt->release_lock); + + rh = get_release_heap(rt, t, 1); + } + bheap_insert(rt->order, &rh->heap, tsk_rt(t)->heap_node); + VTRACE_TASK(t, "arm_release_timer(): added to release heap\n"); + + raw_spin_unlock(&rt->release_lock); + VTRACE_TASK(t, "Returned the release_lock 0x%p\n", &rt->release_lock); + + /* To avoid arming the timer multiple times, we only let the + * owner do the arming (which is the "first" task to reference + * this release_heap anyway). + */ + if (rh == tsk_rt(t)->rel_heap) { + VTRACE_TASK(t, "arming timer 0x%p\n", &rh->timer); + + if (!hrtimer_is_hres_active(&rh->timer)) { + TRACE_TASK(t, "WARNING: no hires timer!!!\n"); + } + + /* we cannot arm the timer using hrtimer_start() + * as it may deadlock on rq->lock + * + * PINNED mode is ok on both local and remote CPU + */ +#ifdef CONFIG_RELEASE_MASTER + if (rt->release_master == NO_CPU && + target_cpu == NO_CPU) +#endif + __hrtimer_start_range_ns(&rh->timer, + ns_to_ktime(rh->release_time), + 0, HRTIMER_MODE_ABS_PINNED, 0); +#ifdef CONFIG_RELEASE_MASTER + else + hrtimer_start_on( + /* target_cpu overrides release master */ + (target_cpu != NO_CPU ? + target_cpu : rt->release_master), + &rh->info, &rh->timer, + ns_to_ktime(rh->release_time), + HRTIMER_MODE_ABS_PINNED); +#endif + } else + VTRACE_TASK(t, "0x%p is not my timer\n", &rh->timer); + } +} + +void rt_domain_init(rt_domain_t *rt, + bheap_prio_t order, + check_resched_needed_t check, + release_jobs_t release + ) +{ + int i; + + BUG_ON(!rt); + if (!check) + check = dummy_resched; + if (!release) + release = default_release_jobs; + if (!order) + order = dummy_order; + +#ifdef CONFIG_RELEASE_MASTER + rt->release_master = NO_CPU; +#endif + + bheap_init(&rt->ready_queue); + INIT_LIST_HEAD(&rt->tobe_released); + for (i = 0; i < RELEASE_QUEUE_SLOTS; i++) + INIT_LIST_HEAD(&rt->release_queue.slot[i]); + + raw_spin_lock_init(&rt->ready_lock); + raw_spin_lock_init(&rt->release_lock); + raw_spin_lock_init(&rt->tobe_lock); + + rt->check_resched = check; + rt->release_jobs = release; + rt->order = order; +} + +/* add_ready - add a real-time task to the rt ready queue. It must be runnable. + * @new: the newly released task + */ +void __add_ready(rt_domain_t* rt, struct task_struct *new) +{ + TRACE("rt: adding %s/%d (%llu, %llu, %llu) rel=%llu " + "to ready queue at %llu\n", + new->comm, new->pid, + get_exec_cost(new), get_rt_period(new), get_rt_relative_deadline(new), + get_release(new), litmus_clock()); + + BUG_ON(bheap_node_in_heap(tsk_rt(new)->heap_node)); + + bheap_insert(rt->order, &rt->ready_queue, tsk_rt(new)->heap_node); + rt->check_resched(rt); +} + +/* merge_ready - Add a sorted set of tasks to the rt ready queue. They must be runnable. + * @tasks - the newly released tasks + */ +void __merge_ready(rt_domain_t* rt, struct bheap* tasks) +{ + bheap_union(rt->order, &rt->ready_queue, tasks); + rt->check_resched(rt); +} + + +#ifdef CONFIG_RELEASE_MASTER +void __add_release_on(rt_domain_t* rt, struct task_struct *task, + int target_cpu) +{ + TRACE_TASK(task, "add_release_on(), rel=%llu, target=%d\n", + get_release(task), target_cpu); + list_add(&tsk_rt(task)->list, &rt->tobe_released); + task->rt_param.domain = rt; + + arm_release_timer_on(rt, target_cpu); +} +#endif + +/* add_release - add a real-time task to the rt release queue. + * @task: the sleeping task + */ +void __add_release(rt_domain_t* rt, struct task_struct *task) +{ + TRACE_TASK(task, "add_release(), rel=%llu\n", get_release(task)); + list_add(&tsk_rt(task)->list, &rt->tobe_released); + task->rt_param.domain = rt; + + arm_release_timer(rt); +} diff --git a/litmus/sched_plugin.c b/litmus/sched_plugin.c new file mode 100644 index 000000000000..edd91e9bf773 --- /dev/null +++ b/litmus/sched_plugin.c @@ -0,0 +1,238 @@ +/* sched_plugin.c -- core infrastructure for the scheduler plugin system + * + * This file includes the initialization of the plugin system, the no-op Linux + * scheduler plugin, some dummy functions, and some helper functions. + */ + +#include +#include +#include +#include + +#include +#include +#include +#include + +/* + * Generic function to trigger preemption on either local or remote cpu + * from scheduler plugins. The key feature is that this function is + * non-preemptive section aware and does not invoke the scheduler / send + * IPIs if the to-be-preempted task is actually non-preemptive. + */ +void preempt_if_preemptable(struct task_struct* t, int cpu) +{ + /* t is the real-time task executing on CPU on_cpu If t is NULL, then + * on_cpu is currently scheduling background work. + */ + + int reschedule = 0; + + if (!t) + /* move non-real-time task out of the way */ + reschedule = 1; + else { + if (smp_processor_id() == cpu) { + /* local CPU case */ + /* check if we need to poke userspace */ + if (is_user_np(t)) + /* Yes, poke it. This doesn't have to be atomic since + * the task is definitely not executing. */ + request_exit_np(t); + else if (!is_kernel_np(t)) + /* only if we are allowed to preempt the + * currently-executing task */ + reschedule = 1; + } else { + /* Remote CPU case. Only notify if it's not a kernel + * NP section and if we didn't set the userspace + * flag. */ + reschedule = !(is_kernel_np(t) || request_exit_np_atomic(t)); + } + } + if (likely(reschedule)) + litmus_reschedule(cpu); +} + + +/************************************************************* + * Dummy plugin functions * + *************************************************************/ + +static void litmus_dummy_finish_switch(struct task_struct * prev) +{ +} + +static struct task_struct* litmus_dummy_schedule(struct task_struct * prev) +{ + sched_state_task_picked(); + return NULL; +} + +static long litmus_dummy_admit_task(struct task_struct* tsk) +{ + printk(KERN_CRIT "LITMUS^RT: Linux plugin rejects %s/%d.\n", + tsk->comm, tsk->pid); + return -EINVAL; +} + +static void litmus_dummy_task_new(struct task_struct *t, int on_rq, int running) +{ +} + +static void litmus_dummy_task_wake_up(struct task_struct *task) +{ +} + +static void litmus_dummy_task_block(struct task_struct *task) +{ +} + +static void litmus_dummy_task_exit(struct task_struct *task) +{ +} + +static void litmus_dummy_task_cleanup(struct task_struct *task) +{ +} + +static long litmus_dummy_complete_job(void) +{ + return -ENOSYS; +} + +static long litmus_dummy_activate_plugin(void) +{ + return 0; +} + +static long litmus_dummy_deactivate_plugin(void) +{ + return 0; +} + +static long litmus_dummy_get_domain_proc_info(struct domain_proc_info **d) +{ + *d = NULL; + return 0; +} + +static void litmus_dummy_synchronous_release_at(lt_t time_zero) +{ + /* ignore */ +} + +#ifdef CONFIG_LITMUS_LOCKING + +static long litmus_dummy_allocate_lock(struct litmus_lock **lock, int type, + void* __user config) +{ + return -ENXIO; +} + +#endif + + +/* The default scheduler plugin. It doesn't do anything and lets Linux do its + * job. + */ +struct sched_plugin linux_sched_plugin = { + .plugin_name = "Linux", + .task_new = litmus_dummy_task_new, + .task_exit = litmus_dummy_task_exit, + .task_wake_up = litmus_dummy_task_wake_up, + .task_block = litmus_dummy_task_block, + .complete_job = litmus_dummy_complete_job, + .schedule = litmus_dummy_schedule, + .finish_switch = litmus_dummy_finish_switch, + .activate_plugin = litmus_dummy_activate_plugin, + .deactivate_plugin = litmus_dummy_deactivate_plugin, + .get_domain_proc_info = litmus_dummy_get_domain_proc_info, + .synchronous_release_at = litmus_dummy_synchronous_release_at, +#ifdef CONFIG_LITMUS_LOCKING + .allocate_lock = litmus_dummy_allocate_lock, +#endif + .admit_task = litmus_dummy_admit_task +}; + +/* + * The reference to current plugin that is used to schedule tasks within + * the system. It stores references to actual function implementations + * Should be initialized by calling "init_***_plugin()" + */ +struct sched_plugin *litmus = &linux_sched_plugin; + +/* the list of registered scheduling plugins */ +static LIST_HEAD(sched_plugins); +static DEFINE_RAW_SPINLOCK(sched_plugins_lock); + +#define CHECK(func) {\ + if (!plugin->func) \ + plugin->func = litmus_dummy_ ## func;} + +/* FIXME: get reference to module */ +int register_sched_plugin(struct sched_plugin* plugin) +{ + printk(KERN_INFO "Registering LITMUS^RT plugin %s.\n", + plugin->plugin_name); + + /* make sure we don't trip over null pointers later */ + CHECK(finish_switch); + CHECK(schedule); + CHECK(task_wake_up); + CHECK(task_exit); + CHECK(task_cleanup); + CHECK(task_block); + CHECK(task_new); + CHECK(complete_job); + CHECK(activate_plugin); + CHECK(deactivate_plugin); + CHECK(get_domain_proc_info); +#ifdef CONFIG_LITMUS_LOCKING + CHECK(allocate_lock); +#endif + CHECK(admit_task); + CHECK(synchronous_release_at); + + if (!plugin->wait_for_release_at) + plugin->wait_for_release_at = default_wait_for_release_at; + + raw_spin_lock(&sched_plugins_lock); + list_add(&plugin->list, &sched_plugins); + raw_spin_unlock(&sched_plugins_lock); + + return 0; +} + + +/* FIXME: reference counting, etc. */ +struct sched_plugin* find_sched_plugin(const char* name) +{ + struct list_head *pos; + struct sched_plugin *plugin; + + raw_spin_lock(&sched_plugins_lock); + list_for_each(pos, &sched_plugins) { + plugin = list_entry(pos, struct sched_plugin, list); + if (!strcmp(plugin->plugin_name, name)) + goto out_unlock; + } + plugin = NULL; + +out_unlock: + raw_spin_unlock(&sched_plugins_lock); + return plugin; +} + +void print_sched_plugins(struct seq_file *m) +{ + struct list_head *pos; + struct sched_plugin *plugin; + + raw_spin_lock(&sched_plugins_lock); + list_for_each(pos, &sched_plugins) { + plugin = list_entry(pos, struct sched_plugin, list); + seq_printf(m, "%s\n", plugin->plugin_name); + } + raw_spin_unlock(&sched_plugins_lock); +} diff --git a/litmus/srp.c b/litmus/srp.c new file mode 100644 index 000000000000..7ab388646e29 --- /dev/null +++ b/litmus/srp.c @@ -0,0 +1,308 @@ +/* ************************************************************************** */ +/* STACK RESOURCE POLICY */ +/* ************************************************************************** */ + +#include +#include +#include + +#include +#include +#include +#include + + +#ifdef CONFIG_LITMUS_LOCKING + +#include + +srp_prioritization_t get_srp_prio; + +struct srp { + struct list_head ceiling; + wait_queue_head_t ceiling_blocked; +}; +#define system_ceiling(srp) list2prio(srp->ceiling.next) +#define ceiling2sem(c) container_of(c, struct srp_semaphore, ceiling) + +#define UNDEF_SEM -2 + +DEFINE_PER_CPU(struct srp, srp); + +DEFINE_PER_CPU(int, srp_objects_in_use); + +/* Initialize SRP semaphores at boot time. */ +static int __init srp_init(void) +{ + int i; + + printk("Initializing SRP per-CPU ceilings..."); + for (i = 0; i < NR_CPUS; i++) { + init_waitqueue_head(&per_cpu(srp, i).ceiling_blocked); + INIT_LIST_HEAD(&per_cpu(srp, i).ceiling); + per_cpu(srp_objects_in_use, i) = 0; + } + printk(" done!\n"); + + return 0; +} +module_init(srp_init); + +/* SRP task priority comparison function. Smaller numeric values have higher + * priority, tie-break is PID. Special case: priority == 0 <=> no priority + */ +static int srp_higher_prio(struct srp_priority* first, + struct srp_priority* second) +{ + if (!first->priority) + return 0; + else + return !second->priority || + first->priority < second->priority || ( + first->priority == second->priority && + first->pid < second->pid); +} + + +static int srp_exceeds_ceiling(struct task_struct* first, + struct srp* srp) +{ + struct srp_priority prio; + + if (list_empty(&srp->ceiling)) + return 1; + else { + prio.pid = first->pid; + prio.priority = get_srp_prio(first); + return srp_higher_prio(&prio, system_ceiling(srp)) || + ceiling2sem(system_ceiling(srp))->owner == first; + } +} + +static void srp_add_prio(struct srp* srp, struct srp_priority* prio) +{ + struct list_head *pos; + if (in_list(&prio->list)) { + printk(KERN_CRIT "WARNING: SRP violation detected, prio is already in " + "ceiling list! cpu=%d, srp=%p\n", smp_processor_id(), ceiling2sem(prio)); + return; + } + list_for_each(pos, &srp->ceiling) + if (unlikely(srp_higher_prio(prio, list2prio(pos)))) { + __list_add(&prio->list, pos->prev, pos); + return; + } + + list_add_tail(&prio->list, &srp->ceiling); +} + + +static int lock_srp_semaphore(struct litmus_lock* l) +{ + struct task_struct* t = current; + struct srp_semaphore* sem = container_of(l, struct srp_semaphore, litmus_lock); + + if (!is_realtime(t)) + return -EPERM; + + /* prevent acquisition of local locks in global critical sections */ + if (tsk_rt(t)->num_locks_held) + return -EBUSY; + + preempt_disable(); + + /* Update ceiling. */ + srp_add_prio(this_cpu_ptr(&srp), &sem->ceiling); + + /* SRP invariant: all resources available */ + BUG_ON(sem->owner != NULL); + + sem->owner = t; + TRACE_CUR("acquired srp 0x%p\n", sem); + + tsk_rt(t)->num_local_locks_held++; + + preempt_enable(); + + return 0; +} + +static int unlock_srp_semaphore(struct litmus_lock* l) +{ + struct task_struct* t = current; + struct srp_semaphore* sem = container_of(l, struct srp_semaphore, litmus_lock); + int err = 0; + + preempt_disable(); + + if (sem->owner != t) { + err = -EINVAL; + } else { + /* The current owner should be executing on the correct CPU. + * + * If the owner transitioned out of RT mode or is exiting, then + * we it might have already been migrated away by the best-effort + * scheduler and we just have to deal with it. */ + if (unlikely(!is_realtime(t) && sem->cpu != smp_processor_id())) { + TRACE_TASK(t, "SRP unlock cpu=%d, sem->cpu=%d\n", + smp_processor_id(), sem->cpu); + preempt_enable(); + err = litmus_be_migrate_to(sem->cpu); + preempt_disable(); + TRACE_TASK(t, "post-migrate: cpu=%d, sem->cpu=%d err=%d\n", + smp_processor_id(), sem->cpu, err); + } + BUG_ON(sem->cpu != smp_processor_id()); + err = 0; + + /* Determine new system priority ceiling for this CPU. */ + BUG_ON(!in_list(&sem->ceiling.list)); + + list_del(&sem->ceiling.list); + sem->owner = NULL; + + /* Wake tasks on this CPU, if they exceed current ceiling. */ + TRACE_CUR("released srp 0x%p\n", sem); + wake_up_all(&this_cpu_ptr(&srp)->ceiling_blocked); + + tsk_rt(t)->num_local_locks_held--; + } + + preempt_enable(); + return err; +} + +static int open_srp_semaphore(struct litmus_lock* l, void* __user arg) +{ + struct srp_semaphore* sem = container_of(l, struct srp_semaphore, litmus_lock); + int err = 0; + struct task_struct* t = current; + struct srp_priority t_prio; + + if (!is_realtime(t)) + return -EPERM; + + TRACE_CUR("opening SRP semaphore %p, cpu=%d\n", sem, sem->cpu); + + preempt_disable(); + + if (sem->owner != NULL) + err = -EBUSY; + + if (err == 0) { + if (sem->cpu == UNDEF_SEM) + sem->cpu = get_partition(t); + else if (sem->cpu != get_partition(t)) + err = -EPERM; + } + + if (err == 0) { + t_prio.priority = get_srp_prio(t); + t_prio.pid = t->pid; + if (srp_higher_prio(&t_prio, &sem->ceiling)) { + sem->ceiling.priority = t_prio.priority; + sem->ceiling.pid = t_prio.pid; + } + } + + preempt_enable(); + + return err; +} + +static int close_srp_semaphore(struct litmus_lock* l) +{ + struct srp_semaphore* sem = container_of(l, struct srp_semaphore, litmus_lock); + int err = 0; + + preempt_disable(); + + if (sem->owner == current) + unlock_srp_semaphore(l); + + preempt_enable(); + + return err; +} + +static void deallocate_srp_semaphore(struct litmus_lock* l) +{ + struct srp_semaphore* sem = container_of(l, struct srp_semaphore, litmus_lock); + raw_cpu_dec(srp_objects_in_use); + kfree(sem); +} + +static struct litmus_lock_ops srp_lock_ops = { + .open = open_srp_semaphore, + .close = close_srp_semaphore, + .lock = lock_srp_semaphore, + .unlock = unlock_srp_semaphore, + .deallocate = deallocate_srp_semaphore, +}; + +struct srp_semaphore* allocate_srp_semaphore(void) +{ + struct srp_semaphore* sem; + + sem = kmalloc(sizeof(*sem), GFP_KERNEL); + if (!sem) + return NULL; + + INIT_LIST_HEAD(&sem->ceiling.list); + sem->ceiling.priority = 0; + sem->cpu = UNDEF_SEM; + sem->owner = NULL; + + sem->litmus_lock.ops = &srp_lock_ops; + + raw_cpu_inc(srp_objects_in_use); + return sem; +} + +static int srp_wake_up(wait_queue_t *wait, unsigned mode, int sync, + void *key) +{ + int cpu = smp_processor_id(); + struct task_struct *tsk = wait->private; + if (cpu != get_partition(tsk)) + TRACE_TASK(tsk, "srp_wake_up on wrong cpu, partition is %d\b", + get_partition(tsk)); + else if (srp_exceeds_ceiling(tsk, this_cpu_ptr(&srp))) + return default_wake_function(wait, mode, sync, key); + return 0; +} + +static void do_ceiling_block(struct task_struct *tsk) +{ + wait_queue_t wait = { + .private = tsk, + .func = srp_wake_up, + .task_list = {NULL, NULL} + }; + + tsk->state = TASK_UNINTERRUPTIBLE; + add_wait_queue(&this_cpu_ptr(&srp)->ceiling_blocked, &wait); + tsk->rt_param.srp_non_recurse = 1; + preempt_enable_no_resched(); + schedule(); + preempt_disable(); + tsk->rt_param.srp_non_recurse = 0; + remove_wait_queue(&this_cpu_ptr(&srp)->ceiling_blocked, &wait); +} + +/* Wait for current task priority to exceed system-wide priority ceiling. + */ +void __srp_ceiling_block(struct task_struct *cur) +{ + preempt_disable(); + if (!srp_exceeds_ceiling(cur, this_cpu_ptr(&srp))) { + TRACE_CUR("is priority ceiling blocked.\n"); + while (!srp_exceeds_ceiling(cur, this_cpu_ptr(&srp))) + do_ceiling_block(cur); + TRACE_CUR("finally exceeds system ceiling.\n"); + } else + TRACE_CUR("is not priority ceiling blocked\n"); + preempt_enable(); +} + +#endif diff --git a/litmus/sync.c b/litmus/sync.c new file mode 100644 index 000000000000..5d180603f46b --- /dev/null +++ b/litmus/sync.c @@ -0,0 +1,152 @@ +/* litmus/sync.c - Support for synchronous and asynchronous task system releases. + * + * + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include + +struct ts_release_wait { + struct list_head list; + struct completion completion; + lt_t ts_release_time; +}; + +#define DECLARE_TS_RELEASE_WAIT(symb) \ + struct ts_release_wait symb = \ + { \ + LIST_HEAD_INIT(symb.list), \ + COMPLETION_INITIALIZER_ONSTACK(symb.completion), \ + 0 \ + } + +static LIST_HEAD(task_release_list); +static DEFINE_MUTEX(task_release_lock); + +static long do_wait_for_ts_release(void) +{ + DECLARE_TS_RELEASE_WAIT(wait); + + long ret = -ERESTARTSYS; + + if (mutex_lock_interruptible(&task_release_lock)) + goto out; + + list_add(&wait.list, &task_release_list); + + mutex_unlock(&task_release_lock); + + /* We are enqueued, now we wait for someone to wake us up. */ + ret = wait_for_completion_interruptible(&wait.completion); + + if (!ret) { + /* Completion succeeded, setup release time. */ + ret = litmus->wait_for_release_at( + wait.ts_release_time + get_rt_phase(current)); + } else { + /* We were interrupted, must cleanup list. */ + mutex_lock(&task_release_lock); + if (!wait.completion.done) + list_del(&wait.list); + mutex_unlock(&task_release_lock); + } + +out: + return ret; +} + +int count_tasks_waiting_for_release(void) +{ + int task_count = 0; + struct list_head *pos; + + mutex_lock(&task_release_lock); + + list_for_each(pos, &task_release_list) { + task_count++; + } + + mutex_unlock(&task_release_lock); + + + return task_count; +} + +static long do_release_ts(lt_t start) +{ + long task_count = 0; + + struct list_head *pos, *safe; + struct ts_release_wait *wait; + + if (mutex_lock_interruptible(&task_release_lock)) { + task_count = -ERESTARTSYS; + goto out; + } + + TRACE("<<<<<< synchronous task system release >>>>>>\n"); + sched_trace_sys_release(&start); + litmus->synchronous_release_at(start); + + task_count = 0; + list_for_each_safe(pos, safe, &task_release_list) { + wait = (struct ts_release_wait*) + list_entry(pos, struct ts_release_wait, list); + + task_count++; + wait->ts_release_time = start; + complete(&wait->completion); + } + + /* clear stale list */ + INIT_LIST_HEAD(&task_release_list); + + mutex_unlock(&task_release_lock); + +out: + return task_count; +} + + +asmlinkage long sys_wait_for_ts_release(void) +{ + long ret = -EPERM; + struct task_struct *t = current; + + if (is_realtime(t)) + ret = do_wait_for_ts_release(); + + return ret; +} + +#define ONE_MS 1000000 + +asmlinkage long sys_release_ts(lt_t __user *__delay) +{ + long ret; + lt_t delay; + lt_t start_time; + + /* FIXME: check capabilities... */ + + ret = copy_from_user(&delay, __delay, sizeof(delay)); + if (ret == 0) { + /* round up to next larger integral millisecond */ + start_time = litmus_clock(); + do_div(start_time, ONE_MS); + start_time *= ONE_MS; + ret = do_release_ts(start_time + delay); + } + + return ret; +} diff --git a/litmus/trace.c b/litmus/trace.c index 2bcaaf474b7a..6b3e5f77cc5e 100644 --- a/litmus/trace.c +++ b/litmus/trace.c @@ -258,6 +258,17 @@ feather_callback void save_cpu_timestamp_irq(unsigned long event, 0, RECORD_LOCAL_TIMESTAMP); } +feather_callback void save_cpu_task_latency(unsigned long event, + unsigned long when_ptr) +{ + lt_t now = litmus_clock(); + lt_t *when = (lt_t*) when_ptr; + + write_cpu_timestamp(event, TSK_RT, + 0, + 0, LOCAL_IRQ_COUNT, 0, + now - *when, DO_NOT_RECORD_TIMESTAMP); +} feather_callback void msg_sent(unsigned long event, unsigned long to) { diff --git a/litmus/uncachedev.c b/litmus/uncachedev.c new file mode 100644 index 000000000000..06a6a7c17983 --- /dev/null +++ b/litmus/uncachedev.c @@ -0,0 +1,102 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +/* device for allocating pages not cached by the CPU */ + +#define UNCACHE_NAME "litmus/uncache" + +void litmus_uncache_vm_open(struct vm_area_struct *vma) +{ +} + +void litmus_uncache_vm_close(struct vm_area_struct *vma) +{ +} + +int litmus_uncache_vm_fault(struct vm_area_struct* vma, + struct vm_fault* vmf) +{ + /* modeled after SG DMA video4linux, but without DMA. */ + /* (see drivers/media/video/videobuf-dma-sg.c) */ + struct page *page; + + page = alloc_page(GFP_USER); + if (!page) + return VM_FAULT_OOM; + + clear_user_highpage(page, (unsigned long)vmf->virtual_address); + vmf->page = page; + + return 0; +} + +static struct vm_operations_struct litmus_uncache_vm_ops = { + .open = litmus_uncache_vm_open, + .close = litmus_uncache_vm_close, + .fault = litmus_uncache_vm_fault, +}; + +static int litmus_uncache_mmap(struct file* filp, struct vm_area_struct* vma) +{ + /* first make sure mapper knows what he's doing */ + + /* you can only map the "first" page */ + if (vma->vm_pgoff != 0) + return -EINVAL; + + /* you can't share it with anyone */ + if (vma->vm_flags & (VM_MAYSHARE | VM_SHARED)) + return -EINVAL; + + /* cannot be expanded, and is not a "normal" page. */ + vma->vm_flags |= VM_DONTEXPAND; + + /* noncached pages are not explicitly locked in memory (for now). */ + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + + vma->vm_ops = &litmus_uncache_vm_ops; + + return 0; +} + +static struct file_operations litmus_uncache_fops = { + .owner = THIS_MODULE, + .mmap = litmus_uncache_mmap, +}; + +static struct miscdevice litmus_uncache_dev = { + .name = UNCACHE_NAME, + .minor = MISC_DYNAMIC_MINOR, + .fops = &litmus_uncache_fops, + /* pages are not locked, so there is no reason why + anyone cannot allocate an uncache pages */ + .mode = (S_IRUGO | S_IWUGO), +}; + +static int __init init_litmus_uncache_dev(void) +{ + int err; + + printk("Initializing LITMUS^RT uncache device.\n"); + err = misc_register(&litmus_uncache_dev); + if (err) + printk("Could not allocate %s device (%d).\n", UNCACHE_NAME, err); + return err; +} + +static void __exit exit_litmus_uncache_dev(void) +{ + misc_deregister(&litmus_uncache_dev); +} + +module_init(init_litmus_uncache_dev); +module_exit(exit_litmus_uncache_dev); -- cgit v1.2.2