/*
* litmus.c -- Implementation of the LITMUS syscalls,
* the LITMUS intialization code,
* and the procfs interface..
*/
#include <asm/uaccess.h>
#include <linux/uaccess.h>
#include <linux/sysrq.h>
#include <linux/sched.h>
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <litmus/litmus.h>
#include <litmus/bheap.h>
#include <litmus/trace.h>
#include <litmus/rt_domain.h>
#include <litmus/litmus_proc.h>
#include <litmus/sched_trace.h>
#include <litmus/clock.h>
#ifdef CONFIG_MERGE_TIMERS
#include <litmus/event_group.h>
#endif
#ifdef CONFIG_PLUGIN_MC
#include <linux/pid.h>
#include <linux/hrtimer.h>
#include <litmus/sched_mc.h>
#else
struct mc_task;
#endif
#ifdef CONFIG_SCHED_CPU_AFFINITY
#include <litmus/affinity.h>
#endif
#include <litmus/color_queue.h>
/* Number of RT tasks that exist in the system */
atomic_t rt_task_count = ATOMIC_INIT(0);
static DEFINE_RAW_SPINLOCK(task_transition_lock);
/* synchronize plugin switching */
atomic_t cannot_use_plugin = ATOMIC_INIT(0);
/* Give log messages sequential IDs. */
atomic_t __log_seq_no = ATOMIC_INIT(0);
/* current master CPU for handling timer IRQs */
atomic_t release_master_cpu = ATOMIC_INIT(NO_CPU);
static struct kmem_cache *bheap_node_cache;
extern struct kmem_cache *release_heap_cache;
#ifdef CONFIG_MERGE_TIMERS
extern struct kmem_cache *event_list_cache;
#endif
#ifdef CONFIG_PLUGIN_MC
static struct kmem_cache *mc_data_cache;
#endif
struct bheap_node* bheap_node_alloc(int gfp_flags)
{
return kmem_cache_alloc(bheap_node_cache, gfp_flags);
}
void bheap_node_free(struct bheap_node* hn)
{
kmem_cache_free(bheap_node_cache, hn);
}
struct release_heap* release_heap_alloc(int gfp_flags);
void release_heap_free(struct release_heap* rh);
/*
* sys_set_task_rt_param
* @pid: Pid of the task which scheduling parameters must be changed
* @param: New real-time extension parameters such as the execution cost and
* period
* Syscall for manipulating with task rt extension params
* Returns EFAULT if param is NULL.
* ESRCH if pid is not corrsponding
* to a valid task.
* EINVAL if either period or execution cost is <=0
* EPERM if pid is a real-time task
* 0 if success
*
* Only non-real-time tasks may be configured with this system call
* to avoid races with the scheduler. In practice, this means that a
* task's parameters must be set _before_ calling sys_prepare_rt_task()
*
* find_task_by_vpid() assumes that we are in the same namespace of the
* target.
*/
asmlinkage long sys_set_rt_task_param(pid_t pid, struct rt_task __user * param)
{
struct rt_task tp;
struct task_struct *target;
int retval = -EINVAL;
printk("Setting up rt task parameters for process %d.\n", pid);
if (pid < 0 || param == 0) {
goto out;
}
if (copy_from_user(&tp, param, sizeof(tp))) {
retval = -EFAULT;
goto out;
}
/* Task search and manipulation must be protected */
read_lock_irq(&tasklist_lock);
if (!(target = find_task_by_vpid(pid))) {
retval = -ESRCH;
goto out_unlock;
}
if (is_realtime(target)) {
/* The task is already a real-time task.
* We cannot not allow parameter changes at this point.
*/
retval = -EBUSY;
goto out_unlock;
}
/* set relative deadline to be implicit if left unspecified */
if (tp.relative_deadline == 0)
tp.relative_deadline = tp.period;
if (tp.exec_cost <= 0)
goto out_unlock;
if (tp.period <= 0)
goto out_unlock;
if (!cpu_online(tp.cpu))
goto out_unlock;
if (min(tp.relative_deadline, tp.period) < tp.exec_cost) /*density check*/
{
printk(KERN_INFO "litmus: real-time task %d rejected "
"because task density > 1.0\n", pid);
goto out_unlock;
}
if (tp.cls != RT_CLASS_HARD &&
tp.cls != RT_CLASS_SOFT &&
tp.cls != RT_CLASS_BEST_EFFORT)
{
printk(KERN_INFO "litmus: real-time task %d rejected "
"because its class is invalid\n", pid);
goto out_unlock;
}
if (tp.budget_policy != NO_ENFORCEMENT &&
tp.budget_policy != QUANTUM_ENFORCEMENT &&
tp.budget_policy != PRECISE_ENFORCEMENT)
{
printk(KERN_INFO "litmus: real-time task %d rejected "
"because unsupported budget enforcement policy "
"specified (%d)\n",
pid, tp.budget_policy);
goto out_unlock;
}
target->rt_param.task_params = tp;
retval = 0;
out_unlock:
read_unlock_irq(&tasklist_lock);
out:
return retval;
}
/*
* Getter of task's RT params
* returns EINVAL if param or pid is NULL
* returns ESRCH if pid does not correspond to a valid task
* returns EFAULT if copying of parameters has failed.
*
* find_task_by_vpid() assumes that we are in the same namespace of the
* target.
*/
asmlinkage long sys_get_rt_task_param(pid_t pid, struct rt_task __user * param)
{
int retval = -EINVAL;
struct task_struct *source;
struct rt_task lp;
if (param == 0 || pid < 0)
goto out;
read_lock(&tasklist_lock);
if (!(source = find_task_by_vpid(pid))) {
retval = -ESRCH;
goto out_unlock;
}
lp = source->rt_param.task_params;
read_unlock(&tasklist_lock);
/* Do copying outside the lock */
retval =
copy_to_user(param, &lp, sizeof(lp)) ? -EFAULT : 0;
return retval;
out_unlock:
read_unlock(&tasklist_lock);
out:
return retval;
}
/*
* This is the crucial function for periodic task implementation,
* It checks if a task is periodic, checks if such kind of sleep
* is permitted and calls plugin-specific sleep, which puts the
* task into a wait array.
* returns 0 on successful wakeup
* returns EPERM if current conditions do not permit such sleep
* returns EINVAL if current task is not able to go to sleep
*/
asmlinkage long sys_complete_job(void)
{
int retval = -EPERM;
if (!is_realtime(current)) {
retval = -EINVAL;
goto out;
}
/* Task with negative or zero period cannot sleep */
if (get_rt_period(current) <= 0) {
retval = -EINVAL;
goto out;
}
/* The plugin has to put the task into an
* appropriate queue and call schedule
*/
retval = litmus->complete_job();
out:
return retval;
}
/* This is an "improved" version of sys_complete_job that
* addresses the problem of unintentionally missing a job after
* an overrun.
*
* returns 0 on successful wakeup
* returns EPERM if current conditions do not permit such sleep
* returns EINVAL if current task is not able to go to sleep
*/
asmlinkage long sys_wait_for_job_release(unsigned int job)
{
int retval = -EPERM;
if (!is_realtime(current)) {
retval = -EINVAL;
goto out;
}
/* Task with negative or zero period cannot sleep */
if (get_rt_period(current) <= 0) {
retval = -EINVAL;
goto out;
}
retval = 0;
/* first wait until we have "reached" the desired job
*
* This implementation has at least two problems:
*
* 1) It doesn't gracefully handle the wrap around of
* job_no. Since LITMUS is a prototype, this is not much
* of a problem right now.
*
* 2) It is theoretically racy if a job release occurs
* between checking job_no and calling sleep_next_period().
* A proper solution would requiring adding another callback
* in the plugin structure and testing the condition with
* interrupts disabled.
*
* FIXME: At least problem 2 should be taken care of eventually.
*/
while (!retval && job > current->rt_param.job_params.job_no)
/* If the last job overran then job <= job_no and we
* don't send the task to sleep.
*/
retval = litmus->complete_job();
out:
return retval;
}
/* This is a helper syscall to query the current job sequence number.
*
* returns 0 on successful query
* returns EPERM if task is not a real-time task.
* returns EFAULT if &job is not a valid pointer.
*/
asmlinkage long sys_query_job_no(unsigned int __user *job)
{
int retval = -EPERM;
if (is_realtime(current))
retval = put_user(current->rt_param.job_params.job_no, job);
return retval;
}
/* sys_null_call() is only used for determining raw system call
* overheads (kernel entry, kernel exit). It has no useful side effects.
* If ts is non-NULL, then the current Feather-Trace time is recorded.
*/
asmlinkage long sys_null_call(cycles_t __user *ts)
{
long ret = 0;
cycles_t now;
if (ts) {
now = litmus_get_cycles();
ret = put_user(now, ts);
}
return ret;
}
#ifdef CONFIG_PLUGIN_MC
asmlinkage long sys_set_rt_task_mc_param(pid_t pid, struct mc_task __user *param)
{
struct mc_task mc;
struct mc_data *mc_data;
struct task_struct *target;
int retval = -EINVAL;
printk("Setting up mixed-criticality task parameters for process %d.\n",
pid);
if (pid < 0 || param == 0) {
goto out;
}
if (copy_from_user(&mc, param, sizeof(mc))) {
retval = -EFAULT;
goto out;
}
/* Task search and manipulation must be protected */
read_lock_irq(&tasklist_lock);
if (!(target = find_task_by_vpid(pid))) {
retval = -ESRCH;
goto out_unlock;
}
if (is_realtime(target)) {
/* The task is already a real-time task.
* We cannot not allow parameter changes at this point.
*/
retval = -EBUSY;
goto out_unlock;
}
/* check parameters passed in are valid */
if (mc.crit < CRIT_LEVEL_A || mc.crit >= NUM_CRIT_LEVELS) {
printk(KERN_WARNING "litmus: real-time task %d rejected because "
"of invalid criticality level\n", pid);
goto out_unlock;
}
if (CRIT_LEVEL_A == mc.crit &&
(mc.lvl_a_id < 0 ||
mc.lvl_a_id >= CONFIG_PLUGIN_MC_LEVEL_A_MAX_TASKS)) {
printk(KERN_WARNING "litmus: real-time task %d rejected because "
"of invalid level A id\n", pid);
goto out_unlock;
}
mc_data = tsk_rt(target)->mc_data;
if (!mc_data) {
mc_data = kmem_cache_alloc(mc_data_cache, GFP_ATOMIC);
if (!mc_data) {
retval = -ENOMEM;
goto out_unlock;
}
tsk_rt(target)->mc_data = mc_data;
}
mc_data->mc_task = mc;
retval = 0;
out_unlock:
read_unlock_irq(&tasklist_lock);
out:
return retval;
}
#else
asmlinkage long sys_set_rt_task_mc_param(pid_t pid, struct mc_task __user *param)
{
/* don't allow this syscall if the plugin is not enabled */
return -EINVAL;
}
#endif
/* p is a real-time task. Re-init its state as a best-effort task. */
static void reinit_litmus_state(struct task_struct* p, int restore)
{
struct rt_task user_config = {};
void* ctrl_page = NULL;
struct list_head color_page_info_list;
TRACE_CUR("restore: %d\n", restore);
if (restore) {
/* Safe user-space provided configuration data.
* and allocated page. */
user_config = p->rt_param.task_params;
ctrl_page = p->rt_param.ctrl_page;
color_page_info_list.next =
p->rt_param.color_page_info_list.next;
color_page_info_list.prev =
p->rt_param.color_page_info_list.prev;
}
/* We probably should not be inheriting any task's priority
* at this point in time.
*/
WARN_ON(p->rt_param.inh_task);
/* Cleanup everything else. */
memset(&p->rt_param, 0, sizeof(p->rt_param));
INIT_LIST_HEAD(&p->rt_param.color_page_info_list);
/* Restore preserved fields. */
if (restore) {
p->rt_param.task_params = user_config;
p->rt_param.ctrl_page = ctrl_page;
p->rt_param.color_page_info_list.next =
color_page_info_list.next;
p->rt_param.color_page_info_list.prev =
color_page_info_list.prev;
}
}
long litmus_admit_task(struct task_struct* tsk)
{
long retval = 0;
unsigned long flags;
BUG_ON(is_realtime(tsk));
if (get_rt_relative_deadline(tsk) == 0 ||
get_exec_cost(tsk) >
min(get_rt_relative_deadline(tsk), get_rt_period(tsk)) ) {
TRACE_TASK(tsk,
"litmus admit: invalid task parameters "
"(e = %lu, p = %lu, d = %lu)\n",
get_exec_cost(tsk), get_rt_period(tsk),
get_rt_relative_deadline(tsk));
retval = -EINVAL;
goto out;
}
if (!cpu_online(get_partition(tsk))) {
TRACE_TASK(tsk, "litmus admit: cpu %d is not online\n",
get_partition(tsk));
retval = -EINVAL;
goto out;
}
INIT_LIST_HEAD(&tsk_rt(tsk)->list);
/* avoid scheduler plugin changing underneath us */
raw_spin_lock_irqsave(&task_transition_lock, flags);
/* allocate heap node for this task */
tsk_rt(tsk)->heap_node = bheap_node_alloc(GFP_ATOMIC);
tsk_rt(tsk)->rel_heap = release_heap_alloc(GFP_ATOMIC);
if (!tsk_rt(tsk)->heap_node || !tsk_rt(tsk)->rel_heap) {
printk(KERN_WARNING "litmus: no more heap node memory!?\n");
bheap_node_free(tsk_rt(tsk)->heap_node);
release_heap_free(tsk_rt(tsk)->rel_heap);
retval = -ENOMEM;
goto out_unlock;
} else {
bheap_node_init(&tsk_rt(tsk)->heap_node, tsk);
}
retval = litmus->admit_task(tsk);
if (!retval) {
sched_trace_task_name(tsk);
sched_trace_task_param(tsk);
atomic_inc(&rt_task_count);
}
out_unlock:
raw_spin_unlock_irqrestore(&task_transition_lock, flags);
out:
return retval;
}
void litmus_exit_task(struct task_struct* tsk)
{
if (is_realtime(tsk)) {
sched_trace_task_completion(tsk, 1);
sched_trace_task_exit(tsk);
sched_trace_task_tardy(tsk);
litmus->task_exit(tsk);
BUG_ON(bheap_node_in_heap(tsk_rt(tsk)->heap_node));
bheap_node_free(tsk_rt(tsk)->heap_node);
release_heap_free(tsk_rt(tsk)->rel_heap);
atomic_dec(&rt_task_count);
reinit_litmus_state(tsk, 1);
}
}
/* IPI callback to synchronize plugin switching */
static void synch_on_plugin_switch(void* info)
{
atomic_inc(&cannot_use_plugin);
while (atomic_read(&cannot_use_plugin) > 0)
cpu_relax();
}
/* Switching a plugin in use is tricky.
* We must watch out that no real-time tasks exists
* (and that none is created in parallel) and that the plugin is not
* currently in use on any processor (in theory).
*/
int switch_sched_plugin(struct sched_plugin* plugin)
{
unsigned long flags;
int ret = 0;
BUG_ON(!plugin);
/* forbid other cpus to use the plugin */
atomic_set(&cannot_use_plugin, 1);
/* send IPI to force other CPUs to synch with us */
smp_call_function(synch_on_plugin_switch, NULL, 0);
/* wait until all other CPUs have started synch */
while (atomic_read(&cannot_use_plugin) < num_online_cpus())
cpu_relax();
/* stop task transitions */
raw_spin_lock_irqsave(&task_transition_lock, flags);
/* don't switch if there are active real-time tasks */
if (atomic_read(&rt_task_count) == 0) {
ret = litmus->deactivate_plugin();
if (0 != ret)
goto out;
ret = plugin->activate_plugin();
if (0 != ret) {
printk(KERN_INFO "Can't activate %s (%d).\n",
plugin->plugin_name, ret);
plugin = &linux_sched_plugin;
}
printk(KERN_INFO "Switching to LITMUS^RT plugin %s.\n", plugin->plugin_name);
litmus = plugin;
} else
ret = -EBUSY;
out:
raw_spin_unlock_irqrestore(&task_transition_lock, flags);
atomic_set(&cannot_use_plugin, 0);
return ret;
}
/* Called upon fork.
* p is the newly forked task.
*/
void litmus_fork(struct task_struct* p)
{
if (is_realtime(p)) {
/* clean out any litmus related state, don't preserve anything */
reinit_litmus_state(p, 0);
/* Don't let the child be a real-time task. */
p->sched_reset_on_fork = 1;
} else {
/* non-rt tasks might have ctrl_page set */
tsk_rt(p)->ctrl_page = NULL;
}
/* od tables are never inherited across a fork */
p->od_table = NULL;
}
/* Called upon execve().
* current is doing the exec.
* Don't let address space specific stuff leak.
*/
void litmus_exec(void)
{
struct task_struct* p = current;
if (is_realtime(p)) {
WARN_ON(p->rt_param.inh_task);
litmus_schedule_deallocation(p);
tsk_rt(p)->ctrl_page = NULL;
}
}
void exit_litmus(struct task_struct *dead_tsk)
{
/* We also allow non-RT tasks to
* allocate control pages to allow
* measurements with non-RT tasks.
* So check if we need to free the page
* in any case.
*/
#ifdef CONFIG_PLUGIN_MC
/* The MC-setup syscall might succeed and allocate mc_data, but the
* task may not exit in real-time mode, and that memory will leak.
* Check and free it here.
*/
if (tsk_rt(dead_tsk)->mc_data)
kmem_cache_free(mc_data_cache, tsk_rt(dead_tsk)->mc_data);
#endif
litmus_schedule_deallocation(dead_tsk);
/* main cleanup only for RT tasks */
if (is_realtime(dead_tsk))
litmus_exit_task(dead_tsk);
}
#ifdef CONFIG_MAGIC_SYSRQ
int sys_kill(int pid, int sig);
static void sysrq_handle_kill_rt_tasks(int key)
{
struct task_struct *t;
read_lock(&tasklist_lock);
for_each_process(t) {
if (is_realtime(t)) {
sys_kill(t->pid, SIGKILL);
}
}
read_unlock(&tasklist_lock);
}
static struct sysrq_key_op sysrq_kill_rt_tasks_op = {
.handler = sysrq_handle_kill_rt_tasks,
.help_msg = "quit-rt-tasks(X)",
.action_msg = "sent SIGKILL to all LITMUS^RT real-time tasks",
};
#endif
extern struct sched_plugin linux_sched_plugin;
#if defined(CONFIG_CPU_V7) && !defined(CONFIG_HW_PERF_EVENTS)
static void __init litmus_enable_perfcounters_v7(void *_ignore)
{
u32 enable_val = 0;
/* disable performance monitoring */
asm volatile("mcr p15, 0, %0, c9, c12, 0" : : "r" (0x00000006));
/* disable all events */
asm volatile("mcr p15, 0, %0, c9, c12, 2" : : "r" (0xffffffff));
/* write 1 to enable user-mode access to the performance counter */
asm volatile("mcr p15, 0, %0, c9, c14, 0" : : "r" (1));
/* disable counter overflow interrupts (just in case) */
asm volatile("mcr p15, 0, %0, c9, c14, 2" : : "r" (0x8000000f));
/* select event zero */
asm volatile("mcr p15, 0, %0, c9, c12, 5" : : "r" (0));
/* count cycles in the selected event zero */
asm volatile("mcr p15, 0, %0, c9, c13, 1" : : "r" (0x00000011));
enable_val |= 1; /* bit 1 enables the counters */
enable_val |= 2; /* resets event counters to zero */
enable_val |= 4; /* resets cycle counter to zero */
/* performance monitor control register: enable all counters */
asm volatile("mcr p15, 0, %0, c9, c12, 0" : : "r"(enable_val));
/* enables counters (cycle counter and event 1) */
asm volatile("mcr p15, 0, %0, c9, c12, 1" : : "r"(0x80000001));
}
static void __init litmus_enable_perfcounters(void)
{
litmus_enable_perfcounters_v7(NULL);
smp_call_function(litmus_enable_perfcounters_v7, NULL, 0);
}
#else
#define litmus_enable_perfcounters() do { } while (0)
#endif
static int __init _init_litmus(void)
{
/* Common initializers,
* mode change lock is used to enforce single mode change
* operation.
*/
printk("Starting LITMUS^RT kernel\n");
BUILD_BUG_ON(sizeof(union np_flag) != sizeof(uint32_t));
register_sched_plugin(&linux_sched_plugin);
bheap_node_cache = KMEM_CACHE(bheap_node, SLAB_PANIC);
release_heap_cache = KMEM_CACHE(release_heap, SLAB_PANIC);
#ifdef CONFIG_MERGE_TIMERS
event_list_cache = KMEM_CACHE(event_list, SLAB_PANIC);
#endif
#ifdef CONFIG_PLUGIN_MC
mc_data_cache = KMEM_CACHE(mc_data, SLAB_PANIC);
#endif
#ifdef CONFIG_MAGIC_SYSRQ
/* offer some debugging help */
if (!register_sysrq_key('x', &sysrq_kill_rt_tasks_op))
printk("Registered kill rt tasks magic sysrq.\n");
else
printk("Could not register kill rt tasks magic sysrq.\n");
#endif
init_litmus_proc();
#ifdef CONFIG_SCHED_CPU_AFFINITY
init_topology();
#endif
litmus_enable_perfcounters();
return 0;
}
static void _exit_litmus(void)
{
exit_litmus_proc();
kmem_cache_destroy(bheap_node_cache);
kmem_cache_destroy(release_heap_cache);
#ifdef CONFIG_MERGE_TIMERS
kmem_cache_destroy(event_list_cache);
#endif
#ifdef CONFIG_PLUGIN_MC
kmem_cache_destroy(mc_data_cache);
#endif
}
module_init(_init_litmus);
module_exit(_exit_litmus);