#include <linux/percpu.h>
#include <linux/sched.h>
#include <linux/list.h>
#include <linux/spinlock.h>
#include <linux/module.h>
#include <linux/slab.h>
#include <litmus/litmus.h>
#include <litmus/jobs.h>
#include <litmus/preempt.h>
#include <litmus/sched_plugin.h>
#include <litmus/rm_common.h>
#include <litmus/sched_trace.h>
#include <litmus/color.h>
#include <litmus/fifo_common.h>
#include <litmus/budget.h>
#include <litmus/rt_server.h>
#include <litmus/dgl.h>
/**
* @rt_server Common server functionality.
* @task Task used to schedule server.
* @timer Budget enforcement for @task
* @start_time If set, time at which server began running.
*/
struct fifo_server {
struct rt_server server;
struct task_struct* task;
struct enforcement_timer timer;
lt_t start_time;
};
/**
* @server Common server functionality.
* @rm_domain PRM domain.
* @scheduled Task physically running on CPU.
* @fifo_server Server partitioned to this CPU.
*/
struct cpu_entry {
struct rt_server server;
rt_domain_t rm_domain;
struct task_struct* scheduled;
struct fifo_server fifo_server;
};
DEFINE_PER_CPU(struct cpu_entry, color_cpus);
static rt_domain_t fifo_domain;
static raw_spinlock_t fifo_lock;
static struct dgl group_lock;
static raw_spinlock_t dgl_lock;
#define local_entry (&__get_cpu_var(color_cpus))
#define remote_entry(cpu) (&per_cpu(color_cpus, cpu))
#define task_entry(task) remote_entry(get_partition(task))
#define task_fserver(task) (&task_entry(task)->fifo_server.server)
#define entry_lock(entry) (&entry->rm_domain.ready_lock)
#define has_resources(t, c) (tsk_rt(t)->req == group_lock.acquired[c])
#define task_dom(entry, task) (is_be(task) ? &fifo_domain : &entry->rm_domain)
#define task_lock(entry, task) (is_be(task) ? &fifo_lock : entry_lock(entry))
#define is_fifo_server(s) (s->sid > num_online_cpus())
/*
* Requeue onto domain's release or ready queue based on task state.
*/
static void requeue(rt_domain_t *dom, struct task_struct* t)
{
if (is_server(t) && !tsk_rt(t)->present)
/* Remove stopped server from the system */
return;
TRACE_TASK(t, "Requeueing\n");
if (is_queued(t)) {
TRACE_TASK(t, "Already queued!\n");
return;
}
set_rt_flags(t, RT_F_RUNNING);
if (is_released(t, litmus_clock()))
__add_ready(dom, t);
else
add_release(dom, t);
}
/*
* Relinquish resources held by @t (or its children).
*/
static void release_resources(struct task_struct *t)
{
struct task_struct *sched;
TRACE_TASK(t, "Releasing resources\n");
if (is_server(t)) {
sched = task_fserver(t)->linked;
if (sched)
release_resources(sched);
} else if (is_kernel_np(t))
remove_group_req(&group_lock, tsk_rt(t)->req);
tsk_rt(t)->kernel_np = 0;
}
/*
* Put in requests for resources needed by @t. If @t is a server, this will
* set @t's np flag to reflect resources held by @t's children.
*/
static void acquire_resources(struct task_struct *t)
{
int cpu;
struct rt_server *server;
struct task_struct *sched;
/* Can't acquire resources if t is not running */
BUG_ON(!get_task_server(t));
if (is_kernel_np(t)) {
TRACE_TASK(t, "Already contending for resources\n");
return;
}
cpu = get_task_server(t)->cpu;
if (is_server(t)) {
server = task_fserver(t);
sched = server->linked;
/* Happens when server is booted off on completion or
* has just completed executing a task.
*/
if (sched && !is_kernel_np(sched))
acquire_resources(sched);
/* Become np if there is a running task */
if (sched && has_resources(sched, cpu)) {
TRACE_TASK(t, "Running task with resource\n");
tsk_rt(t)->kernel_np = 1;
} else {
TRACE_TASK(t, "Running no resources\n");
tsk_rt(t)->kernel_np = 0;
}
} else {
TRACE_TASK(t, "Acquiring resources\n");
if (!has_resources(t, cpu))
add_group_req(&group_lock, tsk_rt(t)->req, cpu);
tsk_rt(t)->kernel_np = 1;
}
}
/*
* Stop logically running the currently linked task.
*/
static void unlink(struct rt_server *server)
{
BUG_ON(!server->linked);
if (is_server(server->linked))
task_fserver(server->linked)->running = 0;
sched_trace_server_switch_away(server->sid, 0,
server->linked->pid,
get_rt_job(server->linked));
TRACE_TASK(server->linked, "No longer run by server %d\n", server->sid);
raw_spin_lock(&dgl_lock);
release_resources(server->linked);
raw_spin_unlock(&dgl_lock);
get_task_server(server->linked) = NULL;
server->linked = NULL;
}
static struct task_struct* schedule_server(struct rt_server *server);
/*
* Logically run @task.
*/
static void link(struct rt_server *server, struct task_struct *task)
{
struct rt_server *tserv;
BUG_ON(server->linked);
BUG_ON(!server->running);
BUG_ON(is_kernel_np(task));
TRACE_TASK(task, "Run by server %d\n", server->sid);
if (is_server(task)) {
tserv = task_fserver(task);
tserv->running = 1;
schedule_server(tserv);
}
server->linked = task;
get_task_server(task) = server;
sched_trace_server_switch_to(server->sid, 0,
task->pid, get_rt_job(task));
}
/*
* Complete job for task linked to @server.
*/
static void job_completion(struct rt_server *server)
{
struct task_struct *t = server->linked;
lt_t et;
TRACE_TASK(t, "Job completed\n");
if (is_server(t))
sched_trace_server_completion(t->pid, get_rt_job(t));
else
sched_trace_task_completion(t, 0);
if (1 < get_rt_job(t)) {
/* our releases happen at the second job */
et = get_exec_time(t);
if (et > tsk_rt(t)->max_exec_time)
tsk_rt(t)->max_exec_time = et;
}
unlink(server);
set_rt_flags(t, RT_F_SLEEP);
prepare_for_next_period(t);
if (is_server(t))
sched_trace_server_release(t->pid, get_rt_job(t),
get_release(t), get_deadline(t));
else
sched_trace_task_release(t);
if (is_running(t))
server->requeue(server, t);
}
/*
* Update @server state to reflect task's state.
*/
static void update_task(struct rt_server *server)
{
int oot, sleep, block, np;
struct task_struct *t = server->linked;
block = !is_running(t);
oot = budget_enforced(t) && budget_exhausted(t);
np = is_kernel_np(t);
sleep = get_rt_flags(t) == RT_F_SLEEP;
TRACE_TASK(t, "Updating task, block: %d, oot: %d, np: %d, sleep: %d\n",
block, oot, np, sleep);
if (block)
unlink(server);
else if (oot || sleep)
job_completion(server);
}
/*
* Link next task for @server.
*/
static struct task_struct* schedule_server(struct rt_server *server)
{
struct task_struct *next;
struct rt_server *lserver;
int is_fifo = is_fifo_server(server);
TRACE("Scheduling server %d\n", server->sid);
if (server->linked) {
if (is_server(server->linked)) {
lserver = task_fserver(server->linked);
lserver->update(lserver);
}
update_task(server);
}
next = server->linked;
if (is_fifo)
raw_spin_lock(&fifo_lock);
if ((!next || !is_np(next)) &&
server->need_preempt(server->domain, next)) {
if (next) {
TRACE_TASK(next, "Preempted\n");
unlink(server);
requeue(server->domain, next);
}
next = __take_ready(server->domain);
link(server, next);
}
if (is_fifo)
raw_spin_unlock(&fifo_lock);
return next;
}
/*
* Dumb requeue for PRM (CPU) servers.
*/
static void rm_requeue(struct rt_server *server, struct task_struct *t)
{
BUG_ON(is_be(t));
requeue(server->domain, t);
}
/*
* Locking requeue for FIFO servers.
*/
static void fifo_requeue(struct rt_server *server, struct task_struct *t)
{
BUG_ON(!is_be(t));
raw_spin_lock(&fifo_lock);
requeue(server->domain, t);
raw_spin_unlock(&fifo_lock);
}
/*
* Locking take for FIFO servers.
* TODO: no longer necessary.
*/
static struct task_struct* fifo_take(struct rt_server *server)
{
struct task_struct *ret;
raw_spin_lock(&fifo_lock);
ret = __take_ready(server->domain);
raw_spin_unlock(&fifo_lock);
return ret;
}
/*
* Update server state, including picking next running task and incrementing
* server execution time.
*/
static void fifo_update(struct rt_server *server)
{
lt_t delta;
struct fifo_server *fserver;
fserver = container_of(server, struct fifo_server, server);
TRACE_TASK(fserver->task, "Updating FIFO server\n");
if (!server->linked || has_resources(server->linked, server->cpu)) {
/* Running here means linked to a parent server */
BUG_ON(!server->running);
/* Stop executing */
if (fserver->start_time) {
delta = litmus_clock() - fserver->start_time;
tsk_rt(fserver->task)->job_params.exec_time += delta;
fserver->start_time = 0;
cancel_enforcement_timer(&fserver->timer);
} else {
/* Server is linked, but not executing */
BUG_ON(fserver->timer.armed);
}
/* Calculate next task */
schedule_server(&fserver->server);
/* Reserve needed resources */
raw_spin_lock(&dgl_lock);
acquire_resources(fserver->task);
raw_spin_unlock(&dgl_lock);
}
}
/*
* Triggers preemption on rm-scheduled "linked" field only.
*/
static void color_rm_release(rt_domain_t *rm, struct bheap *tasks)
{
unsigned long flags;
struct cpu_entry *entry;
TRACE_TASK(bheap2task(bheap_peek(rm->order, tasks)),
"Released set of RM tasks\n");
entry = container_of(rm, struct cpu_entry, rm_domain);
raw_spin_lock_irqsave(entry_lock(entry), flags);
__merge_ready(rm, tasks);
if (rm_preemption_needed(rm, entry->server.linked) &&
(!entry->server.linked || !is_kernel_np(entry->server.linked))) {
litmus_reschedule(entry->server.cpu);
}
raw_spin_unlock_irqrestore(entry_lock(entry), flags);
}
/*
* Triggers preemption on first FIFO server which is running NULL.
*/
static void check_for_fifo_preempt(void)
{
int ret = 0, cpu;
struct cpu_entry *entry;
struct rt_server *cpu_server, *fifo_server;
TRACE("Checking for FIFO preempt\n");
for_each_online_cpu(cpu) {
entry = remote_entry(cpu);
cpu_server = &entry->server;
fifo_server = &entry->fifo_server.server;
raw_spin_lock(entry_lock(entry));
raw_spin_lock(&fifo_lock);
if (cpu_server->linked && is_server(cpu_server->linked) &&
!fifo_server->linked) {
litmus_reschedule(cpu);
ret = 1;
}
raw_spin_unlock(&fifo_lock);
raw_spin_unlock(entry_lock(entry));
if (ret)
break;
}
}
static void color_fifo_release(rt_domain_t *dom, struct bheap *tasks)
{
unsigned long flags;
TRACE_TASK(bheap2task(bheap_peek(dom->order, tasks)),
"Released set of FIFO tasks\n");
local_irq_save(flags);
raw_spin_lock(&fifo_lock);
__merge_ready(dom, tasks);
raw_spin_unlock(&fifo_lock);
check_for_fifo_preempt();
local_irq_restore(flags);
}
#define cpu_empty(entry, run) \
(!(run) || (is_server(run) && !(entry)->fifo_server.server.linked))
static struct task_struct* color_schedule(struct task_struct *prev)
{
unsigned long flags;
int server_running;
struct cpu_entry *entry = local_entry;
struct task_struct *next, *plink = entry->server.linked;
TRACE("Reschedule on %d at %llu\n", entry->server.cpu, litmus_clock());
BUG_ON(entry->scheduled && entry->scheduled != prev);
BUG_ON(entry->scheduled && !is_realtime(prev));
raw_spin_lock_irqsave(entry_lock(entry), flags);
if (entry->scheduled && cpu_empty(entry, plink) && is_running(prev)) {
TRACE_TASK(prev, "Snuck in on new!\n");
raw_spin_lock(&fifo_lock);
requeue(task_dom(entry, prev), prev);
raw_spin_unlock(&fifo_lock);
}
/* Pick next top-level task */
next = schedule_server(&entry->server);
/* Schedule hierarchically */
server_running = next && is_server(next);
if (server_running)
next = task_fserver(next)->linked;
/* Selected tasks must contend for group lock */
if (next) {
raw_spin_lock(&dgl_lock);
acquire_resources(next);
if (has_resources(next, entry->server.cpu)) {
TRACE_TASK(next, "Has group lock\n");
sched_trace_task_resume(next, 1);
} else {
TRACE_TASK(next, "Does not have lock, 0x%p does\n",
group_lock.acquired[entry->server.cpu]);
if (next != prev)
sched_trace_task_block(next, 1);
next = NULL;
server_running = 0;
}
raw_spin_unlock(&dgl_lock);
}
/* Server is blocked if its running task is blocked. Note that if the
* server has no running task, the server will now execute NULL.
*/
if (server_running) {
TRACE_TASK(entry->server.linked, "Server running\n");
arm_enforcement_timer(&entry->fifo_server.timer,
entry->fifo_server.task);
entry->fifo_server.start_time = litmus_clock();
}
if (prev)
tsk_rt(prev)->scheduled_on = NO_CPU;
if (next)
tsk_rt(next)->scheduled_on = entry->server.cpu;
entry->scheduled = next;
sched_state_task_picked();
raw_spin_unlock_irqrestore(entry_lock(entry), flags);
return entry->scheduled;
}
static void color_task_new(struct task_struct *t, int on_rq, int running)
{
unsigned long flags;
int i;
raw_spinlock_t *lock;
struct cpu_entry *entry;
struct dgl_group_req *req;
color_t *colors, *pages;
TRACE_TASK(t, "New colored task\n");
local_irq_save(flags);
entry = (is_be(t)) ? local_entry : task_entry(t);
lock = task_lock(entry, t);
release_at(t, litmus_clock());
req = kmalloc(sizeof(*req), GFP_ATOMIC);
dgl_group_req_init(&group_lock, req);
tsk_rt(t)->req = req;
tsk_rt(t)->max_exec_time = 0;
/* Fill request */
if (tsk_rt(t)->color_ctrl_page) {
colors = tsk_rt(t)->color_ctrl_page->colors;
pages = tsk_rt(t)->color_ctrl_page->pages;
for (i = 0; pages[i]; i++)
set_req(&group_lock, req, colors[i], pages[i]);
}
/* Join system */
raw_spin_lock(lock);
if (running) {
TRACE_TASK(t, "Already scheduled on %d\n", entry->server.cpu);
BUG_ON(entry->scheduled);
entry->scheduled = t;
tsk_rt(t)->scheduled_on = entry->server.cpu;
} else
requeue(task_dom(entry, t), t);
raw_spin_unlock(lock);
/* Trigger necessary preemptions */
if (is_be(t))
check_for_fifo_preempt();
else
litmus_reschedule(entry->server.cpu);
local_irq_restore(flags);
}
static void color_task_wake_up(struct task_struct *task)
{
unsigned long flags;
struct cpu_entry* entry = task_entry(task);
raw_spinlock_t *lock = task_lock(entry, task);
lt_t now = litmus_clock();
TRACE_TASK(task, "Wake up at %llu\n", now);
local_irq_save(flags);
/* Abuse sporadic model */
if (is_tardy(task, now)) {
release_at(task, now);
sched_trace_task_release(task);
}
/* Re-enter system */
if (entry->scheduled != task) {
raw_spin_lock(lock);
requeue(task_dom(entry, task), task);
raw_spin_unlock(lock);
} else {
TRACE_TASK(task, "Is already scheduled on %d!\n",
entry->scheduled);
}
/* Trigger preemptions */
if (is_be(task))
check_for_fifo_preempt();
else
litmus_reschedule(entry->server.cpu);
local_irq_restore(flags);
}
static void color_task_block(struct task_struct *t)
{
TRACE_TASK(t, "Block at %llu, state=%d\n", litmus_clock(), t->state);
BUG_ON(!is_realtime(t));
BUG_ON(is_queued(t));
}
static void color_task_exit(struct task_struct * t)
{
unsigned long flags;
struct cpu_entry *entry = task_entry(t);
raw_spinlock_t *lock = task_lock(entry, t);
TRACE_TASK(t, "RIP, now reschedule\n");
local_irq_save(flags);
sched_trace_task_exit(t);
/* Remove from scheduler consideration */
if (is_queued(t)) {
raw_spin_lock(lock);
remove(task_dom(entry, t), t);
raw_spin_unlock(lock);
}
/* Stop parent server */
if (get_task_server(t))
unlink(get_task_server(t));
/* Unschedule running task */
if (tsk_rt(t)->scheduled_on != NO_CPU) {
entry = remote_entry(tsk_rt(t)->scheduled_on);
raw_spin_lock(entry_lock(entry));
tsk_rt(t)->scheduled_on = NO_CPU;
entry->scheduled = NULL;
litmus_reschedule(entry->server.cpu);
raw_spin_unlock(entry_lock(entry));
}
/* Remove dgl request from system */
raw_spin_lock(&dgl_lock);
release_resources(t);
raw_spin_unlock(&dgl_lock);
dgl_group_req_free(tsk_rt(t)->req);
kfree(tsk_rt(t)->req);
local_irq_restore(flags);
}
/*
* Non-be tasks must have migrated to the right CPU.
*/
static long color_admit_task(struct task_struct* t)
{
int ret = is_be(t) || task_cpu(t) == get_partition(t) ? 0 : -EINVAL;
if (!ret) {
printk(KERN_WARNING "Task failed to migrate to CPU %d\n",
get_partition(t));
}
return ret;
}
/*
* Load server parameters.
*/
static long color_activate_plugin(void)
{
int cpu, ret = 0;
struct rt_task tp;
struct task_struct *server_task;
struct cpu_entry *entry;
lt_t now = litmus_clock();
for_each_online_cpu(cpu) {
entry = remote_entry(cpu);
server_task = entry->fifo_server.task;
raw_spin_lock(entry_lock(entry));
ret = color_server_params(cpu, ((unsigned long*)&tp.exec_cost),
((unsigned long*)&tp.period));
if (ret) {
printk(KERN_WARNING "Uninitialized server for CPU %d\n",
entry->server.cpu);
goto loop_end;
}
/* Fill rt parameters */
tp.phase = 0;
tp.cpu = cpu;
tp.cls = RT_CLASS_SOFT;
tp.budget_policy = PRECISE_ENFORCEMENT;
tsk_rt(server_task)->task_params = tp;
tsk_rt(server_task)->present = 1;
/* Make runnable */
release_at(server_task, now);
entry->fifo_server.start_time = 0;
entry->scheduled = NULL;
cancel_enforcement_timer(&entry->fifo_server.timer);
if (!is_queued(server_task))
requeue(&entry->rm_domain, server_task);
TRACE_TASK(server_task, "Created server with wcet: %llu, "
"period: %llu\n", tp.exec_cost, tp.period);
loop_end:
raw_spin_unlock(entry_lock(entry));
}
return ret;
}
/*
* Mark servers as unused, making future calls to requeue fail.
*/
static long color_deactivate_plugin(void)
{
int cpu;
struct cpu_entry *entry;
for_each_online_cpu(cpu) {
entry = remote_entry(cpu);
if (entry->fifo_server.task) {
tsk_rt(entry->fifo_server.task)->present = 0;
}
}
return 0;
}
/*
* Dump container and server parameters for tracing.
*/
static void color_release_ts(lt_t time)
{
int cpu, fifo_cid;
char fifo_name[TASK_COMM_LEN], cpu_name[TASK_COMM_LEN];
struct cpu_entry *entry;
struct task_struct *stask;
strcpy(cpu_name, "CPU");
strcpy(fifo_name, "BE");
fifo_cid = num_online_cpus();
trace_litmus_container_param(fifo_cid, fifo_name);
for_each_online_cpu(cpu) {
entry = remote_entry(cpu);
trace_litmus_container_param(cpu, cpu_name);
trace_litmus_server_param(entry->server.sid, cpu, 0, 0);
stask = entry->fifo_server.task;
trace_litmus_server_param(stask->pid, fifo_cid,
get_exec_cost(stask),
get_rt_period(stask));
}
}
static struct sched_plugin color_plugin __cacheline_aligned_in_smp = {
.plugin_name = "COLOR",
.task_new = color_task_new,
.complete_job = complete_job,
.task_exit = color_task_exit,
.schedule = color_schedule,
.task_wake_up = color_task_wake_up,
.task_block = color_task_block,
.admit_task = color_admit_task,
.release_ts = color_release_ts,
.activate_plugin = color_activate_plugin,
.deactivate_plugin = color_deactivate_plugin,
};
static int __init init_color(void)
{
int cpu;
struct cpu_entry *entry;
struct task_struct *server_task;
struct fifo_server *fifo_server;
struct rt_server *cpu_server;
for_each_online_cpu(cpu) {
entry = remote_entry(cpu);
rm_domain_init(&entry->rm_domain, NULL, color_rm_release);
entry->scheduled = NULL;
/* Create FIFO server */
fifo_server = &entry->fifo_server;
init_rt_server(&fifo_server->server,
cpu + num_online_cpus() + 1,
cpu,
&fifo_domain,
fifo_preemption_needed,
fifo_requeue, fifo_update, fifo_take);
/* Create task struct for FIFO server */
server_task = kmalloc(sizeof(struct task_struct), GFP_ATOMIC);
memset(server_task, 0, sizeof(*server_task));
server_task->policy = SCHED_LITMUS;
strcpy(server_task->comm, "server");
server_task->pid = fifo_server->server.sid;
fifo_server->task = server_task;
/* Create rt_params for FIFO server */
tsk_rt(server_task)->heap_node = bheap_node_alloc(GFP_ATOMIC);
tsk_rt(server_task)->rel_heap = release_heap_alloc(GFP_ATOMIC);
bheap_node_init(&tsk_rt(server_task)->heap_node, server_task);
tsk_rt(server_task)->is_server = 1;
/* Create CPU server */
cpu_server = &entry->server;
init_rt_server(cpu_server, cpu + 1, cpu,
&entry->rm_domain, rm_preemption_needed,
rm_requeue, NULL, NULL);
cpu_server->running = 1;
init_enforcement_timer(&fifo_server->timer);
}
fifo_domain_init(&fifo_domain, NULL, color_fifo_release);
raw_spin_lock_init(&fifo_lock);
dgl_init(&group_lock, color_cache_info.nr_colors,
color_cache_info.ways);
raw_spin_lock_init(&dgl_lock);
return register_sched_plugin(&color_plugin);
}
static void exit_color(void)
{
dgl_free(&group_lock);
}
module_init(init_color);
module_exit(exit_color);