/*
* litmus/sched_cedf.c
*
* Implementation of the C-EDF scheduling algorithm.
*
* This implementation is based on G-EDF:
* - CPUs are clustered around L2 or L3 caches.
* - Clusters topology is automatically detected (this is arch dependent
* and is working only on x86 at the moment --- and only with modern
* cpus that exports cpuid4 information)
* - The plugins _does not_ attempt to put tasks in the right cluster i.e.
* the programmer needs to be aware of the topology to place tasks
* in the desired cluster
* - default clustering is around L2 cache (cache index = 2)
* supported clusters are: L1 (private cache: pedf), L2, L3, ALL (all
* online_cpus are placed in a single cluster).
*
* For details on functions, take a look at sched_gsn_edf.c
*
* Currently, we do not support changes in the number of online cpus.
* If the num_online_cpus() dynamically changes, the plugin is broken.
*
* This version uses the simple approach and serializes all scheduling
* decisions by the use of a queue lock. This is probably not the
* best way to do it, but it should suffice for now.
*/
#include <linux/spinlock.h>
#include <linux/percpu.h>
#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/uaccess.h>
#include <linux/module.h>
#include <litmus/litmus.h>
#include <litmus/jobs.h>
#include <litmus/preempt.h>
#include <litmus/sched_plugin.h>
#include <litmus/edf_common.h>
#include <litmus/sched_trace.h>
#include <litmus/clustered.h>
#include <litmus/bheap.h>
#ifdef CONFIG_SCHED_CPU_AFFINITY
#include <litmus/affinity.h>
#endif
/* to configure the cluster size */
#include <litmus/litmus_proc.h>
#ifdef CONFIG_SCHED_CPU_AFFINITY
#include <litmus/affinity.h>
#endif
#ifdef CONFIG_LITMUS_SOFTIRQD
#include <litmus/litmus_softirq.h>
#endif
#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
#include <linux/interrupt.h>
#include <litmus/trace.h>
#endif
#ifdef CONFIG_LITMUS_NVIDIA
#include <litmus/nvidia_info.h>
#endif
/* Reference configuration variable. Determines which cache level is used to
* group CPUs into clusters. GLOBAL_CLUSTER, which is the default, means that
* all CPUs form a single cluster (just like GSN-EDF).
*/
static enum cache_level cluster_config = GLOBAL_CLUSTER;
struct clusterdomain;
/* cpu_entry_t - maintain the linked and scheduled state
*
* A cpu also contains a pointer to the cedf_domain_t cluster
* that owns it (struct clusterdomain*)
*/
typedef struct {
int cpu;
struct clusterdomain* cluster; /* owning cluster */
struct task_struct* linked; /* only RT tasks */
struct task_struct* scheduled; /* only RT tasks */
atomic_t will_schedule; /* prevent unneeded IPIs */
struct bheap_node* hn;
} cpu_entry_t;
/* one cpu_entry_t per CPU */
DEFINE_PER_CPU(cpu_entry_t, cedf_cpu_entries);
#define set_will_schedule() \
(atomic_set(&__get_cpu_var(cedf_cpu_entries).will_schedule, 1))
#define clear_will_schedule() \
(atomic_set(&__get_cpu_var(cedf_cpu_entries).will_schedule, 0))
#define test_will_schedule(cpu) \
(atomic_read(&per_cpu(cedf_cpu_entries, cpu).will_schedule))
#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
struct tasklet_head
{
struct tasklet_struct *head;
struct tasklet_struct **tail;
};
#endif
/*
* In C-EDF there is a cedf domain _per_ cluster
* The number of clusters is dynamically determined accordingly to the
* total cpu number and the cluster size
*/
typedef struct clusterdomain {
/* rt_domain for this cluster */
rt_domain_t domain;
/* cpus in this cluster */
cpu_entry_t* *cpus;
/* map of this cluster cpus */
cpumask_var_t cpu_map;
/* the cpus queue themselves according to priority in here */
struct bheap_node *heap_node;
struct bheap cpu_heap;
/* lock for this cluster */
#define cluster_lock domain.ready_lock
#ifdef CONFIG_LITMUS_PAI_SOFTIRQD
struct tasklet_head pending_tasklets;
#endif
} cedf_domain_t;
/* a cedf_domain per cluster; allocation is done at init/activation time */
cedf_domain_t *cedf;
#define remote_cluster(cpu) ((cedf_domain_t *) per_cpu(cedf_cpu_entries, cpu).cluster)
#define task_cpu_cluster(task) remote_cluster(get_partition(task))
/* Uncomment WANT_ALL_SCHED_EVENTS if you want to see all scheduling
* decisions in the TRACE() log; uncomment VERBOSE_INIT for verbose
* information during the initialization of the plugin (e.g., topology)
#define WANT_ALL_SCHED_EVENTS
*/
#define VERBOSE_INIT
static int cpu_lower_prio(struct bheap_node *_a, struct bheap_node *_b)
{
cpu_entry_t *a, *b;
a = _a->value;
b = _b->value;
/* Note that a and b are inverted: we want the lowest-priority CPU at
* the top of the heap.
*/
return edf_higher_prio(b->linked, a->linked);
}
/* update_cpu_position - Move the cpu entry to the correct place to maintain
* order in the cpu queue. Caller must hold cedf lock.
*/
static void update_cpu_position(cpu_entry_t *entry)
{
cedf_domain_t *cluster = entry->cluster;
if (likely(bheap_node_in_heap(entry->hn)))
bheap_delete(cpu_lower_prio,
&cluster->cpu_heap,
entry->hn);
bheap_insert(cpu_lower_prio, &cluster->cpu_heap, entry->hn);
}
/* caller must hold cedf lock */
static cpu_entry_t* lowest_prio_cpu(cedf_domain_t *cluster)
{
struct bheap_node* hn;
hn = bheap_peek(cpu_lower_prio, &cluster->cpu_heap);
return hn->value;
}
/* link_task_to_cpu - Update the link of a CPU.
* Handles the case where the to-be-linked task is already
* scheduled on a different CPU.
*/
static noinline void link_task_to_cpu(struct task_struct* linked,
cpu_entry_t *entry)
{
cpu_entry_t *sched;
struct task_struct* tmp;
int on_cpu;
BUG_ON(linked && !is_realtime(linked));
/* Currently linked task is set to be unlinked. */
if (entry->linked) {
entry->linked->rt_param.linked_on = NO_CPU;
}
/* Link new task to CPU. */
if (linked) {
set_rt_flags(linked, RT_F_RUNNING);
/* handle task is already scheduled somewhere! */
on_cpu = linked->rt_param.scheduled_on;
if (on_cpu != NO_CPU) {
sched = &per_cpu(cedf_cpu_entries, on_cpu);
/* this should only happen if not linked already */
BUG_ON(sched->linked == linked);
/* If we are already scheduled on the CPU to which we
* wanted to link, we don't need to do the swap --
* we just link ourselves to the CPU and depend on
* the caller to get things right.
*/
if (entry != sched) {
TRACE_TASK(linked,
"already scheduled on %d, updating link.\n",
sched->cpu);
tmp = sched->linked;
linked->rt_param.linked_on = sched->cpu;
sched->linked = linked;
update_cpu_position(sched);
linked = tmp;
}
}
if (linked) /* might be NULL due to swap */
linked->rt_param.linked_on = entry->cpu;
}
entry->linked = linked;
#ifdef WANT_ALL_SCHED_EVENTS
if (linked)
TRACE_TASK(linked, "linked to %d.\n", entry->cpu);
else
TRACE("NULL linked to %d.\n", entry->cpu);
#endif
update_cpu_position(entry);
}
/* unlink - Make sure a task is not linked any longer to an entry
* where it was linked before. Must hold cluster_lock.
*/
static noinline void unlink(struct task_struct* t)
{
cpu_entry_t *entry;
if (t->rt_param.linked_on != NO_CPU) {
/* unlink */
entry = &per_cpu(cedf_cpu_entries, t->rt_param.linked_on);
t->rt_param.linked_on = NO_CPU;
link_task_to_cpu(NULL, entry);
} else if (is_queued(t)) {
/* This is an interesting situation: t is scheduled,
* but was just recently unlinked. It cannot be
* linked anywhere else (because then it would have
* been relinked to this CPU), thus it must be in some
* queue. We must remove it from the list in this
* case.
*
* in C-EDF case is should be somewhere in the queue for
* its domain, therefore and we can get the domain using
* task_cpu_cluster
*/
remove(&(task_cpu_cluster(t))->domain, t);
}
}
/* preempt - force a CPU to reschedule
*/
static void preempt(cpu_entry_t *entry)
{
preempt_if_preemptable(entry->scheduled, entry->cpu);
}
/* requeue - Put an unlinked task into gsn-edf domain.
* Caller must hold cluster_lock.
*/
static noinline void requeue(struct task_struct* task)
{
cedf_domain_t *cluster = task_cpu_cluster(task);
BUG_ON(!task);
/* sanity check before insertion */
BUG_ON(is_queued(task));
if (is_released(task, litmus_clock()))
__add_ready(&cluster->domain, task);
else {
/* it has got to wait */
add_release(&cluster->domain, task);
}
}
#ifdef CONFIG_SCHED_CPU_AFFINITY
static cpu_entry_t* cedf_get_nearest_available_cpu(
cedf_domain_t *cluster, cpu_entry_t *start)
{
cpu_entry_t *affinity;
get_nearest_available_cpu(affinity, start, cedf_cpu_entries,
#ifdef CONFIG_RELEASE_MASTER
cluster->domain.release_master
#else
NO_CPU
#endif
);
/* make sure CPU is in our cluster */
if (affinity && cpu_isset(affinity->cpu, *cluster->cpu_map))
return(affinity);
else
return(NULL);
}
#endif
/* check for any necessary preemptions */
static void check_for_preemptions(cedf_domain_t *cluster)
{
struct task_struct *task;
cpu_entry_t *last;
for(last = lowest_prio_cpu(cluster);
edf_preemption_needed(&cluster->domain, last->linked);
last = lowest_prio_cpu(cluster)) {
/* preemption necessary */
task = __take_ready(&cluster->domain);
TRACE("check_for_preemptions: attempting to link task %d to %d\n",
task->pid, last->cpu);
#ifdef CONFIG_SCHED_CPU_AFFINITY
{
cpu_entry_t *affinity =
cedf_get_nearest_available_cpu(cluster,
&per_cpu(cedf_cpu_entries, task_cpu(task)));
if(affinity)
last = affinity;
else if(last->linked)
requeue(last->linked);
}
#else
if (last->linked)
requeue(last->linked);
#endif
link_task_to_cpu(task, last);
preempt(last);
}
}
/* cedf_job_arrival: task is either resumed or released */
static noinline void cedf_job_arrival(struct task_struct* task)
{
cedf_domain_t *cluster = task_cpu_cluster(task);
BUG_ON(!task);
requeue(task);
check_for_preemptions(cluster);
}
|