1 files changed, 127 insertions, 98 deletions
diff --git a/litmus/sched_pfair.c b/litmus/sched_pfair.c
index 0a64273daa47..16f1065bbdca 100644
--- a/litmus/sched_pfair.c
+++ b/litmus/sched_pfair.c
@@ -1,7 +1,8 @@
 /*
 * kernel/sched_pfair.c
 *
- * Implementation of the (global) Pfair scheduling algorithm.
+ * Implementation of the PD^2 pfair scheduling algorithm. This
+ * implementation realizes "early releasing," i.e., it is work-conserving.
 *
 */
@@ -76,36 +77,29 @@ struct pfair_state {
        struct task_struct* local;     /* the local copy of linked          */
        struct task_struct* scheduled; /* what is actually scheduled        */
-        unsigned long missed_quanta;
        lt_t offset;                    /* stagger offset */
+        unsigned int missed_updates;
+        unsigned int missed_quanta;
 };
-/* Currently, we limit the maximum period of any task to 2000 quanta.
- * The reason is that it makes the implementation easier since we do not
- * need to reallocate the release wheel on task arrivals.
- * In the future
- */
-#define PFAIR_MAX_PERIOD 2000
 struct pfair_cluster {
        struct scheduling_cluster topology;
        /* The "global" time in this cluster. */
        quanta_t pfair_time; /* the "official" PFAIR clock */
-        quanta_t merge_time; /* Updated after the release queue has been
-                              * merged. Used by drop_all_references().
-                              */
        /* The ready queue for this cluster. */
        rt_domain_t pfair;
-        /* This is the release queue wheel for this cluster. It is indexed by
+        /* The set of jobs that should have their release enacted at the next
-         * pfair_time % PFAIR_MAX_PERIOD.  Each heap is ordered by PFAIR
+         * quantum boundary.
-         * priority, so that it can be merged with the ready queue.
         */
-        struct bheap release_queue[PFAIR_MAX_PERIOD];
+        struct bheap release_queue;
+        raw_spinlock_t release_lock;
 };
+#define RT_F_REQUEUE 0x2
 static inline struct pfair_cluster* cpu_cluster(struct pfair_state* state)
 {
        return container_of(state->topology.cluster, struct pfair_cluster, topology);
@@ -121,6 +115,11 @@ static inline struct pfair_state* from_cluster_list(struct list_head* pos)
        return list_entry(pos, struct pfair_state, topology.cluster_list);
 }
+static inline struct pfair_cluster* from_domain(rt_domain_t* rt)
+{
+        return container_of(rt, struct pfair_cluster, pfair);
+}
 static inline raw_spinlock_t* cluster_lock(struct pfair_cluster* cluster)
 {
        /* The ready_lock is used to serialize all scheduling events. */
@@ -161,21 +160,11 @@ static quanta_t cur_deadline(struct task_struct* t)
        return cur_subtask(t)->deadline +  tsk_pfair(t)->release;
 }
-static quanta_t cur_sub_release(struct task_struct* t)
-{
-        return cur_subtask(t)->release +  tsk_pfair(t)->release;
-}
 static quanta_t cur_release(struct task_struct* t)
 {
-#ifdef EARLY_RELEASE
+        /* This is early releasing: only the release of the first subtask
-        /* only the release of the first subtask counts when we early
+         * counts. */
-         * release */
        return tsk_pfair(t)->release;
-#else
-        return cur_sub_release(t);
-#endif
 }
 static quanta_t cur_overlap(struct task_struct* t)
@@ -235,11 +224,16 @@ int pfair_ready_order(struct bheap_node* a, struct bheap_node* b)
        return pfair_higher_prio(bheap2task(a), bheap2task(b));
 }
-/* return the proper release queue for time t */
+static void pfair_release_jobs(rt_domain_t* rt, struct bheap* tasks)
-static struct bheap* relq(struct pfair_cluster* cluster, quanta_t t)
 {
-        struct bheap* rq = cluster->release_queue + (t % PFAIR_MAX_PERIOD);
+        struct pfair_cluster* cluster = from_domain(rt);
-        return rq;
+        unsigned long flags;
+        raw_spin_lock_irqsave(&cluster->release_lock, flags);
+        bheap_union(pfair_ready_order, &cluster->release_queue, tasks);
+        raw_spin_unlock_irqrestore(&cluster->release_lock, flags);
 }
 static void prepare_release(struct task_struct* t, quanta_t at)
@@ -248,25 +242,12 @@ static void prepare_release(struct task_struct* t, quanta_t at)
        tsk_pfair(t)->cur        = 0;
 }
-static void __pfair_add_release(struct task_struct* t, struct bheap* queue)
-{
-        bheap_insert(pfair_ready_order, queue,
-                    tsk_rt(t)->heap_node);
-}
-static void pfair_add_release(struct pfair_cluster* cluster,
-                              struct task_struct* t)
-{
-        BUG_ON(bheap_node_in_heap(tsk_rt(t)->heap_node));
-        __pfair_add_release(t, relq(cluster, cur_release(t)));
-}
 /* pull released tasks from the release queue */
-static void poll_releases(struct pfair_cluster* cluster,
+static void poll_releases(struct pfair_cluster* cluster)
-                          quanta_t time)
 {
-        __merge_ready(&cluster->pfair, relq(cluster, time));
+        raw_spin_lock(&cluster->release_lock);
-        cluster->merge_time = time;
+        __merge_ready(&cluster->pfair, &cluster->release_queue);
+        raw_spin_unlock(&cluster->release_lock);
 }
 static void check_preempt(struct task_struct* t)
@@ -292,16 +273,12 @@ static void drop_all_references(struct task_struct *t)
 {
        int cpu;
        struct pfair_state* s;
-        struct bheap* q;
        struct pfair_cluster* cluster;
        if (bheap_node_in_heap(tsk_rt(t)->heap_node)) {
-                /* figure out what queue the node is in */
+                /* It must be in the ready queue; drop references isn't called
+                 * when the job is in a release queue. */
                cluster = tsk_pfair(t)->cluster;
-                if (time_before_eq(cur_release(t), cluster->merge_time))
+                bheap_delete(pfair_ready_order, &cluster->pfair.ready_queue,
-                        q = &cluster->pfair.ready_queue;
-                else
-                        q = relq(cluster, cur_release(t));
-                bheap_delete(pfair_ready_order, q,
                            tsk_rt(t)->heap_node);
        }
        for (cpu = 0; cpu < num_online_cpus(); cpu++) {
@@ -313,6 +290,17 @@ static void drop_all_references(struct task_struct *t)
                if (s->scheduled  == t)
                        s->scheduled = NULL;
        }
+        /* make sure we don't have a stale linked_on field */
+        tsk_rt(t)->linked_on = NO_CPU;
+}
+static void pfair_prepare_next_period(struct task_struct* t)
+{
+        struct pfair_param* p = tsk_pfair(t);
+        prepare_for_next_period(t);
+        get_rt_flags(t) = RT_F_RUNNING;
+        p->release += p->period;
 }
 /* returns 1 if the task needs to go the release queue */
@@ -322,30 +310,26 @@ static int advance_subtask(quanta_t time, struct task_struct* t, int cpu)
        int to_relq;
        p->cur = (p->cur + 1) % p->quanta;
        if (!p->cur) {
-                sched_trace_task_completion(t, 1);
                if (tsk_rt(t)->present) {
-                        /* we start a new job */
+                        /* The job overran; we start a new budget allocation. */
-                        prepare_for_next_period(t);
+                        pfair_prepare_next_period(t);
-                        sched_trace_task_release(t);
-                        get_rt_flags(t) = RT_F_RUNNING;
-                        p->release += p->period;
                } else {
                        /* remove task from system until it wakes */
                        drop_all_references(t);
+                        tsk_rt(t)->flags = RT_F_REQUEUE;
                        TRACE_TASK(t, "on %d advanced to subtask %lu (not present)\n",
                                   cpu, p->cur);
                        return 0;
                }
        }
        to_relq = time_after(cur_release(t), time);
-        TRACE_TASK(t, "on %d advanced to subtask %lu -> to_relq=%d\n",
+        TRACE_TASK(t, "on %d advanced to subtask %lu -> to_relq=%d (cur_release:%lu time:%lu)\n",
-                   cpu, p->cur, to_relq);
+                   cpu, p->cur, to_relq, cur_release(t), time);
        return to_relq;
 }
 static void advance_subtasks(struct pfair_cluster *cluster, quanta_t time)
 {
-        int missed;
        struct task_struct* l;
        struct pfair_param* p;
        struct list_head* pos;
@@ -354,14 +338,17 @@ static void advance_subtasks(struct pfair_cluster *cluster, quanta_t time)
        list_for_each(pos, &cluster->topology.cpus) {
                cpu = from_cluster_list(pos);
                l = cpu->linked;
-                missed = cpu->linked != cpu->local;
+                cpu->missed_updates += cpu->linked != cpu->local;
                if (l) {
                        p = tsk_pfair(l);
                        p->last_quantum = time;
                        p->last_cpu     =  cpu_id(cpu);
                        if (advance_subtask(time, l, cpu_id(cpu))) {
-                                cpu->linked = NULL;
+                                //cpu->linked = NULL;
-                                pfair_add_release(cluster, l);
+                                PTRACE_TASK(l, "should go to release queue. "
+                                            "scheduled_on=%d present=%d\n",
+                                            tsk_rt(l)->scheduled_on,
+                                            tsk_rt(l)->present);
                        }
                }
        }
@@ -445,6 +432,11 @@ static void schedule_subtasks(struct pfair_cluster *cluster, quanta_t time)
        list_for_each(pos, &cluster->topology.cpus) {
                cpu_state = from_cluster_list(pos);
                retry = 1;
+#ifdef CONFIG_RELEASE_MASTER
+                /* skip release master */
+                if (cluster->pfair.release_master == cpu_id(cpu_state))
+                        continue;
+#endif
                while (retry) {
                        if (pfair_higher_prio(__peek_ready(&cluster->pfair),
                                              cpu_state->linked))
@@ -471,13 +463,13 @@ static void schedule_next_quantum(struct pfair_cluster *cluster, quanta_t time)
        sched_trace_quantum_boundary();
        advance_subtasks(cluster, time);
-        poll_releases(cluster, time);
+        poll_releases(cluster);
        schedule_subtasks(cluster, time);
        list_for_each(pos, &cluster->topology.cpus) {
                cpu = from_cluster_list(pos);
                if (cpu->linked)
-                        PTRACE_TASK(pstate[cpu]->linked,
+                        PTRACE_TASK(cpu->linked,
                                    " linked on %d.\n", cpu_id(cpu));
                else
                        PTRACE("(null) linked on %d.\n", cpu_id(cpu));
@@ -612,12 +604,42 @@ static int safe_to_schedule(struct task_struct* t, int cpu)
 static struct task_struct* pfair_schedule(struct task_struct * prev)
 {
        struct pfair_state* state = &__get_cpu_var(pfair_state);
-        int blocks;
+        struct pfair_cluster* cluster = cpu_cluster(state);
+        int blocks, completion, out_of_time;
        struct task_struct* next = NULL;
+#ifdef CONFIG_RELEASE_MASTER
+        /* Bail out early if we are the release master.
+         * The release master never schedules any real-time tasks.
+         */
+        if (unlikely(cluster->pfair.release_master == cpu_id(state))) {
+                sched_state_task_picked();
+                return NULL;
+        }
+#endif
        raw_spin_lock(cpu_lock(state));
-        blocks  = is_realtime(prev) && !is_running(prev);
+        blocks      = is_realtime(prev) && !is_running(prev);
+        completion  = is_realtime(prev) && get_rt_flags(prev) == RT_F_SLEEP;
+        out_of_time = is_realtime(prev) && time_after(cur_release(prev),
+                                                      state->local_tick);
+        if (is_realtime(prev))
+            PTRACE_TASK(prev, "blocks:%d completion:%d out_of_time:%d\n",
+                        blocks, completion, out_of_time);
+        if (completion) {
+                sched_trace_task_completion(prev, 0);
+                pfair_prepare_next_period(prev);
+                prepare_release(prev, cur_release(prev));
+        }
+        if (!blocks && (completion || out_of_time)) {
+                drop_all_references(prev);
+                sched_trace_task_release(prev);
+                add_release(&cluster->pfair, prev);
+        }
        if (state->local && safe_to_schedule(state->local, cpu_id(state)))
                next = state->local;
@@ -649,13 +671,19 @@ static void pfair_task_new(struct task_struct * t, int on_rq, int running)
        cluster = tsk_pfair(t)->cluster;
        raw_spin_lock_irqsave(cluster_lock(cluster), flags);
-        if (running)
-                t->rt_param.scheduled_on = task_cpu(t);
-        else
-                t->rt_param.scheduled_on = NO_CPU;
        prepare_release(t, cluster->pfair_time + 1);
-        pfair_add_release(cluster, t);
+        t->rt_param.scheduled_on = NO_CPU;
+        if (running) {
+#ifdef CONFIG_RELEASE_MASTER
+                if (task_cpu(t) != cluster->pfair.release_master)
+#endif
+                        t->rt_param.scheduled_on = task_cpu(t);
+                __add_ready(&cluster->pfair, t);
+        }
        check_preempt(t);
        raw_spin_unlock_irqrestore(cluster_lock(cluster), flags);
@@ -665,6 +693,7 @@ static void pfair_task_wake_up(struct task_struct *t)
 {
        unsigned long flags;
        lt_t now;
+        int requeue = 0;
        struct pfair_cluster* cluster;
        cluster = tsk_pfair(t)->cluster;
@@ -679,13 +708,20 @@ static void pfair_task_wake_up(struct task_struct *t)
         * (as if it never blocked at all). Otherwise, we have a
         * new sporadic job release.
         */
+        requeue = tsk_rt(t)->flags == RT_F_REQUEUE;
        now = litmus_clock();
        if (lt_before(get_deadline(t), now)) {
+                TRACE_TASK(t, "sporadic release!\n");
                release_at(t, now);
                prepare_release(t, time2quanta(now, CEIL));
                sched_trace_task_release(t);
-                /* FIXME: race with pfair_time advancing */
+        }
-                pfair_add_release(cluster, t);
+        /* only add to ready queue if the task isn't still linked somewhere */
+        if (requeue) {
+                TRACE_TASK(t, "requeueing required\n");
+                tsk_rt(t)->flags = RT_F_RUNNING;
+                __add_ready(&cluster->pfair, t);
        }
        check_preempt(t);
@@ -744,15 +780,11 @@ static void pfair_release_at(struct task_struct* task, lt_t start)
        release_at(task, start);
        release = time2quanta(start, CEIL);
-        /* FIXME: support arbitrary offsets. */
-        if (release - cluster->pfair_time >= PFAIR_MAX_PERIOD)
-                release = cluster->pfair_time + PFAIR_MAX_PERIOD;
        TRACE_TASK(task, "sys release at %lu\n", release);
        drop_all_references(task);
        prepare_release(task, release);
-        pfair_add_release(cluster, task);
+        add_release(&cluster->pfair, task);
        raw_spin_unlock_irqrestore(cluster_lock(cluster), flags);
 }
@@ -834,13 +866,6 @@ static long pfair_admit_task(struct task_struct* t)
                       "The period of %s/%d is not a multiple of %llu.\n",
                       t->comm, t->pid, (unsigned long long) quantum_length);
-        if (period >= PFAIR_MAX_PERIOD) {
-                printk(KERN_WARNING
-                       "PFAIR: Rejecting task %s/%d; its period is too long.\n",
-                       t->comm, t->pid);
-                return -EINVAL;
-        }
        if (quanta == period) {
                /* special case: task has weight 1.0 */
                printk(KERN_INFO
@@ -880,12 +905,9 @@ static long pfair_admit_task(struct task_struct* t)
 static void pfair_init_cluster(struct pfair_cluster* cluster)
 {
-        int i;
+        rt_domain_init(&cluster->pfair, pfair_ready_order, NULL, pfair_release_jobs);
+        bheap_init(&cluster->release_queue);
-        /* initialize release queue */
+        raw_spin_lock_init(&cluster->release_lock);
-        for (i = 0; i < PFAIR_MAX_PERIOD; i++)
-                bheap_init(&cluster->release_queue[i]);
-        rt_domain_init(&cluster->pfair, pfair_ready_order, NULL, NULL);
        INIT_LIST_HEAD(&cluster->topology.cpus);
 }
@@ -899,8 +921,11 @@ static void cleanup_clusters(void)
        num_pfair_clusters = 0;
        /* avoid stale pointers */
-        for (i = 0; i < NR_CPUS; i++)
+        for (i = 0; i < num_online_cpus(); i++) {
                pstate[i]->topology.cluster = NULL;
+                printk("P%d missed %u updates and %u quanta.\n", cpu_id(pstate[i]),
+                       pstate[i]->missed_updates, pstate[i]->missed_quanta);
+        }
 }
 static long pfair_activate_plugin(void)
@@ -936,6 +961,9 @@ static long pfair_activate_plugin(void)
                pfair_init_cluster(cluster);
                cluster->pfair_time = now;
                clust[i] = &cluster->topology;
+#ifdef CONFIG_RELEASE_MASTER
+                cluster->pfair.release_master = atomic_read(&release_master_cpu);
+#endif
        }
        for (i = 0; i < num_online_cpus(); i++)  {
@@ -943,6 +971,7 @@ static long pfair_activate_plugin(void)
                state->cur_tick   = now;
                state->local_tick = now;
                state->missed_quanta = 0;
+                state->missed_updates = 0;
                state->offset     = cpu_stagger_offset(i);
                printk(KERN_ERR "cpus[%d] set; %d\n", i, num_online_cpus());
                cpus[i] = &state->topology;

diff --git a/litmus/sched_pfair.c b/litmus/sched_pfair.c index 0a64273daa47..16f1065bbdca 100644 --- a/litmus/sched_pfair.c +++ b/litmus/sched_pfair.c
@@ -1,7 +1,8 @@
1	/*	1	/*
2	* kernel/sched_pfair.c	2	* kernel/sched_pfair.c
3	*	3	*
4	* Implementation of the (global) Pfair scheduling algorithm.	4	* Implementation of the PD^2 pfair scheduling algorithm. This
		5	* implementation realizes "early releasing," i.e., it is work-conserving.
5	*	6	*
6	*/	7	*/
7		8
@@ -76,36 +77,29 @@ struct pfair_state {
76	struct task_struct* local; /* the local copy of linked */	77	struct task_struct* local; /* the local copy of linked */
77	struct task_struct* scheduled; /* what is actually scheduled */	78	struct task_struct* scheduled; /* what is actually scheduled */
78		79
79	unsigned long missed_quanta;
80	lt_t offset; /* stagger offset */	80	lt_t offset; /* stagger offset */
		81	unsigned int missed_updates;
		82	unsigned int missed_quanta;
81	};	83	};
82		84
83	/* Currently, we limit the maximum period of any task to 2000 quanta.
84	* The reason is that it makes the implementation easier since we do not
85	* need to reallocate the release wheel on task arrivals.
86	* In the future
87	*/
88	#define PFAIR_MAX_PERIOD 2000
89
90	struct pfair_cluster {	85	struct pfair_cluster {
91	struct scheduling_cluster topology;	86	struct scheduling_cluster topology;
92		87
93	/* The "global" time in this cluster. */	88	/* The "global" time in this cluster. */
94	quanta_t pfair_time; /* the "official" PFAIR clock */	89	quanta_t pfair_time; /* the "official" PFAIR clock */
95	quanta_t merge_time; /* Updated after the release queue has been
96	* merged. Used by drop_all_references().
97	*/
98		90
99	/* The ready queue for this cluster. */	91	/* The ready queue for this cluster. */
100	rt_domain_t pfair;	92	rt_domain_t pfair;
101		93
102	/* This is the release queue wheel for this cluster. It is indexed by	94	/* The set of jobs that should have their release enacted at the next
103	* pfair_time % PFAIR_MAX_PERIOD. Each heap is ordered by PFAIR	95	* quantum boundary.
104	* priority, so that it can be merged with the ready queue.
105	*/	96	*/
106	struct bheap release_queue[PFAIR_MAX_PERIOD];	97	struct bheap release_queue;
		98	raw_spinlock_t release_lock;
107	};	99	};
108		100
		101	#define RT_F_REQUEUE 0x2
		102
109	static inline struct pfair_cluster* cpu_cluster(struct pfair_state* state)	103	static inline struct pfair_cluster* cpu_cluster(struct pfair_state* state)
110	{	104	{
111	return container_of(state->topology.cluster, struct pfair_cluster, topology);	105	return container_of(state->topology.cluster, struct pfair_cluster, topology);
@@ -121,6 +115,11 @@ static inline struct pfair_state* from_cluster_list(struct list_head* pos)
121	return list_entry(pos, struct pfair_state, topology.cluster_list);	115	return list_entry(pos, struct pfair_state, topology.cluster_list);
122	}	116	}
123		117
		118	static inline struct pfair_cluster* from_domain(rt_domain_t* rt)
		119	{
		120	return container_of(rt, struct pfair_cluster, pfair);
		121	}
		122
124	static inline raw_spinlock_t* cluster_lock(struct pfair_cluster* cluster)	123	static inline raw_spinlock_t* cluster_lock(struct pfair_cluster* cluster)
125	{	124	{
126	/* The ready_lock is used to serialize all scheduling events. */	125	/* The ready_lock is used to serialize all scheduling events. */
@@ -161,21 +160,11 @@ static quanta_t cur_deadline(struct task_struct* t)
161	return cur_subtask(t)->deadline + tsk_pfair(t)->release;	160	return cur_subtask(t)->deadline + tsk_pfair(t)->release;
162	}	161	}
163		162
164
165	static quanta_t cur_sub_release(struct task_struct* t)
166	{
167	return cur_subtask(t)->release + tsk_pfair(t)->release;
168	}
169
170	static quanta_t cur_release(struct task_struct* t)	163	static quanta_t cur_release(struct task_struct* t)
171	{	164	{
172	#ifdef EARLY_RELEASE	165	/* This is early releasing: only the release of the first subtask
173	/* only the release of the first subtask counts when we early	166	* counts. */
174	* release */
175	return tsk_pfair(t)->release;	167	return tsk_pfair(t)->release;
176	#else
177	return cur_sub_release(t);
178	#endif
179	}	168	}
180		169
181	static quanta_t cur_overlap(struct task_struct* t)	170	static quanta_t cur_overlap(struct task_struct* t)
@@ -235,11 +224,16 @@ int pfair_ready_order(struct bheap_node* a, struct bheap_node* b)
235	return pfair_higher_prio(bheap2task(a), bheap2task(b));	224	return pfair_higher_prio(bheap2task(a), bheap2task(b));
236	}	225	}
237		226
238	/* return the proper release queue for time t */	227	static void pfair_release_jobs(rt_domain_t* rt, struct bheap* tasks)
239	static struct bheap* relq(struct pfair_cluster* cluster, quanta_t t)
240	{	228	{
241	struct bheap* rq = cluster->release_queue + (t % PFAIR_MAX_PERIOD);	229	struct pfair_cluster* cluster = from_domain(rt);
242	return rq;	230	unsigned long flags;
		231
		232	raw_spin_lock_irqsave(&cluster->release_lock, flags);
		233
		234	bheap_union(pfair_ready_order, &cluster->release_queue, tasks);
		235
		236	raw_spin_unlock_irqrestore(&cluster->release_lock, flags);
243	}	237	}
244		238
245	static void prepare_release(struct task_struct* t, quanta_t at)	239	static void prepare_release(struct task_struct* t, quanta_t at)
@@ -248,25 +242,12 @@ static void prepare_release(struct task_struct* t, quanta_t at)
248	tsk_pfair(t)->cur = 0;	242	tsk_pfair(t)->cur = 0;
249	}	243	}
250		244
251	static void __pfair_add_release(struct task_struct* t, struct bheap* queue)
252	{
253	bheap_insert(pfair_ready_order, queue,
254	tsk_rt(t)->heap_node);
255	}
256
257	static void pfair_add_release(struct pfair_cluster* cluster,
258	struct task_struct* t)
259	{
260	BUG_ON(bheap_node_in_heap(tsk_rt(t)->heap_node));
261	__pfair_add_release(t, relq(cluster, cur_release(t)));
262	}
263
264	/* pull released tasks from the release queue */	245	/* pull released tasks from the release queue */
265	static void poll_releases(struct pfair_cluster* cluster,	246	static void poll_releases(struct pfair_cluster* cluster)
266	quanta_t time)
267	{	247	{
268	__merge_ready(&cluster->pfair, relq(cluster, time));	248	raw_spin_lock(&cluster->release_lock);
269	cluster->merge_time = time;	249	__merge_ready(&cluster->pfair, &cluster->release_queue);
		250	raw_spin_unlock(&cluster->release_lock);
270	}	251	}
271		252
272	static void check_preempt(struct task_struct* t)	253	static void check_preempt(struct task_struct* t)
@@ -292,16 +273,12 @@ static void drop_all_references(struct task_struct *t)
292	{	273	{
293	int cpu;	274	int cpu;
294	struct pfair_state* s;	275	struct pfair_state* s;
295	struct bheap* q;
296	struct pfair_cluster* cluster;	276	struct pfair_cluster* cluster;
297	if (bheap_node_in_heap(tsk_rt(t)->heap_node)) {	277	if (bheap_node_in_heap(tsk_rt(t)->heap_node)) {
298	/* figure out what queue the node is in */	278	/* It must be in the ready queue; drop references isn't called
		279	* when the job is in a release queue. */
299	cluster = tsk_pfair(t)->cluster;	280	cluster = tsk_pfair(t)->cluster;
300	if (time_before_eq(cur_release(t), cluster->merge_time))	281	bheap_delete(pfair_ready_order, &cluster->pfair.ready_queue,
301	q = &cluster->pfair.ready_queue;
302	else
303	q = relq(cluster, cur_release(t));
304	bheap_delete(pfair_ready_order, q,
305	tsk_rt(t)->heap_node);	282	tsk_rt(t)->heap_node);
306	}	283	}
307	for (cpu = 0; cpu < num_online_cpus(); cpu++) {	284	for (cpu = 0; cpu < num_online_cpus(); cpu++) {
@@ -313,6 +290,17 @@ static void drop_all_references(struct task_struct *t)
313	if (s->scheduled == t)	290	if (s->scheduled == t)
314	s->scheduled = NULL;	291	s->scheduled = NULL;
315	}	292	}
		293	/* make sure we don't have a stale linked_on field */
		294	tsk_rt(t)->linked_on = NO_CPU;
		295	}
		296
		297	static void pfair_prepare_next_period(struct task_struct* t)
		298	{
		299	struct pfair_param* p = tsk_pfair(t);
		300
		301	prepare_for_next_period(t);
		302	get_rt_flags(t) = RT_F_RUNNING;
		303	p->release += p->period;
316	}	304	}
317		305
318	/* returns 1 if the task needs to go the release queue */	306	/* returns 1 if the task needs to go the release queue */
@@ -322,30 +310,26 @@ static int advance_subtask(quanta_t time, struct task_struct* t, int cpu)
322	int to_relq;	310	int to_relq;
323	p->cur = (p->cur + 1) % p->quanta;	311	p->cur = (p->cur + 1) % p->quanta;
324	if (!p->cur) {	312	if (!p->cur) {
325	sched_trace_task_completion(t, 1);
326	if (tsk_rt(t)->present) {	313	if (tsk_rt(t)->present) {
327	/* we start a new job */	314	/* The job overran; we start a new budget allocation. */
328	prepare_for_next_period(t);	315	pfair_prepare_next_period(t);
329	sched_trace_task_release(t);
330	get_rt_flags(t) = RT_F_RUNNING;
331	p->release += p->period;
332	} else {	316	} else {
333	/* remove task from system until it wakes */	317	/* remove task from system until it wakes */
334	drop_all_references(t);	318	drop_all_references(t);
		319	tsk_rt(t)->flags = RT_F_REQUEUE;
335	TRACE_TASK(t, "on %d advanced to subtask %lu (not present)\n",	320	TRACE_TASK(t, "on %d advanced to subtask %lu (not present)\n",
336	cpu, p->cur);	321	cpu, p->cur);
337	return 0;	322	return 0;
338	}	323	}
339	}	324	}
340	to_relq = time_after(cur_release(t), time);	325	to_relq = time_after(cur_release(t), time);
341	TRACE_TASK(t, "on %d advanced to subtask %lu -> to_relq=%d\n",	326	TRACE_TASK(t, "on %d advanced to subtask %lu -> to_relq=%d (cur_release:%lu time:%lu)\n",
342	cpu, p->cur, to_relq);	327	cpu, p->cur, to_relq, cur_release(t), time);
343	return to_relq;	328	return to_relq;
344	}	329	}
345		330
346	static void advance_subtasks(struct pfair_cluster *cluster, quanta_t time)	331	static void advance_subtasks(struct pfair_cluster *cluster, quanta_t time)
347	{	332	{
348	int missed;
349	struct task_struct* l;	333	struct task_struct* l;
350	struct pfair_param* p;	334	struct pfair_param* p;
351	struct list_head* pos;	335	struct list_head* pos;
@@ -354,14 +338,17 @@ static void advance_subtasks(struct pfair_cluster *cluster, quanta_t time)
354	list_for_each(pos, &cluster->topology.cpus) {	338	list_for_each(pos, &cluster->topology.cpus) {
355	cpu = from_cluster_list(pos);	339	cpu = from_cluster_list(pos);
356	l = cpu->linked;	340	l = cpu->linked;
357	missed = cpu->linked != cpu->local;	341	cpu->missed_updates += cpu->linked != cpu->local;
358	if (l) {	342	if (l) {
359	p = tsk_pfair(l);	343	p = tsk_pfair(l);
360	p->last_quantum = time;	344	p->last_quantum = time;
361	p->last_cpu = cpu_id(cpu);	345	p->last_cpu = cpu_id(cpu);
362	if (advance_subtask(time, l, cpu_id(cpu))) {	346	if (advance_subtask(time, l, cpu_id(cpu))) {
363	cpu->linked = NULL;	347	//cpu->linked = NULL;
364	pfair_add_release(cluster, l);	348	PTRACE_TASK(l, "should go to release queue. "
		349	"scheduled_on=%d present=%d\n",
		350	tsk_rt(l)->scheduled_on,
		351	tsk_rt(l)->present);
365	}	352	}
366	}	353	}
367	}	354	}
@@ -445,6 +432,11 @@ static void schedule_subtasks(struct pfair_cluster *cluster, quanta_t time)
445	list_for_each(pos, &cluster->topology.cpus) {	432	list_for_each(pos, &cluster->topology.cpus) {
446	cpu_state = from_cluster_list(pos);	433	cpu_state = from_cluster_list(pos);
447	retry = 1;	434	retry = 1;
		435	#ifdef CONFIG_RELEASE_MASTER
		436	/* skip release master */
		437	if (cluster->pfair.release_master == cpu_id(cpu_state))
		438	continue;
		439	#endif
448	while (retry) {	440	while (retry) {
449	if (pfair_higher_prio(__peek_ready(&cluster->pfair),	441	if (pfair_higher_prio(__peek_ready(&cluster->pfair),
450	cpu_state->linked))	442	cpu_state->linked))
@@ -471,13 +463,13 @@ static void schedule_next_quantum(struct pfair_cluster *cluster, quanta_t time)
471	sched_trace_quantum_boundary();	463	sched_trace_quantum_boundary();
472		464
473	advance_subtasks(cluster, time);	465	advance_subtasks(cluster, time);
474	poll_releases(cluster, time);	466	poll_releases(cluster);
475	schedule_subtasks(cluster, time);	467	schedule_subtasks(cluster, time);
476		468
477	list_for_each(pos, &cluster->topology.cpus) {	469	list_for_each(pos, &cluster->topology.cpus) {
478	cpu = from_cluster_list(pos);	470	cpu = from_cluster_list(pos);
479	if (cpu->linked)	471	if (cpu->linked)
480	PTRACE_TASK(pstate[cpu]->linked,	472	PTRACE_TASK(cpu->linked,
481	" linked on %d.\n", cpu_id(cpu));	473	" linked on %d.\n", cpu_id(cpu));
482	else	474	else
483	PTRACE("(null) linked on %d.\n", cpu_id(cpu));	475	PTRACE("(null) linked on %d.\n", cpu_id(cpu));
@@ -612,12 +604,42 @@ static int safe_to_schedule(struct task_struct* t, int cpu)
612	static struct task_struct* pfair_schedule(struct task_struct * prev)	604	static struct task_struct* pfair_schedule(struct task_struct * prev)
613	{	605	{
614	struct pfair_state* state = &__get_cpu_var(pfair_state);	606	struct pfair_state* state = &__get_cpu_var(pfair_state);
615	int blocks;	607	struct pfair_cluster* cluster = cpu_cluster(state);
		608	int blocks, completion, out_of_time;
616	struct task_struct* next = NULL;	609	struct task_struct* next = NULL;
617		610
		611	#ifdef CONFIG_RELEASE_MASTER
		612	/* Bail out early if we are the release master.
		613	* The release master never schedules any real-time tasks.
		614	*/
		615	if (unlikely(cluster->pfair.release_master == cpu_id(state))) {
		616	sched_state_task_picked();
		617	return NULL;
		618	}
		619	#endif
		620
618	raw_spin_lock(cpu_lock(state));	621	raw_spin_lock(cpu_lock(state));
619		622
620	blocks = is_realtime(prev) && !is_running(prev);	623	blocks = is_realtime(prev) && !is_running(prev);
		624	completion = is_realtime(prev) && get_rt_flags(prev) == RT_F_SLEEP;
		625	out_of_time = is_realtime(prev) && time_after(cur_release(prev),
		626	state->local_tick);
		627
		628	if (is_realtime(prev))
		629	PTRACE_TASK(prev, "blocks:%d completion:%d out_of_time:%d\n",
		630	blocks, completion, out_of_time);
		631
		632	if (completion) {
		633	sched_trace_task_completion(prev, 0);
		634	pfair_prepare_next_period(prev);
		635	prepare_release(prev, cur_release(prev));
		636	}
		637
		638	if (!blocks && (completion \|\| out_of_time)) {
		639	drop_all_references(prev);
		640	sched_trace_task_release(prev);
		641	add_release(&cluster->pfair, prev);
		642	}
621		643
622	if (state->local && safe_to_schedule(state->local, cpu_id(state)))	644	if (state->local && safe_to_schedule(state->local, cpu_id(state)))
623	next = state->local;	645	next = state->local;
@@ -649,13 +671,19 @@ static void pfair_task_new(struct task_struct * t, int on_rq, int running)
649	cluster = tsk_pfair(t)->cluster;	671	cluster = tsk_pfair(t)->cluster;
650		672
651	raw_spin_lock_irqsave(cluster_lock(cluster), flags);	673	raw_spin_lock_irqsave(cluster_lock(cluster), flags);
652	if (running)
653	t->rt_param.scheduled_on = task_cpu(t);
654	else
655	t->rt_param.scheduled_on = NO_CPU;
656		674
657	prepare_release(t, cluster->pfair_time + 1);	675	prepare_release(t, cluster->pfair_time + 1);
658	pfair_add_release(cluster, t);	676
		677	t->rt_param.scheduled_on = NO_CPU;
		678
		679	if (running) {
		680	#ifdef CONFIG_RELEASE_MASTER
		681	if (task_cpu(t) != cluster->pfair.release_master)
		682	#endif
		683	t->rt_param.scheduled_on = task_cpu(t);
		684	__add_ready(&cluster->pfair, t);
		685	}
		686
659	check_preempt(t);	687	check_preempt(t);
660		688
661	raw_spin_unlock_irqrestore(cluster_lock(cluster), flags);	689	raw_spin_unlock_irqrestore(cluster_lock(cluster), flags);
@@ -665,6 +693,7 @@ static void pfair_task_wake_up(struct task_struct *t)
665	{	693	{
666	unsigned long flags;	694	unsigned long flags;
667	lt_t now;	695	lt_t now;
		696	int requeue = 0;
668	struct pfair_cluster* cluster;	697	struct pfair_cluster* cluster;
669		698
670	cluster = tsk_pfair(t)->cluster;	699	cluster = tsk_pfair(t)->cluster;
@@ -679,13 +708,20 @@ static void pfair_task_wake_up(struct task_struct *t)
679	* (as if it never blocked at all). Otherwise, we have a	708	* (as if it never blocked at all). Otherwise, we have a
680	* new sporadic job release.	709	* new sporadic job release.
681	*/	710	*/
		711	requeue = tsk_rt(t)->flags == RT_F_REQUEUE;
682	now = litmus_clock();	712	now = litmus_clock();
683	if (lt_before(get_deadline(t), now)) {	713	if (lt_before(get_deadline(t), now)) {
		714	TRACE_TASK(t, "sporadic release!\n");
684	release_at(t, now);	715	release_at(t, now);
685	prepare_release(t, time2quanta(now, CEIL));	716	prepare_release(t, time2quanta(now, CEIL));
686	sched_trace_task_release(t);	717	sched_trace_task_release(t);
687	/* FIXME: race with pfair_time advancing */	718	}
688	pfair_add_release(cluster, t);	719
		720	/* only add to ready queue if the task isn't still linked somewhere */
		721	if (requeue) {
		722	TRACE_TASK(t, "requeueing required\n");
		723	tsk_rt(t)->flags = RT_F_RUNNING;
		724	__add_ready(&cluster->pfair, t);
689	}	725	}
690		726
691	check_preempt(t);	727	check_preempt(t);
@@ -744,15 +780,11 @@ static void pfair_release_at(struct task_struct* task, lt_t start)
744	release_at(task, start);	780	release_at(task, start);
745	release = time2quanta(start, CEIL);	781	release = time2quanta(start, CEIL);
746		782
747	/* FIXME: support arbitrary offsets. */
748	if (release - cluster->pfair_time >= PFAIR_MAX_PERIOD)
749	release = cluster->pfair_time + PFAIR_MAX_PERIOD;
750
751	TRACE_TASK(task, "sys release at %lu\n", release);	783	TRACE_TASK(task, "sys release at %lu\n", release);
752		784
753	drop_all_references(task);	785	drop_all_references(task);
754	prepare_release(task, release);	786	prepare_release(task, release);
755	pfair_add_release(cluster, task);	787	add_release(&cluster->pfair, task);
756		788
757	raw_spin_unlock_irqrestore(cluster_lock(cluster), flags);	789	raw_spin_unlock_irqrestore(cluster_lock(cluster), flags);
758	}	790	}
@@ -834,13 +866,6 @@ static long pfair_admit_task(struct task_struct* t)
834	"The period of %s/%d is not a multiple of %llu.\n",	866	"The period of %s/%d is not a multiple of %llu.\n",
835	t->comm, t->pid, (unsigned long long) quantum_length);	867	t->comm, t->pid, (unsigned long long) quantum_length);
836		868
837	if (period >= PFAIR_MAX_PERIOD) {
838	printk(KERN_WARNING
839	"PFAIR: Rejecting task %s/%d; its period is too long.\n",
840	t->comm, t->pid);
841	return -EINVAL;
842	}
843
844	if (quanta == period) {	869	if (quanta == period) {
845	/* special case: task has weight 1.0 */	870	/* special case: task has weight 1.0 */
846	printk(KERN_INFO	871	printk(KERN_INFO
@@ -880,12 +905,9 @@ static long pfair_admit_task(struct task_struct* t)
880		905
881	static void pfair_init_cluster(struct pfair_cluster* cluster)	906	static void pfair_init_cluster(struct pfair_cluster* cluster)
882	{	907	{
883	int i;	908	rt_domain_init(&cluster->pfair, pfair_ready_order, NULL, pfair_release_jobs);
884		909	bheap_init(&cluster->release_queue);
885	/* initialize release queue */	910	raw_spin_lock_init(&cluster->release_lock);
886	for (i = 0; i < PFAIR_MAX_PERIOD; i++)
887	bheap_init(&cluster->release_queue[i]);
888	rt_domain_init(&cluster->pfair, pfair_ready_order, NULL, NULL);
889	INIT_LIST_HEAD(&cluster->topology.cpus);	911	INIT_LIST_HEAD(&cluster->topology.cpus);
890	}	912	}
891		913
@@ -899,8 +921,11 @@ static void cleanup_clusters(void)
899	num_pfair_clusters = 0;	921	num_pfair_clusters = 0;
900		922
901	/* avoid stale pointers */	923	/* avoid stale pointers */
902	for (i = 0; i < NR_CPUS; i++)	924	for (i = 0; i < num_online_cpus(); i++) {
903	pstate[i]->topology.cluster = NULL;	925	pstate[i]->topology.cluster = NULL;
		926	printk("P%d missed %u updates and %u quanta.\n", cpu_id(pstate[i]),
		927	pstate[i]->missed_updates, pstate[i]->missed_quanta);
		928	}
904	}	929	}
905		930
906	static long pfair_activate_plugin(void)	931	static long pfair_activate_plugin(void)
@@ -936,6 +961,9 @@ static long pfair_activate_plugin(void)
936	pfair_init_cluster(cluster);	961	pfair_init_cluster(cluster);
937	cluster->pfair_time = now;	962	cluster->pfair_time = now;
938	clust[i] = &cluster->topology;	963	clust[i] = &cluster->topology;
		964	#ifdef CONFIG_RELEASE_MASTER
		965	cluster->pfair.release_master = atomic_read(&release_master_cpu);
		966	#endif
939	}	967	}
940		968
941	for (i = 0; i < num_online_cpus(); i++) {	969	for (i = 0; i < num_online_cpus(); i++) {
@@ -943,6 +971,7 @@ static long pfair_activate_plugin(void)
943	state->cur_tick = now;	971	state->cur_tick = now;
944	state->local_tick = now;	972	state->local_tick = now;
945	state->missed_quanta = 0;	973	state->missed_quanta = 0;
		974	state->missed_updates = 0;
946	state->offset = cpu_stagger_offset(i);	975	state->offset = cpu_stagger_offset(i);
947	printk(KERN_ERR "cpus[%d] set; %d\n", i, num_online_cpus());	976	printk(KERN_ERR "cpus[%d] set; %d\n", i, num_online_cpus());
948	cpus[i] = &state->topology;	977	cpus[i] = &state->topology;