path: root/litmus/litmus_softirq.c



#include <linux/interrupt.h>
#include <linux/percpu.h>
#include <linux/cpu.h>
#include <linux/kthread.h>
#include <linux/ftrace.h>
#include <linux/smp.h>
#include <linux/slab.h>
#include <linux/mutex.h>

#include <linux/sched.h>
#include <linux/cpuset.h>

#include <litmus/litmus.h>
#include <litmus/sched_trace.h>
#include <litmus/jobs.h>
#include <litmus/sched_plugin.h>
#include <litmus/litmus_softirq.h>

/* TODO: Remove unneeded mb() and other barriers. */


/* counts number of daemons ready to handle litmus irqs. */
static atomic_t num_ready_klitirqds = ATOMIC_INIT(0);

enum pending_flags
{
    LIT_TASKLET_LOW = 0x1,
    LIT_TASKLET_HI  = LIT_TASKLET_LOW<<1,
	LIT_WORK = LIT_TASKLET_HI<<1
};

/* only support tasklet processing for now. */
struct tasklet_head
{
	struct tasklet_struct *head;
	struct tasklet_struct **tail;
};

struct klitirqd_info
{
	struct task_struct*		klitirqd;
    struct task_struct*     current_owner;
    int						terminating;


	raw_spinlock_t			lock;

	u32						pending;
	atomic_t				num_hi_pending;
	atomic_t				num_low_pending;
	atomic_t				num_work_pending;

	/* in order of priority */
	struct tasklet_head     pending_tasklets_hi;
	struct tasklet_head		pending_tasklets;
	struct list_head		worklist;
};

/* one list for each klitirqd */
static struct klitirqd_info klitirqds[NR_LITMUS_SOFTIRQD];


int proc_read_klitirqd_stats(char *page, char **start,
							 off_t off, int count,
							 int *eof, void *data)
{
	int len = snprintf(page, PAGE_SIZE,
				"num ready klitirqds: %d\n\n",
				atomic_read(&num_ready_klitirqds));

	if(klitirqd_is_ready())
	{
		int i;
		for(i = 0; i < NR_LITMUS_SOFTIRQD; ++i)
		{
			len +=
				snprintf(page + len - 1, PAGE_SIZE, /* -1 to strip off \0 */
						 "klitirqd_th%d: %s/%d\n"
						 "\tcurrent_owner: %s/%d\n"
						 "\tpending: %x\n"
						 "\tnum hi: %d\n"
						 "\tnum low: %d\n"
						 "\tnum work: %d\n\n",
						 i,
						 klitirqds[i].klitirqd->comm, klitirqds[i].klitirqd->pid,
						 (klitirqds[i].current_owner != NULL) ?
						 	klitirqds[i].current_owner->comm : "(null)",
						 (klitirqds[i].current_owner != NULL) ?
							klitirqds[i].current_owner->pid : 0,
						 klitirqds[i].pending,
						 atomic_read(&klitirqds[i].num_hi_pending),
						 atomic_read(&klitirqds[i].num_low_pending),
						 atomic_read(&klitirqds[i].num_work_pending));
		}
	}

	return(len);
}


#if 0
static atomic_t dump_id = ATOMIC_INIT(0);

static void __dump_state(struct klitirqd_info* which, const char* caller)
{
	struct tasklet_struct* list;

	int id = atomic_inc_return(&dump_id);

	//if(in_interrupt())
	{
		if(which->current_owner)
		{
			TRACE("(id: %d  caller: %s)\n"
				"klitirqd: %s/%d\n"
				"current owner: %s/%d\n"
				"pending: %x\n",
				id, caller,
				which->klitirqd->comm, which->klitirqd->pid,
				which->current_owner->comm, which->current_owner->pid,
				which->pending);
		}
		else
		{
			TRACE("(id: %d  caller: %s)\n"
				"klitirqd: %s/%d\n"
				"current owner: %p\n"
				"pending: %x\n",
				id, caller,
				which->klitirqd->comm, which->klitirqd->pid,
				NULL,
				which->pending);
		}

		list = which->pending_tasklets.head;
		while(list)
		{
			struct tasklet_struct *t = list;
			list = list->next; /* advance */
			if(t->owner)
				TRACE("(id: %d  caller: %s) Tasklet: %x, Owner = %s/%d\n", id, caller, t, t->owner->comm, t->owner->pid);
			else
				TRACE("(id: %d  caller: %s) Tasklet: %x, Owner = %p\n", id, caller, t, NULL);
		}
	}
}

static void dump_state(struct klitirqd_info* which, const char* caller)
{
	unsigned long flags;

	raw_spin_lock_irqsave(&which->lock, flags);
    __dump_state(which, caller);
    raw_spin_unlock_irqrestore(&which->lock, flags);
}
#endif


/* forward declarations */
static void ___litmus_tasklet_schedule(struct tasklet_struct *t,
									   struct klitirqd_info *which,
									   int wakeup);
static void ___litmus_tasklet_hi_schedule(struct tasklet_struct *t,
										  struct klitirqd_info *which,
										  int wakeup);
static void ___litmus_schedule_work(struct work_struct *w,
									struct klitirqd_info *which,
									int wakeup);


inline unsigned int klitirqd_id(struct task_struct* tsk)
{
    int i;
    for(i = 0; i < NR_LITMUS_SOFTIRQD; ++i)
    {
        if(klitirqds[i].klitirqd == tsk)
        {
            return i;
        }
    }

    BUG();

    return 0;
}


inline static u32 litirq_pending_hi_irqoff(struct klitirqd_info* which)
{
    return (which->pending & LIT_TASKLET_HI);
}

inline static u32 litirq_pending_low_irqoff(struct klitirqd_info* which)
{
    return (which->pending & LIT_TASKLET_LOW);
}

inline static u32 litirq_pending_work_irqoff(struct klitirqd_info* which)
{
	return (which->pending & LIT_WORK);
}

inline static u32 litirq_pending_irqoff(struct klitirqd_info* which)
{
    return(which->pending);
}


inline static u32 litirq_pending(struct klitirqd_info* which)
{
    unsigned long flags;
    u32 pending;

    raw_spin_lock_irqsave(&which->lock, flags);
    pending = litirq_pending_irqoff(which);
    raw_spin_unlock_irqrestore(&which->lock, flags);

    return pending;
};

inline static u32 litirq_pending_with_owner(struct klitirqd_info* which, struct task_struct* owner)
{
	unsigned long flags;
	u32 pending;

	raw_spin_lock_irqsave(&which->lock, flags);
	pending = litirq_pending_irqoff(which);
	if(pending)
	{
		if(which->current_owner != owner)
		{
			pending = 0;  // owner switch!
		}
	}
	raw_spin_unlock_irqrestore(&which->lock, flags);

	return pending;
}


inline static u32 litirq_pending_and_sem_and_owner(struct klitirqd_info* which,
				struct mutex** sem,
				struct task_struct** t)
{
	unsigned long flags;
	u32 pending;

	/* init values */
	*sem = NULL;
	*t = NULL;

	raw_spin_lock_irqsave(&which->lock, flags);

	pending = litirq_pending_irqoff(which);
	if(pending)
	{
		if(which->current_owner != NULL)
		{
			*t = which->current_owner;
			*sem = &tsk_rt(which->current_owner)->klitirqd_sem;
		}
		else
		{
			BUG();
		}
	}
	raw_spin_unlock_irqrestore(&which->lock, flags);

	if(likely(*sem))
	{
		return pending;
	}
	else
	{
		return 0;
	}
}

/* returns true if the next piece of work to do is from a different owner.
 */
static int tasklet_ownership_change(
				struct klitirqd_info* which,
				enum pending_flags taskletQ)
{
	/* this function doesn't have to look at work objects since they have
	   priority below tasklets. */

    unsigned long flags;
    int ret = 0;

    raw_spin_lock_irqsave(&which->lock, flags);

	switch(taskletQ)
	{
	case LIT_TASKLET_HI:
		if(litirq_pending_hi_irqoff(which))
		{
			ret = (which->pending_tasklets_hi.head->owner !=
						which->current_owner);
		}
		break;
	case LIT_TASKLET_LOW:
		if(litirq_pending_low_irqoff(which))
		{
			ret = (which->pending_tasklets.head->owner !=
						which->current_owner);
		}
		break;
	default:
		break;
	}

    raw_spin_unlock_irqrestore(&which->lock, flags);

    TRACE_TASK(which->klitirqd, "ownership change needed: %d\n", ret);

    return ret;
}


static void __reeval_prio(struct klitirqd_info* which)
{
    struct task_struct* next_owner = NULL;
	struct task_struct* klitirqd = which->klitirqd;

	/* Check in prio-order */
	u32 pending = litirq_pending_irqoff(which);

	//__dump_state(which, "__reeval_prio: before");

	if(pending)
	{
		if(pending & LIT_TASKLET_HI)
		{
			next_owner = which->pending_tasklets_hi.head->owner;
		}
		else if(pending & LIT_TASKLET_LOW)
		{
			next_owner = which->pending_tasklets.head->owner;
		}
		else if(pending & LIT_WORK)
		{
			struct work_struct* work =
				list_first_entry(&which->worklist, struct work_struct, entry);
			next_owner = work->owner;
		}
	}

	if(next_owner != which->current_owner)
	{
		struct task_struct* old_owner = which->current_owner;

		/* bind the next owner. */
		which->current_owner = next_owner;
		mb();

        if(next_owner != NULL)
        {
			if(!in_interrupt())
			{
				TRACE_CUR("%s: Ownership change: %s/%d to %s/%d\n", __FUNCTION__,
						((tsk_rt(klitirqd)->inh_task) ? tsk_rt(klitirqd)->inh_task : klitirqd)->comm,
						((tsk_rt(klitirqd)->inh_task) ? tsk_rt(klitirqd)->inh_task : klitirqd)->pid,
						next_owner->comm, next_owner->pid);
			}
			else
			{
				TRACE("%s: Ownership change: %s/%d to %s/%d\n", __FUNCTION__,
					((tsk_rt(klitirqd)->inh_task) ? tsk_rt(klitirqd)->inh_task : klitirqd)->comm,
					((tsk_rt(klitirqd)->inh_task) ? tsk_rt(klitirqd)->inh_task : klitirqd)->pid,
					next_owner->comm, next_owner->pid);
			}

			litmus->increase_prio_inheritance_klitirqd(klitirqd, old_owner, next_owner);
        }
        else
        {
			if(likely(!in_interrupt()))
			{
				TRACE_CUR("%s: Ownership change: %s/%d to NULL (reverting)\n",
						__FUNCTION__, klitirqd->comm, klitirqd->pid);
			}
			else
			{
				// is this a bug?
				TRACE("%s: Ownership change: %s/%d to NULL (reverting)\n",
					__FUNCTION__, klitirqd->comm, klitirqd->pid);
			}

			BUG_ON(pending != 0);
			litmus->decrease_prio_inheritance_klitirqd(klitirqd, old_owner, NULL);
        }
    }

	//__dump_state(which, "__reeval_prio: after");
}

static void reeval_prio(struct klitirqd_info* which)
{
    unsigned long flags;

    raw_spin_lock_irqsave(&which->lock, flags);
    __reeval_prio(which);
    raw_spin_unlock_irqrestore(&which->lock, flags);
}


static void wakeup_litirqd_locked(struct klitirqd_info* which)
{
	/* Interrupts are disabled: no need to stop preemption */
	if (which && which->klitirqd)
	{
        __reeval_prio(which); /* configure the proper priority */

		if(which->klitirqd->state != TASK_RUNNING)
		{
        	TRACE("%s: Waking up klitirqd: %s/%d\n", __FUNCTION__,
			  	which->klitirqd->comm, which->klitirqd->pid);

			wake_up_process(which->klitirqd);
		}
    }
}


static void do_lit_tasklet(struct klitirqd_info* which,
						   struct tasklet_head* pending_tasklets)
{
    unsigned long flags;
	struct tasklet_struct *list;
	atomic_t* count;

    raw_spin_lock_irqsave(&which->lock, flags);

	//__dump_state(which, "do_lit_tasklet: before steal");

	/* copy out the tasklets for our private use. */
	list = pending_tasklets->head;
	pending_tasklets->head = NULL;
	pending_tasklets->tail = &pending_tasklets->head;

	/* remove pending flag */
	which->pending &= (pending_tasklets == &which->pending_tasklets) ?
		~LIT_TASKLET_LOW :
		~LIT_TASKLET_HI;

	count = (pending_tasklets == &which->pending_tasklets) ?
		&which->num_low_pending:
		&which->num_hi_pending;

	//__dump_state(which, "do_lit_tasklet: after steal");

    raw_spin_unlock_irqrestore(&which->lock, flags);


    while(list)
    {
        struct tasklet_struct *t = list;

        /* advance, lest we forget */
		list = list->next;

        /* execute tasklet if it has my priority and is free */
		if ((t->owner == which->current_owner) && tasklet_trylock(t)) {
			if (!atomic_read(&t->count)) {

				sched_trace_tasklet_begin(t->owner);

				if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
                {
					BUG();
                }
                TRACE_CUR("%s: Invoking tasklet.\n", __FUNCTION__);
				t->func(t->data);
				tasklet_unlock(t);

				atomic_dec(count);

				sched_trace_tasklet_end(t->owner, 0ul);

				continue;  /* process more tasklets */
			}
			tasklet_unlock(t);
		}

        TRACE_CUR("%s: Could not invoke tasklet.  Requeuing.\n", __FUNCTION__);

		/* couldn't process tasklet.  put it back at the end of the queue. */
		if(pending_tasklets == &which->pending_tasklets)
			___litmus_tasklet_schedule(t, which, 0);
		else
			___litmus_tasklet_hi_schedule(t, which, 0);
    }
}


// returns 1 if priorities need to be changed to continue processing
// pending tasklets.
static int do_litirq(struct klitirqd_info* which)
{
    u32 pending;
    int resched = 0;

    if(in_interrupt())
    {
        TRACE("%s: exiting early: in interrupt context!\n", __FUNCTION__);
        return(0);
    }

	if(which->klitirqd != current)
	{
        TRACE_CUR("%s: exiting early: thread/info mismatch! Running %s/%d but given %s/%d.\n",
				  __FUNCTION__, current->comm, current->pid,
				  which->klitirqd->comm, which->klitirqd->pid);
        return(0);
	}

    if(!is_realtime(current))
    {
        TRACE_CUR("%s: exiting early: klitirqd is not real-time. Sched Policy = %d\n",
				  __FUNCTION__, current->policy);
        return(0);
    }


    /* We only handle tasklets & work objects, no need for RCU triggers? */

    pending = litirq_pending(which);
    if(pending)
    {
        /* extract the work to do and do it! */
        if(pending & LIT_TASKLET_HI)
        {
            TRACE_CUR("%s: Invoking HI tasklets.\n", __FUNCTION__);
            do_lit_tasklet(which, &which->pending_tasklets_hi);
            resched = tasklet_ownership_change(which, LIT_TASKLET_HI);

            if(resched)
            {
                TRACE_CUR("%s: HI tasklets of another owner remain. "
						  "Skipping any LOW tasklets.\n", __FUNCTION__);
            }
        }

        if(!resched && (pending & LIT_TASKLET_LOW))
        {
            TRACE_CUR("%s: Invoking LOW tasklets.\n", __FUNCTION__);
			do_lit_tasklet(which, &which->pending_tasklets);
			resched = tasklet_ownership_change(which, LIT_TASKLET_LOW);

            if(resched)
            {
                TRACE_CUR("%s: LOW tasklets of another owner remain. "
						  "Skipping any work objects.\n", __FUNCTION__);
            }
        }
    }

	return(resched);
}


static void do_work(struct klitirqd_info* which)
{
	unsigned long flags;
	work_func_t f;
	struct work_struct* work;

	// only execute one work-queue item to yield to tasklets.
	// ...is this a good idea, or should we just batch them?
	raw_spin_lock_irqsave(&which->lock, flags);

	if(!litirq_pending_work_irqoff(which))
	{
		raw_spin_unlock_irqrestore(&which->lock, flags);
		goto no_work;
	}

	work = list_first_entry(&which->worklist, struct work_struct, entry);
	list_del_init(&work->entry);

	if(list_empty(&which->worklist))
	{
		which->pending &= ~LIT_WORK;
	}

	raw_spin_unlock_irqrestore(&which->lock, flags);


	/* safe to read current_owner outside of lock since only this thread
	 may write to the pointer. */
	if(work->owner == which->current_owner)
	{
		TRACE_CUR("%s: Invoking work object.\n", __FUNCTION__);
		// do the work!
		work_clear_pending(work);
		f = work->func;
		f(work);  /* can't touch 'work' after this point,
				   the user may have freed it. */

		atomic_dec(&which->num_work_pending);
	}
	else
	{
		TRACE_CUR("%s: Could not invoke work object.  Requeuing.\n",
				  __FUNCTION__);
		___litmus_schedule_work(work, which, 0);
	}

no_work:
	return;
}


static int set_litmus_daemon_sched(void)
{
    /* set up a daemon job that will never complete.
       it should only ever run on behalf of another
       real-time task.

       TODO: Transition to a new job whenever a
       new tasklet is handled */

    int ret = 0;

	struct rt_task tp = {
		.exec_cost = 0,
		.period = 1000000000, /* dummy 1 second period */
		.phase = 0,
		.cpu = task_cpu(current),
		.budget_policy = NO_ENFORCEMENT,
		.cls = RT_CLASS_BEST_EFFORT
	};

	struct sched_param param = { .sched_priority = 0};


	/* set task params, mark as proxy thread, and init other data */
	tsk_rt(current)->task_params = tp;
	tsk_rt(current)->is_proxy_thread = 1;
	tsk_rt(current)->cur_klitirqd = NULL;
	mutex_init(&tsk_rt(current)->klitirqd_sem);
	atomic_set(&tsk_rt(current)->klitirqd_sem_stat, NOT_HELD);

	/* inform the OS we're SCHED_LITMUS --
	   sched_setscheduler_nocheck() calls litmus_admit_task(). */
	sched_setscheduler_nocheck(current, SCHED_LITMUS, &param);

    return ret;
}

static void enter_execution_phase(struct klitirqd_info* which,
								  struct mutex* sem,
								  struct task_struct* t)
{
	TRACE_CUR("%s: Trying to enter execution phase. "
			  "Acquiring semaphore of %s/%d\n", __FUNCTION__,
			  t->comm, t->pid);
	down_and_set_stat(current, HELD, sem);
	TRACE_CUR("%s: Execution phase entered! "
			  "Acquired semaphore of %s/%d\n", __FUNCTION__,
			  t->comm, t->pid);
}

static void exit_execution_phase(struct klitirqd_info* which,
								 struct mutex* sem,
								 struct task_struct* t)
{
	TRACE_CUR("%s: Exiting execution phase. "
			  "Releasing semaphore of %s/%d\n", __FUNCTION__,
			  t->comm, t->pid);
	if(atomic_read(&tsk_rt(current)->klitirqd_sem_stat) == HELD)
	{
		up_and_set_stat(current, NOT_HELD, sem);
		TRACE_CUR("%s: Execution phase exited! "
				  "Released semaphore of %s/%d\n", __FUNCTION__,
				  t->comm, t->pid);
	}
	else
	{
		TRACE_CUR("%s: COULDN'T RELEASE SEMAPHORE BECAUSE ONE IS NOT HELD!\n", __FUNCTION__);
	}
}

/* main loop for klitsoftirqd */
static int run_klitirqd(void* unused)
{
	struct klitirqd_info* which = &klitirqds[klitirqd_id(current)];
	struct mutex* sem;
	struct task_struct* owner;

    int rt_status = set_litmus_daemon_sched();

    if(rt_status != 0)
    {
        TRACE_CUR("%s: Failed to transition to rt-task.\n", __FUNCTION__);
        goto rt_failed;
    }

	atomic_inc(&num_ready_klitirqds);

	set_current_state(TASK_INTERRUPTIBLE);

	while (!kthread_should_stop())
	{
		preempt_disable();
		if (!litirq_pending(which))
		{
            /* sleep for work */
            TRACE_CUR("%s: No more tasklets or work objects. Going to sleep.\n",
					  __FUNCTION__);
			preempt_enable_no_resched();
            schedule();

			if(kthread_should_stop()) /* bail out */
			{
				TRACE_CUR("%s:%d: Signaled to terminate.\n", __FUNCTION__, __LINE__);
				continue;
			}

			preempt_disable();
		}

		__set_current_state(TASK_RUNNING);

		while (litirq_pending_and_sem_and_owner(which, &sem, &owner))
		{
			int needs_resched = 0;

			preempt_enable_no_resched();

			BUG_ON(sem == NULL);

			// wait to enter execution phase; wait for 'current_owner' to block.
			enter_execution_phase(which, sem, owner);

			if(kthread_should_stop())
			{
				TRACE_CUR("%s:%d: Signaled to terminate.\n", __FUNCTION__, __LINE__);
				break;
			}

			preempt_disable();

			/* Double check that there's still pending work and the owner hasn't
			 * changed. Pending items may have been flushed while we were sleeping.
			 */
			if(litirq_pending_with_owner(which, owner))
			{
				TRACE_CUR("%s: Executing tasklets and/or work objects.\n",
						  __FUNCTION__);

				needs_resched = do_litirq(which);

				preempt_enable_no_resched();

				// work objects are preemptible.
				if(!needs_resched)
				{
					do_work(which);
				}

				// exit execution phase.
				exit_execution_phase(which, sem, owner);

				TRACE_CUR("%s: Setting up next priority.\n", __FUNCTION__);
				reeval_prio(which); /* check if we need to change priority here */
			}
			else
			{
				TRACE_CUR("%s: Pending work was flushed!  Prev owner was %s/%d\n",
								__FUNCTION__,
								owner->comm, owner->pid);
				preempt_enable_no_resched();

				// exit execution phase.
				exit_execution_phase(which, sem, owner);
			}

			cond_resched();
			preempt_disable();
		}
		preempt_enable();
		set_current_state(TASK_INTERRUPTIBLE);
	}
	__set_current_state(TASK_RUNNING);

	atomic_dec(&num_ready_klitirqds);

rt_failed:
    litmus_exit_task(current);

	return rt_status;
}


struct klitirqd_launch_data
{
	int* cpu_affinity;
	struct work_struct work;
};

/* executed by a kworker from workqueues */
static void launch_klitirqd(struct work_struct *work)
{
    int i;

	struct klitirqd_launch_data* launch_data =
		container_of(work, struct klitirqd_launch_data, work);

    TRACE("%s: Creating %d klitirqds\n", __FUNCTION__, NR_LITMUS_SOFTIRQD);

    /* create the daemon threads */
    for(i = 0; i < NR_LITMUS_SOFTIRQD; ++i)
    {
		if(launch_data->cpu_affinity)
		{
			klitirqds[i].klitirqd =
				kthread_create(
				   run_klitirqd,
				   /* treat the affinity as a pointer, we'll cast it back later */
				   (void*)(long long)launch_data->cpu_affinity[i],
				   "klitirqd_th%d/%d",
				   i,
				   launch_data->cpu_affinity[i]);

			/* litmus will put is in the right cluster. */
			kthread_bind(klitirqds[i].klitirqd, launch_data->cpu_affinity[i]);
		}
		else
		{
			klitirqds[i].klitirqd =
				kthread_create(
				   run_klitirqd,
				   /* treat the affinity as a pointer, we'll cast it back later */
				   (void*)(long long)(-1),
				   "klitirqd_th%d",
				   i);
		}
    }

    TRACE("%s: Launching %d klitirqds\n", __FUNCTION__, NR_LITMUS_SOFTIRQD);

    /* unleash the daemons */
    for(i = 0; i < NR_LITMUS_SOFTIRQD; ++i)
    {
        wake_up_process(klitirqds[i].klitirqd);
    }

	if(launch_data->cpu_affinity)
		kfree(launch_data->cpu_affinity);
	kfree(launch_data);
}


void spawn_klitirqd(int* affinity)
{
    int i;
    struct klitirqd_launch_data* delayed_launch;

	if(atomic_read(&num_ready_klitirqds) != 0)
	{
		TRACE("%s: At least one klitirqd is already running! Need to call kill_klitirqd()?\n");
		return;
	}

    /* init the tasklet & work queues */
    for(i = 0; i < NR_LITMUS_SOFTIRQD; ++i)
    {
		klitirqds[i].terminating = 0;
		klitirqds[i].pending = 0;

		klitirqds[i].num_hi_pending.counter = 0;
		klitirqds[i].num_low_pending.counter = 0;
		klitirqds[i].num_work_pending.counter = 0;

        klitirqds[i].pending_tasklets_hi.head = NULL;
        klitirqds[i].pending_tasklets_hi.tail = &klitirqds[i].pending_tasklets_hi.head;

        klitirqds[i].pending_tasklets.head = NULL;
        klitirqds[i].pending_tasklets.tail = &klitirqds[i].pending_tasklets.head;

		INIT_LIST_HEAD(&klitirqds[i].worklist);

		raw_spin_lock_init(&klitirqds[i].lock);
    }

    /* wait to flush the initializations to memory since other threads
       will access it. */
    mb();

    /* tell a work queue to launch the threads.  we can't make scheduling
       calls since we're in an atomic state. */
    TRACE("%s: Setting callback up to launch klitirqds\n", __FUNCTION__);
	delayed_launch = kmalloc(sizeof(struct klitirqd_launch_data), GFP_ATOMIC);
	if(affinity)
	{
		delayed_launch->cpu_affinity =
			kmalloc(sizeof(int)*NR_LITMUS_SOFTIRQD, GFP_ATOMIC);

		memcpy(delayed_launch->cpu_affinity, affinity,
			sizeof(int)*NR_LITMUS_SOFTIRQD);
	}
	else
	{
		delayed_launch->cpu_affinity = NULL;
	}
    INIT_WORK(&delayed_launch->work, launch_klitirqd);
    schedule_work(&delayed_launch->work);
}


void kill_klitirqd(void)
{
	if(!klitirqd_is_dead())
	{
    	int i;

    	TRACE("%s: Killing %d klitirqds\n", __FUNCTION__, NR_LITMUS_SOFTIRQD);

    	for(i = 0; i < NR_LITMUS_SOFTIRQD; ++i)
    	{
			if(klitirqds[i].terminating != 1)
			{
				klitirqds[i].terminating = 1;
				mb(); /* just to be sure? */
				flush_pending(klitirqds[i].klitirqd, NULL);

				/* signal termination */
       			kthread_stop(klitirqds[i].klitirqd);
			}
    	}
	}
}


int klitirqd_is_ready(void)
{
	return(atomic_read(&num_ready_klitirqds) == NR_LITMUS_SOFTIRQD);
}

int klitirqd_is_dead(void)
{
	return(atomic_read(&num_ready_klitirqds) == 0);
}


struct task_struct* get_klitirqd(unsigned int k_id)
{
	return(klitirqds[k_id].klitirqd);
}


void flush_pending(struct task_struct* klitirqd_thread,
				   struct task_struct* owner)
{
	unsigned int k_id = klitirqd_id(klitirqd_thread);
	struct klitirqd_info *which = &klitirqds[k_id];

	unsigned long flags;
	struct tasklet_struct *list;

	u32 work_flushed = 0;

	raw_spin_lock_irqsave(&which->lock, flags);

	//__dump_state(which, "flush_pending: before");

	// flush hi tasklets.
	if(litirq_pending_hi_irqoff(which))
	{
		which->pending &= ~LIT_TASKLET_HI;

		list = which->pending_tasklets_hi.head;
		which->pending_tasklets_hi.head = NULL;
		which->pending_tasklets_hi.tail = &which->pending_tasklets_hi.head;

		TRACE("%s: Handing HI tasklets back to Linux.\n", __FUNCTION__);

		while(list)
		{
			struct tasklet_struct *t = list;
			list = list->next;

			if(likely((t->owner == owner) || (owner == NULL)))
			{
				if(unlikely(!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state)))
				{
					BUG();
				}

				work_flushed |= LIT_TASKLET_HI;

				t->owner = NULL;

				// WTF?
				if(!test_and_set_bit(TASKLET_STATE_SCHED, &t->state))
				{
					atomic_dec(&which->num_hi_pending);
					___tasklet_hi_schedule(t);
				}
				else
				{
					TRACE("%s: dropped hi tasklet??\n", __FUNCTION__);
					BUG();
				}
			}
			else
			{
				TRACE("%s: Could not flush a HI tasklet.\n", __FUNCTION__);
				// put back on queue.
				___litmus_tasklet_hi_schedule(t, which, 0);
			}
		}
	}

	// flush low tasklets.
	if(litirq_pending_low_irqoff(which))
	{
		which->pending &= ~LIT_TASKLET_LOW;

		list = which->pending_tasklets.head;
		which->pending_tasklets.head = NULL;
		which->pending_tasklets.tail = &which->pending_tasklets.head;

		TRACE("%s: Handing LOW tasklets back to Linux.\n", __FUNCTION__);

		while(list)
		{
			struct tasklet_struct *t = list;
			list = list->next;

			if(likely((t->owner == owner) || (owner == NULL)))
			{
				if(unlikely(!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state)))
				{
					BUG();
				}

				work_flushed |= LIT_TASKLET_LOW;

				t->owner = NULL;
				sched_trace_tasklet_end(owner, 1ul);

				if(!test_and_set_bit(TASKLET_STATE_SCHED, &t->state))
				{
					atomic_dec(&which->num_low_pending);
					___tasklet_schedule(t);
				}
				else
				{
					TRACE("%s: dropped tasklet??\n", __FUNCTION__);
					BUG();
				}
			}
			else
			{
				TRACE("%s: Could not flush a LOW tasklet.\n", __FUNCTION__);
				// put back on queue
				___litmus_tasklet_schedule(t, which, 0);
			}
		}
	}

	// flush work objects
	if(litirq_pending_work_irqoff(which))
	{
		which->pending &= ~LIT_WORK;

		TRACE("%s: Handing work objects back to Linux.\n", __FUNCTION__);

		while(!list_empty(&which->worklist))
		{
			struct work_struct* work =
				list_first_entry(&which->worklist, struct work_struct, entry);
			list_del_init(&work->entry);

			if(likely((work->owner == owner) || (owner == NULL)))
			{
				work_flushed |= LIT_WORK;
				atomic_dec(&which->num_work_pending);

				work->owner = NULL;
				sched_trace_work_end(owner, current, 1ul);
				__schedule_work(work);
			}
			else
			{
				TRACE("%s: Could not flush a work object.\n", __FUNCTION__);
				// put back on queue
				___litmus_schedule_work(work, which, 0);
			}
		}
	}

	//__dump_state(which, "flush_pending: after (before reeval prio)");


	mb(); /* commit changes to pending flags */

	/* reset the scheduling priority */
	if(work_flushed)
	{
		__reeval_prio(which);

		/* Try to offload flushed tasklets to Linux's ksoftirqd. */
		if(work_flushed & (LIT_TASKLET_LOW | LIT_TASKLET_HI))
		{
			wakeup_softirqd();
		}
	}
	else
	{
		TRACE_CUR("%s: no work flushed, so __reeval_prio() skipped\n", __FUNCTION__);
	}

	raw_spin_unlock_irqrestore(&which->lock, flags);
}


static void ___litmus_tasklet_schedule(struct tasklet_struct *t,
									   struct klitirqd_info *which,
									   int wakeup)
{
	unsigned long flags;
	u32 old_pending;

	t->next = NULL;

    raw_spin_lock_irqsave(&which->lock, flags);

	//__dump_state(which, "___litmus_tasklet_schedule: before queuing");

    *(which->pending_tasklets.tail) = t;
    which->pending_tasklets.tail = &t->next;

	old_pending = which->pending;
	which->pending |= LIT_TASKLET_LOW;

	atomic_inc(&which->num_low_pending);

	mb();

	if(!old_pending && wakeup)
	{
		wakeup_litirqd_locked(which); /* wake up the klitirqd */
	}

	//__dump_state(which, "___litmus_tasklet_schedule: after queuing");

    raw_spin_unlock_irqrestore(&which->lock, flags);
}

int __litmus_tasklet_schedule(struct tasklet_struct *t, unsigned int k_id)
{
	int ret = 0; /* assume failure */
    if(unlikely((t->owner == NULL) || !is_realtime(t->owner)))
    {
        TRACE("%s: No owner associated with this tasklet!\n", __FUNCTION__);
        BUG();
    }

    if(unlikely(k_id >= NR_LITMUS_SOFTIRQD))
    {
        TRACE("%s: No klitirqd_th%d!\n", __FUNCTION__, k_id);
        BUG();
    }

	if(likely(!klitirqds[k_id].terminating))
	{
		/* Can't accept tasklets while we're processing a workqueue
		   because they're handled by the same thread. This case is
		   very RARE.

		   TODO: Use a separate thread for work objects!!!!!!
         */
		if(likely(atomic_read(&klitirqds[k_id].num_work_pending) == 0))
		{
			ret = 1;
			___litmus_tasklet_schedule(t, &klitirqds[k_id], 1);
		}
		else
		{
			TRACE("%s: rejected tasklet because of pending work.\n",
						__FUNCTION__);
		}
	}
	return(ret);
}

EXPORT_SYMBOL(__litmus_tasklet_schedule);


static void ___litmus_tasklet_hi_schedule(struct tasklet_struct *t,
									   struct klitirqd_info *which,
									   int wakeup)
{
	unsigned long flags;
	u32 old_pending;

	t->next = NULL;

    raw_spin_lock_irqsave(&which->lock, flags);

    *(which->pending_tasklets_hi.tail) = t;
    which->pending_tasklets_hi.tail = &t->next;

	old_pending = which->pending;
	which->pending |= LIT_TASKLET_HI;

	atomic_inc(&which->num_hi_pending);

	mb();

	if(!old_pending && wakeup)
	{
		wakeup_litirqd_locked(which); /* wake up the klitirqd */
	}

    raw_spin_unlock_irqrestore(&which->lock, flags);
}

int __litmus_tasklet_hi_schedule(struct tasklet_struct *t, unsigned int k_id)
{
	int ret = 0; /* assume failure */
    if(unlikely((t->owner == NULL) || !is_realtime(t->owner)))
    {
        TRACE("%s: No owner associated with this tasklet!\n", __FUNCTION__);
        BUG();
    }

    if(unlikely(k_id >= NR_LITMUS_SOFTIRQD))
    {
        TRACE("%s: No klitirqd_th%d!\n", __FUNCTION__, k_id);
        BUG();
    }

    if(unlikely(!klitirqd_is_ready()))
    {
        TRACE("%s: klitirqd is not ready!\n", __FUNCTION__, k_id);
        BUG();
    }

	if(likely(!klitirqds[k_id].terminating))
	{
		if(likely(atomic_read(&klitirqds[k_id].num_work_pending) == 0))
		{
			ret = 1;
			___litmus_tasklet_hi_schedule(t, &klitirqds[k_id], 1);
		}
		else
		{
			TRACE("%s: rejected tasklet because of pending work.\n",
						__FUNCTION__);
		}
	}
	return(ret);
}

EXPORT_SYMBOL(__litmus_tasklet_hi_schedule);


int __litmus_tasklet_hi_schedule_first(struct tasklet_struct *t, unsigned int k_id)
{
	int ret = 0; /* assume failure */
	u32 old_pending;

	BUG_ON(!irqs_disabled());

    if(unlikely((t->owner == NULL) || !is_realtime(t->owner)))
    {
        TRACE("%s: No owner associated with this tasklet!\n", __FUNCTION__);
        BUG();
    }

    if(unlikely(k_id >= NR_LITMUS_SOFTIRQD))
    {
        TRACE("%s: No klitirqd_th%u!\n", __FUNCTION__, k_id);
        BUG();
    }

    if(unlikely(!klitirqd_is_ready()))
    {
        TRACE("%s: klitirqd is not ready!\n", __FUNCTION__, k_id);
        BUG();
    }

	if(likely(!klitirqds[k_id].terminating))
	{
    	raw_spin_lock(&klitirqds[k_id].lock);

		if(likely(atomic_read(&klitirqds[k_id].num_work_pending) == 0))
		{
			ret = 1;  // success!

			t->next = klitirqds[k_id].pending_tasklets_hi.head;
    		klitirqds[k_id].pending_tasklets_hi.head = t;

			old_pending = klitirqds[k_id].pending;
			klitirqds[k_id].pending |= LIT_TASKLET_HI;

			atomic_inc(&klitirqds[k_id].num_hi_pending);

			mb();

			if(!old_pending)
    			wakeup_litirqd_locked(&klitirqds[k_id]); /* wake up the klitirqd */
		}
		else
		{
			TRACE("%s: rejected tasklet because of pending work.\n",
					__FUNCTION__);
		}

    	raw_spin_unlock(&klitirqds[k_id].lock);
	}
	return(ret);
}

EXPORT_SYMBOL(__litmus_tasklet_hi_schedule_first);


static void ___litmus_schedule_work(struct work_struct *w,
									struct klitirqd_info *which,
									int wakeup)
{
	unsigned long flags;
	u32 old_pending;

	raw_spin_lock_irqsave(&which->lock, flags);

	work_pending(w);
	list_add_tail(&w->entry, &which->worklist);

	old_pending = which->pending;
	which->pending |= LIT_WORK;

	atomic_inc(&which->num_work_pending);

	mb();

	if(!old_pending && wakeup)
	{
		wakeup_litirqd_locked(which); /* wakeup the klitirqd */
	}

	raw_spin_unlock_irqrestore(&which->lock, flags);
}

int __litmus_schedule_work(struct work_struct *w, unsigned int k_id)
{
	int ret = 1; /* assume success */
	if(unlikely(w->owner == NULL) || !is_realtime(w->owner))
	{
		TRACE("%s: No owner associated with this work object!\n", __FUNCTION__);
		BUG();
	}

	if(unlikely(k_id >= NR_LITMUS_SOFTIRQD))
	{
		TRACE("%s: No klitirqd_th%u!\n", k_id);
		BUG();
	}

    if(unlikely(!klitirqd_is_ready()))
    {
        TRACE("%s: klitirqd is not ready!\n", __FUNCTION__, k_id);
        BUG();
    }

	if(likely(!klitirqds[k_id].terminating))
		___litmus_schedule_work(w, &klitirqds[k_id], 1);
	else
		ret = 0;
	return(ret);
}
EXPORT_SYMBOL(__litmus_schedule_work);


static int set_klitirqd_sem_status(unsigned long stat)
{
	TRACE_CUR("SETTING STATUS FROM %d TO %d\n",
					atomic_read(&tsk_rt(current)->klitirqd_sem_stat),
					stat);
	atomic_set(&tsk_rt(current)->klitirqd_sem_stat, stat);
	//mb();

	return(0);
}

static int set_klitirqd_sem_status_if_not_held(unsigned long stat)
{
	if(atomic_read(&tsk_rt(current)->klitirqd_sem_stat) != HELD)
	{
		return(set_klitirqd_sem_status(stat));
	}
	return(-1);
}


void __down_and_reset_and_set_stat(struct task_struct* t,
					   enum klitirqd_sem_status to_reset,
					   enum klitirqd_sem_status to_set,
					   struct mutex* sem)
{
#if 0
	struct rt_param* param = container_of(sem, struct rt_param, klitirqd_sem);
	struct task_struct* task = container_of(param, struct task_struct, rt_param);

	TRACE_CUR("%s: entered.  Locking semaphore of %s/%d\n",
					__FUNCTION__, task->comm, task->pid);
#endif

	mutex_lock_sfx(sem,
				   set_klitirqd_sem_status_if_not_held, to_reset,
				   set_klitirqd_sem_status, to_set);
#if 0
	TRACE_CUR("%s: exiting.  Have semaphore of %s/%d\n",
					__FUNCTION__, task->comm, task->pid);
#endif
}

void down_and_set_stat(struct task_struct* t,
					   enum klitirqd_sem_status to_set,
					   struct mutex* sem)
{
#if 0
	struct rt_param* param = container_of(sem, struct rt_param, klitirqd_sem);
	struct task_struct* task = container_of(param, struct task_struct, rt_param);

	TRACE_CUR("%s: entered.  Locking semaphore of %s/%d\n",
					__FUNCTION__, task->comm, task->pid);
#endif

	mutex_lock_sfx(sem,
				   NULL, 0,
				   set_klitirqd_sem_status, to_set);

#if 0
	TRACE_CUR("%s: exiting.  Have semaphore of %s/%d\n",
					__FUNCTION__, task->comm, task->pid);
#endif
}


void up_and_set_stat(struct task_struct* t,
					 enum klitirqd_sem_status to_set,
					 struct mutex* sem)
{
#if 0
	struct rt_param* param = container_of(sem, struct rt_param, klitirqd_sem);
	struct task_struct* task = container_of(param, struct task_struct, rt_param);

	TRACE_CUR("%s: entered.  Unlocking semaphore of %s/%d\n",
					__FUNCTION__,
					task->comm, task->pid);
#endif

	mutex_unlock_sfx(sem, NULL, 0,
					 set_klitirqd_sem_status, to_set);

#if 0
	TRACE_CUR("%s: exiting.  Unlocked semaphore of %s/%d\n",
					__FUNCTION__,
					task->comm, task->pid);
#endif
}


void release_klitirqd_lock(struct task_struct* t)
{
	if(is_realtime(t) && (atomic_read(&tsk_rt(t)->klitirqd_sem_stat) == HELD))
	{
		struct mutex* sem;
		struct task_struct* owner = t;

		if(t->state == TASK_RUNNING)
		{
			TRACE_TASK(t, "NOT giving up klitirqd_sem because we're not blocked!\n");
			return;
		}

		if(likely(!tsk_rt(t)->is_proxy_thread))
		{
			sem = &tsk_rt(t)->klitirqd_sem;
		}
		else
		{
			unsigned int k_id = klitirqd_id(t);
			owner = klitirqds[k_id].current_owner;

			BUG_ON(t != klitirqds[k_id].klitirqd);

			if(likely(owner))
			{
				sem = &tsk_rt(owner)->klitirqd_sem;
			}
			else
			{
				BUG();

				// We had the rug pulled out from under us.  Abort attempt
				// to reacquire the lock since our client no longer needs us.
				TRACE_CUR("HUH?!  How did this happen?\n");
				atomic_set(&tsk_rt(t)->klitirqd_sem_stat, NOT_HELD);
				return;
			}
		}

		//TRACE_CUR("Releasing semaphore of %s/%d...\n", owner->comm, owner->pid);
		up_and_set_stat(t, NEED_TO_REACQUIRE, sem);
		//TRACE_CUR("Semaphore of %s/%d released!\n", owner->comm, owner->pid);
	}
	/*
	else if(is_realtime(t))
	{
		TRACE_CUR("%s: Nothing to do.  Stat = %d\n", __FUNCTION__, tsk_rt(t)->klitirqd_sem_stat);
	}
	*/
}

int reacquire_klitirqd_lock(struct task_struct* t)
{
	int ret = 0;

	if(is_realtime(t) && (atomic_read(&tsk_rt(t)->klitirqd_sem_stat) == NEED_TO_REACQUIRE))
	{
		struct mutex* sem;
		struct task_struct* owner = t;

		if(likely(!tsk_rt(t)->is_proxy_thread))
		{
			sem = &tsk_rt(t)->klitirqd_sem;
		}
		else
		{
			unsigned int k_id = klitirqd_id(t);
			//struct task_struct* owner = klitirqds[k_id].current_owner;
			owner = klitirqds[k_id].current_owner;

			BUG_ON(t != klitirqds[k_id].klitirqd);

			if(likely(owner))
			{
				sem = &tsk_rt(owner)->klitirqd_sem;
			}
			else
			{
				// We had the rug pulled out from under us.  Abort attempt
				// to reacquire the lock since our client no longer needs us.
				TRACE_CUR("No longer needs to reacquire klitirqd_sem!\n");
				atomic_set(&tsk_rt(t)->klitirqd_sem_stat, NOT_HELD);
				return(0);
			}
		}

		//TRACE_CUR("Trying to reacquire semaphore of %s/%d\n", owner->comm, owner->pid);
		__down_and_reset_and_set_stat(t, REACQUIRING, HELD, sem);
		//TRACE_CUR("Reacquired semaphore %s/%d\n", owner->comm, owner->pid);
	}
	/*
	else if(is_realtime(t))
	{
		TRACE_CUR("%s: Nothing to do.  Stat = %d\n", __FUNCTION__, tsk_rt(t)->klitirqd_sem_stat);
	}
	*/

	return(ret);
}