/* * * Implementation of synchronized PFAIR PD2 scheduler * */ #include #include #include #include #include #include #include #include struct cpu_state { struct task_struct * t; volatile jiffie_t jiffie_marker; }; /* PFAIR scheduling domain, release and ready queues */ static pfair_domain_t pfair __cacheline_aligned_in_smp; /* An indicator that quantum boundary was crossed * and a decision has to be made */ static int sync_go[NR_CPUS]; /* A collection of CPU states protected by pfair lock */ DEFINE_PER_CPU(struct cpu_state, states); /* * This function gets called by the timer code, with HZ frequency * with interrupts disabled. * * The function merges the release queue with the ready queue * and indicates that quantum boundary was crossed. * * It also suggests to schedule off currently running * real-time task if the mode is non-real-time. */ static reschedule_check_t pfair_scheduler_tick(void) { int want_resched = NO_RESCHED; sync_go[smp_processor_id()] = 0; if (!cpu_isset(smp_processor_id(), pfair.domain_cpus)) goto out; /* Now determine if we want current task to be preempted */ if (get_rt_mode() == MODE_RT_RUN) { pfair_try_release_pending(&pfair); want_resched = FORCE_RESCHED; /* indicate that the interrupt fired */ sync_go[smp_processor_id()] = 1; barrier(); } else if (is_realtime(current) && is_running(current)) { /* In non real-time mode we want to * schedule off real-time tasks */ want_resched = FORCE_RESCHED; } else if (is_realtime(current) && !is_running(current)) { TRACE("[%d] %d Timer interrupt on not runninng %d\n", smp_processor_id(), jiffies-rt_start_time, current->pid); } out: return want_resched; } /** * This function is called by the processor * that performs rescheduling. It saves the timing * parameters of currently running jobs that were not rescheduled yet * and releases next subtask for these jobs placing them into * release and ready queues. */ static void pretend_release(cpumask_t p) { int i = 0; struct task_struct * t = NULL; /* for all the tasks increment the number of used quanta * and release next subtask or job depending on the number * of used quanta */ for_each_cpu_mask(i, p) { t = per_cpu(states, i).t; if (t != NULL) { backup_times(t); inc_passed_quanta(t); if ( get_passed_quanta(t) == get_exec_cost(t)) { pfair_prepare_next_job(t); } else { pfair_prepare_next_subtask(t); } /* TRACE("[%d] %d pretending release %d with (%d, %d)\n", smp_processor_id(), jiffies-rt_start_time,t->pid, get_release(t)-rt_start_time, get_deadline(t)-rt_start_time);*/ /* detect if the job or subtask has to be released now*/ if (time_before_eq(get_release(t), jiffies)) pfair_add_ready(&pfair, t); else pfair_add_release(&pfair, t); } } } /* * Rollback the the pretended release of tasks. * Timing parameters are restored and tasks are removed * from the queues as it was before calling the schedule() function. * */ static void rollback_release(cpumask_t p) { int i = -1; struct task_struct * t = NULL; /* * Rollback the pretended changes */ for_each_cpu_mask(i, p) { t = per_cpu(states, i).t; if (t != NULL) { restore_times(t); if(t->rt_list.prev != LIST_POISON1 || t->rt_list.next != LIST_POISON2) { /* Delete the task from a queue */ list_del(&t->rt_list); } } } } /* * The procedure creates a list of cpu's whose tasks have not been * rescheduled yet. These are CPU's with jiffie marker different from * the value of jiffies. */ static void find_participants(cpumask_t * target) { cpumask_t res;int i; cpus_clear(res); for_each_online_cpu(i) { if(per_cpu(states, i).jiffie_marker != jiffies) cpu_set(i, res); } /* Examine only cpus in the domain */ cpus_and(res, pfair.domain_cpus, res); (*target) = res; } /* * This is main PFAIR schedule function, * each processor pretends that some currently running tasks are * released in the next quantum and determines whether it should * keep the task that is currently running (this is usually the case * for heavy tasks). */ static int pfair_schedule(struct task_struct *prev, struct task_struct **next, runqueue_t * rq) { int cpu =-1; int k =-1; int need_deactivate = 1; int keep =0; unsigned long flags; cpumask_t participants; /* A temporary array */ struct task_struct * rs_old_ptr[NR_CPUS]; *next = NULL; cpu = smp_processor_id(); /* CPU's not in the domain just bypass */ if (!cpu_isset(cpu, pfair.domain_cpus)) { goto out; } queue_lock_irqsave(&pfair.pfair_lock, flags); /* If we happen to run in non-realtime mode * then we have to schedule off currently running tasks * */ if (get_rt_mode() != MODE_RT_RUN) { if (is_realtime(prev)) { per_cpu(states, cpu).t = NULL; TRACE("[%d] %d Suspending %d\n", cpu, jiffies - rt_start_time, prev->pid); /* Move the task to the * release queue for future runs * FIXME: Do something smarter. * For example create a set where * prepared or inactive tasks are placed * and then released. * */ set_release(prev, get_release(prev) + 1000); pfair_add_release(&pfair, prev); } goto out_deactivate; } /* If the current task stops or dies */ if (is_realtime(prev) && !is_running(prev)) { /* remove it from the running set */ per_cpu(states, cpu).t = NULL; } /* Make pfair decisions at quantum boundaries only, * but schedule off stopped or dead tasks */ if ((sync_go[cpu]--) != 1) goto out_deactivate; /*TRACE("[%d] %d Scheduler activation", cpu, jiffies-rt_start_time); cpus_and(res, pfair.domain_cpus, cpu_online_map); for_each_cpu_mask(k, res) { TRACE("%d" ,(per_cpu(states, k).jiffie_marker!=jiffies)); } TRACE("\n");*/ /* Find processors that have not rescheduled yet */ find_participants(&participants); /* For each task on remote cpu's pretend release */ pretend_release(participants); /* Clear temporary array */ for_each_possible_cpu(k) { rs_old_ptr[k] = NULL; } /* Select a new subset of eligible tasks */ for_each_cpu_mask(k, participants) { rs_old_ptr[k] = __pfair_take_ready (&pfair); /* Check if our current task must be scheduled in the next quantum */ if (rs_old_ptr[k] == per_cpu(states, cpu).t) { /* this is our current task, keep it */ *next = per_cpu(states, cpu).t; need_deactivate = 0; keep = 1; break; } } /* Put all the extracted tasks back into the ready queue */ for_each_cpu_mask(k, participants) { if (rs_old_ptr[k] != NULL){ pfair_add_ready(&pfair, rs_old_ptr[k]); rs_old_ptr[k] = NULL; } } /* Rollback the pretended release, * task parameters are restored and running tasks are removed * from queues */ rollback_release(participants); /* * If the current task is not scheduled in the next quantum * then select a new pfair task */ if(!keep) { *next = per_cpu(states, cpu).t = __pfair_take_ready(&pfair); if (*next != NULL) { /*TRACE("[%d] %d Scheduling %d with (%d, %d)\n", cpu, jiffies-rt_start_time, get_release(*next), get_deadline(*next)); */ set_task_cpu(*next, cpu); __activate_task(*next, rq); } } else { if (is_realtime(prev)) { /*TRACE("[%d] %d prev==next %d\n", cpu,jiffies-rt_start_time, (prev)->pid);*/ /* The task will not be switched off but we * need to track the execution time */ inc_passed_quanta(prev); } } /*Show that our task does not participate in subsequent selections*/ __get_cpu_var(states).jiffie_marker = jiffies; out_deactivate: if ( is_realtime(prev) && need_deactivate && prev->array) { /* take prev out of the linux run queue */ deactivate_task(prev, rq); } queue_unlock_irqrestore(&pfair.pfair_lock, flags); out: return 0; } static void pfair_finish_task_switch(struct task_struct *t) { if (!is_realtime(t) || !is_running(t)) return; queue_lock(&pfair.pfair_lock); /* Release in real-time mode only, * if the mode is non real-time, then * the task is already in the release queue * with the time far in the future */ if (get_rt_mode() == MODE_RT_RUN) { inc_passed_quanta(t); if ( get_passed_quanta(t) == get_exec_cost(t)) { sched_trace_job_completion(t); pfair_prepare_next_job(t); } else { pfair_prepare_next_subtask(t); } /*TRACE("[%d] %d releasing %d with (%d, %d)\n", smp_processor_id(), jiffies-rt_start_time, t->pid, get_release(t)-rt_start_time, get_deadline(t)-rt_start_time);*/ if (time_before_eq(get_release(t), jiffies)) pfair_add_ready(&pfair, t); else pfair_add_release(&pfair, t); } queue_unlock(&pfair.pfair_lock); } /* Prepare a task for running in RT mode * Enqueues the task into master queue data structure * returns * -EPERM if task is not TASK_STOPPED */ static long pfair_prepare_task(struct task_struct * t) { unsigned long flags; TRACE("pfair: prepare task %d\n", t->pid); if (t->state == TASK_STOPPED) { __setscheduler(t, SCHED_FIFO, MAX_RT_PRIO - 1); if (get_rt_mode() == MODE_RT_RUN) /* The action is already on. * Prepare immediate release */ __pfair_prepare_new_release(t, jiffies); /* The task should be running in the queue, otherwise signal * code will try to wake it up with fatal consequences. */ t->state = TASK_RUNNING; queue_lock_irqsave(&pfair.pfair_lock, flags); pfair_add_release(&pfair, t); queue_unlock_irqrestore(&pfair.pfair_lock, flags); return 0; } else return -EPERM; } static void pfair_wake_up_task(struct task_struct *task) { unsigned long flags; /* We must determine whether task should go into the release * queue or into the ready queue. * The task enters the ready queue if the previous deadline was missed, * so we treat the invoked job as a new sporadic release. * * The job can also enter the ready queue if it was invoked before its * global deadline, but its budjet must be clipped down to one quantum */ task->state = TASK_RUNNING; if (time_after_eq(jiffies, task->rt_param.times.last_release + get_rt_period(task))) { /* new sporadic release */ TRACE("[%d] Sporadic release of %d at %d\n", smp_processor_id(), jiffies-rt_start_time, task->pid); __pfair_prepare_new_release(task, jiffies); queue_lock_irqsave(&pfair.pfair_lock, flags); sched_trace_job_release(task); pfair_add_ready(&pfair, task); queue_unlock_irqrestore(&pfair.pfair_lock, flags); } else if (task->time_slice) { /* came back in time before deadline * clip the budget to be the last subtask of a job or * the new job. */ task->rt_param.times.exec_time = get_exec_cost(task) - 1; if (task->rt_param.times.exec_time == 0) { pfair_prepare_next_job(task); } else { pfair_prepare_next_subtask(task); } TRACE("[%d] %d Resume of %d with %d, %d, %d\n", smp_processor_id(), jiffies-rt_start_time, task->pid, get_release(task)-rt_start_time, get_deadline(task)-rt_start_time, get_passed_quanta(task)); set_rt_flags(task, RT_F_RUNNING); queue_lock_irqsave(&pfair.pfair_lock, flags); sched_trace_job_release(task); if (time_after_eq(jiffies, get_release(task))) { pfair_add_ready(&pfair, task); } else { pfair_add_release(&pfair, task); } queue_unlock_irqrestore(&pfair.pfair_lock, flags); } else { TRACE("[%d] %d Strange release of %d with %d, %d, %d\n", smp_processor_id(), jiffies-rt_start_time, task->pid, get_release(task), get_deadline(task), get_passed_quanta(task)); queue_lock_irqsave(&pfair.pfair_lock, flags); pfair_add_release(&pfair, task); queue_unlock_irqrestore(&pfair.pfair_lock, flags); } } static void pfair_task_blocks(struct task_struct *t) { unsigned long flags; int i; cpumask_t res; BUG_ON(!is_realtime(t)); /* If the task blocks, then it must be removed from the running set */ queue_lock_irqsave(&pfair.pfair_lock, flags); cpus_and(res,pfair.domain_cpus, cpu_online_map); for_each_cpu_mask(i, res) { if (per_cpu(states, i).t == t) per_cpu(states, i).t = NULL; } /* If the task is running and in some * list it might have been released by another * processor */ if((t->rt_list.next != LIST_POISON1 || t->rt_list.prev != LIST_POISON2)) { TRACE("[%d] %d task %d is deleted from the list\n", smp_processor_id(), jiffies-rt_start_time, t->pid); list_del(&t->rt_list); } queue_unlock_irqrestore(&pfair.pfair_lock, flags); TRACE("[%d] %d task %d blocks with budget=%d state=%d\n", smp_processor_id(), jiffies-rt_start_time, t->pid, t->time_slice, t->state); } static long pfair_tear_down(struct task_struct * t) { BUG_ON(!is_realtime(t)); TRACE("pfair: tear down called for %d \n", t->pid); BUG_ON(t->array); BUG_ON(t->rt_list.next != LIST_POISON1); BUG_ON(t->rt_list.prev != LIST_POISON2); return 0; } static int pfair_mode_change(int new_mode) { printk(KERN_INFO "[%d] pfair mode change %d\n", smp_processor_id(), new_mode); if (new_mode == MODE_RT_RUN) { pfair_prepare_new_releases(&pfair, jiffies + 10); } printk(KERN_INFO "[%d] pfair: mode change done\n", smp_processor_id()); return 0; } /* Plugin object */ static sched_plugin_t s_plugin __cacheline_aligned_in_smp = { .ready_to_use = 0 }; /* * PFAIR plugin initialization macro. */ #define INIT_PFAIR_PLUGIN (struct sched_plugin){\ .plugin_name = "PFAIR",\ .ready_to_use = 1,\ .scheduler_tick = pfair_scheduler_tick,\ .prepare_task = pfair_prepare_task,\ .tear_down = pfair_tear_down,\ .schedule = pfair_schedule,\ .finish_switch = pfair_finish_task_switch,\ .mode_change = pfair_mode_change,\ .wake_up_task = pfair_wake_up_task,\ .task_blocks = pfair_task_blocks \ } sched_plugin_t* __init init_pfair_plugin(void) { int i=0; if (!s_plugin.ready_to_use) { pfair_domain_init(&pfair); for (i=0; i