From f1a438d813d416fa9f4be4e6dbd10b54c5938d89 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 9 Aug 2007 11:16:45 +0200 Subject: sched: reorder update_cpu_load(rq) with the ->task_tick() call Peter Williams suggested to flip the order of update_cpu_load(rq) with the ->task_tick() call. This is a NOP for the current scheduler (the two functions are independent of each other), ->task_tick() might create some state for update_cpu_load() in the future (or in PlugSched). Signed-off-by: Ingo Molnar --- kernel/sched.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/sched.c') diff --git a/kernel/sched.c b/kernel/sched.c index 72bb9483d949..4680f52974e3 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -3298,9 +3298,9 @@ void scheduler_tick(void) struct task_struct *curr = rq->curr; spin_lock(&rq->lock); + update_cpu_load(rq); if (curr != rq->idle) /* FIXME: needed? */ curr->sched_class->task_tick(rq, curr); - update_cpu_load(rq); spin_unlock(&rq->lock); #ifdef CONFIG_SMP -- cgit v1.2.2 From 4301065920b0cbde3986519582347e883b166f3e Mon Sep 17 00:00:00 2001 From: Peter Williams Date: Thu, 9 Aug 2007 11:16:46 +0200 Subject: sched: simplify move_tasks() The move_tasks() function is currently multiplexed with two distinct capabilities: 1. attempt to move a specified amount of weighted load from one run queue to another; and 2. attempt to move a specified number of tasks from one run queue to another. The first of these capabilities is used in two places, load_balance() and load_balance_idle(), and in both of these cases the return value of move_tasks() is used purely to decide if tasks/load were moved and no notice of the actual number of tasks moved is taken. The second capability is used in exactly one place, active_load_balance(), to attempt to move exactly one task and, as before, the return value is only used as an indicator of success or failure. This multiplexing of sched_task() was introduced, by me, as part of the smpnice patches and was motivated by the fact that the alternative, one function to move specified load and one to move a single task, would have led to two functions of roughly the same complexity as the old move_tasks() (or the new balance_tasks()). However, the new modular design of the new CFS scheduler allows a simpler solution to be adopted and this patch addresses that solution by: 1. adding a new function, move_one_task(), to be used by active_load_balance(); and 2. making move_tasks() a single purpose function that tries to move a specified weighted load and returns 1 for success and 0 for failure. One of the consequences of these changes is that neither move_one_task() or the new move_tasks() care how many tasks sched_class.load_balance() moves and this enables its interface to be simplified by returning the amount of load moved as its result and removing the load_moved pointer from the argument list. This helps simplify the new move_tasks() and slightly reduces the amount of work done in each of sched_class.load_balance()'s implementations. Further simplification, e.g. changes to balance_tasks(), are possible but (slightly) complicated by the special needs of load_balance_fair() so I've left them to a later patch (if this one gets accepted). NB Since move_tasks() gets called with two run queue locks held even small reductions in overhead are worthwhile. [ mingo@elte.hu ] this change also reduces code size nicely: text data bss dec hex filename 39216 3618 24 42858 a76a sched.o.before 39173 3618 24 42815 a73f sched.o.after Signed-off-by: Peter Williams Signed-off-by: Ingo Molnar --- kernel/sched.c | 82 ++++++++++++++++++++++++++++++++-------------------------- 1 file changed, 46 insertions(+), 36 deletions(-) (limited to 'kernel/sched.c') diff --git a/kernel/sched.c b/kernel/sched.c index 4680f52974e3..42029634ef5a 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -2231,32 +2231,49 @@ out: } /* - * move_tasks tries to move up to max_nr_move tasks and max_load_move weighted - * load from busiest to this_rq, as part of a balancing operation within - * "domain". Returns the number of tasks moved. + * move_tasks tries to move up to max_load_move weighted load from busiest to + * this_rq, as part of a balancing operation within domain "sd". + * Returns 1 if successful and 0 otherwise. * * Called with both runqueues locked. */ static int move_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest, - unsigned long max_nr_move, unsigned long max_load_move, + unsigned long max_load_move, struct sched_domain *sd, enum cpu_idle_type idle, int *all_pinned) { struct sched_class *class = sched_class_highest; - unsigned long load_moved, total_nr_moved = 0, nr_moved; - long rem_load_move = max_load_move; + unsigned long total_load_moved = 0; do { - nr_moved = class->load_balance(this_rq, this_cpu, busiest, - max_nr_move, (unsigned long)rem_load_move, - sd, idle, all_pinned, &load_moved); - total_nr_moved += nr_moved; - max_nr_move -= nr_moved; - rem_load_move -= load_moved; + total_load_moved += + class->load_balance(this_rq, this_cpu, busiest, + ULONG_MAX, max_load_move - total_load_moved, + sd, idle, all_pinned); class = class->next; - } while (class && max_nr_move && rem_load_move > 0); + } while (class && max_load_move > total_load_moved); - return total_nr_moved; + return total_load_moved > 0; +} + +/* + * move_one_task tries to move exactly one task from busiest to this_rq, as + * part of active balancing operations within "domain". + * Returns 1 if successful and 0 otherwise. + * + * Called with both runqueues locked. + */ +static int move_one_task(struct rq *this_rq, int this_cpu, struct rq *busiest, + struct sched_domain *sd, enum cpu_idle_type idle) +{ + struct sched_class *class; + + for (class = sched_class_highest; class; class = class->next) + if (class->load_balance(this_rq, this_cpu, busiest, + 1, ULONG_MAX, sd, idle, NULL)) + return 1; + + return 0; } /* @@ -2588,11 +2605,6 @@ find_busiest_queue(struct sched_group *group, enum cpu_idle_type idle, */ #define MAX_PINNED_INTERVAL 512 -static inline unsigned long minus_1_or_zero(unsigned long n) -{ - return n > 0 ? n - 1 : 0; -} - /* * Check this_cpu to ensure it is balanced within domain. Attempt to move * tasks if there is an imbalance. @@ -2601,7 +2613,7 @@ static int load_balance(int this_cpu, struct rq *this_rq, struct sched_domain *sd, enum cpu_idle_type idle, int *balance) { - int nr_moved, all_pinned = 0, active_balance = 0, sd_idle = 0; + int ld_moved, all_pinned = 0, active_balance = 0, sd_idle = 0; struct sched_group *group; unsigned long imbalance; struct rq *busiest; @@ -2642,18 +2654,17 @@ redo: schedstat_add(sd, lb_imbalance[idle], imbalance); - nr_moved = 0; + ld_moved = 0; if (busiest->nr_running > 1) { /* * Attempt to move tasks. If find_busiest_group has found * an imbalance but busiest->nr_running <= 1, the group is - * still unbalanced. nr_moved simply stays zero, so it is + * still unbalanced. ld_moved simply stays zero, so it is * correctly treated as an imbalance. */ local_irq_save(flags); double_rq_lock(this_rq, busiest); - nr_moved = move_tasks(this_rq, this_cpu, busiest, - minus_1_or_zero(busiest->nr_running), + ld_moved = move_tasks(this_rq, this_cpu, busiest, imbalance, sd, idle, &all_pinned); double_rq_unlock(this_rq, busiest); local_irq_restore(flags); @@ -2661,7 +2672,7 @@ redo: /* * some other cpu did the load balance for us. */ - if (nr_moved && this_cpu != smp_processor_id()) + if (ld_moved && this_cpu != smp_processor_id()) resched_cpu(this_cpu); /* All tasks on this runqueue were pinned by CPU affinity */ @@ -2673,7 +2684,7 @@ redo: } } - if (!nr_moved) { + if (!ld_moved) { schedstat_inc(sd, lb_failed[idle]); sd->nr_balance_failed++; @@ -2722,10 +2733,10 @@ redo: sd->balance_interval *= 2; } - if (!nr_moved && !sd_idle && sd->flags & SD_SHARE_CPUPOWER && + if (!ld_moved && !sd_idle && sd->flags & SD_SHARE_CPUPOWER && !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE)) return -1; - return nr_moved; + return ld_moved; out_balanced: schedstat_inc(sd, lb_balanced[idle]); @@ -2757,7 +2768,7 @@ load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd) struct sched_group *group; struct rq *busiest = NULL; unsigned long imbalance; - int nr_moved = 0; + int ld_moved = 0; int sd_idle = 0; int all_pinned = 0; cpumask_t cpus = CPU_MASK_ALL; @@ -2792,12 +2803,11 @@ redo: schedstat_add(sd, lb_imbalance[CPU_NEWLY_IDLE], imbalance); - nr_moved = 0; + ld_moved = 0; if (busiest->nr_running > 1) { /* Attempt to move tasks */ double_lock_balance(this_rq, busiest); - nr_moved = move_tasks(this_rq, this_cpu, busiest, - minus_1_or_zero(busiest->nr_running), + ld_moved = move_tasks(this_rq, this_cpu, busiest, imbalance, sd, CPU_NEWLY_IDLE, &all_pinned); spin_unlock(&busiest->lock); @@ -2809,7 +2819,7 @@ redo: } } - if (!nr_moved) { + if (!ld_moved) { schedstat_inc(sd, lb_failed[CPU_NEWLY_IDLE]); if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER && !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE)) @@ -2817,7 +2827,7 @@ redo: } else sd->nr_balance_failed = 0; - return nr_moved; + return ld_moved; out_balanced: schedstat_inc(sd, lb_balanced[CPU_NEWLY_IDLE]); @@ -2905,8 +2915,8 @@ static void active_load_balance(struct rq *busiest_rq, int busiest_cpu) if (likely(sd)) { schedstat_inc(sd, alb_cnt); - if (move_tasks(target_rq, target_cpu, busiest_rq, 1, - ULONG_MAX, sd, CPU_IDLE, NULL)) + if (move_one_task(target_rq, target_cpu, busiest_rq, + sd, CPU_IDLE)) schedstat_inc(sd, alb_pushed); else schedstat_inc(sd, alb_failed); -- cgit v1.2.2 From 9531b62f5ebf2b693bf85129d20328188f685c44 Mon Sep 17 00:00:00 2001 From: Ulrich Drepper Date: Thu, 9 Aug 2007 11:16:46 +0200 Subject: sched: clean up sched_getaffinity() here's another tiny cleanup. The generated code is not affected (gcc is smart enough) but for people looking over the code it is just irritating to have the extra conditional. Signed-off-by: Ulrich Drepper Signed-off-by: Ingo Molnar --- kernel/sched.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'kernel/sched.c') diff --git a/kernel/sched.c b/kernel/sched.c index 42029634ef5a..50c3587b06cb 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -4473,10 +4473,8 @@ long sched_getaffinity(pid_t pid, cpumask_t *mask) out_unlock: read_unlock(&tasklist_lock); mutex_unlock(&sched_hotcpu_mutex); - if (retval) - return retval; - return 0; + return retval; } /** -- cgit v1.2.2 From 7bfd0485871df01764ca89d5679f128d870aef1a Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 9 Aug 2007 11:16:46 +0200 Subject: sched: uninline rq_clock() uninline rq_clock() to save 263 bytes of code: text data bss dec hex filename 39561 3642 24 43227 a8db sched.o.before 39298 3642 24 42964 a7d4 sched.o.after Signed-off-by: Ingo Molnar --- kernel/sched.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/sched.c') diff --git a/kernel/sched.c b/kernel/sched.c index 50c3587b06cb..0112f63ad376 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -353,7 +353,7 @@ static unsigned long long __rq_clock(struct rq *rq) return clock; } -static inline unsigned long long rq_clock(struct rq *rq) +static unsigned long long rq_clock(struct rq *rq) { int this_cpu = smp_processor_id(); -- cgit v1.2.2 From 8e717b194ce3f3ac9e6acc63f66fe274cdf9cde1 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 9 Aug 2007 11:16:46 +0200 Subject: sched: schedule() speedup speed up schedule(): share the 'now' parameter that deactivate_task() was calculating internally. ( this also fixes the small accounting window between the deactivate call and the pick_next_task() call. ) Signed-off-by: Ingo Molnar --- kernel/sched.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) (limited to 'kernel/sched.c') diff --git a/kernel/sched.c b/kernel/sched.c index 0112f63ad376..49f5b281c561 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -940,10 +940,9 @@ static inline void activate_idle_task(struct task_struct *p, struct rq *rq) /* * deactivate_task - remove a task from the runqueue. */ -static void deactivate_task(struct rq *rq, struct task_struct *p, int sleep) +static void +deactivate_task(struct rq *rq, struct task_struct *p, int sleep, u64 now) { - u64 now = rq_clock(rq); - if (p->state == TASK_UNINTERRUPTIBLE) rq->nr_uninterruptible++; @@ -2122,7 +2121,7 @@ void sched_exec(void) static void pull_task(struct rq *src_rq, struct task_struct *p, struct rq *this_rq, int this_cpu) { - deactivate_task(src_rq, p, 0); + deactivate_task(src_rq, p, 0, rq_clock(src_rq)); set_task_cpu(p, this_cpu); activate_task(this_rq, p, 0); /* @@ -3446,13 +3445,14 @@ need_resched_nonpreemptible: spin_lock_irq(&rq->lock); clear_tsk_need_resched(prev); + now = __rq_clock(rq); if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) { if (unlikely((prev->state & TASK_INTERRUPTIBLE) && unlikely(signal_pending(prev)))) { prev->state = TASK_RUNNING; } else { - deactivate_task(rq, prev, 1); + deactivate_task(rq, prev, 1, now); } switch_count = &prev->nvcsw; } @@ -3460,7 +3460,6 @@ need_resched_nonpreemptible: if (unlikely(!rq->nr_running)) idle_balance(cpu, rq); - now = __rq_clock(rq); prev->sched_class->put_prev_task(rq, prev, now); next = pick_next_task(rq, prev, now); @@ -4220,7 +4219,7 @@ recheck: } on_rq = p->se.on_rq; if (on_rq) - deactivate_task(rq, p, 0); + deactivate_task(rq, p, 0, rq_clock(rq)); oldprio = p->prio; __setscheduler(rq, p, policy, param->sched_priority); if (on_rq) { @@ -4973,7 +4972,7 @@ static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu) on_rq = p->se.on_rq; if (on_rq) - deactivate_task(rq_src, p, 0); + deactivate_task(rq_src, p, 0, rq_clock(rq_src)); set_task_cpu(p, dest_cpu); if (on_rq) { activate_task(rq_dest, p, 0); @@ -5387,7 +5386,7 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu) rq->migration_thread = NULL; /* Idle task back to normal (off runqueue, low prio) */ rq = task_rq_lock(rq->idle, &flags); - deactivate_task(rq, rq->idle, 0); + deactivate_task(rq, rq->idle, 0, rq_clock(rq)); rq->idle->static_prio = MAX_PRIO; __setscheduler(rq, rq->idle, SCHED_NORMAL, 0); rq->idle->sched_class = &idle_sched_class; @@ -6626,7 +6625,7 @@ void normalize_rt_tasks(void) on_rq = p->se.on_rq; if (on_rq) - deactivate_task(task_rq(p), p, 0); + deactivate_task(task_rq(p), p, 0, rq_clock(task_rq(p))); __setscheduler(rq, p, SCHED_NORMAL, 0); if (on_rq) { activate_task(task_rq(p), p, 0); -- cgit v1.2.2 From e0361851e5647cdd62fd5c367df5d7e145769d04 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Thu, 9 Aug 2007 11:16:46 +0200 Subject: sched: remove binary sysctls from kernel.sched_domain kernel.sched_domain hierarchy is under CTL_UNNUMBERED and thus unreachable to sysctl(2). Generating .ctl_number's in such situation is not useful. Signed-off-by: Alexey Dobriyan Signed-off-by: Ingo Molnar --- kernel/sched.c | 38 +++++++++++++++++++++----------------- 1 file changed, 21 insertions(+), 17 deletions(-) (limited to 'kernel/sched.c') diff --git a/kernel/sched.c b/kernel/sched.c index 49f5b281c561..85b93118d244 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -5217,12 +5217,19 @@ static void migrate_dead_tasks(unsigned int dead_cpu) #if defined(CONFIG_SCHED_DEBUG) && defined(CONFIG_SYSCTL) static struct ctl_table sd_ctl_dir[] = { - {CTL_UNNUMBERED, "sched_domain", NULL, 0, 0755, NULL, }, + { + .procname = "sched_domain", + .mode = 0755, + }, {0,}, }; static struct ctl_table sd_ctl_root[] = { - {CTL_UNNUMBERED, "kernel", NULL, 0, 0755, sd_ctl_dir, }, + { + .procname = "kernel", + .mode = 0755, + .child = sd_ctl_dir, + }, {0,}, }; @@ -5238,11 +5245,10 @@ static struct ctl_table *sd_alloc_ctl_entry(int n) } static void -set_table_entry(struct ctl_table *entry, int ctl_name, +set_table_entry(struct ctl_table *entry, const char *procname, void *data, int maxlen, mode_t mode, proc_handler *proc_handler) { - entry->ctl_name = ctl_name; entry->procname = procname; entry->data = data; entry->maxlen = maxlen; @@ -5255,28 +5261,28 @@ sd_alloc_ctl_domain_table(struct sched_domain *sd) { struct ctl_table *table = sd_alloc_ctl_entry(14); - set_table_entry(&table[0], 1, "min_interval", &sd->min_interval, + set_table_entry(&table[0], "min_interval", &sd->min_interval, sizeof(long), 0644, proc_doulongvec_minmax); - set_table_entry(&table[1], 2, "max_interval", &sd->max_interval, + set_table_entry(&table[1], "max_interval", &sd->max_interval, sizeof(long), 0644, proc_doulongvec_minmax); - set_table_entry(&table[2], 3, "busy_idx", &sd->busy_idx, + set_table_entry(&table[2], "busy_idx", &sd->busy_idx, sizeof(int), 0644, proc_dointvec_minmax); - set_table_entry(&table[3], 4, "idle_idx", &sd->idle_idx, + set_table_entry(&table[3], "idle_idx", &sd->idle_idx, sizeof(int), 0644, proc_dointvec_minmax); - set_table_entry(&table[4], 5, "newidle_idx", &sd->newidle_idx, + set_table_entry(&table[4], "newidle_idx", &sd->newidle_idx, sizeof(int), 0644, proc_dointvec_minmax); - set_table_entry(&table[5], 6, "wake_idx", &sd->wake_idx, + set_table_entry(&table[5], "wake_idx", &sd->wake_idx, sizeof(int), 0644, proc_dointvec_minmax); - set_table_entry(&table[6], 7, "forkexec_idx", &sd->forkexec_idx, + set_table_entry(&table[6], "forkexec_idx", &sd->forkexec_idx, sizeof(int), 0644, proc_dointvec_minmax); - set_table_entry(&table[7], 8, "busy_factor", &sd->busy_factor, + set_table_entry(&table[7], "busy_factor", &sd->busy_factor, sizeof(int), 0644, proc_dointvec_minmax); - set_table_entry(&table[8], 9, "imbalance_pct", &sd->imbalance_pct, + set_table_entry(&table[8], "imbalance_pct", &sd->imbalance_pct, sizeof(int), 0644, proc_dointvec_minmax); - set_table_entry(&table[10], 11, "cache_nice_tries", + set_table_entry(&table[10], "cache_nice_tries", &sd->cache_nice_tries, sizeof(int), 0644, proc_dointvec_minmax); - set_table_entry(&table[12], 13, "flags", &sd->flags, + set_table_entry(&table[12], "flags", &sd->flags, sizeof(int), 0644, proc_dointvec_minmax); return table; @@ -5296,7 +5302,6 @@ static ctl_table *sd_alloc_ctl_cpu_table(int cpu) i = 0; for_each_domain(cpu, sd) { snprintf(buf, 32, "domain%d", i); - entry->ctl_name = i + 1; entry->procname = kstrdup(buf, GFP_KERNEL); entry->mode = 0755; entry->child = sd_alloc_ctl_domain_table(sd); @@ -5317,7 +5322,6 @@ static void init_sched_domain_sysctl(void) for (i = 0; i < cpu_num; i++, entry++) { snprintf(buf, 32, "cpu%d", i); - entry->ctl_name = i + 1; entry->procname = kstrdup(buf, GFP_KERNEL); entry->mode = 0755; entry->child = sd_alloc_ctl_cpu_table(i); -- cgit v1.2.2 From a4ac01c36e286dd1b9a1d5cd7422c5af51dc55f8 Mon Sep 17 00:00:00 2001 From: Peter Williams Date: Thu, 9 Aug 2007 11:16:46 +0200 Subject: sched: fix bug in balance_tasks() There are two problems with balance_tasks() and how it used: 1. The variables best_prio and best_prio_seen (inherited from the old move_tasks()) were only required to handle problems caused by the active/expired arrays, the order in which they were processed and the possibility that the task with the highest priority could be on either. These issues are no longer present and the extra overhead associated with their use is unnecessary (and possibly wrong). 2. In the absence of CONFIG_FAIR_GROUP_SCHED being set, the same this_best_prio variable needs to be used by all scheduling classes or there is a risk of moving too much load. E.g. if the highest priority task on this at the beginning is a fairly low priority task and the rt class migrates a task (during its turn) then that moved task becomes the new highest priority task on this_rq but when the sched_fair class initializes its copy of this_best_prio it will get the priority of the original highest priority task as, due to the run queue locks being held, the reschedule triggered by pull_task() will not have taken place. This could result in inappropriate overriding of skip_for_load and excessive load being moved. The attached patch addresses these problems by deleting all reference to best_prio and best_prio_seen and making this_best_prio a reference parameter to the various functions involved. load_balance_fair() has also been modified so that this_best_prio is only reset (in the loop) if CONFIG_FAIR_GROUP_SCHED is set. This should preserve the effect of helping spread groups' higher priority tasks around the available CPUs while improving system performance when CONFIG_FAIR_GROUP_SCHED isn't set. Signed-off-by: Peter Williams Signed-off-by: Ingo Molnar --- kernel/sched.c | 26 +++++++++++--------------- 1 file changed, 11 insertions(+), 15 deletions(-) (limited to 'kernel/sched.c') diff --git a/kernel/sched.c b/kernel/sched.c index 85b93118d244..1fa07c14624e 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -745,8 +745,7 @@ static int balance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest, unsigned long max_nr_move, unsigned long max_load_move, struct sched_domain *sd, enum cpu_idle_type idle, int *all_pinned, unsigned long *load_moved, - int this_best_prio, int best_prio, int best_prio_seen, - struct rq_iterator *iterator); + int *this_best_prio, struct rq_iterator *iterator); #include "sched_stats.h" #include "sched_rt.c" @@ -2165,8 +2164,7 @@ static int balance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest, unsigned long max_nr_move, unsigned long max_load_move, struct sched_domain *sd, enum cpu_idle_type idle, int *all_pinned, unsigned long *load_moved, - int this_best_prio, int best_prio, int best_prio_seen, - struct rq_iterator *iterator) + int *this_best_prio, struct rq_iterator *iterator) { int pulled = 0, pinned = 0, skip_for_load; struct task_struct *p; @@ -2191,12 +2189,8 @@ next: */ skip_for_load = (p->se.load.weight >> 1) > rem_load_move + SCHED_LOAD_SCALE_FUZZ; - if (skip_for_load && p->prio < this_best_prio) - skip_for_load = !best_prio_seen && p->prio == best_prio; - if (skip_for_load || + if ((skip_for_load && p->prio >= *this_best_prio) || !can_migrate_task(p, busiest, this_cpu, sd, idle, &pinned)) { - - best_prio_seen |= p->prio == best_prio; p = iterator->next(iterator->arg); goto next; } @@ -2210,8 +2204,8 @@ next: * and the prescribed amount of weighted load. */ if (pulled < max_nr_move && rem_load_move > 0) { - if (p->prio < this_best_prio) - this_best_prio = p->prio; + if (p->prio < *this_best_prio) + *this_best_prio = p->prio; p = iterator->next(iterator->arg); goto next; } @@ -2243,12 +2237,13 @@ static int move_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest, { struct sched_class *class = sched_class_highest; unsigned long total_load_moved = 0; + int this_best_prio = this_rq->curr->prio; do { total_load_moved += class->load_balance(this_rq, this_cpu, busiest, ULONG_MAX, max_load_move - total_load_moved, - sd, idle, all_pinned); + sd, idle, all_pinned, &this_best_prio); class = class->next; } while (class && max_load_move > total_load_moved); @@ -2266,10 +2261,12 @@ static int move_one_task(struct rq *this_rq, int this_cpu, struct rq *busiest, struct sched_domain *sd, enum cpu_idle_type idle) { struct sched_class *class; + int this_best_prio = MAX_PRIO; for (class = sched_class_highest; class; class = class->next) if (class->load_balance(this_rq, this_cpu, busiest, - 1, ULONG_MAX, sd, idle, NULL)) + 1, ULONG_MAX, sd, idle, NULL, + &this_best_prio)) return 1; return 0; @@ -3184,8 +3181,7 @@ static int balance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest, unsigned long max_nr_move, unsigned long max_load_move, struct sched_domain *sd, enum cpu_idle_type idle, int *all_pinned, unsigned long *load_moved, - int this_best_prio, int best_prio, int best_prio_seen, - struct rq_iterator *iterator) + int *this_best_prio, struct rq_iterator *iterator) { *load_moved = 0; -- cgit v1.2.2 From b04a0f4c1651a553ee1a03dc70297d66ec74db5c Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 9 Aug 2007 11:16:46 +0200 Subject: sched: add [__]update_rq_clock(rq) add the [__]update_rq_clock(rq) functions. (No change in functionality, just reorganization to prepare for elimination of the heavy 64-bit timestamp-passing in the scheduler.) Signed-off-by: Ingo Molnar --- kernel/sched.c | 30 ++++++++++++++++++++++-------- 1 file changed, 22 insertions(+), 8 deletions(-) (limited to 'kernel/sched.c') diff --git a/kernel/sched.c b/kernel/sched.c index 1fa07c14624e..d613723f324f 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -318,15 +318,19 @@ static inline int cpu_of(struct rq *rq) } /* - * Per-runqueue clock, as finegrained as the platform can give us: + * Update the per-runqueue clock, as finegrained as the platform can give + * us, but without assuming monotonicity, etc.: */ -static unsigned long long __rq_clock(struct rq *rq) +static void __update_rq_clock(struct rq *rq) { u64 prev_raw = rq->prev_clock_raw; u64 now = sched_clock(); s64 delta = now - prev_raw; u64 clock = rq->clock; +#ifdef CONFIG_SCHED_DEBUG + WARN_ON_ONCE(cpu_of(rq) != smp_processor_id()); +#endif /* * Protect against sched_clock() occasionally going backwards: */ @@ -349,17 +353,24 @@ static unsigned long long __rq_clock(struct rq *rq) rq->prev_clock_raw = now; rq->clock = clock; +} - return clock; +static void update_rq_clock(struct rq *rq) +{ + if (likely(smp_processor_id() == cpu_of(rq))) + __update_rq_clock(rq); } -static unsigned long long rq_clock(struct rq *rq) +static u64 __rq_clock(struct rq *rq) { - int this_cpu = smp_processor_id(); + __update_rq_clock(rq); - if (this_cpu == cpu_of(rq)) - return __rq_clock(rq); + return rq->clock; +} +static u64 rq_clock(struct rq *rq) +{ + update_rq_clock(rq); return rq->clock; } @@ -386,9 +397,12 @@ unsigned long long cpu_clock(int cpu) { unsigned long long now; unsigned long flags; + struct rq *rq; local_irq_save(flags); - now = rq_clock(cpu_rq(cpu)); + rq = cpu_rq(cpu); + update_rq_clock(rq); + now = rq->clock; local_irq_restore(flags); return now; -- cgit v1.2.2 From a8e504d2a57ecd3f905b402072cdd1903f963bef Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 9 Aug 2007 11:16:47 +0200 Subject: sched: eliminate rq_clock() use eliminate rq_clock() use by changing it to: update_rq_clock(rq) now = rq->clock; identity transformation - no change in behavior. Signed-off-by: Ingo Molnar --- kernel/sched.c | 49 ++++++++++++++++++++++++++++++++++--------------- 1 file changed, 34 insertions(+), 15 deletions(-) (limited to 'kernel/sched.c') diff --git a/kernel/sched.c b/kernel/sched.c index d613723f324f..fe3c152d0c68 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -927,7 +927,10 @@ static int effective_prio(struct task_struct *p) */ static void activate_task(struct rq *rq, struct task_struct *p, int wakeup) { - u64 now = rq_clock(rq); + u64 now; + + update_rq_clock(rq); + now = rq->clock; if (p->state == TASK_UNINTERRUPTIBLE) rq->nr_uninterruptible--; @@ -941,7 +944,10 @@ static void activate_task(struct rq *rq, struct task_struct *p, int wakeup) */ static inline void activate_idle_task(struct task_struct *p, struct rq *rq) { - u64 now = rq_clock(rq); + u64 now; + + update_rq_clock(rq); + now = rq->clock; if (p->state == TASK_UNINTERRUPTIBLE) rq->nr_uninterruptible--; @@ -1664,7 +1670,8 @@ void fastcall wake_up_new_task(struct task_struct *p, unsigned long clone_flags) rq = task_rq_lock(p, &flags); BUG_ON(p->state != TASK_RUNNING); this_cpu = smp_processor_id(); /* parent's CPU */ - now = rq_clock(rq); + update_rq_clock(rq); + now = rq->clock; p->prio = effective_prio(p); @@ -2134,7 +2141,8 @@ void sched_exec(void) static void pull_task(struct rq *src_rq, struct task_struct *p, struct rq *this_rq, int this_cpu) { - deactivate_task(src_rq, p, 0, rq_clock(src_rq)); + update_rq_clock(src_rq); + deactivate_task(src_rq, p, 0, src_rq->clock); set_task_cpu(p, this_cpu); activate_task(this_rq, p, 0); /* @@ -3221,7 +3229,8 @@ unsigned long long task_sched_runtime(struct task_struct *p) rq = task_rq_lock(p, &flags); ns = p->se.sum_exec_runtime; if (rq->curr == p) { - delta_exec = rq_clock(rq) - p->se.exec_start; + update_rq_clock(rq); + delta_exec = rq->clock - p->se.exec_start; if ((s64)delta_exec > 0) ns += delta_exec; } @@ -3919,7 +3928,8 @@ void rt_mutex_setprio(struct task_struct *p, int prio) BUG_ON(prio < 0 || prio > MAX_PRIO); rq = task_rq_lock(p, &flags); - now = rq_clock(rq); + update_rq_clock(rq); + now = rq->clock; oldprio = p->prio; on_rq = p->se.on_rq; @@ -3966,7 +3976,8 @@ void set_user_nice(struct task_struct *p, long nice) * the task might be in the middle of scheduling on another CPU. */ rq = task_rq_lock(p, &flags); - now = rq_clock(rq); + update_rq_clock(rq); + now = rq->clock; /* * The RT priorities are set via sched_setscheduler(), but we still * allow the 'normal' nice value to be set - but as expected @@ -4228,8 +4239,10 @@ recheck: goto recheck; } on_rq = p->se.on_rq; - if (on_rq) - deactivate_task(rq, p, 0, rq_clock(rq)); + if (on_rq) { + update_rq_clock(rq); + deactivate_task(rq, p, 0, rq->clock); + } oldprio = p->prio; __setscheduler(rq, p, policy, param->sched_priority); if (on_rq) { @@ -4981,8 +4994,10 @@ static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu) goto out; on_rq = p->se.on_rq; - if (on_rq) - deactivate_task(rq_src, p, 0, rq_clock(rq_src)); + if (on_rq) { + update_rq_clock(rq_src); + deactivate_task(rq_src, p, 0, rq_src->clock); + } set_task_cpu(p, dest_cpu); if (on_rq) { activate_task(rq_dest, p, 0); @@ -5215,7 +5230,8 @@ static void migrate_dead_tasks(unsigned int dead_cpu) for ( ; ; ) { if (!rq->nr_running) break; - next = pick_next_task(rq, rq->curr, rq_clock(rq)); + update_rq_clock(rq); + next = pick_next_task(rq, rq->curr, rq->clock); if (!next) break; migrate_dead(dead_cpu, next); @@ -5400,7 +5416,8 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu) rq->migration_thread = NULL; /* Idle task back to normal (off runqueue, low prio) */ rq = task_rq_lock(rq->idle, &flags); - deactivate_task(rq, rq->idle, 0, rq_clock(rq)); + update_rq_clock(rq); + deactivate_task(rq, rq->idle, 0, rq->clock); rq->idle->static_prio = MAX_PRIO; __setscheduler(rq, rq->idle, SCHED_NORMAL, 0); rq->idle->sched_class = &idle_sched_class; @@ -6638,8 +6655,10 @@ void normalize_rt_tasks(void) #endif on_rq = p->se.on_rq; - if (on_rq) - deactivate_task(task_rq(p), p, 0, rq_clock(task_rq(p))); + if (on_rq) { + update_rq_clock(task_rq(p)); + deactivate_task(task_rq(p), p, 0, task_rq(p)->clock); + } __setscheduler(rq, p, SCHED_NORMAL, 0); if (on_rq) { activate_task(task_rq(p), p, 0); -- cgit v1.2.2 From 2ab81159fa426bd09c21faf7c25fba13bc9d2902 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 9 Aug 2007 11:16:47 +0200 Subject: sched: remove rq_clock() remove the now unused rq_clock() function. Signed-off-by: Ingo Molnar --- kernel/sched.c | 6 ------ 1 file changed, 6 deletions(-) (limited to 'kernel/sched.c') diff --git a/kernel/sched.c b/kernel/sched.c index fe3c152d0c68..893211054790 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -368,12 +368,6 @@ static u64 __rq_clock(struct rq *rq) return rq->clock; } -static u64 rq_clock(struct rq *rq) -{ - update_rq_clock(rq); - return rq->clock; -} - /* * The domain tree (rq->sd) is protected by RCU's quiescent state transition. * See detach_destroy_domains: synchronize_sched for details. -- cgit v1.2.2 From c1b3da3ecdbf9e9f377474c11ba988b8821f86c8 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 9 Aug 2007 11:16:47 +0200 Subject: sched: eliminate __rq_clock() use eliminate __rq_clock() use by changing it to: __update_rq_clock(rq) now = rq->clock; identity transformation - no change in behavior. Signed-off-by: Ingo Molnar --- kernel/sched.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'kernel/sched.c') diff --git a/kernel/sched.c b/kernel/sched.c index 893211054790..d67345175179 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -1967,9 +1967,12 @@ static void update_cpu_load(struct rq *this_rq) unsigned long total_load = this_rq->ls.load.weight; unsigned long this_load = total_load; struct load_stat *ls = &this_rq->ls; - u64 now = __rq_clock(this_rq); + u64 now; int i, scale; + __update_rq_clock(this_rq); + now = this_rq->clock; + this_rq->nr_load_updates++; if (unlikely(!(sysctl_sched_features & SCHED_FEAT_PRECISE_CPU_LOAD))) goto do_avg; @@ -3458,7 +3461,8 @@ need_resched_nonpreemptible: spin_lock_irq(&rq->lock); clear_tsk_need_resched(prev); - now = __rq_clock(rq); + __update_rq_clock(rq); + now = rq->clock; if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) { if (unlikely((prev->state & TASK_INTERRUPTIBLE) && -- cgit v1.2.2 From eb59449400f1e5984509e502711141302a2867ab Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 9 Aug 2007 11:16:47 +0200 Subject: sched: remove __rq_clock() remove the (now unused) __rq_clock() function. Signed-off-by: Ingo Molnar --- kernel/sched.c | 7 ------- 1 file changed, 7 deletions(-) (limited to 'kernel/sched.c') diff --git a/kernel/sched.c b/kernel/sched.c index d67345175179..65eb484dc268 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -361,13 +361,6 @@ static void update_rq_clock(struct rq *rq) __update_rq_clock(rq); } -static u64 __rq_clock(struct rq *rq) -{ - __update_rq_clock(rq); - - return rq->clock; -} - /* * The domain tree (rq->sd) is protected by RCU's quiescent state transition. * See detach_destroy_domains: synchronize_sched for details. -- cgit v1.2.2 From d281918d7c135c555d9cebcf73d4320efa8177dc Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 9 Aug 2007 11:16:47 +0200 Subject: sched: remove 'now' use from assignments change all 'now' timestamp uses in assignments to rq->clock. ( this is an identity transformation that causes no functionality change: all such new rq->clock is necessarily preceded by an update_rq_clock() call. ) Signed-off-by: Ingo Molnar --- kernel/sched.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'kernel/sched.c') diff --git a/kernel/sched.c b/kernel/sched.c index 65eb484dc268..49a5fb0cdea0 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -788,8 +788,8 @@ static void update_curr_load(struct rq *rq, u64 now) u64 start; start = ls->load_update_start; - ls->load_update_start = now; - ls->delta_stat += now - start; + ls->load_update_start = rq->clock; + ls->delta_stat += rq->clock - start; /* * Stagger updates to ls->delta_fair. Very frequent updates * can be expensive. @@ -1979,8 +1979,8 @@ static void update_cpu_load(struct rq *this_rq) exec_delta64 = ls->delta_exec + 1; ls->delta_exec = 0; - sample_interval64 = now - ls->load_update_last; - ls->load_update_last = now; + sample_interval64 = this_rq->clock - ls->load_update_last; + ls->load_update_last = this_rq->clock; if ((s64)sample_interval64 < (s64)TICK_NSEC) sample_interval64 = TICK_NSEC; -- cgit v1.2.2 From fd390f6a04f22fb457d6fd1855964f79536525de Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 9 Aug 2007 11:16:48 +0200 Subject: sched: remove the 'u64 now' parameter from ->enqueue_task() remove the 'u64 now' parameter from ->enqueue_task(). ( identity transformation that causes no change in functionality. ) Signed-off-by: Ingo Molnar --- kernel/sched.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/sched.c') diff --git a/kernel/sched.c b/kernel/sched.c index 49a5fb0cdea0..43ae1566b8fc 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -852,7 +852,7 @@ static void enqueue_task(struct rq *rq, struct task_struct *p, int wakeup, u64 now) { sched_info_queued(p); - p->sched_class->enqueue_task(rq, p, wakeup, now); + p->sched_class->enqueue_task(rq, p, wakeup); p->se.on_rq = 1; } -- cgit v1.2.2 From f02231e51a280f1a0fee4d03ad8f50048e06cced Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 9 Aug 2007 11:16:48 +0200 Subject: sched: remove the 'u64 now' parameter from ->dequeue_task() remove the 'u64 now' parameter from ->dequeue_task(). ( identity transformation that causes no change in functionality. ) Signed-off-by: Ingo Molnar --- kernel/sched.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/sched.c') diff --git a/kernel/sched.c b/kernel/sched.c index 43ae1566b8fc..e51d75f4b4d7 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -859,7 +859,7 @@ enqueue_task(struct rq *rq, struct task_struct *p, int wakeup, u64 now) static void dequeue_task(struct rq *rq, struct task_struct *p, int sleep, u64 now) { - p->sched_class->dequeue_task(rq, p, sleep, now); + p->sched_class->dequeue_task(rq, p, sleep); p->se.on_rq = 0; } -- cgit v1.2.2 From fb8d47240246e20f864f0724a23a7220cd1c59ac Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 9 Aug 2007 11:16:48 +0200 Subject: sched: remove the 'u64 now' parameter from ->pick_next_task() remove the 'u64 now' parameter from ->pick_next_task(). ( identity transformation that causes no change in functionality. ) Signed-off-by: Ingo Molnar --- kernel/sched.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel/sched.c') diff --git a/kernel/sched.c b/kernel/sched.c index e51d75f4b4d7..b67a288a0f1f 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -3410,14 +3410,14 @@ pick_next_task(struct rq *rq, struct task_struct *prev, u64 now) * the fair class we can call that function directly: */ if (likely(rq->nr_running == rq->cfs.nr_running)) { - p = fair_sched_class.pick_next_task(rq, now); + p = fair_sched_class.pick_next_task(rq); if (likely(p)) return p; } class = sched_class_highest; for ( ; ; ) { - p = class->pick_next_task(rq, now); + p = class->pick_next_task(rq); if (p) return p; /* -- cgit v1.2.2 From ff95f3df54609d9d4b9572f8a67d09922a645043 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 9 Aug 2007 11:16:49 +0200 Subject: sched: remove the 'u64 now' parameter from pick_next_task() remove the 'u64 now' parameter from pick_next_task(). ( identity transformation that causes no change in functionality. ) Signed-off-by: Ingo Molnar --- kernel/sched.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'kernel/sched.c') diff --git a/kernel/sched.c b/kernel/sched.c index b67a288a0f1f..4f9f9e9d7265 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -3400,7 +3400,7 @@ static inline void schedule_debug(struct task_struct *prev) * Pick up the highest-prio task: */ static inline struct task_struct * -pick_next_task(struct rq *rq, struct task_struct *prev, u64 now) +pick_next_task(struct rq *rq, struct task_struct *prev) { struct sched_class *class; struct task_struct *p; @@ -3471,7 +3471,7 @@ need_resched_nonpreemptible: idle_balance(cpu, rq); prev->sched_class->put_prev_task(rq, prev, now); - next = pick_next_task(rq, prev, now); + next = pick_next_task(rq, prev); sched_info_switch(prev, next); @@ -5222,7 +5222,7 @@ static void migrate_dead_tasks(unsigned int dead_cpu) if (!rq->nr_running) break; update_rq_clock(rq); - next = pick_next_task(rq, rq->curr, rq->clock); + next = pick_next_task(rq, rq->curr); if (!next) break; migrate_dead(dead_cpu, next); -- cgit v1.2.2 From 31ee529cc2254e8b62880535ec8f21a4c5e1c091 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 9 Aug 2007 11:16:49 +0200 Subject: sched: remove the 'u64 now' parameter from ->put_prev_task() remove the 'u64 now' parameter from ->put_prev_task(). ( identity transformation that causes no change in functionality. ) Signed-off-by: Ingo Molnar --- kernel/sched.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/sched.c') diff --git a/kernel/sched.c b/kernel/sched.c index 4f9f9e9d7265..664440160485 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -3470,7 +3470,7 @@ need_resched_nonpreemptible: if (unlikely(!rq->nr_running)) idle_balance(cpu, rq); - prev->sched_class->put_prev_task(rq, prev, now); + prev->sched_class->put_prev_task(rq, prev); next = pick_next_task(rq, prev); sched_info_switch(prev, next); -- cgit v1.2.2 From ee0827d8b5271094380410cf21d8c48c109a773a Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 9 Aug 2007 11:16:49 +0200 Subject: sched: remove the 'u64 now' parameter from ->task_new() remove the 'u64 now' parameter from ->task_new(). ( identity transformation that causes no change in functionality. ) Signed-off-by: Ingo Molnar --- kernel/sched.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/sched.c') diff --git a/kernel/sched.c b/kernel/sched.c index 664440160485..0619178efa01 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -1672,7 +1672,7 @@ void fastcall wake_up_new_task(struct task_struct *p, unsigned long clone_flags) * Let the scheduling class do new task startup * management (if any): */ - p->sched_class->task_new(rq, p, now); + p->sched_class->task_new(rq, p); inc_nr_running(p, rq, now); } check_preempt_curr(rq, p); -- cgit v1.2.2 From 84a1d7a2f91d2f26d21026973dbf3023d17c701f Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 9 Aug 2007 11:16:49 +0200 Subject: sched: remove the 'u64 now' parameter from update_curr_load() remove the 'u64 now' parameter from update_curr_load(). ( identity transformation that causes no change in functionality. ) Signed-off-by: Ingo Molnar --- kernel/sched.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'kernel/sched.c') diff --git a/kernel/sched.c b/kernel/sched.c index 0619178efa01..5d5859c2e019 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -782,7 +782,7 @@ static void __update_curr_load(struct rq *rq, struct load_stat *ls) * This function is called /before/ updating rq->ls.load * and when switching tasks. */ -static void update_curr_load(struct rq *rq, u64 now) +static void update_curr_load(struct rq *rq) { struct load_stat *ls = &rq->ls; u64 start; @@ -801,14 +801,14 @@ static void update_curr_load(struct rq *rq, u64 now) static inline void inc_load(struct rq *rq, const struct task_struct *p, u64 now) { - update_curr_load(rq, now); + update_curr_load(rq); update_load_add(&rq->ls.load, p->se.load.weight); } static inline void dec_load(struct rq *rq, const struct task_struct *p, u64 now) { - update_curr_load(rq, now); + update_curr_load(rq); update_load_sub(&rq->ls.load, p->se.load.weight); } @@ -1971,7 +1971,7 @@ static void update_cpu_load(struct rq *this_rq) goto do_avg; /* Update delta_fair/delta_exec fields first */ - update_curr_load(this_rq, now); + update_curr_load(this_rq); fair_delta64 = ls->delta_fair + 1; ls->delta_fair = 0; -- cgit v1.2.2 From 29b4b623fe8163ca3c1da125da81234d41c8a3db Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 9 Aug 2007 11:16:49 +0200 Subject: sched: remove the 'u64 now' parameter from inc_load() remove the 'u64 now' parameter from inc_load(). ( identity transformation that causes no change in functionality. ) Signed-off-by: Ingo Molnar --- kernel/sched.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'kernel/sched.c') diff --git a/kernel/sched.c b/kernel/sched.c index 5d5859c2e019..aa8cac4ae547 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -798,8 +798,7 @@ static void update_curr_load(struct rq *rq) __update_curr_load(rq, ls); } -static inline void -inc_load(struct rq *rq, const struct task_struct *p, u64 now) +static inline void inc_load(struct rq *rq, const struct task_struct *p) { update_curr_load(rq); update_load_add(&rq->ls.load, p->se.load.weight); @@ -815,7 +814,7 @@ dec_load(struct rq *rq, const struct task_struct *p, u64 now) static void inc_nr_running(struct task_struct *p, struct rq *rq, u64 now) { rq->nr_running++; - inc_load(rq, p, now); + inc_load(rq, p); } static void dec_nr_running(struct task_struct *p, struct rq *rq, u64 now) @@ -3993,7 +3992,7 @@ void set_user_nice(struct task_struct *p, long nice) if (on_rq) { enqueue_task(rq, p, 0, now); - inc_load(rq, p, now); + inc_load(rq, p); /* * If the task increased its priority or is running and * lowered its priority, then reschedule its CPU: -- cgit v1.2.2 From 79b5dddf831b4719b7ec8dfcfb9bf9c619805b9c Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 9 Aug 2007 11:16:49 +0200 Subject: sched: remove the 'u64 now' parameter from dec_load() remove the 'u64 now' parameter from dec_load(). ( identity transformation that causes no change in functionality. ) Signed-off-by: Ingo Molnar --- kernel/sched.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'kernel/sched.c') diff --git a/kernel/sched.c b/kernel/sched.c index aa8cac4ae547..23583bb93273 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -804,8 +804,7 @@ static inline void inc_load(struct rq *rq, const struct task_struct *p) update_load_add(&rq->ls.load, p->se.load.weight); } -static inline void -dec_load(struct rq *rq, const struct task_struct *p, u64 now) +static inline void dec_load(struct rq *rq, const struct task_struct *p) { update_curr_load(rq); update_load_sub(&rq->ls.load, p->se.load.weight); @@ -820,7 +819,7 @@ static void inc_nr_running(struct task_struct *p, struct rq *rq, u64 now) static void dec_nr_running(struct task_struct *p, struct rq *rq, u64 now) { rq->nr_running--; - dec_load(rq, p, now); + dec_load(rq, p); } static void set_load_weight(struct task_struct *p) @@ -3981,7 +3980,7 @@ void set_user_nice(struct task_struct *p, long nice) on_rq = p->se.on_rq; if (on_rq) { dequeue_task(rq, p, 0, now); - dec_load(rq, p, now); + dec_load(rq, p); } p->static_prio = NICE_TO_PRIO(nice); -- cgit v1.2.2 From e5fa2237b53d751c59f773a68e1b12c411f0b19b Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 9 Aug 2007 11:16:49 +0200 Subject: sched: remove the 'u64 now' parameter from inc_nr_running() remove the 'u64 now' parameter from inc_nr_running(). ( identity transformation that causes no change in functionality. ) Signed-off-by: Ingo Molnar --- kernel/sched.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'kernel/sched.c') diff --git a/kernel/sched.c b/kernel/sched.c index 23583bb93273..bdb683464c00 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -810,7 +810,7 @@ static inline void dec_load(struct rq *rq, const struct task_struct *p) update_load_sub(&rq->ls.load, p->se.load.weight); } -static void inc_nr_running(struct task_struct *p, struct rq *rq, u64 now) +static void inc_nr_running(struct task_struct *p, struct rq *rq) { rq->nr_running++; inc_load(rq, p); @@ -921,7 +921,7 @@ static void activate_task(struct rq *rq, struct task_struct *p, int wakeup) rq->nr_uninterruptible--; enqueue_task(rq, p, wakeup, now); - inc_nr_running(p, rq, now); + inc_nr_running(p, rq); } /* @@ -938,7 +938,7 @@ static inline void activate_idle_task(struct task_struct *p, struct rq *rq) rq->nr_uninterruptible--; enqueue_task(rq, p, 0, now); - inc_nr_running(p, rq, now); + inc_nr_running(p, rq); } /* @@ -1671,7 +1671,7 @@ void fastcall wake_up_new_task(struct task_struct *p, unsigned long clone_flags) * management (if any): */ p->sched_class->task_new(rq, p); - inc_nr_running(p, rq, now); + inc_nr_running(p, rq); } check_preempt_curr(rq, p); task_rq_unlock(rq, &flags); -- cgit v1.2.2 From db53181e41728cfd58336925422dc17f1d2c655c Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 9 Aug 2007 11:16:49 +0200 Subject: sched: remove the 'u64 now' parameter from dec_nr_running() remove the 'u64 now' parameter from dec_nr_running(). ( identity transformation that causes no change in functionality. ) Signed-off-by: Ingo Molnar --- kernel/sched.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel/sched.c') diff --git a/kernel/sched.c b/kernel/sched.c index bdb683464c00..86e751a19d6b 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -816,7 +816,7 @@ static void inc_nr_running(struct task_struct *p, struct rq *rq) inc_load(rq, p); } -static void dec_nr_running(struct task_struct *p, struct rq *rq, u64 now) +static void dec_nr_running(struct task_struct *p, struct rq *rq) { rq->nr_running--; dec_load(rq, p); @@ -951,7 +951,7 @@ deactivate_task(struct rq *rq, struct task_struct *p, int sleep, u64 now) rq->nr_uninterruptible++; dequeue_task(rq, p, sleep, now); - dec_nr_running(p, rq, now); + dec_nr_running(p, rq); } /** -- cgit v1.2.2 From 8159f87e2bfeeba8887b8ef34f7b523958910132 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 9 Aug 2007 11:16:49 +0200 Subject: sched: remove the 'u64 now' parameter from enqueue_task() remove the 'u64 now' parameter from enqueue_task(). ( identity transformation that causes no change in functionality. ) Signed-off-by: Ingo Molnar --- kernel/sched.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'kernel/sched.c') diff --git a/kernel/sched.c b/kernel/sched.c index 86e751a19d6b..0ecfdd134f77 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -846,8 +846,7 @@ static void set_load_weight(struct task_struct *p) p->se.load.inv_weight = prio_to_wmult[p->static_prio - MAX_RT_PRIO]; } -static void -enqueue_task(struct rq *rq, struct task_struct *p, int wakeup, u64 now) +static void enqueue_task(struct rq *rq, struct task_struct *p, int wakeup) { sched_info_queued(p); p->sched_class->enqueue_task(rq, p, wakeup); @@ -920,7 +919,7 @@ static void activate_task(struct rq *rq, struct task_struct *p, int wakeup) if (p->state == TASK_UNINTERRUPTIBLE) rq->nr_uninterruptible--; - enqueue_task(rq, p, wakeup, now); + enqueue_task(rq, p, wakeup); inc_nr_running(p, rq); } @@ -937,7 +936,7 @@ static inline void activate_idle_task(struct task_struct *p, struct rq *rq) if (p->state == TASK_UNINTERRUPTIBLE) rq->nr_uninterruptible--; - enqueue_task(rq, p, 0, now); + enqueue_task(rq, p, 0); inc_nr_running(p, rq); } @@ -3933,7 +3932,7 @@ void rt_mutex_setprio(struct task_struct *p, int prio) p->prio = prio; if (on_rq) { - enqueue_task(rq, p, 0, now); + enqueue_task(rq, p, 0); /* * Reschedule if we are currently running on this runqueue and * our priority decreased, or if we are not currently running on @@ -3990,7 +3989,7 @@ void set_user_nice(struct task_struct *p, long nice) delta = p->prio - old_prio; if (on_rq) { - enqueue_task(rq, p, 0, now); + enqueue_task(rq, p, 0); inc_load(rq, p); /* * If the task increased its priority or is running and -- cgit v1.2.2 From 69be72c13db0e9165796422b544f989033146171 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 9 Aug 2007 11:16:49 +0200 Subject: sched: remove the 'u64 now' parameter from dequeue_task() remove the 'u64 now' parameter from dequeue_task(). ( identity transformation that causes no change in functionality. ) Signed-off-by: Ingo Molnar --- kernel/sched.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'kernel/sched.c') diff --git a/kernel/sched.c b/kernel/sched.c index 0ecfdd134f77..05ce3f54e815 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -853,8 +853,7 @@ static void enqueue_task(struct rq *rq, struct task_struct *p, int wakeup) p->se.on_rq = 1; } -static void -dequeue_task(struct rq *rq, struct task_struct *p, int sleep, u64 now) +static void dequeue_task(struct rq *rq, struct task_struct *p, int sleep) { p->sched_class->dequeue_task(rq, p, sleep); p->se.on_rq = 0; @@ -949,7 +948,7 @@ deactivate_task(struct rq *rq, struct task_struct *p, int sleep, u64 now) if (p->state == TASK_UNINTERRUPTIBLE) rq->nr_uninterruptible++; - dequeue_task(rq, p, sleep, now); + dequeue_task(rq, p, sleep); dec_nr_running(p, rq); } @@ -3922,7 +3921,7 @@ void rt_mutex_setprio(struct task_struct *p, int prio) oldprio = p->prio; on_rq = p->se.on_rq; if (on_rq) - dequeue_task(rq, p, 0, now); + dequeue_task(rq, p, 0); if (rt_prio(prio)) p->sched_class = &rt_sched_class; @@ -3978,7 +3977,7 @@ void set_user_nice(struct task_struct *p, long nice) } on_rq = p->se.on_rq; if (on_rq) { - dequeue_task(rq, p, 0, now); + dequeue_task(rq, p, 0); dec_load(rq, p); } -- cgit v1.2.2 From 2e1cb74a501c4b1bca5e55dabff24f267349193c Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 9 Aug 2007 11:16:49 +0200 Subject: sched: remove the 'u64 now' parameter from deactivate_task() remove the 'u64 now' parameter from deactivate_task(). ( identity transformation that causes no change in functionality. ) Signed-off-by: Ingo Molnar --- kernel/sched.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) (limited to 'kernel/sched.c') diff --git a/kernel/sched.c b/kernel/sched.c index 05ce3f54e815..2dc5d2f7b392 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -942,8 +942,7 @@ static inline void activate_idle_task(struct task_struct *p, struct rq *rq) /* * deactivate_task - remove a task from the runqueue. */ -static void -deactivate_task(struct rq *rq, struct task_struct *p, int sleep, u64 now) +static void deactivate_task(struct rq *rq, struct task_struct *p, int sleep) { if (p->state == TASK_UNINTERRUPTIBLE) rq->nr_uninterruptible++; @@ -2128,7 +2127,7 @@ static void pull_task(struct rq *src_rq, struct task_struct *p, struct rq *this_rq, int this_cpu) { update_rq_clock(src_rq); - deactivate_task(src_rq, p, 0, src_rq->clock); + deactivate_task(src_rq, p, 0); set_task_cpu(p, this_cpu); activate_task(this_rq, p, 0); /* @@ -3458,7 +3457,7 @@ need_resched_nonpreemptible: unlikely(signal_pending(prev)))) { prev->state = TASK_RUNNING; } else { - deactivate_task(rq, prev, 1, now); + deactivate_task(rq, prev, 1); } switch_count = &prev->nvcsw; } @@ -4228,7 +4227,7 @@ recheck: on_rq = p->se.on_rq; if (on_rq) { update_rq_clock(rq); - deactivate_task(rq, p, 0, rq->clock); + deactivate_task(rq, p, 0); } oldprio = p->prio; __setscheduler(rq, p, policy, param->sched_priority); @@ -4983,7 +4982,7 @@ static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu) on_rq = p->se.on_rq; if (on_rq) { update_rq_clock(rq_src); - deactivate_task(rq_src, p, 0, rq_src->clock); + deactivate_task(rq_src, p, 0); } set_task_cpu(p, dest_cpu); if (on_rq) { @@ -5404,7 +5403,7 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu) /* Idle task back to normal (off runqueue, low prio) */ rq = task_rq_lock(rq->idle, &flags); update_rq_clock(rq); - deactivate_task(rq, rq->idle, 0, rq->clock); + deactivate_task(rq, rq->idle, 0); rq->idle->static_prio = MAX_PRIO; __setscheduler(rq, rq->idle, SCHED_NORMAL, 0); rq->idle->sched_class = &idle_sched_class; @@ -6644,7 +6643,7 @@ void normalize_rt_tasks(void) on_rq = p->se.on_rq; if (on_rq) { update_rq_clock(task_rq(p)); - deactivate_task(task_rq(p), p, 0, task_rq(p)->clock); + deactivate_task(task_rq(p), p, 0); } __setscheduler(rq, p, SCHED_NORMAL, 0); if (on_rq) { -- cgit v1.2.2 From bdd4dfa89c1e3e1379729b9edec1526b3ecc25ec Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 9 Aug 2007 11:16:51 +0200 Subject: sched: remove the 'u64 now' local variables final step: remove all (now superfluous) 'u64 now' variables. ( identity transformation that causes no change in functionality. ) Signed-off-by: Ingo Molnar --- kernel/sched.c | 16 ---------------- 1 file changed, 16 deletions(-) (limited to 'kernel/sched.c') diff --git a/kernel/sched.c b/kernel/sched.c index 2dc5d2f7b392..b78b9d9ffd1c 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -910,10 +910,7 @@ static int effective_prio(struct task_struct *p) */ static void activate_task(struct rq *rq, struct task_struct *p, int wakeup) { - u64 now; - update_rq_clock(rq); - now = rq->clock; if (p->state == TASK_UNINTERRUPTIBLE) rq->nr_uninterruptible--; @@ -927,10 +924,7 @@ static void activate_task(struct rq *rq, struct task_struct *p, int wakeup) */ static inline void activate_idle_task(struct task_struct *p, struct rq *rq) { - u64 now; - update_rq_clock(rq); - now = rq->clock; if (p->state == TASK_UNINTERRUPTIBLE) rq->nr_uninterruptible--; @@ -1647,13 +1641,11 @@ void fastcall wake_up_new_task(struct task_struct *p, unsigned long clone_flags) unsigned long flags; struct rq *rq; int this_cpu; - u64 now; rq = task_rq_lock(p, &flags); BUG_ON(p->state != TASK_RUNNING); this_cpu = smp_processor_id(); /* parent's CPU */ update_rq_clock(rq); - now = rq->clock; p->prio = effective_prio(p); @@ -1955,11 +1947,9 @@ static void update_cpu_load(struct rq *this_rq) unsigned long total_load = this_rq->ls.load.weight; unsigned long this_load = total_load; struct load_stat *ls = &this_rq->ls; - u64 now; int i, scale; __update_rq_clock(this_rq); - now = this_rq->clock; this_rq->nr_load_updates++; if (unlikely(!(sysctl_sched_features & SCHED_FEAT_PRECISE_CPU_LOAD))) @@ -3431,7 +3421,6 @@ asmlinkage void __sched schedule(void) struct task_struct *prev, *next; long *switch_count; struct rq *rq; - u64 now; int cpu; need_resched: @@ -3450,7 +3439,6 @@ need_resched_nonpreemptible: spin_lock_irq(&rq->lock); clear_tsk_need_resched(prev); __update_rq_clock(rq); - now = rq->clock; if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) { if (unlikely((prev->state & TASK_INTERRUPTIBLE) && @@ -3909,13 +3897,11 @@ void rt_mutex_setprio(struct task_struct *p, int prio) unsigned long flags; int oldprio, on_rq; struct rq *rq; - u64 now; BUG_ON(prio < 0 || prio > MAX_PRIO); rq = task_rq_lock(p, &flags); update_rq_clock(rq); - now = rq->clock; oldprio = p->prio; on_rq = p->se.on_rq; @@ -3953,7 +3939,6 @@ void set_user_nice(struct task_struct *p, long nice) int old_prio, delta, on_rq; unsigned long flags; struct rq *rq; - u64 now; if (TASK_NICE(p) == nice || nice < -20 || nice > 19) return; @@ -3963,7 +3948,6 @@ void set_user_nice(struct task_struct *p, long nice) */ rq = task_rq_lock(p, &flags); update_rq_clock(rq); - now = rq->clock; /* * The RT priorities are set via sched_setscheduler(), but we still * allow the 'normal' nice value to be set - but as expected -- cgit v1.2.2 From 546fe3c909b0a4235c7237c210da483eaaac1edc Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 9 Aug 2007 11:16:51 +0200 Subject: sched: move the __update_rq_clock() call to scheduler_tick() move the __update_rq_clock() call from update_cpu_load() to scheduler_tick(). ( identity transformation that causes no change in functionality. ) this allows the direct use of rq->clock in ->task_tick() functions. Signed-off-by: Ingo Molnar --- kernel/sched.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'kernel/sched.c') diff --git a/kernel/sched.c b/kernel/sched.c index b78b9d9ffd1c..3f5d52949990 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -1949,8 +1949,6 @@ static void update_cpu_load(struct rq *this_rq) struct load_stat *ls = &this_rq->ls; int i, scale; - __update_rq_clock(this_rq); - this_rq->nr_load_updates++; if (unlikely(!(sysctl_sched_features & SCHED_FEAT_PRECISE_CPU_LOAD))) goto do_avg; @@ -3301,6 +3299,7 @@ void scheduler_tick(void) struct task_struct *curr = rq->curr; spin_lock(&rq->lock); + __update_rq_clock(rq); update_cpu_load(rq); if (curr != rq->idle) /* FIXME: needed? */ curr->sched_class->task_tick(rq, curr); -- cgit v1.2.2 From 2daa357705bfe68788132cf9079930ca948a90af Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 9 Aug 2007 11:16:51 +0200 Subject: sched: optimize activate_task() optimize activate_task() by removing update_rq_clock() from it. (and add update_rq_clock() to all callsites of activate_task() that did not have it before.) Signed-off-by: Ingo Molnar --- kernel/sched.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) (limited to 'kernel/sched.c') diff --git a/kernel/sched.c b/kernel/sched.c index 3f5d52949990..9ccd91e5b65b 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -910,8 +910,6 @@ static int effective_prio(struct task_struct *p) */ static void activate_task(struct rq *rq, struct task_struct *p, int wakeup) { - update_rq_clock(rq); - if (p->state == TASK_UNINTERRUPTIBLE) rq->nr_uninterruptible--; @@ -1510,6 +1508,7 @@ out_set_cpu: out_activate: #endif /* CONFIG_SMP */ + update_rq_clock(rq); activate_task(rq, p, 1); /* * Sync wakeups (i.e. those types of wakeups where the waker @@ -2117,6 +2116,7 @@ static void pull_task(struct rq *src_rq, struct task_struct *p, update_rq_clock(src_rq); deactivate_task(src_rq, p, 0); set_task_cpu(p, this_cpu); + __update_rq_clock(this_rq); activate_task(this_rq, p, 0); /* * Note that idle threads have a prio of MAX_PRIO, for this test @@ -4207,11 +4207,10 @@ recheck: spin_unlock_irqrestore(&p->pi_lock, flags); goto recheck; } + update_rq_clock(rq); on_rq = p->se.on_rq; - if (on_rq) { - update_rq_clock(rq); + if (on_rq) deactivate_task(rq, p, 0); - } oldprio = p->prio; __setscheduler(rq, p, policy, param->sched_priority); if (on_rq) { @@ -4969,6 +4968,7 @@ static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu) } set_task_cpu(p, dest_cpu); if (on_rq) { + update_rq_clock(rq_dest); activate_task(rq_dest, p, 0); check_preempt_curr(rq_dest, p); } @@ -6623,14 +6623,13 @@ void normalize_rt_tasks(void) goto out_unlock; #endif + update_rq_clock(rq); on_rq = p->se.on_rq; - if (on_rq) { - update_rq_clock(task_rq(p)); - deactivate_task(task_rq(p), p, 0); - } + if (on_rq) + deactivate_task(rq, p, 0); __setscheduler(rq, p, SCHED_NORMAL, 0); if (on_rq) { - activate_task(task_rq(p), p, 0); + activate_task(rq, p, 0); resched_task(rq->curr); } #ifdef CONFIG_SMP -- cgit v1.2.2 From 6e82a3befe91423e501c2124312bd805be0048eb Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 9 Aug 2007 11:16:51 +0200 Subject: sched: optimize update_rq_clock() calls in the load-balancer optimize update_rq_clock() calls in the load-balancer: update them right after locking the runqueue(s) so that the pull functions do not have to call it. Signed-off-by: Ingo Molnar --- kernel/sched.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'kernel/sched.c') diff --git a/kernel/sched.c b/kernel/sched.c index 9ccd91e5b65b..afc59f274e58 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -2017,6 +2017,8 @@ static void double_rq_lock(struct rq *rq1, struct rq *rq2) spin_lock(&rq1->lock); } } + update_rq_clock(rq1); + update_rq_clock(rq2); } /* @@ -2113,10 +2115,8 @@ void sched_exec(void) static void pull_task(struct rq *src_rq, struct task_struct *p, struct rq *this_rq, int this_cpu) { - update_rq_clock(src_rq); deactivate_task(src_rq, p, 0); set_task_cpu(p, this_cpu); - __update_rq_clock(this_rq); activate_task(this_rq, p, 0); /* * Note that idle threads have a prio of MAX_PRIO, for this test @@ -2798,6 +2798,8 @@ redo: if (busiest->nr_running > 1) { /* Attempt to move tasks */ double_lock_balance(this_rq, busiest); + /* this_rq->clock is already updated */ + update_rq_clock(busiest); ld_moved = move_tasks(this_rq, this_cpu, busiest, imbalance, sd, CPU_NEWLY_IDLE, &all_pinned); @@ -2895,6 +2897,8 @@ static void active_load_balance(struct rq *busiest_rq, int busiest_cpu) /* move a task from busiest_rq to target_rq */ double_lock_balance(busiest_rq, target_rq); + update_rq_clock(busiest_rq); + update_rq_clock(target_rq); /* Search for an sd spanning us and the target CPU. */ for_each_domain(target_cpu, sd) { @@ -4962,13 +4966,11 @@ static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu) goto out; on_rq = p->se.on_rq; - if (on_rq) { - update_rq_clock(rq_src); + if (on_rq) deactivate_task(rq_src, p, 0); - } + set_task_cpu(p, dest_cpu); if (on_rq) { - update_rq_clock(rq_dest); activate_task(rq_dest, p, 0); check_preempt_curr(rq_dest, p); } -- cgit v1.2.2 From 254753dc321ea2b753ca9bc58ac329557a20efac Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 9 Aug 2007 11:16:51 +0200 Subject: sched: make the multiplication table more accurate do small deltas in the weight and multiplication constant table so that the worst-case numeric error is better than 1:100000000. (8 digits) the current error table is: nice mult * inv_mult error ------------------------------------------ -20: 88761 * 48388 -0.0000000065 -19: 71755 * 59856 -0.0000000037 -18: 56483 * 76040 0.0000000056 -17: 46273 * 92818 0.0000000042 -16: 36291 * 118348 -0.0000000065 -15: 29154 * 147320 -0.0000000037 -14: 23254 * 184698 -0.0000000009 -13: 18705 * 229616 -0.0000000037 -12: 14949 * 287308 -0.0000000009 -11: 11916 * 360437 -0.0000000009 -10: 9548 * 449829 -0.0000000009 -9: 7620 * 563644 -0.0000000037 -8: 6100 * 704093 0.0000000009 -7: 4904 * 875809 0.0000000093 -6: 3906 * 1099582 -0.0000000009 -5: 3121 * 1376151 -0.0000000058 -4: 2501 * 1717300 0.0000000009 -3: 1991 * 2157191 -0.0000000035 -2: 1586 * 2708050 0.0000000009 -1: 1277 * 3363326 0.0000000014 0: 1024 * 4194304 0.0000000000 1: 820 * 5237765 0.0000000009 2: 655 * 6557202 0.0000000033 3: 526 * 8165337 -0.0000000079 4: 423 * 10153587 0.0000000012 5: 335 * 12820798 0.0000000079 6: 272 * 15790321 0.0000000037 7: 215 * 19976592 -0.0000000037 8: 172 * 24970740 -0.0000000037 9: 137 * 31350126 -0.0000000079 10: 110 * 39045157 -0.0000000061 11: 87 * 49367440 -0.0000000037 12: 70 * 61356676 0.0000000056 13: 56 * 76695844 -0.0000000075 14: 45 * 95443717 -0.0000000072 15: 36 * 119304647 -0.0000000009 16: 29 * 148102320 -0.0000000037 17: 23 * 186737708 -0.0000000028 18: 18 * 238609294 -0.0000000009 19: 15 * 286331153 -0.0000000002 Signed-off-by: Ingo Molnar --- kernel/sched.c | 29 ++++++++++++++++------------- 1 file changed, 16 insertions(+), 13 deletions(-) (limited to 'kernel/sched.c') diff --git a/kernel/sched.c b/kernel/sched.c index afc59f274e58..5470ab0258a8 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -704,11 +704,14 @@ static void update_load_sub(struct load_weight *lw, unsigned long dec) * the relative distance between them is ~25%.) */ static const int prio_to_weight[40] = { -/* -20 */ 88818, 71054, 56843, 45475, 36380, 29104, 23283, 18626, 14901, 11921, -/* -10 */ 9537, 7629, 6103, 4883, 3906, 3125, 2500, 2000, 1600, 1280, -/* 0 */ NICE_0_LOAD /* 1024 */, -/* 1 */ 819, 655, 524, 419, 336, 268, 215, 172, 137, -/* 10 */ 110, 87, 70, 56, 45, 36, 29, 23, 18, 15, + /* -20 */ 88761, 71755, 56483, 46273, 36291, + /* -15 */ 29154, 23254, 18705, 14949, 11916, + /* -10 */ 9548, 7620, 6100, 4904, 3906, + /* -5 */ 3121, 2501, 1991, 1586, 1277, + /* 0 */ 1024, 820, 655, 526, 423, + /* 5 */ 335, 272, 215, 172, 137, + /* 10 */ 110, 87, 70, 56, 45, + /* 15 */ 36, 29, 23, 18, 15, }; /* @@ -719,14 +722,14 @@ static const int prio_to_weight[40] = { * into multiplications: */ static const u32 prio_to_wmult[40] = { -/* -20 */ 48356, 60446, 75558, 94446, 118058, -/* -15 */ 147573, 184467, 230589, 288233, 360285, -/* -10 */ 450347, 562979, 703746, 879575, 1099582, -/* -5 */ 1374389, 1717986, 2147483, 2684354, 3355443, -/* 0 */ 4194304, 5244160, 6557201, 8196502, 10250518, -/* 5 */ 12782640, 16025997, 19976592, 24970740, 31350126, -/* 10 */ 39045157, 49367440, 61356675, 76695844, 95443717, -/* 15 */ 119304647, 148102320, 186737708, 238609294, 286331153, + /* -20 */ 48388, 59856, 76040, 92818, 118348, + /* -15 */ 147320, 184698, 229616, 287308, 360437, + /* -10 */ 449829, 563644, 704093, 875809, 1099582, + /* -5 */ 1376151, 1717300, 2157191, 2708050, 3363326, + /* 0 */ 4194304, 5237765, 6557202, 8165337, 10153587, + /* 5 */ 12820798, 15790321, 19976592, 24970740, 31350126, + /* 10 */ 39045157, 49367440, 61356676, 76695844, 95443717, + /* 15 */ 119304647, 148102320, 186737708, 238609294, 286331153, }; static void activate_task(struct rq *rq, struct task_struct *p, int wakeup); -- cgit v1.2.2 From 194081ebfaa8c7d16133e08dd79254910c20c6ff Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 9 Aug 2007 11:16:51 +0200 Subject: sched: round a bit better round a tiny bit better in high-frequency rescheduling scenarios, by rounding around zero instead of rounding down. (this is pretty theoretical though) Signed-off-by: Ingo Molnar --- kernel/sched.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) (limited to 'kernel/sched.c') diff --git a/kernel/sched.c b/kernel/sched.c index 5470ab0258a8..b0afd8db1396 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -638,6 +638,11 @@ static u64 div64_likely32(u64 divident, unsigned long divisor) #define WMULT_SHIFT 32 +/* + * Shift right and round: + */ +#define RSR(x, y) (((x) + (1UL << ((y) - 1))) >> (y)) + static unsigned long calc_delta_mine(unsigned long delta_exec, unsigned long weight, struct load_weight *lw) @@ -645,18 +650,17 @@ calc_delta_mine(unsigned long delta_exec, unsigned long weight, u64 tmp; if (unlikely(!lw->inv_weight)) - lw->inv_weight = WMULT_CONST / lw->weight; + lw->inv_weight = (WMULT_CONST - lw->weight/2) / lw->weight + 1; tmp = (u64)delta_exec * weight; /* * Check whether we'd overflow the 64-bit multiplication: */ - if (unlikely(tmp > WMULT_CONST)) { - tmp = ((tmp >> WMULT_SHIFT/2) * lw->inv_weight) - >> (WMULT_SHIFT/2); - } else { - tmp = (tmp * lw->inv_weight) >> WMULT_SHIFT; - } + if (unlikely(tmp > WMULT_CONST)) + tmp = RSR(RSR(tmp, WMULT_SHIFT/2) * lw->inv_weight, + WMULT_SHIFT/2); + else + tmp = RSR(tmp * lw->inv_weight, WMULT_SHIFT); return (unsigned long)min(tmp, (u64)(unsigned long)LONG_MAX); } -- cgit v1.2.2 From 529c77261bccd9d37f110f58b0753d95beaa9fa2 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 10 Aug 2007 23:05:11 +0200 Subject: sched: improve rq-clock overflow logic improve the rq-clock overflow logic: limit the absolute rq->clock delta since the last scheduler tick, instead of limiting the delta itself. tested by Arjan van de Ven - whole laptop was misbehaving due to an incorrectly calibrated cpu_khz confusing sched_clock(). Signed-off-by: Ingo Molnar Signed-off-by: Arjan van de Ven --- kernel/sched.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) (limited to 'kernel/sched.c') diff --git a/kernel/sched.c b/kernel/sched.c index b0afd8db1396..6247e4a8350f 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -263,6 +263,7 @@ struct rq { unsigned int clock_warps, clock_overflows; unsigned int clock_unstable_events; + u64 tick_timestamp; atomic_t nr_iowait; @@ -341,8 +342,11 @@ static void __update_rq_clock(struct rq *rq) /* * Catch too large forward jumps too: */ - if (unlikely(delta > 2*TICK_NSEC)) { - clock++; + if (unlikely(clock + delta > rq->tick_timestamp + TICK_NSEC)) { + if (clock < rq->tick_timestamp + TICK_NSEC) + clock = rq->tick_timestamp + TICK_NSEC; + else + clock++; rq->clock_overflows++; } else { if (unlikely(delta > rq->clock_max_delta)) @@ -3308,9 +3312,16 @@ void scheduler_tick(void) int cpu = smp_processor_id(); struct rq *rq = cpu_rq(cpu); struct task_struct *curr = rq->curr; + u64 next_tick = rq->tick_timestamp + TICK_NSEC; spin_lock(&rq->lock); __update_rq_clock(rq); + /* + * Let rq->clock advance by at least TICK_NSEC: + */ + if (unlikely(rq->clock < next_tick)) + rq->clock = next_tick; + rq->tick_timestamp = rq->clock; update_cpu_load(rq); if (curr != rq->idle) /* FIXME: needed? */ curr->sched_class->task_tick(rq, curr); -- cgit v1.2.2 From 6707de00fdec3e3225192fe3dcd21323a8936b1f Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Sun, 12 Aug 2007 18:08:19 +0200 Subject: sched: make global code static This patch makes the following needlessly global code static: - arch_reinit_sched_domains() - struct attr_sched_mc_power_savings - struct attr_sched_smt_power_savings Signed-off-by: Adrian Bunk Signed-off-by: Andrew Morton Signed-off-by: Ingo Molnar --- kernel/sched.c | 46 +++++++++++++++++++++++----------------------- 1 file changed, 23 insertions(+), 23 deletions(-) (limited to 'kernel/sched.c') diff --git a/kernel/sched.c b/kernel/sched.c index 6247e4a8350f..c02659f1bd09 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -6328,7 +6328,7 @@ int partition_sched_domains(cpumask_t *partition1, cpumask_t *partition2) } #if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT) -int arch_reinit_sched_domains(void) +static int arch_reinit_sched_domains(void) { int err; @@ -6357,24 +6357,6 @@ static ssize_t sched_power_savings_store(const char *buf, size_t count, int smt) return ret ? ret : count; } -int sched_create_sysfs_power_savings_entries(struct sysdev_class *cls) -{ - int err = 0; - -#ifdef CONFIG_SCHED_SMT - if (smt_capable()) - err = sysfs_create_file(&cls->kset.kobj, - &attr_sched_smt_power_savings.attr); -#endif -#ifdef CONFIG_SCHED_MC - if (!err && mc_capable()) - err = sysfs_create_file(&cls->kset.kobj, - &attr_sched_mc_power_savings.attr); -#endif - return err; -} -#endif - #ifdef CONFIG_SCHED_MC static ssize_t sched_mc_power_savings_show(struct sys_device *dev, char *page) { @@ -6385,8 +6367,8 @@ static ssize_t sched_mc_power_savings_store(struct sys_device *dev, { return sched_power_savings_store(buf, count, 0); } -SYSDEV_ATTR(sched_mc_power_savings, 0644, sched_mc_power_savings_show, - sched_mc_power_savings_store); +static SYSDEV_ATTR(sched_mc_power_savings, 0644, sched_mc_power_savings_show, + sched_mc_power_savings_store); #endif #ifdef CONFIG_SCHED_SMT @@ -6399,8 +6381,26 @@ static ssize_t sched_smt_power_savings_store(struct sys_device *dev, { return sched_power_savings_store(buf, count, 1); } -SYSDEV_ATTR(sched_smt_power_savings, 0644, sched_smt_power_savings_show, - sched_smt_power_savings_store); +static SYSDEV_ATTR(sched_smt_power_savings, 0644, sched_smt_power_savings_show, + sched_smt_power_savings_store); +#endif + +int sched_create_sysfs_power_savings_entries(struct sysdev_class *cls) +{ + int err = 0; + +#ifdef CONFIG_SCHED_SMT + if (smt_capable()) + err = sysfs_create_file(&cls->kset.kobj, + &attr_sched_smt_power_savings.attr); +#endif +#ifdef CONFIG_SCHED_MC + if (!err && mc_capable()) + err = sysfs_create_file(&cls->kset.kobj, + &attr_sched_mc_power_savings.attr); +#endif + return err; +} #endif /* -- cgit v1.2.2 From de0cf899bbf06b6f64a5dce9c59d74c41b6b4232 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sun, 12 Aug 2007 18:08:19 +0200 Subject: sched: run_rebalance_domains: s/SCHED_IDLE/CPU_IDLE/ rebalance_domains(SCHED_IDLE) looks strange (typo), change it to CPU_IDLE. the effect of this bug was slightly more agressive idle-balancing on SMP than intended. Signed-off-by: Oleg Nesterov Signed-off-by: Ingo Molnar --- kernel/sched.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/sched.c') diff --git a/kernel/sched.c b/kernel/sched.c index c02659f1bd09..45e17b83b7f1 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -3106,7 +3106,7 @@ static void run_rebalance_domains(struct softirq_action *h) if (need_resched()) break; - rebalance_domains(balance_cpu, SCHED_IDLE); + rebalance_domains(balance_cpu, CPU_IDLE); rq = cpu_rq(balance_cpu); if (time_after(this_rq->next_balance, rq->next_balance)) -- cgit v1.2.2