aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2010-12-22 05:53:20 -0500
committerIngo Molnar <mingo@elte.hu>2010-12-22 05:53:23 -0500
commit6c529a266bdc590a870ee2d2092ff6527eff427b (patch)
tree7be65fa2578820a1258b5a1e8e063a509a5d6176 /kernel
parent7639dae0ca11038286bbbcda05f2bef601c1eb8d (diff)
parent90a8a73c06cc32b609a880d48449d7083327e11a (diff)
Merge commit 'v2.6.37-rc7' into perf/core
Merge reason: Pick up the latest -rc. Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/fork.c1
-rw-r--r--kernel/power/swap.c2
-rw-r--r--kernel/power/user.c2
-rw-r--r--kernel/resource.c104
-rw-r--r--kernel/sched.c287
-rw-r--r--kernel/timer.c8
-rw-r--r--kernel/workqueue.c7
7 files changed, 260 insertions, 151 deletions
diff --git a/kernel/fork.c b/kernel/fork.c
index 3b159c5991b7..5447dc7defa9 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -273,6 +273,7 @@ static struct task_struct *dup_task_struct(struct task_struct *orig)
273 273
274 setup_thread_stack(tsk, orig); 274 setup_thread_stack(tsk, orig);
275 clear_user_return_notifier(tsk); 275 clear_user_return_notifier(tsk);
276 clear_tsk_need_resched(tsk);
276 stackend = end_of_stack(tsk); 277 stackend = end_of_stack(tsk);
277 *stackend = STACK_END_MAGIC; /* for overflow detection */ 278 *stackend = STACK_END_MAGIC; /* for overflow detection */
278 279
diff --git a/kernel/power/swap.c b/kernel/power/swap.c
index baf667bb2794..8c7e4832b9be 100644
--- a/kernel/power/swap.c
+++ b/kernel/power/swap.c
@@ -30,7 +30,7 @@
30 30
31#include "power.h" 31#include "power.h"
32 32
33#define HIBERNATE_SIG "LINHIB0001" 33#define HIBERNATE_SIG "S1SUSPEND"
34 34
35/* 35/*
36 * The swap map is a data structure used for keeping track of each page 36 * The swap map is a data structure used for keeping track of each page
diff --git a/kernel/power/user.c b/kernel/power/user.c
index 1b2ea31e6bd8..c36c3b9e8a84 100644
--- a/kernel/power/user.c
+++ b/kernel/power/user.c
@@ -137,7 +137,7 @@ static int snapshot_release(struct inode *inode, struct file *filp)
137 free_all_swap_pages(data->swap); 137 free_all_swap_pages(data->swap);
138 if (data->frozen) 138 if (data->frozen)
139 thaw_processes(); 139 thaw_processes();
140 pm_notifier_call_chain(data->mode == O_WRONLY ? 140 pm_notifier_call_chain(data->mode == O_RDONLY ?
141 PM_POST_HIBERNATION : PM_POST_RESTORE); 141 PM_POST_HIBERNATION : PM_POST_RESTORE);
142 atomic_inc(&snapshot_device_available); 142 atomic_inc(&snapshot_device_available);
143 143
diff --git a/kernel/resource.c b/kernel/resource.c
index 9fad33efd0db..798e2fae2a06 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -40,23 +40,6 @@ EXPORT_SYMBOL(iomem_resource);
40 40
41static DEFINE_RWLOCK(resource_lock); 41static DEFINE_RWLOCK(resource_lock);
42 42
43/*
44 * By default, we allocate free space bottom-up. The architecture can request
45 * top-down by clearing this flag. The user can override the architecture's
46 * choice with the "resource_alloc_from_bottom" kernel boot option, but that
47 * should only be a debugging tool.
48 */
49int resource_alloc_from_bottom = 1;
50
51static __init int setup_alloc_from_bottom(char *s)
52{
53 printk(KERN_INFO
54 "resource: allocating from bottom-up; please report a bug\n");
55 resource_alloc_from_bottom = 1;
56 return 0;
57}
58early_param("resource_alloc_from_bottom", setup_alloc_from_bottom);
59
60static void *r_next(struct seq_file *m, void *v, loff_t *pos) 43static void *r_next(struct seq_file *m, void *v, loff_t *pos)
61{ 44{
62 struct resource *p = v; 45 struct resource *p = v;
@@ -374,6 +357,10 @@ int __weak page_is_ram(unsigned long pfn)
374 return walk_system_ram_range(pfn, 1, NULL, __is_ram) == 1; 357 return walk_system_ram_range(pfn, 1, NULL, __is_ram) == 1;
375} 358}
376 359
360void __weak arch_remove_reservations(struct resource *avail)
361{
362}
363
377static resource_size_t simple_align_resource(void *data, 364static resource_size_t simple_align_resource(void *data,
378 const struct resource *avail, 365 const struct resource *avail,
379 resource_size_t size, 366 resource_size_t size,
@@ -397,74 +384,7 @@ static bool resource_contains(struct resource *res1, struct resource *res2)
397} 384}
398 385
399/* 386/*
400 * Find the resource before "child" in the sibling list of "root" children.
401 */
402static struct resource *find_sibling_prev(struct resource *root, struct resource *child)
403{
404 struct resource *this;
405
406 for (this = root->child; this; this = this->sibling)
407 if (this->sibling == child)
408 return this;
409
410 return NULL;
411}
412
413/*
414 * Find empty slot in the resource tree given range and alignment. 387 * Find empty slot in the resource tree given range and alignment.
415 * This version allocates from the end of the root resource first.
416 */
417static int find_resource_from_top(struct resource *root, struct resource *new,
418 resource_size_t size, resource_size_t min,
419 resource_size_t max, resource_size_t align,
420 resource_size_t (*alignf)(void *,
421 const struct resource *,
422 resource_size_t,
423 resource_size_t),
424 void *alignf_data)
425{
426 struct resource *this;
427 struct resource tmp, avail, alloc;
428
429 tmp.start = root->end;
430 tmp.end = root->end;
431
432 this = find_sibling_prev(root, NULL);
433 for (;;) {
434 if (this) {
435 if (this->end < root->end)
436 tmp.start = this->end + 1;
437 } else
438 tmp.start = root->start;
439
440 resource_clip(&tmp, min, max);
441
442 /* Check for overflow after ALIGN() */
443 avail = *new;
444 avail.start = ALIGN(tmp.start, align);
445 avail.end = tmp.end;
446 if (avail.start >= tmp.start) {
447 alloc.start = alignf(alignf_data, &avail, size, align);
448 alloc.end = alloc.start + size - 1;
449 if (resource_contains(&avail, &alloc)) {
450 new->start = alloc.start;
451 new->end = alloc.end;
452 return 0;
453 }
454 }
455
456 if (!this || this->start == root->start)
457 break;
458
459 tmp.end = this->start - 1;
460 this = find_sibling_prev(root, this);
461 }
462 return -EBUSY;
463}
464
465/*
466 * Find empty slot in the resource tree given range and alignment.
467 * This version allocates from the beginning of the root resource first.
468 */ 388 */
469static int find_resource(struct resource *root, struct resource *new, 389static int find_resource(struct resource *root, struct resource *new,
470 resource_size_t size, resource_size_t min, 390 resource_size_t size, resource_size_t min,
@@ -478,23 +398,24 @@ static int find_resource(struct resource *root, struct resource *new,
478 struct resource *this = root->child; 398 struct resource *this = root->child;
479 struct resource tmp = *new, avail, alloc; 399 struct resource tmp = *new, avail, alloc;
480 400
401 tmp.flags = new->flags;
481 tmp.start = root->start; 402 tmp.start = root->start;
482 /* 403 /*
483 * Skip past an allocated resource that starts at 0, since the 404 * Skip past an allocated resource that starts at 0, since the assignment
484 * assignment of this->start - 1 to tmp->end below would cause an 405 * of this->start - 1 to tmp->end below would cause an underflow.
485 * underflow.
486 */ 406 */
487 if (this && this->start == 0) { 407 if (this && this->start == 0) {
488 tmp.start = this->end + 1; 408 tmp.start = this->end + 1;
489 this = this->sibling; 409 this = this->sibling;
490 } 410 }
491 for (;;) { 411 for(;;) {
492 if (this) 412 if (this)
493 tmp.end = this->start - 1; 413 tmp.end = this->start - 1;
494 else 414 else
495 tmp.end = root->end; 415 tmp.end = root->end;
496 416
497 resource_clip(&tmp, min, max); 417 resource_clip(&tmp, min, max);
418 arch_remove_reservations(&tmp);
498 419
499 /* Check for overflow after ALIGN() */ 420 /* Check for overflow after ALIGN() */
500 avail = *new; 421 avail = *new;
@@ -509,10 +430,8 @@ static int find_resource(struct resource *root, struct resource *new,
509 return 0; 430 return 0;
510 } 431 }
511 } 432 }
512
513 if (!this) 433 if (!this)
514 break; 434 break;
515
516 tmp.start = this->end + 1; 435 tmp.start = this->end + 1;
517 this = this->sibling; 436 this = this->sibling;
518 } 437 }
@@ -545,10 +464,7 @@ int allocate_resource(struct resource *root, struct resource *new,
545 alignf = simple_align_resource; 464 alignf = simple_align_resource;
546 465
547 write_lock(&resource_lock); 466 write_lock(&resource_lock);
548 if (resource_alloc_from_bottom) 467 err = find_resource(root, new, size, min, max, align, alignf, alignf_data);
549 err = find_resource(root, new, size, min, max, align, alignf, alignf_data);
550 else
551 err = find_resource_from_top(root, new, size, min, max, align, alignf, alignf_data);
552 if (err >= 0 && __request_resource(root, new)) 468 if (err >= 0 && __request_resource(root, new))
553 err = -EBUSY; 469 err = -EBUSY;
554 write_unlock(&resource_lock); 470 write_unlock(&resource_lock);
diff --git a/kernel/sched.c b/kernel/sched.c
index 605ab1b24d81..c68cead94dd7 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -636,22 +636,18 @@ static inline struct task_group *task_group(struct task_struct *p)
636 636
637#endif /* CONFIG_CGROUP_SCHED */ 637#endif /* CONFIG_CGROUP_SCHED */
638 638
639static u64 irq_time_cpu(int cpu); 639static void update_rq_clock_task(struct rq *rq, s64 delta);
640static void sched_irq_time_avg_update(struct rq *rq, u64 irq_time);
641 640
642inline void update_rq_clock(struct rq *rq) 641static void update_rq_clock(struct rq *rq)
643{ 642{
644 if (!rq->skip_clock_update) { 643 s64 delta;
645 int cpu = cpu_of(rq);
646 u64 irq_time;
647 644
648 rq->clock = sched_clock_cpu(cpu); 645 if (rq->skip_clock_update)
649 irq_time = irq_time_cpu(cpu); 646 return;
650 if (rq->clock - irq_time > rq->clock_task)
651 rq->clock_task = rq->clock - irq_time;
652 647
653 sched_irq_time_avg_update(rq, irq_time); 648 delta = sched_clock_cpu(cpu_of(rq)) - rq->clock;
654 } 649 rq->clock += delta;
650 update_rq_clock_task(rq, delta);
655} 651}
656 652
657/* 653/*
@@ -1924,10 +1920,9 @@ static void deactivate_task(struct rq *rq, struct task_struct *p, int flags)
1924 * They are read and saved off onto struct rq in update_rq_clock(). 1920 * They are read and saved off onto struct rq in update_rq_clock().
1925 * This may result in other CPU reading this CPU's irq time and can 1921 * This may result in other CPU reading this CPU's irq time and can
1926 * race with irq/account_system_vtime on this CPU. We would either get old 1922 * race with irq/account_system_vtime on this CPU. We would either get old
1927 * or new value (or semi updated value on 32 bit) with a side effect of 1923 * or new value with a side effect of accounting a slice of irq time to wrong
1928 * accounting a slice of irq time to wrong task when irq is in progress 1924 * task when irq is in progress while we read rq->clock. That is a worthy
1929 * while we read rq->clock. That is a worthy compromise in place of having 1925 * compromise in place of having locks on each irq in account_system_time.
1930 * locks on each irq in account_system_time.
1931 */ 1926 */
1932static DEFINE_PER_CPU(u64, cpu_hardirq_time); 1927static DEFINE_PER_CPU(u64, cpu_hardirq_time);
1933static DEFINE_PER_CPU(u64, cpu_softirq_time); 1928static DEFINE_PER_CPU(u64, cpu_softirq_time);
@@ -1945,19 +1940,58 @@ void disable_sched_clock_irqtime(void)
1945 sched_clock_irqtime = 0; 1940 sched_clock_irqtime = 0;
1946} 1941}
1947 1942
1948static u64 irq_time_cpu(int cpu) 1943#ifndef CONFIG_64BIT
1944static DEFINE_PER_CPU(seqcount_t, irq_time_seq);
1945
1946static inline void irq_time_write_begin(void)
1949{ 1947{
1950 if (!sched_clock_irqtime) 1948 __this_cpu_inc(irq_time_seq.sequence);
1951 return 0; 1949 smp_wmb();
1950}
1951
1952static inline void irq_time_write_end(void)
1953{
1954 smp_wmb();
1955 __this_cpu_inc(irq_time_seq.sequence);
1956}
1957
1958static inline u64 irq_time_read(int cpu)
1959{
1960 u64 irq_time;
1961 unsigned seq;
1952 1962
1963 do {
1964 seq = read_seqcount_begin(&per_cpu(irq_time_seq, cpu));
1965 irq_time = per_cpu(cpu_softirq_time, cpu) +
1966 per_cpu(cpu_hardirq_time, cpu);
1967 } while (read_seqcount_retry(&per_cpu(irq_time_seq, cpu), seq));
1968
1969 return irq_time;
1970}
1971#else /* CONFIG_64BIT */
1972static inline void irq_time_write_begin(void)
1973{
1974}
1975
1976static inline void irq_time_write_end(void)
1977{
1978}
1979
1980static inline u64 irq_time_read(int cpu)
1981{
1953 return per_cpu(cpu_softirq_time, cpu) + per_cpu(cpu_hardirq_time, cpu); 1982 return per_cpu(cpu_softirq_time, cpu) + per_cpu(cpu_hardirq_time, cpu);
1954} 1983}
1984#endif /* CONFIG_64BIT */
1955 1985
1986/*
1987 * Called before incrementing preempt_count on {soft,}irq_enter
1988 * and before decrementing preempt_count on {soft,}irq_exit.
1989 */
1956void account_system_vtime(struct task_struct *curr) 1990void account_system_vtime(struct task_struct *curr)
1957{ 1991{
1958 unsigned long flags; 1992 unsigned long flags;
1993 s64 delta;
1959 int cpu; 1994 int cpu;
1960 u64 now, delta;
1961 1995
1962 if (!sched_clock_irqtime) 1996 if (!sched_clock_irqtime)
1963 return; 1997 return;
@@ -1965,9 +1999,10 @@ void account_system_vtime(struct task_struct *curr)
1965 local_irq_save(flags); 1999 local_irq_save(flags);
1966 2000
1967 cpu = smp_processor_id(); 2001 cpu = smp_processor_id();
1968 now = sched_clock_cpu(cpu); 2002 delta = sched_clock_cpu(cpu) - __this_cpu_read(irq_start_time);
1969 delta = now - per_cpu(irq_start_time, cpu); 2003 __this_cpu_add(irq_start_time, delta);
1970 per_cpu(irq_start_time, cpu) = now; 2004
2005 irq_time_write_begin();
1971 /* 2006 /*
1972 * We do not account for softirq time from ksoftirqd here. 2007 * We do not account for softirq time from ksoftirqd here.
1973 * We want to continue accounting softirq time to ksoftirqd thread 2008 * We want to continue accounting softirq time to ksoftirqd thread
@@ -1975,33 +2010,55 @@ void account_system_vtime(struct task_struct *curr)
1975 * that do not consume any time, but still wants to run. 2010 * that do not consume any time, but still wants to run.
1976 */ 2011 */
1977 if (hardirq_count()) 2012 if (hardirq_count())
1978 per_cpu(cpu_hardirq_time, cpu) += delta; 2013 __this_cpu_add(cpu_hardirq_time, delta);
1979 else if (in_serving_softirq() && !(curr->flags & PF_KSOFTIRQD)) 2014 else if (in_serving_softirq() && !(curr->flags & PF_KSOFTIRQD))
1980 per_cpu(cpu_softirq_time, cpu) += delta; 2015 __this_cpu_add(cpu_softirq_time, delta);
1981 2016
2017 irq_time_write_end();
1982 local_irq_restore(flags); 2018 local_irq_restore(flags);
1983} 2019}
1984EXPORT_SYMBOL_GPL(account_system_vtime); 2020EXPORT_SYMBOL_GPL(account_system_vtime);
1985 2021
1986static void sched_irq_time_avg_update(struct rq *rq, u64 curr_irq_time) 2022static void update_rq_clock_task(struct rq *rq, s64 delta)
1987{ 2023{
1988 if (sched_clock_irqtime && sched_feat(NONIRQ_POWER)) { 2024 s64 irq_delta;
1989 u64 delta_irq = curr_irq_time - rq->prev_irq_time; 2025
1990 rq->prev_irq_time = curr_irq_time; 2026 irq_delta = irq_time_read(cpu_of(rq)) - rq->prev_irq_time;
1991 sched_rt_avg_update(rq, delta_irq); 2027
1992 } 2028 /*
2029 * Since irq_time is only updated on {soft,}irq_exit, we might run into
2030 * this case when a previous update_rq_clock() happened inside a
2031 * {soft,}irq region.
2032 *
2033 * When this happens, we stop ->clock_task and only update the
2034 * prev_irq_time stamp to account for the part that fit, so that a next
2035 * update will consume the rest. This ensures ->clock_task is
2036 * monotonic.
2037 *
2038 * It does however cause some slight miss-attribution of {soft,}irq
2039 * time, a more accurate solution would be to update the irq_time using
2040 * the current rq->clock timestamp, except that would require using
2041 * atomic ops.
2042 */
2043 if (irq_delta > delta)
2044 irq_delta = delta;
2045
2046 rq->prev_irq_time += irq_delta;
2047 delta -= irq_delta;
2048 rq->clock_task += delta;
2049
2050 if (irq_delta && sched_feat(NONIRQ_POWER))
2051 sched_rt_avg_update(rq, irq_delta);
1993} 2052}
1994 2053
1995#else 2054#else /* CONFIG_IRQ_TIME_ACCOUNTING */
1996 2055
1997static u64 irq_time_cpu(int cpu) 2056static void update_rq_clock_task(struct rq *rq, s64 delta)
1998{ 2057{
1999 return 0; 2058 rq->clock_task += delta;
2000} 2059}
2001 2060
2002static void sched_irq_time_avg_update(struct rq *rq, u64 curr_irq_time) { } 2061#endif /* CONFIG_IRQ_TIME_ACCOUNTING */
2003
2004#endif
2005 2062
2006#include "sched_idletask.c" 2063#include "sched_idletask.c"
2007#include "sched_fair.c" 2064#include "sched_fair.c"
@@ -2129,7 +2186,7 @@ static void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags)
2129 * A queue event has occurred, and we're going to schedule. In 2186 * A queue event has occurred, and we're going to schedule. In
2130 * this case, we can save a useless back to back clock update. 2187 * this case, we can save a useless back to back clock update.
2131 */ 2188 */
2132 if (test_tsk_need_resched(rq->curr)) 2189 if (rq->curr->se.on_rq && test_tsk_need_resched(rq->curr))
2133 rq->skip_clock_update = 1; 2190 rq->skip_clock_update = 1;
2134} 2191}
2135 2192
@@ -3119,6 +3176,15 @@ static long calc_load_fold_active(struct rq *this_rq)
3119 return delta; 3176 return delta;
3120} 3177}
3121 3178
3179static unsigned long
3180calc_load(unsigned long load, unsigned long exp, unsigned long active)
3181{
3182 load *= exp;
3183 load += active * (FIXED_1 - exp);
3184 load += 1UL << (FSHIFT - 1);
3185 return load >> FSHIFT;
3186}
3187
3122#ifdef CONFIG_NO_HZ 3188#ifdef CONFIG_NO_HZ
3123/* 3189/*
3124 * For NO_HZ we delay the active fold to the next LOAD_FREQ update. 3190 * For NO_HZ we delay the active fold to the next LOAD_FREQ update.
@@ -3148,6 +3214,128 @@ static long calc_load_fold_idle(void)
3148 3214
3149 return delta; 3215 return delta;
3150} 3216}
3217
3218/**
3219 * fixed_power_int - compute: x^n, in O(log n) time
3220 *
3221 * @x: base of the power
3222 * @frac_bits: fractional bits of @x
3223 * @n: power to raise @x to.
3224 *
3225 * By exploiting the relation between the definition of the natural power
3226 * function: x^n := x*x*...*x (x multiplied by itself for n times), and
3227 * the binary encoding of numbers used by computers: n := \Sum n_i * 2^i,
3228 * (where: n_i \elem {0, 1}, the binary vector representing n),
3229 * we find: x^n := x^(\Sum n_i * 2^i) := \Prod x^(n_i * 2^i), which is
3230 * of course trivially computable in O(log_2 n), the length of our binary
3231 * vector.
3232 */
3233static unsigned long
3234fixed_power_int(unsigned long x, unsigned int frac_bits, unsigned int n)
3235{
3236 unsigned long result = 1UL << frac_bits;
3237
3238 if (n) for (;;) {
3239 if (n & 1) {
3240 result *= x;
3241 result += 1UL << (frac_bits - 1);
3242 result >>= frac_bits;
3243 }
3244 n >>= 1;
3245 if (!n)
3246 break;
3247 x *= x;
3248 x += 1UL << (frac_bits - 1);
3249 x >>= frac_bits;
3250 }
3251
3252 return result;
3253}
3254
3255/*
3256 * a1 = a0 * e + a * (1 - e)
3257 *
3258 * a2 = a1 * e + a * (1 - e)
3259 * = (a0 * e + a * (1 - e)) * e + a * (1 - e)
3260 * = a0 * e^2 + a * (1 - e) * (1 + e)
3261 *
3262 * a3 = a2 * e + a * (1 - e)
3263 * = (a0 * e^2 + a * (1 - e) * (1 + e)) * e + a * (1 - e)
3264 * = a0 * e^3 + a * (1 - e) * (1 + e + e^2)
3265 *
3266 * ...
3267 *
3268 * an = a0 * e^n + a * (1 - e) * (1 + e + ... + e^n-1) [1]
3269 * = a0 * e^n + a * (1 - e) * (1 - e^n)/(1 - e)
3270 * = a0 * e^n + a * (1 - e^n)
3271 *
3272 * [1] application of the geometric series:
3273 *
3274 * n 1 - x^(n+1)
3275 * S_n := \Sum x^i = -------------
3276 * i=0 1 - x
3277 */
3278static unsigned long
3279calc_load_n(unsigned long load, unsigned long exp,
3280 unsigned long active, unsigned int n)
3281{
3282
3283 return calc_load(load, fixed_power_int(exp, FSHIFT, n), active);
3284}
3285
3286/*
3287 * NO_HZ can leave us missing all per-cpu ticks calling
3288 * calc_load_account_active(), but since an idle CPU folds its delta into
3289 * calc_load_tasks_idle per calc_load_account_idle(), all we need to do is fold
3290 * in the pending idle delta if our idle period crossed a load cycle boundary.
3291 *
3292 * Once we've updated the global active value, we need to apply the exponential
3293 * weights adjusted to the number of cycles missed.
3294 */
3295static void calc_global_nohz(unsigned long ticks)
3296{
3297 long delta, active, n;
3298
3299 if (time_before(jiffies, calc_load_update))
3300 return;
3301
3302 /*
3303 * If we crossed a calc_load_update boundary, make sure to fold
3304 * any pending idle changes, the respective CPUs might have
3305 * missed the tick driven calc_load_account_active() update
3306 * due to NO_HZ.
3307 */
3308 delta = calc_load_fold_idle();
3309 if (delta)
3310 atomic_long_add(delta, &calc_load_tasks);
3311
3312 /*
3313 * If we were idle for multiple load cycles, apply them.
3314 */
3315 if (ticks >= LOAD_FREQ) {
3316 n = ticks / LOAD_FREQ;
3317
3318 active = atomic_long_read(&calc_load_tasks);
3319 active = active > 0 ? active * FIXED_1 : 0;
3320
3321 avenrun[0] = calc_load_n(avenrun[0], EXP_1, active, n);
3322 avenrun[1] = calc_load_n(avenrun[1], EXP_5, active, n);
3323 avenrun[2] = calc_load_n(avenrun[2], EXP_15, active, n);
3324
3325 calc_load_update += n * LOAD_FREQ;
3326 }
3327
3328 /*
3329 * Its possible the remainder of the above division also crosses
3330 * a LOAD_FREQ period, the regular check in calc_global_load()
3331 * which comes after this will take care of that.
3332 *
3333 * Consider us being 11 ticks before a cycle completion, and us
3334 * sleeping for 4*LOAD_FREQ + 22 ticks, then the above code will
3335 * age us 4 cycles, and the test in calc_global_load() will
3336 * pick up the final one.
3337 */
3338}
3151#else 3339#else
3152static void calc_load_account_idle(struct rq *this_rq) 3340static void calc_load_account_idle(struct rq *this_rq)
3153{ 3341{
@@ -3157,6 +3345,10 @@ static inline long calc_load_fold_idle(void)
3157{ 3345{
3158 return 0; 3346 return 0;
3159} 3347}
3348
3349static void calc_global_nohz(unsigned long ticks)
3350{
3351}
3160#endif 3352#endif
3161 3353
3162/** 3354/**
@@ -3174,24 +3366,17 @@ void get_avenrun(unsigned long *loads, unsigned long offset, int shift)
3174 loads[2] = (avenrun[2] + offset) << shift; 3366 loads[2] = (avenrun[2] + offset) << shift;
3175} 3367}
3176 3368
3177static unsigned long
3178calc_load(unsigned long load, unsigned long exp, unsigned long active)
3179{
3180 load *= exp;
3181 load += active * (FIXED_1 - exp);
3182 return load >> FSHIFT;
3183}
3184
3185/* 3369/*
3186 * calc_load - update the avenrun load estimates 10 ticks after the 3370 * calc_load - update the avenrun load estimates 10 ticks after the
3187 * CPUs have updated calc_load_tasks. 3371 * CPUs have updated calc_load_tasks.
3188 */ 3372 */
3189void calc_global_load(void) 3373void calc_global_load(unsigned long ticks)
3190{ 3374{
3191 unsigned long upd = calc_load_update + 10;
3192 long active; 3375 long active;
3193 3376
3194 if (time_before(jiffies, upd)) 3377 calc_global_nohz(ticks);
3378
3379 if (time_before(jiffies, calc_load_update + 10))
3195 return; 3380 return;
3196 3381
3197 active = atomic_long_read(&calc_load_tasks); 3382 active = atomic_long_read(&calc_load_tasks);
@@ -3845,7 +4030,6 @@ static void put_prev_task(struct rq *rq, struct task_struct *prev)
3845{ 4030{
3846 if (prev->se.on_rq) 4031 if (prev->se.on_rq)
3847 update_rq_clock(rq); 4032 update_rq_clock(rq);
3848 rq->skip_clock_update = 0;
3849 prev->sched_class->put_prev_task(rq, prev); 4033 prev->sched_class->put_prev_task(rq, prev);
3850} 4034}
3851 4035
@@ -3903,7 +4087,6 @@ need_resched_nonpreemptible:
3903 hrtick_clear(rq); 4087 hrtick_clear(rq);
3904 4088
3905 raw_spin_lock_irq(&rq->lock); 4089 raw_spin_lock_irq(&rq->lock);
3906 clear_tsk_need_resched(prev);
3907 4090
3908 switch_count = &prev->nivcsw; 4091 switch_count = &prev->nivcsw;
3909 if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) { 4092 if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) {
@@ -3935,6 +4118,8 @@ need_resched_nonpreemptible:
3935 4118
3936 put_prev_task(rq, prev); 4119 put_prev_task(rq, prev);
3937 next = pick_next_task(rq); 4120 next = pick_next_task(rq);
4121 clear_tsk_need_resched(prev);
4122 rq->skip_clock_update = 0;
3938 4123
3939 if (likely(prev != next)) { 4124 if (likely(prev != next)) {
3940 sched_info_switch(prev, next); 4125 sched_info_switch(prev, next);
diff --git a/kernel/timer.c b/kernel/timer.c
index 68a9ae7679b7..353b9227c2ec 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -1252,6 +1252,12 @@ unsigned long get_next_timer_interrupt(unsigned long now)
1252 struct tvec_base *base = __get_cpu_var(tvec_bases); 1252 struct tvec_base *base = __get_cpu_var(tvec_bases);
1253 unsigned long expires; 1253 unsigned long expires;
1254 1254
1255 /*
1256 * Pretend that there is no timer pending if the cpu is offline.
1257 * Possible pending timers will be migrated later to an active cpu.
1258 */
1259 if (cpu_is_offline(smp_processor_id()))
1260 return now + NEXT_TIMER_MAX_DELTA;
1255 spin_lock(&base->lock); 1261 spin_lock(&base->lock);
1256 if (time_before_eq(base->next_timer, base->timer_jiffies)) 1262 if (time_before_eq(base->next_timer, base->timer_jiffies))
1257 base->next_timer = __next_timer_interrupt(base); 1263 base->next_timer = __next_timer_interrupt(base);
@@ -1319,7 +1325,7 @@ void do_timer(unsigned long ticks)
1319{ 1325{
1320 jiffies_64 += ticks; 1326 jiffies_64 += ticks;
1321 update_wall_time(); 1327 update_wall_time();
1322 calc_global_load(); 1328 calc_global_load(ticks);
1323} 1329}
1324 1330
1325#ifdef __ARCH_WANT_SYS_ALARM 1331#ifdef __ARCH_WANT_SYS_ALARM
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 90db1bd1a978..e785b0f2aea5 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -661,7 +661,7 @@ void wq_worker_waking_up(struct task_struct *task, unsigned int cpu)
661{ 661{
662 struct worker *worker = kthread_data(task); 662 struct worker *worker = kthread_data(task);
663 663
664 if (likely(!(worker->flags & WORKER_NOT_RUNNING))) 664 if (!(worker->flags & WORKER_NOT_RUNNING))
665 atomic_inc(get_gcwq_nr_running(cpu)); 665 atomic_inc(get_gcwq_nr_running(cpu));
666} 666}
667 667
@@ -687,7 +687,7 @@ struct task_struct *wq_worker_sleeping(struct task_struct *task,
687 struct global_cwq *gcwq = get_gcwq(cpu); 687 struct global_cwq *gcwq = get_gcwq(cpu);
688 atomic_t *nr_running = get_gcwq_nr_running(cpu); 688 atomic_t *nr_running = get_gcwq_nr_running(cpu);
689 689
690 if (unlikely(worker->flags & WORKER_NOT_RUNNING)) 690 if (worker->flags & WORKER_NOT_RUNNING)
691 return NULL; 691 return NULL;
692 692
693 /* this can only happen on the local cpu */ 693 /* this can only happen on the local cpu */
@@ -3692,7 +3692,8 @@ static int __init init_workqueues(void)
3692 system_nrt_wq = alloc_workqueue("events_nrt", WQ_NON_REENTRANT, 0); 3692 system_nrt_wq = alloc_workqueue("events_nrt", WQ_NON_REENTRANT, 0);
3693 system_unbound_wq = alloc_workqueue("events_unbound", WQ_UNBOUND, 3693 system_unbound_wq = alloc_workqueue("events_unbound", WQ_UNBOUND,
3694 WQ_UNBOUND_MAX_ACTIVE); 3694 WQ_UNBOUND_MAX_ACTIVE);
3695 BUG_ON(!system_wq || !system_long_wq || !system_nrt_wq); 3695 BUG_ON(!system_wq || !system_long_wq || !system_nrt_wq ||
3696 !system_unbound_wq);
3696 return 0; 3697 return 0;
3697} 3698}
3698early_initcall(init_workqueues); 3699early_initcall(init_workqueues);