aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/cpuset.c25
-rw-r--r--kernel/exit.c8
-rw-r--r--kernel/hrtimer.c6
-rw-r--r--kernel/posix-cpu-timers.c48
-rw-r--r--kernel/power/main.c2
-rw-r--r--kernel/printk.c28
-rw-r--r--kernel/sched.c62
-rw-r--r--kernel/timer.c16
8 files changed, 107 insertions, 88 deletions
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 72248d1b9e3f..ab81fdd4572b 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -2231,19 +2231,25 @@ static const struct cpuset *nearest_exclusive_ancestor(const struct cpuset *cs)
2231 * So only GFP_KERNEL allocations, if all nodes in the cpuset are 2231 * So only GFP_KERNEL allocations, if all nodes in the cpuset are
2232 * short of memory, might require taking the callback_mutex mutex. 2232 * short of memory, might require taking the callback_mutex mutex.
2233 * 2233 *
2234 * The first loop over the zonelist in mm/page_alloc.c:__alloc_pages() 2234 * The first call here from mm/page_alloc:get_page_from_freelist()
2235 * calls here with __GFP_HARDWALL always set in gfp_mask, enforcing 2235 * has __GFP_HARDWALL set in gfp_mask, enforcing hardwall cpusets, so
2236 * hardwall cpusets - no allocation on a node outside the cpuset is 2236 * no allocation on a node outside the cpuset is allowed (unless in
2237 * allowed (unless in interrupt, of course). 2237 * interrupt, of course).
2238 * 2238 *
2239 * The second loop doesn't even call here for GFP_ATOMIC requests 2239 * The second pass through get_page_from_freelist() doesn't even call
2240 * (if the __alloc_pages() local variable 'wait' is set). That check 2240 * here for GFP_ATOMIC calls. For those calls, the __alloc_pages()
2241 * and the checks below have the combined affect in the second loop of 2241 * variable 'wait' is not set, and the bit ALLOC_CPUSET is not set
2242 * the __alloc_pages() routine that: 2242 * in alloc_flags. That logic and the checks below have the combined
2243 * affect that:
2243 * in_interrupt - any node ok (current task context irrelevant) 2244 * in_interrupt - any node ok (current task context irrelevant)
2244 * GFP_ATOMIC - any node ok 2245 * GFP_ATOMIC - any node ok
2245 * GFP_KERNEL - any node in enclosing mem_exclusive cpuset ok 2246 * GFP_KERNEL - any node in enclosing mem_exclusive cpuset ok
2246 * GFP_USER - only nodes in current tasks mems allowed ok. 2247 * GFP_USER - only nodes in current tasks mems allowed ok.
2248 *
2249 * Rule:
2250 * Don't call cpuset_zone_allowed() if you can't sleep, unless you
2251 * pass in the __GFP_HARDWALL flag set in gfp_flag, which disables
2252 * the code that might scan up ancestor cpusets and sleep.
2247 **/ 2253 **/
2248 2254
2249int __cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask) 2255int __cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask)
@@ -2255,6 +2261,7 @@ int __cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask)
2255 if (in_interrupt()) 2261 if (in_interrupt())
2256 return 1; 2262 return 1;
2257 node = z->zone_pgdat->node_id; 2263 node = z->zone_pgdat->node_id;
2264 might_sleep_if(!(gfp_mask & __GFP_HARDWALL));
2258 if (node_isset(node, current->mems_allowed)) 2265 if (node_isset(node, current->mems_allowed))
2259 return 1; 2266 return 1;
2260 if (gfp_mask & __GFP_HARDWALL) /* If hardwall request, stop here */ 2267 if (gfp_mask & __GFP_HARDWALL) /* If hardwall request, stop here */
diff --git a/kernel/exit.c b/kernel/exit.c
index e95b93282210..e06d0c10a24e 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -881,14 +881,6 @@ fastcall NORET_TYPE void do_exit(long code)
881 881
882 tsk->flags |= PF_EXITING; 882 tsk->flags |= PF_EXITING;
883 883
884 /*
885 * Make sure we don't try to process any timer firings
886 * while we are already exiting.
887 */
888 tsk->it_virt_expires = cputime_zero;
889 tsk->it_prof_expires = cputime_zero;
890 tsk->it_sched_expires = 0;
891
892 if (unlikely(in_atomic())) 884 if (unlikely(in_atomic()))
893 printk(KERN_INFO "note: %s[%d] exited with preempt_count %d\n", 885 printk(KERN_INFO "note: %s[%d] exited with preempt_count %d\n",
894 current->comm, current->pid, 886 current->comm, current->pid,
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index b7f0388bd71c..01fa2ae98a85 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -456,6 +456,7 @@ hrtimer_start(struct hrtimer *timer, ktime_t tim, const enum hrtimer_mode mode)
456 456
457 return ret; 457 return ret;
458} 458}
459EXPORT_SYMBOL_GPL(hrtimer_start);
459 460
460/** 461/**
461 * hrtimer_try_to_cancel - try to deactivate a timer 462 * hrtimer_try_to_cancel - try to deactivate a timer
@@ -484,6 +485,7 @@ int hrtimer_try_to_cancel(struct hrtimer *timer)
484 return ret; 485 return ret;
485 486
486} 487}
488EXPORT_SYMBOL_GPL(hrtimer_try_to_cancel);
487 489
488/** 490/**
489 * hrtimer_cancel - cancel a timer and wait for the handler to finish. 491 * hrtimer_cancel - cancel a timer and wait for the handler to finish.
@@ -504,6 +506,7 @@ int hrtimer_cancel(struct hrtimer *timer)
504 cpu_relax(); 506 cpu_relax();
505 } 507 }
506} 508}
509EXPORT_SYMBOL_GPL(hrtimer_cancel);
507 510
508/** 511/**
509 * hrtimer_get_remaining - get remaining time for the timer 512 * hrtimer_get_remaining - get remaining time for the timer
@@ -522,6 +525,7 @@ ktime_t hrtimer_get_remaining(const struct hrtimer *timer)
522 525
523 return rem; 526 return rem;
524} 527}
528EXPORT_SYMBOL_GPL(hrtimer_get_remaining);
525 529
526#ifdef CONFIG_NO_IDLE_HZ 530#ifdef CONFIG_NO_IDLE_HZ
527/** 531/**
@@ -580,6 +584,7 @@ void hrtimer_init(struct hrtimer *timer, clockid_t clock_id,
580 timer->base = &bases[clock_id]; 584 timer->base = &bases[clock_id];
581 timer->node.rb_parent = HRTIMER_INACTIVE; 585 timer->node.rb_parent = HRTIMER_INACTIVE;
582} 586}
587EXPORT_SYMBOL_GPL(hrtimer_init);
583 588
584/** 589/**
585 * hrtimer_get_res - get the timer resolution for a clock 590 * hrtimer_get_res - get the timer resolution for a clock
@@ -599,6 +604,7 @@ int hrtimer_get_res(const clockid_t which_clock, struct timespec *tp)
599 604
600 return 0; 605 return 0;
601} 606}
607EXPORT_SYMBOL_GPL(hrtimer_get_res);
602 608
603/* 609/*
604 * Expire the per base hrtimer-queue: 610 * Expire the per base hrtimer-queue:
diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c
index 520f6c59948d..d38d9ec3276c 100644
--- a/kernel/posix-cpu-timers.c
+++ b/kernel/posix-cpu-timers.c
@@ -555,9 +555,6 @@ static void arm_timer(struct k_itimer *timer, union cpu_time_count now)
555 struct cpu_timer_list *next; 555 struct cpu_timer_list *next;
556 unsigned long i; 556 unsigned long i;
557 557
558 if (CPUCLOCK_PERTHREAD(timer->it_clock) && (p->flags & PF_EXITING))
559 return;
560
561 head = (CPUCLOCK_PERTHREAD(timer->it_clock) ? 558 head = (CPUCLOCK_PERTHREAD(timer->it_clock) ?
562 p->cpu_timers : p->signal->cpu_timers); 559 p->cpu_timers : p->signal->cpu_timers);
563 head += CPUCLOCK_WHICH(timer->it_clock); 560 head += CPUCLOCK_WHICH(timer->it_clock);
@@ -1173,6 +1170,9 @@ static void check_process_timers(struct task_struct *tsk,
1173 } 1170 }
1174 t = tsk; 1171 t = tsk;
1175 do { 1172 do {
1173 if (unlikely(t->flags & PF_EXITING))
1174 continue;
1175
1176 ticks = cputime_add(cputime_add(t->utime, t->stime), 1176 ticks = cputime_add(cputime_add(t->utime, t->stime),
1177 prof_left); 1177 prof_left);
1178 if (!cputime_eq(prof_expires, cputime_zero) && 1178 if (!cputime_eq(prof_expires, cputime_zero) &&
@@ -1193,11 +1193,7 @@ static void check_process_timers(struct task_struct *tsk,
1193 t->it_sched_expires > sched)) { 1193 t->it_sched_expires > sched)) {
1194 t->it_sched_expires = sched; 1194 t->it_sched_expires = sched;
1195 } 1195 }
1196 1196 } while ((t = next_thread(t)) != tsk);
1197 do {
1198 t = next_thread(t);
1199 } while (unlikely(t->flags & PF_EXITING));
1200 } while (t != tsk);
1201 } 1197 }
1202} 1198}
1203 1199
@@ -1289,30 +1285,30 @@ void run_posix_cpu_timers(struct task_struct *tsk)
1289 1285
1290#undef UNEXPIRED 1286#undef UNEXPIRED
1291 1287
1292 BUG_ON(tsk->exit_state);
1293
1294 /* 1288 /*
1295 * Double-check with locks held. 1289 * Double-check with locks held.
1296 */ 1290 */
1297 read_lock(&tasklist_lock); 1291 read_lock(&tasklist_lock);
1298 spin_lock(&tsk->sighand->siglock); 1292 if (likely(tsk->signal != NULL)) {
1293 spin_lock(&tsk->sighand->siglock);
1299 1294
1300 /* 1295 /*
1301 * Here we take off tsk->cpu_timers[N] and tsk->signal->cpu_timers[N] 1296 * Here we take off tsk->cpu_timers[N] and tsk->signal->cpu_timers[N]
1302 * all the timers that are firing, and put them on the firing list. 1297 * all the timers that are firing, and put them on the firing list.
1303 */ 1298 */
1304 check_thread_timers(tsk, &firing); 1299 check_thread_timers(tsk, &firing);
1305 check_process_timers(tsk, &firing); 1300 check_process_timers(tsk, &firing);
1306 1301
1307 /* 1302 /*
1308 * We must release these locks before taking any timer's lock. 1303 * We must release these locks before taking any timer's lock.
1309 * There is a potential race with timer deletion here, as the 1304 * There is a potential race with timer deletion here, as the
1310 * siglock now protects our private firing list. We have set 1305 * siglock now protects our private firing list. We have set
1311 * the firing flag in each timer, so that a deletion attempt 1306 * the firing flag in each timer, so that a deletion attempt
1312 * that gets the timer lock before we do will give it up and 1307 * that gets the timer lock before we do will give it up and
1313 * spin until we've taken care of that timer below. 1308 * spin until we've taken care of that timer below.
1314 */ 1309 */
1315 spin_unlock(&tsk->sighand->siglock); 1310 spin_unlock(&tsk->sighand->siglock);
1311 }
1316 read_unlock(&tasklist_lock); 1312 read_unlock(&tasklist_lock);
1317 1313
1318 /* 1314 /*
diff --git a/kernel/power/main.c b/kernel/power/main.c
index a6d9ef46009e..0a907f0dc56b 100644
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -86,6 +86,7 @@ static int suspend_prepare(suspend_state_t state)
86 goto Thaw; 86 goto Thaw;
87 } 87 }
88 88
89 suspend_console();
89 if ((error = device_suspend(PMSG_SUSPEND))) { 90 if ((error = device_suspend(PMSG_SUSPEND))) {
90 printk(KERN_ERR "Some devices failed to suspend\n"); 91 printk(KERN_ERR "Some devices failed to suspend\n");
91 goto Finish; 92 goto Finish;
@@ -133,6 +134,7 @@ int suspend_enter(suspend_state_t state)
133static void suspend_finish(suspend_state_t state) 134static void suspend_finish(suspend_state_t state)
134{ 135{
135 device_resume(); 136 device_resume();
137 resume_console();
136 thaw_processes(); 138 thaw_processes();
137 enable_nonboot_cpus(); 139 enable_nonboot_cpus();
138 if (pm_ops && pm_ops->finish) 140 if (pm_ops && pm_ops->finish)
diff --git a/kernel/printk.c b/kernel/printk.c
index c056f3324432..19a955619294 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -67,6 +67,7 @@ EXPORT_SYMBOL(oops_in_progress);
67 * driver system. 67 * driver system.
68 */ 68 */
69static DECLARE_MUTEX(console_sem); 69static DECLARE_MUTEX(console_sem);
70static DECLARE_MUTEX(secondary_console_sem);
70struct console *console_drivers; 71struct console *console_drivers;
71/* 72/*
72 * This is used for debugging the mess that is the VT code by 73 * This is used for debugging the mess that is the VT code by
@@ -76,7 +77,7 @@ struct console *console_drivers;
76 * path in the console code where we end up in places I want 77 * path in the console code where we end up in places I want
77 * locked without the console sempahore held 78 * locked without the console sempahore held
78 */ 79 */
79static int console_locked; 80static int console_locked, console_suspended;
80 81
81/* 82/*
82 * logbuf_lock protects log_buf, log_start, log_end, con_start and logged_chars 83 * logbuf_lock protects log_buf, log_start, log_end, con_start and logged_chars
@@ -698,6 +699,23 @@ int __init add_preferred_console(char *name, int idx, char *options)
698} 699}
699 700
700/** 701/**
702 * suspend_console - suspend the console subsystem
703 *
704 * This disables printk() while we go into suspend states
705 */
706void suspend_console(void)
707{
708 acquire_console_sem();
709 console_suspended = 1;
710}
711
712void resume_console(void)
713{
714 console_suspended = 0;
715 release_console_sem();
716}
717
718/**
701 * acquire_console_sem - lock the console system for exclusive use. 719 * acquire_console_sem - lock the console system for exclusive use.
702 * 720 *
703 * Acquires a semaphore which guarantees that the caller has 721 * Acquires a semaphore which guarantees that the caller has
@@ -708,6 +726,10 @@ int __init add_preferred_console(char *name, int idx, char *options)
708void acquire_console_sem(void) 726void acquire_console_sem(void)
709{ 727{
710 BUG_ON(in_interrupt()); 728 BUG_ON(in_interrupt());
729 if (console_suspended) {
730 down(&secondary_console_sem);
731 return;
732 }
711 down(&console_sem); 733 down(&console_sem);
712 console_locked = 1; 734 console_locked = 1;
713 console_may_schedule = 1; 735 console_may_schedule = 1;
@@ -750,6 +772,10 @@ void release_console_sem(void)
750 unsigned long _con_start, _log_end; 772 unsigned long _con_start, _log_end;
751 unsigned long wake_klogd = 0; 773 unsigned long wake_klogd = 0;
752 774
775 if (console_suspended) {
776 up(&secondary_console_sem);
777 return;
778 }
753 for ( ; ; ) { 779 for ( ; ; ) {
754 spin_lock_irqsave(&logbuf_lock, flags); 780 spin_lock_irqsave(&logbuf_lock, flags);
755 wake_klogd |= log_start - log_end; 781 wake_klogd |= log_start - log_end;
diff --git a/kernel/sched.c b/kernel/sched.c
index 4c64f85698ae..c13f1bd2df7d 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -665,55 +665,13 @@ static int effective_prio(task_t *p)
665} 665}
666 666
667/* 667/*
668 * We place interactive tasks back into the active array, if possible.
669 *
670 * To guarantee that this does not starve expired tasks we ignore the
671 * interactivity of a task if the first expired task had to wait more
672 * than a 'reasonable' amount of time. This deadline timeout is
673 * load-dependent, as the frequency of array switched decreases with
674 * increasing number of running tasks. We also ignore the interactivity
675 * if a better static_prio task has expired, and switch periodically
676 * regardless, to ensure that highly interactive tasks do not starve
677 * the less fortunate for unreasonably long periods.
678 */
679static inline int expired_starving(runqueue_t *rq)
680{
681 int limit;
682
683 /*
684 * Arrays were recently switched, all is well
685 */
686 if (!rq->expired_timestamp)
687 return 0;
688
689 limit = STARVATION_LIMIT * rq->nr_running;
690
691 /*
692 * It's time to switch arrays
693 */
694 if (jiffies - rq->expired_timestamp >= limit)
695 return 1;
696
697 /*
698 * There's a better selection in the expired array
699 */
700 if (rq->curr->static_prio > rq->best_expired_prio)
701 return 1;
702
703 /*
704 * All is well
705 */
706 return 0;
707}
708
709/*
710 * __activate_task - move a task to the runqueue. 668 * __activate_task - move a task to the runqueue.
711 */ 669 */
712static void __activate_task(task_t *p, runqueue_t *rq) 670static void __activate_task(task_t *p, runqueue_t *rq)
713{ 671{
714 prio_array_t *target = rq->active; 672 prio_array_t *target = rq->active;
715 673
716 if (unlikely(batch_task(p) || (expired_starving(rq) && !rt_task(p)))) 674 if (batch_task(p))
717 target = rq->expired; 675 target = rq->expired;
718 enqueue_task(p, target); 676 enqueue_task(p, target);
719 rq->nr_running++; 677 rq->nr_running++;
@@ -2532,6 +2490,22 @@ unsigned long long current_sched_time(const task_t *tsk)
2532} 2490}
2533 2491
2534/* 2492/*
2493 * We place interactive tasks back into the active array, if possible.
2494 *
2495 * To guarantee that this does not starve expired tasks we ignore the
2496 * interactivity of a task if the first expired task had to wait more
2497 * than a 'reasonable' amount of time. This deadline timeout is
2498 * load-dependent, as the frequency of array switched decreases with
2499 * increasing number of running tasks. We also ignore the interactivity
2500 * if a better static_prio task has expired:
2501 */
2502#define EXPIRED_STARVING(rq) \
2503 ((STARVATION_LIMIT && ((rq)->expired_timestamp && \
2504 (jiffies - (rq)->expired_timestamp >= \
2505 STARVATION_LIMIT * ((rq)->nr_running) + 1))) || \
2506 ((rq)->curr->static_prio > (rq)->best_expired_prio))
2507
2508/*
2535 * Account user cpu time to a process. 2509 * Account user cpu time to a process.
2536 * @p: the process that the cpu time gets accounted to 2510 * @p: the process that the cpu time gets accounted to
2537 * @hardirq_offset: the offset to subtract from hardirq_count() 2511 * @hardirq_offset: the offset to subtract from hardirq_count()
@@ -2666,7 +2640,7 @@ void scheduler_tick(void)
2666 2640
2667 if (!rq->expired_timestamp) 2641 if (!rq->expired_timestamp)
2668 rq->expired_timestamp = jiffies; 2642 rq->expired_timestamp = jiffies;
2669 if (!TASK_INTERACTIVE(p) || expired_starving(rq)) { 2643 if (!TASK_INTERACTIVE(p) || EXPIRED_STARVING(rq)) {
2670 enqueue_task(p, rq->expired); 2644 enqueue_task(p, rq->expired);
2671 if (p->static_prio < rq->best_expired_prio) 2645 if (p->static_prio < rq->best_expired_prio)
2672 rq->best_expired_prio = p->static_prio; 2646 rq->best_expired_prio = p->static_prio;
diff --git a/kernel/timer.c b/kernel/timer.c
index 67eaf0f54096..9e49deed468c 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -541,6 +541,22 @@ found:
541 } 541 }
542 spin_unlock(&base->lock); 542 spin_unlock(&base->lock);
543 543
544 /*
545 * It can happen that other CPUs service timer IRQs and increment
546 * jiffies, but we have not yet got a local timer tick to process
547 * the timer wheels. In that case, the expiry time can be before
548 * jiffies, but since the high-resolution timer here is relative to
549 * jiffies, the default expression when high-resolution timers are
550 * not active,
551 *
552 * time_before(MAX_JIFFY_OFFSET + jiffies, expires)
553 *
554 * would falsely evaluate to true. If that is the case, just
555 * return jiffies so that we can immediately fire the local timer
556 */
557 if (time_before(expires, jiffies))
558 return jiffies;
559
544 if (time_before(hr_expires, expires)) 560 if (time_before(hr_expires, expires))
545 return hr_expires; 561 return hr_expires;
546 562