aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/sched.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/sched.c')
-rw-r--r--kernel/sched.c125
1 files changed, 86 insertions, 39 deletions
diff --git a/kernel/sched.c b/kernel/sched.c
index 4ee400f9d56b..a2be2d055299 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -51,6 +51,7 @@
51#include <linux/times.h> 51#include <linux/times.h>
52#include <linux/acct.h> 52#include <linux/acct.h>
53#include <linux/kprobes.h> 53#include <linux/kprobes.h>
54#include <linux/delayacct.h>
54#include <asm/tlb.h> 55#include <asm/tlb.h>
55 56
56#include <asm/unistd.h> 57#include <asm/unistd.h>
@@ -501,9 +502,36 @@ struct file_operations proc_schedstat_operations = {
501 .release = single_release, 502 .release = single_release,
502}; 503};
503 504
505/*
506 * Expects runqueue lock to be held for atomicity of update
507 */
508static inline void
509rq_sched_info_arrive(struct rq *rq, unsigned long delta_jiffies)
510{
511 if (rq) {
512 rq->rq_sched_info.run_delay += delta_jiffies;
513 rq->rq_sched_info.pcnt++;
514 }
515}
516
517/*
518 * Expects runqueue lock to be held for atomicity of update
519 */
520static inline void
521rq_sched_info_depart(struct rq *rq, unsigned long delta_jiffies)
522{
523 if (rq)
524 rq->rq_sched_info.cpu_time += delta_jiffies;
525}
504# define schedstat_inc(rq, field) do { (rq)->field++; } while (0) 526# define schedstat_inc(rq, field) do { (rq)->field++; } while (0)
505# define schedstat_add(rq, field, amt) do { (rq)->field += (amt); } while (0) 527# define schedstat_add(rq, field, amt) do { (rq)->field += (amt); } while (0)
506#else /* !CONFIG_SCHEDSTATS */ 528#else /* !CONFIG_SCHEDSTATS */
529static inline void
530rq_sched_info_arrive(struct rq *rq, unsigned long delta_jiffies)
531{}
532static inline void
533rq_sched_info_depart(struct rq *rq, unsigned long delta_jiffies)
534{}
507# define schedstat_inc(rq, field) do { } while (0) 535# define schedstat_inc(rq, field) do { } while (0)
508# define schedstat_add(rq, field, amt) do { } while (0) 536# define schedstat_add(rq, field, amt) do { } while (0)
509#endif 537#endif
@@ -523,7 +551,7 @@ static inline struct rq *this_rq_lock(void)
523 return rq; 551 return rq;
524} 552}
525 553
526#ifdef CONFIG_SCHEDSTATS 554#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
527/* 555/*
528 * Called when a process is dequeued from the active array and given 556 * Called when a process is dequeued from the active array and given
529 * the cpu. We should note that with the exception of interactive 557 * the cpu. We should note that with the exception of interactive
@@ -551,21 +579,16 @@ static inline void sched_info_dequeued(struct task_struct *t)
551 */ 579 */
552static void sched_info_arrive(struct task_struct *t) 580static void sched_info_arrive(struct task_struct *t)
553{ 581{
554 unsigned long now = jiffies, diff = 0; 582 unsigned long now = jiffies, delta_jiffies = 0;
555 struct rq *rq = task_rq(t);
556 583
557 if (t->sched_info.last_queued) 584 if (t->sched_info.last_queued)
558 diff = now - t->sched_info.last_queued; 585 delta_jiffies = now - t->sched_info.last_queued;
559 sched_info_dequeued(t); 586 sched_info_dequeued(t);
560 t->sched_info.run_delay += diff; 587 t->sched_info.run_delay += delta_jiffies;
561 t->sched_info.last_arrival = now; 588 t->sched_info.last_arrival = now;
562 t->sched_info.pcnt++; 589 t->sched_info.pcnt++;
563 590
564 if (!rq) 591 rq_sched_info_arrive(task_rq(t), delta_jiffies);
565 return;
566
567 rq->rq_sched_info.run_delay += diff;
568 rq->rq_sched_info.pcnt++;
569} 592}
570 593
571/* 594/*
@@ -585,8 +608,9 @@ static void sched_info_arrive(struct task_struct *t)
585 */ 608 */
586static inline void sched_info_queued(struct task_struct *t) 609static inline void sched_info_queued(struct task_struct *t)
587{ 610{
588 if (!t->sched_info.last_queued) 611 if (unlikely(sched_info_on()))
589 t->sched_info.last_queued = jiffies; 612 if (!t->sched_info.last_queued)
613 t->sched_info.last_queued = jiffies;
590} 614}
591 615
592/* 616/*
@@ -595,13 +619,10 @@ static inline void sched_info_queued(struct task_struct *t)
595 */ 619 */
596static inline void sched_info_depart(struct task_struct *t) 620static inline void sched_info_depart(struct task_struct *t)
597{ 621{
598 struct rq *rq = task_rq(t); 622 unsigned long delta_jiffies = jiffies - t->sched_info.last_arrival;
599 unsigned long diff = jiffies - t->sched_info.last_arrival;
600
601 t->sched_info.cpu_time += diff;
602 623
603 if (rq) 624 t->sched_info.cpu_time += delta_jiffies;
604 rq->rq_sched_info.cpu_time += diff; 625 rq_sched_info_depart(task_rq(t), delta_jiffies);
605} 626}
606 627
607/* 628/*
@@ -610,7 +631,7 @@ static inline void sched_info_depart(struct task_struct *t)
610 * the idle task.) We are only called when prev != next. 631 * the idle task.) We are only called when prev != next.
611 */ 632 */
612static inline void 633static inline void
613sched_info_switch(struct task_struct *prev, struct task_struct *next) 634__sched_info_switch(struct task_struct *prev, struct task_struct *next)
614{ 635{
615 struct rq *rq = task_rq(prev); 636 struct rq *rq = task_rq(prev);
616 637
@@ -625,10 +646,16 @@ sched_info_switch(struct task_struct *prev, struct task_struct *next)
625 if (next != rq->idle) 646 if (next != rq->idle)
626 sched_info_arrive(next); 647 sched_info_arrive(next);
627} 648}
649static inline void
650sched_info_switch(struct task_struct *prev, struct task_struct *next)
651{
652 if (unlikely(sched_info_on()))
653 __sched_info_switch(prev, next);
654}
628#else 655#else
629#define sched_info_queued(t) do { } while (0) 656#define sched_info_queued(t) do { } while (0)
630#define sched_info_switch(t, next) do { } while (0) 657#define sched_info_switch(t, next) do { } while (0)
631#endif /* CONFIG_SCHEDSTATS */ 658#endif /* CONFIG_SCHEDSTATS || CONFIG_TASK_DELAY_ACCT */
632 659
633/* 660/*
634 * Adding/removing a task to/from a priority array: 661 * Adding/removing a task to/from a priority array:
@@ -1530,8 +1557,9 @@ void fastcall sched_fork(struct task_struct *p, int clone_flags)
1530 1557
1531 INIT_LIST_HEAD(&p->run_list); 1558 INIT_LIST_HEAD(&p->run_list);
1532 p->array = NULL; 1559 p->array = NULL;
1533#ifdef CONFIG_SCHEDSTATS 1560#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
1534 memset(&p->sched_info, 0, sizeof(p->sched_info)); 1561 if (unlikely(sched_info_on()))
1562 memset(&p->sched_info, 0, sizeof(p->sched_info));
1535#endif 1563#endif
1536#if defined(CONFIG_SMP) && defined(__ARCH_WANT_UNLOCKED_CTXSW) 1564#if defined(CONFIG_SMP) && defined(__ARCH_WANT_UNLOCKED_CTXSW)
1537 p->oncpu = 0; 1565 p->oncpu = 0;
@@ -1788,7 +1816,15 @@ context_switch(struct rq *rq, struct task_struct *prev,
1788 WARN_ON(rq->prev_mm); 1816 WARN_ON(rq->prev_mm);
1789 rq->prev_mm = oldmm; 1817 rq->prev_mm = oldmm;
1790 } 1818 }
1819 /*
1820 * Since the runqueue lock will be released by the next
1821 * task (which is an invalid locking op but in the case
1822 * of the scheduler it's an obvious special-case), so we
1823 * do an early lockdep release here:
1824 */
1825#ifndef __ARCH_WANT_UNLOCKED_CTXSW
1791 spin_release(&rq->lock.dep_map, 1, _THIS_IP_); 1826 spin_release(&rq->lock.dep_map, 1, _THIS_IP_);
1827#endif
1792 1828
1793 /* Here we just switch the register state and the stack. */ 1829 /* Here we just switch the register state and the stack. */
1794 switch_to(prev, next, prev); 1830 switch_to(prev, next, prev);
@@ -3384,7 +3420,7 @@ EXPORT_SYMBOL(schedule);
3384 3420
3385#ifdef CONFIG_PREEMPT 3421#ifdef CONFIG_PREEMPT
3386/* 3422/*
3387 * this is is the entry point to schedule() from in-kernel preemption 3423 * this is the entry point to schedule() from in-kernel preemption
3388 * off of preempt_enable. Kernel preemptions off return from interrupt 3424 * off of preempt_enable. Kernel preemptions off return from interrupt
3389 * occur there and call schedule directly. 3425 * occur there and call schedule directly.
3390 */ 3426 */
@@ -3427,7 +3463,7 @@ need_resched:
3427EXPORT_SYMBOL(preempt_schedule); 3463EXPORT_SYMBOL(preempt_schedule);
3428 3464
3429/* 3465/*
3430 * this is is the entry point to schedule() from kernel preemption 3466 * this is the entry point to schedule() from kernel preemption
3431 * off of irq context. 3467 * off of irq context.
3432 * Note, that this is called and return with irqs disabled. This will 3468 * Note, that this is called and return with irqs disabled. This will
3433 * protect us against recursive calling from irq. 3469 * protect us against recursive calling from irq.
@@ -3439,7 +3475,7 @@ asmlinkage void __sched preempt_schedule_irq(void)
3439 struct task_struct *task = current; 3475 struct task_struct *task = current;
3440 int saved_lock_depth; 3476 int saved_lock_depth;
3441#endif 3477#endif
3442 /* Catch callers which need to be fixed*/ 3478 /* Catch callers which need to be fixed */
3443 BUG_ON(ti->preempt_count || !irqs_disabled()); 3479 BUG_ON(ti->preempt_count || !irqs_disabled());
3444 3480
3445need_resched: 3481need_resched:
@@ -4420,9 +4456,9 @@ asmlinkage long sys_sched_yield(void)
4420 return 0; 4456 return 0;
4421} 4457}
4422 4458
4423static inline int __resched_legal(void) 4459static inline int __resched_legal(int expected_preempt_count)
4424{ 4460{
4425 if (unlikely(preempt_count())) 4461 if (unlikely(preempt_count() != expected_preempt_count))
4426 return 0; 4462 return 0;
4427 if (unlikely(system_state != SYSTEM_RUNNING)) 4463 if (unlikely(system_state != SYSTEM_RUNNING))
4428 return 0; 4464 return 0;
@@ -4448,7 +4484,7 @@ static void __cond_resched(void)
4448 4484
4449int __sched cond_resched(void) 4485int __sched cond_resched(void)
4450{ 4486{
4451 if (need_resched() && __resched_legal()) { 4487 if (need_resched() && __resched_legal(0)) {
4452 __cond_resched(); 4488 __cond_resched();
4453 return 1; 4489 return 1;
4454 } 4490 }
@@ -4474,7 +4510,7 @@ int cond_resched_lock(spinlock_t *lock)
4474 ret = 1; 4510 ret = 1;
4475 spin_lock(lock); 4511 spin_lock(lock);
4476 } 4512 }
4477 if (need_resched() && __resched_legal()) { 4513 if (need_resched() && __resched_legal(1)) {
4478 spin_release(&lock->dep_map, 1, _THIS_IP_); 4514 spin_release(&lock->dep_map, 1, _THIS_IP_);
4479 _raw_spin_unlock(lock); 4515 _raw_spin_unlock(lock);
4480 preempt_enable_no_resched(); 4516 preempt_enable_no_resched();
@@ -4490,7 +4526,7 @@ int __sched cond_resched_softirq(void)
4490{ 4526{
4491 BUG_ON(!in_softirq()); 4527 BUG_ON(!in_softirq());
4492 4528
4493 if (need_resched() && __resched_legal()) { 4529 if (need_resched() && __resched_legal(0)) {
4494 raw_local_irq_disable(); 4530 raw_local_irq_disable();
4495 _local_bh_enable(); 4531 _local_bh_enable();
4496 raw_local_irq_enable(); 4532 raw_local_irq_enable();
@@ -4526,9 +4562,11 @@ void __sched io_schedule(void)
4526{ 4562{
4527 struct rq *rq = &__raw_get_cpu_var(runqueues); 4563 struct rq *rq = &__raw_get_cpu_var(runqueues);
4528 4564
4565 delayacct_blkio_start();
4529 atomic_inc(&rq->nr_iowait); 4566 atomic_inc(&rq->nr_iowait);
4530 schedule(); 4567 schedule();
4531 atomic_dec(&rq->nr_iowait); 4568 atomic_dec(&rq->nr_iowait);
4569 delayacct_blkio_end();
4532} 4570}
4533EXPORT_SYMBOL(io_schedule); 4571EXPORT_SYMBOL(io_schedule);
4534 4572
@@ -4537,9 +4575,11 @@ long __sched io_schedule_timeout(long timeout)
4537 struct rq *rq = &__raw_get_cpu_var(runqueues); 4575 struct rq *rq = &__raw_get_cpu_var(runqueues);
4538 long ret; 4576 long ret;
4539 4577
4578 delayacct_blkio_start();
4540 atomic_inc(&rq->nr_iowait); 4579 atomic_inc(&rq->nr_iowait);
4541 ret = schedule_timeout(timeout); 4580 ret = schedule_timeout(timeout);
4542 atomic_dec(&rq->nr_iowait); 4581 atomic_dec(&rq->nr_iowait);
4582 delayacct_blkio_end();
4543 return ret; 4583 return ret;
4544} 4584}
4545 4585
@@ -4650,7 +4690,7 @@ static inline struct task_struct *younger_sibling(struct task_struct *p)
4650 return list_entry(p->sibling.next,struct task_struct,sibling); 4690 return list_entry(p->sibling.next,struct task_struct,sibling);
4651} 4691}
4652 4692
4653static const char *stat_nam[] = { "R", "S", "D", "T", "t", "Z", "X" }; 4693static const char stat_nam[] = "RSDTtZX";
4654 4694
4655static void show_task(struct task_struct *p) 4695static void show_task(struct task_struct *p)
4656{ 4696{
@@ -4658,12 +4698,9 @@ static void show_task(struct task_struct *p)
4658 unsigned long free = 0; 4698 unsigned long free = 0;
4659 unsigned state; 4699 unsigned state;
4660 4700
4661 printk("%-13.13s ", p->comm);
4662 state = p->state ? __ffs(p->state) + 1 : 0; 4701 state = p->state ? __ffs(p->state) + 1 : 0;
4663 if (state < ARRAY_SIZE(stat_nam)) 4702 printk("%-13.13s %c", p->comm,
4664 printk(stat_nam[state]); 4703 state < sizeof(stat_nam) - 1 ? stat_nam[state] : '?');
4665 else
4666 printk("?");
4667#if (BITS_PER_LONG == 32) 4704#if (BITS_PER_LONG == 32)
4668 if (state == TASK_RUNNING) 4705 if (state == TASK_RUNNING)
4669 printk(" running "); 4706 printk(" running ");
@@ -4877,7 +4914,7 @@ static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu)
4877 p->timestamp = p->timestamp - rq_src->timestamp_last_tick 4914 p->timestamp = p->timestamp - rq_src->timestamp_last_tick
4878 + rq_dest->timestamp_last_tick; 4915 + rq_dest->timestamp_last_tick;
4879 deactivate_task(p, rq_src); 4916 deactivate_task(p, rq_src);
4880 activate_task(p, rq_dest, 0); 4917 __activate_task(p, rq_dest);
4881 if (TASK_PREEMPTS_CURR(p, rq_dest)) 4918 if (TASK_PREEMPTS_CURR(p, rq_dest))
4882 resched_task(rq_dest->curr); 4919 resched_task(rq_dest->curr);
4883 } 4920 }
@@ -5776,7 +5813,7 @@ static unsigned long long measure_migration_cost(int cpu1, int cpu2)
5776 cache = vmalloc(max_size); 5813 cache = vmalloc(max_size);
5777 if (!cache) { 5814 if (!cache) {
5778 printk("could not vmalloc %d bytes for cache!\n", 2*max_size); 5815 printk("could not vmalloc %d bytes for cache!\n", 2*max_size);
5779 return 1000000; // return 1 msec on very small boxen 5816 return 1000000; /* return 1 msec on very small boxen */
5780 } 5817 }
5781 5818
5782 while (size <= max_size) { 5819 while (size <= max_size) {
@@ -6457,7 +6494,12 @@ static int build_sched_domains(const cpumask_t *cpu_map)
6457 for (i = 0; i < MAX_NUMNODES; i++) 6494 for (i = 0; i < MAX_NUMNODES; i++)
6458 init_numa_sched_groups_power(sched_group_nodes[i]); 6495 init_numa_sched_groups_power(sched_group_nodes[i]);
6459 6496
6460 init_numa_sched_groups_power(sched_group_allnodes); 6497 if (sched_group_allnodes) {
6498 int group = cpu_to_allnodes_group(first_cpu(*cpu_map));
6499 struct sched_group *sg = &sched_group_allnodes[group];
6500
6501 init_numa_sched_groups_power(sg);
6502 }
6461#endif 6503#endif
6462 6504
6463 /* Attach the domains */ 6505 /* Attach the domains */
@@ -6724,6 +6766,11 @@ void __init sched_init(void)
6724 } 6766 }
6725 6767
6726 set_load_weight(&init_task); 6768 set_load_weight(&init_task);
6769
6770#ifdef CONFIG_RT_MUTEXES
6771 plist_head_init(&init_task.pi_waiters, &init_task.pi_lock);
6772#endif
6773
6727 /* 6774 /*
6728 * The boot idle thread does lazy MMU switching as well: 6775 * The boot idle thread does lazy MMU switching as well:
6729 */ 6776 */