diff options
Diffstat (limited to 'kernel/sched.c')
-rw-r--r-- | kernel/sched.c | 125 |
1 files changed, 86 insertions, 39 deletions
diff --git a/kernel/sched.c b/kernel/sched.c index 4ee400f9d56b..a2be2d055299 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -51,6 +51,7 @@ | |||
51 | #include <linux/times.h> | 51 | #include <linux/times.h> |
52 | #include <linux/acct.h> | 52 | #include <linux/acct.h> |
53 | #include <linux/kprobes.h> | 53 | #include <linux/kprobes.h> |
54 | #include <linux/delayacct.h> | ||
54 | #include <asm/tlb.h> | 55 | #include <asm/tlb.h> |
55 | 56 | ||
56 | #include <asm/unistd.h> | 57 | #include <asm/unistd.h> |
@@ -501,9 +502,36 @@ struct file_operations proc_schedstat_operations = { | |||
501 | .release = single_release, | 502 | .release = single_release, |
502 | }; | 503 | }; |
503 | 504 | ||
505 | /* | ||
506 | * Expects runqueue lock to be held for atomicity of update | ||
507 | */ | ||
508 | static inline void | ||
509 | rq_sched_info_arrive(struct rq *rq, unsigned long delta_jiffies) | ||
510 | { | ||
511 | if (rq) { | ||
512 | rq->rq_sched_info.run_delay += delta_jiffies; | ||
513 | rq->rq_sched_info.pcnt++; | ||
514 | } | ||
515 | } | ||
516 | |||
517 | /* | ||
518 | * Expects runqueue lock to be held for atomicity of update | ||
519 | */ | ||
520 | static inline void | ||
521 | rq_sched_info_depart(struct rq *rq, unsigned long delta_jiffies) | ||
522 | { | ||
523 | if (rq) | ||
524 | rq->rq_sched_info.cpu_time += delta_jiffies; | ||
525 | } | ||
504 | # define schedstat_inc(rq, field) do { (rq)->field++; } while (0) | 526 | # define schedstat_inc(rq, field) do { (rq)->field++; } while (0) |
505 | # define schedstat_add(rq, field, amt) do { (rq)->field += (amt); } while (0) | 527 | # define schedstat_add(rq, field, amt) do { (rq)->field += (amt); } while (0) |
506 | #else /* !CONFIG_SCHEDSTATS */ | 528 | #else /* !CONFIG_SCHEDSTATS */ |
529 | static inline void | ||
530 | rq_sched_info_arrive(struct rq *rq, unsigned long delta_jiffies) | ||
531 | {} | ||
532 | static inline void | ||
533 | rq_sched_info_depart(struct rq *rq, unsigned long delta_jiffies) | ||
534 | {} | ||
507 | # define schedstat_inc(rq, field) do { } while (0) | 535 | # define schedstat_inc(rq, field) do { } while (0) |
508 | # define schedstat_add(rq, field, amt) do { } while (0) | 536 | # define schedstat_add(rq, field, amt) do { } while (0) |
509 | #endif | 537 | #endif |
@@ -523,7 +551,7 @@ static inline struct rq *this_rq_lock(void) | |||
523 | return rq; | 551 | return rq; |
524 | } | 552 | } |
525 | 553 | ||
526 | #ifdef CONFIG_SCHEDSTATS | 554 | #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) |
527 | /* | 555 | /* |
528 | * Called when a process is dequeued from the active array and given | 556 | * Called when a process is dequeued from the active array and given |
529 | * the cpu. We should note that with the exception of interactive | 557 | * the cpu. We should note that with the exception of interactive |
@@ -551,21 +579,16 @@ static inline void sched_info_dequeued(struct task_struct *t) | |||
551 | */ | 579 | */ |
552 | static void sched_info_arrive(struct task_struct *t) | 580 | static void sched_info_arrive(struct task_struct *t) |
553 | { | 581 | { |
554 | unsigned long now = jiffies, diff = 0; | 582 | unsigned long now = jiffies, delta_jiffies = 0; |
555 | struct rq *rq = task_rq(t); | ||
556 | 583 | ||
557 | if (t->sched_info.last_queued) | 584 | if (t->sched_info.last_queued) |
558 | diff = now - t->sched_info.last_queued; | 585 | delta_jiffies = now - t->sched_info.last_queued; |
559 | sched_info_dequeued(t); | 586 | sched_info_dequeued(t); |
560 | t->sched_info.run_delay += diff; | 587 | t->sched_info.run_delay += delta_jiffies; |
561 | t->sched_info.last_arrival = now; | 588 | t->sched_info.last_arrival = now; |
562 | t->sched_info.pcnt++; | 589 | t->sched_info.pcnt++; |
563 | 590 | ||
564 | if (!rq) | 591 | rq_sched_info_arrive(task_rq(t), delta_jiffies); |
565 | return; | ||
566 | |||
567 | rq->rq_sched_info.run_delay += diff; | ||
568 | rq->rq_sched_info.pcnt++; | ||
569 | } | 592 | } |
570 | 593 | ||
571 | /* | 594 | /* |
@@ -585,8 +608,9 @@ static void sched_info_arrive(struct task_struct *t) | |||
585 | */ | 608 | */ |
586 | static inline void sched_info_queued(struct task_struct *t) | 609 | static inline void sched_info_queued(struct task_struct *t) |
587 | { | 610 | { |
588 | if (!t->sched_info.last_queued) | 611 | if (unlikely(sched_info_on())) |
589 | t->sched_info.last_queued = jiffies; | 612 | if (!t->sched_info.last_queued) |
613 | t->sched_info.last_queued = jiffies; | ||
590 | } | 614 | } |
591 | 615 | ||
592 | /* | 616 | /* |
@@ -595,13 +619,10 @@ static inline void sched_info_queued(struct task_struct *t) | |||
595 | */ | 619 | */ |
596 | static inline void sched_info_depart(struct task_struct *t) | 620 | static inline void sched_info_depart(struct task_struct *t) |
597 | { | 621 | { |
598 | struct rq *rq = task_rq(t); | 622 | unsigned long delta_jiffies = jiffies - t->sched_info.last_arrival; |
599 | unsigned long diff = jiffies - t->sched_info.last_arrival; | ||
600 | |||
601 | t->sched_info.cpu_time += diff; | ||
602 | 623 | ||
603 | if (rq) | 624 | t->sched_info.cpu_time += delta_jiffies; |
604 | rq->rq_sched_info.cpu_time += diff; | 625 | rq_sched_info_depart(task_rq(t), delta_jiffies); |
605 | } | 626 | } |
606 | 627 | ||
607 | /* | 628 | /* |
@@ -610,7 +631,7 @@ static inline void sched_info_depart(struct task_struct *t) | |||
610 | * the idle task.) We are only called when prev != next. | 631 | * the idle task.) We are only called when prev != next. |
611 | */ | 632 | */ |
612 | static inline void | 633 | static inline void |
613 | sched_info_switch(struct task_struct *prev, struct task_struct *next) | 634 | __sched_info_switch(struct task_struct *prev, struct task_struct *next) |
614 | { | 635 | { |
615 | struct rq *rq = task_rq(prev); | 636 | struct rq *rq = task_rq(prev); |
616 | 637 | ||
@@ -625,10 +646,16 @@ sched_info_switch(struct task_struct *prev, struct task_struct *next) | |||
625 | if (next != rq->idle) | 646 | if (next != rq->idle) |
626 | sched_info_arrive(next); | 647 | sched_info_arrive(next); |
627 | } | 648 | } |
649 | static inline void | ||
650 | sched_info_switch(struct task_struct *prev, struct task_struct *next) | ||
651 | { | ||
652 | if (unlikely(sched_info_on())) | ||
653 | __sched_info_switch(prev, next); | ||
654 | } | ||
628 | #else | 655 | #else |
629 | #define sched_info_queued(t) do { } while (0) | 656 | #define sched_info_queued(t) do { } while (0) |
630 | #define sched_info_switch(t, next) do { } while (0) | 657 | #define sched_info_switch(t, next) do { } while (0) |
631 | #endif /* CONFIG_SCHEDSTATS */ | 658 | #endif /* CONFIG_SCHEDSTATS || CONFIG_TASK_DELAY_ACCT */ |
632 | 659 | ||
633 | /* | 660 | /* |
634 | * Adding/removing a task to/from a priority array: | 661 | * Adding/removing a task to/from a priority array: |
@@ -1530,8 +1557,9 @@ void fastcall sched_fork(struct task_struct *p, int clone_flags) | |||
1530 | 1557 | ||
1531 | INIT_LIST_HEAD(&p->run_list); | 1558 | INIT_LIST_HEAD(&p->run_list); |
1532 | p->array = NULL; | 1559 | p->array = NULL; |
1533 | #ifdef CONFIG_SCHEDSTATS | 1560 | #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) |
1534 | memset(&p->sched_info, 0, sizeof(p->sched_info)); | 1561 | if (unlikely(sched_info_on())) |
1562 | memset(&p->sched_info, 0, sizeof(p->sched_info)); | ||
1535 | #endif | 1563 | #endif |
1536 | #if defined(CONFIG_SMP) && defined(__ARCH_WANT_UNLOCKED_CTXSW) | 1564 | #if defined(CONFIG_SMP) && defined(__ARCH_WANT_UNLOCKED_CTXSW) |
1537 | p->oncpu = 0; | 1565 | p->oncpu = 0; |
@@ -1788,7 +1816,15 @@ context_switch(struct rq *rq, struct task_struct *prev, | |||
1788 | WARN_ON(rq->prev_mm); | 1816 | WARN_ON(rq->prev_mm); |
1789 | rq->prev_mm = oldmm; | 1817 | rq->prev_mm = oldmm; |
1790 | } | 1818 | } |
1819 | /* | ||
1820 | * Since the runqueue lock will be released by the next | ||
1821 | * task (which is an invalid locking op but in the case | ||
1822 | * of the scheduler it's an obvious special-case), so we | ||
1823 | * do an early lockdep release here: | ||
1824 | */ | ||
1825 | #ifndef __ARCH_WANT_UNLOCKED_CTXSW | ||
1791 | spin_release(&rq->lock.dep_map, 1, _THIS_IP_); | 1826 | spin_release(&rq->lock.dep_map, 1, _THIS_IP_); |
1827 | #endif | ||
1792 | 1828 | ||
1793 | /* Here we just switch the register state and the stack. */ | 1829 | /* Here we just switch the register state and the stack. */ |
1794 | switch_to(prev, next, prev); | 1830 | switch_to(prev, next, prev); |
@@ -3384,7 +3420,7 @@ EXPORT_SYMBOL(schedule); | |||
3384 | 3420 | ||
3385 | #ifdef CONFIG_PREEMPT | 3421 | #ifdef CONFIG_PREEMPT |
3386 | /* | 3422 | /* |
3387 | * this is is the entry point to schedule() from in-kernel preemption | 3423 | * this is the entry point to schedule() from in-kernel preemption |
3388 | * off of preempt_enable. Kernel preemptions off return from interrupt | 3424 | * off of preempt_enable. Kernel preemptions off return from interrupt |
3389 | * occur there and call schedule directly. | 3425 | * occur there and call schedule directly. |
3390 | */ | 3426 | */ |
@@ -3427,7 +3463,7 @@ need_resched: | |||
3427 | EXPORT_SYMBOL(preempt_schedule); | 3463 | EXPORT_SYMBOL(preempt_schedule); |
3428 | 3464 | ||
3429 | /* | 3465 | /* |
3430 | * this is is the entry point to schedule() from kernel preemption | 3466 | * this is the entry point to schedule() from kernel preemption |
3431 | * off of irq context. | 3467 | * off of irq context. |
3432 | * Note, that this is called and return with irqs disabled. This will | 3468 | * Note, that this is called and return with irqs disabled. This will |
3433 | * protect us against recursive calling from irq. | 3469 | * protect us against recursive calling from irq. |
@@ -3439,7 +3475,7 @@ asmlinkage void __sched preempt_schedule_irq(void) | |||
3439 | struct task_struct *task = current; | 3475 | struct task_struct *task = current; |
3440 | int saved_lock_depth; | 3476 | int saved_lock_depth; |
3441 | #endif | 3477 | #endif |
3442 | /* Catch callers which need to be fixed*/ | 3478 | /* Catch callers which need to be fixed */ |
3443 | BUG_ON(ti->preempt_count || !irqs_disabled()); | 3479 | BUG_ON(ti->preempt_count || !irqs_disabled()); |
3444 | 3480 | ||
3445 | need_resched: | 3481 | need_resched: |
@@ -4420,9 +4456,9 @@ asmlinkage long sys_sched_yield(void) | |||
4420 | return 0; | 4456 | return 0; |
4421 | } | 4457 | } |
4422 | 4458 | ||
4423 | static inline int __resched_legal(void) | 4459 | static inline int __resched_legal(int expected_preempt_count) |
4424 | { | 4460 | { |
4425 | if (unlikely(preempt_count())) | 4461 | if (unlikely(preempt_count() != expected_preempt_count)) |
4426 | return 0; | 4462 | return 0; |
4427 | if (unlikely(system_state != SYSTEM_RUNNING)) | 4463 | if (unlikely(system_state != SYSTEM_RUNNING)) |
4428 | return 0; | 4464 | return 0; |
@@ -4448,7 +4484,7 @@ static void __cond_resched(void) | |||
4448 | 4484 | ||
4449 | int __sched cond_resched(void) | 4485 | int __sched cond_resched(void) |
4450 | { | 4486 | { |
4451 | if (need_resched() && __resched_legal()) { | 4487 | if (need_resched() && __resched_legal(0)) { |
4452 | __cond_resched(); | 4488 | __cond_resched(); |
4453 | return 1; | 4489 | return 1; |
4454 | } | 4490 | } |
@@ -4474,7 +4510,7 @@ int cond_resched_lock(spinlock_t *lock) | |||
4474 | ret = 1; | 4510 | ret = 1; |
4475 | spin_lock(lock); | 4511 | spin_lock(lock); |
4476 | } | 4512 | } |
4477 | if (need_resched() && __resched_legal()) { | 4513 | if (need_resched() && __resched_legal(1)) { |
4478 | spin_release(&lock->dep_map, 1, _THIS_IP_); | 4514 | spin_release(&lock->dep_map, 1, _THIS_IP_); |
4479 | _raw_spin_unlock(lock); | 4515 | _raw_spin_unlock(lock); |
4480 | preempt_enable_no_resched(); | 4516 | preempt_enable_no_resched(); |
@@ -4490,7 +4526,7 @@ int __sched cond_resched_softirq(void) | |||
4490 | { | 4526 | { |
4491 | BUG_ON(!in_softirq()); | 4527 | BUG_ON(!in_softirq()); |
4492 | 4528 | ||
4493 | if (need_resched() && __resched_legal()) { | 4529 | if (need_resched() && __resched_legal(0)) { |
4494 | raw_local_irq_disable(); | 4530 | raw_local_irq_disable(); |
4495 | _local_bh_enable(); | 4531 | _local_bh_enable(); |
4496 | raw_local_irq_enable(); | 4532 | raw_local_irq_enable(); |
@@ -4526,9 +4562,11 @@ void __sched io_schedule(void) | |||
4526 | { | 4562 | { |
4527 | struct rq *rq = &__raw_get_cpu_var(runqueues); | 4563 | struct rq *rq = &__raw_get_cpu_var(runqueues); |
4528 | 4564 | ||
4565 | delayacct_blkio_start(); | ||
4529 | atomic_inc(&rq->nr_iowait); | 4566 | atomic_inc(&rq->nr_iowait); |
4530 | schedule(); | 4567 | schedule(); |
4531 | atomic_dec(&rq->nr_iowait); | 4568 | atomic_dec(&rq->nr_iowait); |
4569 | delayacct_blkio_end(); | ||
4532 | } | 4570 | } |
4533 | EXPORT_SYMBOL(io_schedule); | 4571 | EXPORT_SYMBOL(io_schedule); |
4534 | 4572 | ||
@@ -4537,9 +4575,11 @@ long __sched io_schedule_timeout(long timeout) | |||
4537 | struct rq *rq = &__raw_get_cpu_var(runqueues); | 4575 | struct rq *rq = &__raw_get_cpu_var(runqueues); |
4538 | long ret; | 4576 | long ret; |
4539 | 4577 | ||
4578 | delayacct_blkio_start(); | ||
4540 | atomic_inc(&rq->nr_iowait); | 4579 | atomic_inc(&rq->nr_iowait); |
4541 | ret = schedule_timeout(timeout); | 4580 | ret = schedule_timeout(timeout); |
4542 | atomic_dec(&rq->nr_iowait); | 4581 | atomic_dec(&rq->nr_iowait); |
4582 | delayacct_blkio_end(); | ||
4543 | return ret; | 4583 | return ret; |
4544 | } | 4584 | } |
4545 | 4585 | ||
@@ -4650,7 +4690,7 @@ static inline struct task_struct *younger_sibling(struct task_struct *p) | |||
4650 | return list_entry(p->sibling.next,struct task_struct,sibling); | 4690 | return list_entry(p->sibling.next,struct task_struct,sibling); |
4651 | } | 4691 | } |
4652 | 4692 | ||
4653 | static const char *stat_nam[] = { "R", "S", "D", "T", "t", "Z", "X" }; | 4693 | static const char stat_nam[] = "RSDTtZX"; |
4654 | 4694 | ||
4655 | static void show_task(struct task_struct *p) | 4695 | static void show_task(struct task_struct *p) |
4656 | { | 4696 | { |
@@ -4658,12 +4698,9 @@ static void show_task(struct task_struct *p) | |||
4658 | unsigned long free = 0; | 4698 | unsigned long free = 0; |
4659 | unsigned state; | 4699 | unsigned state; |
4660 | 4700 | ||
4661 | printk("%-13.13s ", p->comm); | ||
4662 | state = p->state ? __ffs(p->state) + 1 : 0; | 4701 | state = p->state ? __ffs(p->state) + 1 : 0; |
4663 | if (state < ARRAY_SIZE(stat_nam)) | 4702 | printk("%-13.13s %c", p->comm, |
4664 | printk(stat_nam[state]); | 4703 | state < sizeof(stat_nam) - 1 ? stat_nam[state] : '?'); |
4665 | else | ||
4666 | printk("?"); | ||
4667 | #if (BITS_PER_LONG == 32) | 4704 | #if (BITS_PER_LONG == 32) |
4668 | if (state == TASK_RUNNING) | 4705 | if (state == TASK_RUNNING) |
4669 | printk(" running "); | 4706 | printk(" running "); |
@@ -4877,7 +4914,7 @@ static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu) | |||
4877 | p->timestamp = p->timestamp - rq_src->timestamp_last_tick | 4914 | p->timestamp = p->timestamp - rq_src->timestamp_last_tick |
4878 | + rq_dest->timestamp_last_tick; | 4915 | + rq_dest->timestamp_last_tick; |
4879 | deactivate_task(p, rq_src); | 4916 | deactivate_task(p, rq_src); |
4880 | activate_task(p, rq_dest, 0); | 4917 | __activate_task(p, rq_dest); |
4881 | if (TASK_PREEMPTS_CURR(p, rq_dest)) | 4918 | if (TASK_PREEMPTS_CURR(p, rq_dest)) |
4882 | resched_task(rq_dest->curr); | 4919 | resched_task(rq_dest->curr); |
4883 | } | 4920 | } |
@@ -5776,7 +5813,7 @@ static unsigned long long measure_migration_cost(int cpu1, int cpu2) | |||
5776 | cache = vmalloc(max_size); | 5813 | cache = vmalloc(max_size); |
5777 | if (!cache) { | 5814 | if (!cache) { |
5778 | printk("could not vmalloc %d bytes for cache!\n", 2*max_size); | 5815 | printk("could not vmalloc %d bytes for cache!\n", 2*max_size); |
5779 | return 1000000; // return 1 msec on very small boxen | 5816 | return 1000000; /* return 1 msec on very small boxen */ |
5780 | } | 5817 | } |
5781 | 5818 | ||
5782 | while (size <= max_size) { | 5819 | while (size <= max_size) { |
@@ -6457,7 +6494,12 @@ static int build_sched_domains(const cpumask_t *cpu_map) | |||
6457 | for (i = 0; i < MAX_NUMNODES; i++) | 6494 | for (i = 0; i < MAX_NUMNODES; i++) |
6458 | init_numa_sched_groups_power(sched_group_nodes[i]); | 6495 | init_numa_sched_groups_power(sched_group_nodes[i]); |
6459 | 6496 | ||
6460 | init_numa_sched_groups_power(sched_group_allnodes); | 6497 | if (sched_group_allnodes) { |
6498 | int group = cpu_to_allnodes_group(first_cpu(*cpu_map)); | ||
6499 | struct sched_group *sg = &sched_group_allnodes[group]; | ||
6500 | |||
6501 | init_numa_sched_groups_power(sg); | ||
6502 | } | ||
6461 | #endif | 6503 | #endif |
6462 | 6504 | ||
6463 | /* Attach the domains */ | 6505 | /* Attach the domains */ |
@@ -6724,6 +6766,11 @@ void __init sched_init(void) | |||
6724 | } | 6766 | } |
6725 | 6767 | ||
6726 | set_load_weight(&init_task); | 6768 | set_load_weight(&init_task); |
6769 | |||
6770 | #ifdef CONFIG_RT_MUTEXES | ||
6771 | plist_head_init(&init_task.pi_waiters, &init_task.pi_lock); | ||
6772 | #endif | ||
6773 | |||
6727 | /* | 6774 | /* |
6728 | * The boot idle thread does lazy MMU switching as well: | 6775 | * The boot idle thread does lazy MMU switching as well: |
6729 | */ | 6776 | */ |