diff options
Diffstat (limited to 'kernel/sched.c')
| -rw-r--r-- | kernel/sched.c | 155 | 
1 files changed, 110 insertions, 45 deletions
diff --git a/kernel/sched.c b/kernel/sched.c index 7ffaabd64f89..7854ee516b92 100644 --- a/kernel/sched.c +++ b/kernel/sched.c  | |||
| @@ -49,6 +49,7 @@ | |||
| 49 | #include <linux/syscalls.h> | 49 | #include <linux/syscalls.h> | 
| 50 | #include <linux/times.h> | 50 | #include <linux/times.h> | 
| 51 | #include <linux/acct.h> | 51 | #include <linux/acct.h> | 
| 52 | #include <linux/kprobes.h> | ||
| 52 | #include <asm/tlb.h> | 53 | #include <asm/tlb.h> | 
| 53 | 54 | ||
| 54 | #include <asm/unistd.h> | 55 | #include <asm/unistd.h> | 
| @@ -144,7 +145,8 @@ | |||
| 144 | (v1) * (v2_max) / (v1_max) | 145 | (v1) * (v2_max) / (v1_max) | 
| 145 | 146 | ||
| 146 | #define DELTA(p) \ | 147 | #define DELTA(p) \ | 
| 147 | (SCALE(TASK_NICE(p), 40, MAX_BONUS) + INTERACTIVE_DELTA) | 148 | (SCALE(TASK_NICE(p) + 20, 40, MAX_BONUS) - 20 * MAX_BONUS / 40 + \ | 
| 149 | INTERACTIVE_DELTA) | ||
| 148 | 150 | ||
| 149 | #define TASK_INTERACTIVE(p) \ | 151 | #define TASK_INTERACTIVE(p) \ | 
| 150 | ((p)->prio <= (p)->static_prio - DELTA(p)) | 152 | ((p)->prio <= (p)->static_prio - DELTA(p)) | 
| @@ -1546,8 +1548,14 @@ static inline void finish_task_switch(runqueue_t *rq, task_t *prev) | |||
| 1546 | finish_lock_switch(rq, prev); | 1548 | finish_lock_switch(rq, prev); | 
| 1547 | if (mm) | 1549 | if (mm) | 
| 1548 | mmdrop(mm); | 1550 | mmdrop(mm); | 
| 1549 | if (unlikely(prev_task_flags & PF_DEAD)) | 1551 | if (unlikely(prev_task_flags & PF_DEAD)) { | 
| 1552 | /* | ||
| 1553 | * Remove function-return probe instances associated with this | ||
| 1554 | * task and put them back on the free list. | ||
| 1555 | */ | ||
| 1556 | kprobe_flush_task(prev); | ||
| 1550 | put_task_struct(prev); | 1557 | put_task_struct(prev); | 
| 1558 | } | ||
| 1551 | } | 1559 | } | 
| 1552 | 1560 | ||
| 1553 | /** | 1561 | /** | 
| @@ -2871,13 +2879,11 @@ asmlinkage void __sched schedule(void) | |||
| 2871 | * schedule() atomically, we ignore that path for now. | 2879 | * schedule() atomically, we ignore that path for now. | 
| 2872 | * Otherwise, whine if we are scheduling when we should not be. | 2880 | * Otherwise, whine if we are scheduling when we should not be. | 
| 2873 | */ | 2881 | */ | 
| 2874 | if (likely(!current->exit_state)) { | 2882 | if (unlikely(in_atomic() && !current->exit_state)) { | 
| 2875 | if (unlikely(in_atomic())) { | 2883 | printk(KERN_ERR "BUG: scheduling while atomic: " | 
| 2876 | printk(KERN_ERR "BUG: scheduling while atomic: " | 2884 | "%s/0x%08x/%d\n", | 
| 2877 | "%s/0x%08x/%d\n", | 2885 | current->comm, preempt_count(), current->pid); | 
| 2878 | current->comm, preempt_count(), current->pid); | 2886 | dump_stack(); | 
| 2879 | dump_stack(); | ||
| 2880 | } | ||
| 2881 | } | 2887 | } | 
| 2882 | profile_hit(SCHED_PROFILING, __builtin_return_address(0)); | 2888 | profile_hit(SCHED_PROFILING, __builtin_return_address(0)); | 
| 2883 | 2889 | ||
| @@ -5568,11 +5574,31 @@ static int cpu_to_cpu_group(int cpu) | |||
| 5568 | } | 5574 | } | 
| 5569 | #endif | 5575 | #endif | 
| 5570 | 5576 | ||
| 5577 | #ifdef CONFIG_SCHED_MC | ||
| 5578 | static DEFINE_PER_CPU(struct sched_domain, core_domains); | ||
| 5579 | static struct sched_group sched_group_core[NR_CPUS]; | ||
| 5580 | #endif | ||
| 5581 | |||
| 5582 | #if defined(CONFIG_SCHED_MC) && defined(CONFIG_SCHED_SMT) | ||
| 5583 | static int cpu_to_core_group(int cpu) | ||
| 5584 | { | ||
| 5585 | return first_cpu(cpu_sibling_map[cpu]); | ||
| 5586 | } | ||
| 5587 | #elif defined(CONFIG_SCHED_MC) | ||
| 5588 | static int cpu_to_core_group(int cpu) | ||
| 5589 | { | ||
| 5590 | return cpu; | ||
| 5591 | } | ||
| 5592 | #endif | ||
| 5593 | |||
| 5571 | static DEFINE_PER_CPU(struct sched_domain, phys_domains); | 5594 | static DEFINE_PER_CPU(struct sched_domain, phys_domains); | 
| 5572 | static struct sched_group sched_group_phys[NR_CPUS]; | 5595 | static struct sched_group sched_group_phys[NR_CPUS]; | 
| 5573 | static int cpu_to_phys_group(int cpu) | 5596 | static int cpu_to_phys_group(int cpu) | 
| 5574 | { | 5597 | { | 
| 5575 | #ifdef CONFIG_SCHED_SMT | 5598 | #if defined(CONFIG_SCHED_MC) | 
| 5599 | cpumask_t mask = cpu_coregroup_map(cpu); | ||
| 5600 | return first_cpu(mask); | ||
| 5601 | #elif defined(CONFIG_SCHED_SMT) | ||
| 5576 | return first_cpu(cpu_sibling_map[cpu]); | 5602 | return first_cpu(cpu_sibling_map[cpu]); | 
| 5577 | #else | 5603 | #else | 
| 5578 | return cpu; | 5604 | return cpu; | 
| @@ -5595,6 +5621,32 @@ static int cpu_to_allnodes_group(int cpu) | |||
| 5595 | { | 5621 | { | 
| 5596 | return cpu_to_node(cpu); | 5622 | return cpu_to_node(cpu); | 
| 5597 | } | 5623 | } | 
| 5624 | static void init_numa_sched_groups_power(struct sched_group *group_head) | ||
| 5625 | { | ||
| 5626 | struct sched_group *sg = group_head; | ||
| 5627 | int j; | ||
| 5628 | |||
| 5629 | if (!sg) | ||
| 5630 | return; | ||
| 5631 | next_sg: | ||
| 5632 | for_each_cpu_mask(j, sg->cpumask) { | ||
| 5633 | struct sched_domain *sd; | ||
| 5634 | |||
| 5635 | sd = &per_cpu(phys_domains, j); | ||
| 5636 | if (j != first_cpu(sd->groups->cpumask)) { | ||
| 5637 | /* | ||
| 5638 | * Only add "power" once for each | ||
| 5639 | * physical package. | ||
| 5640 | */ | ||
| 5641 | continue; | ||
| 5642 | } | ||
| 5643 | |||
| 5644 | sg->cpu_power += sd->groups->cpu_power; | ||
| 5645 | } | ||
| 5646 | sg = sg->next; | ||
| 5647 | if (sg != group_head) | ||
| 5648 | goto next_sg; | ||
| 5649 | } | ||
| 5598 | #endif | 5650 | #endif | 
| 5599 | 5651 | ||
| 5600 | /* | 5652 | /* | 
| @@ -5670,6 +5722,17 @@ void build_sched_domains(const cpumask_t *cpu_map) | |||
| 5670 | sd->parent = p; | 5722 | sd->parent = p; | 
| 5671 | sd->groups = &sched_group_phys[group]; | 5723 | sd->groups = &sched_group_phys[group]; | 
| 5672 | 5724 | ||
| 5725 | #ifdef CONFIG_SCHED_MC | ||
| 5726 | p = sd; | ||
| 5727 | sd = &per_cpu(core_domains, i); | ||
| 5728 | group = cpu_to_core_group(i); | ||
| 5729 | *sd = SD_MC_INIT; | ||
| 5730 | sd->span = cpu_coregroup_map(i); | ||
| 5731 | cpus_and(sd->span, sd->span, *cpu_map); | ||
| 5732 | sd->parent = p; | ||
| 5733 | sd->groups = &sched_group_core[group]; | ||
| 5734 | #endif | ||
| 5735 | |||
| 5673 | #ifdef CONFIG_SCHED_SMT | 5736 | #ifdef CONFIG_SCHED_SMT | 
| 5674 | p = sd; | 5737 | p = sd; | 
| 5675 | sd = &per_cpu(cpu_domains, i); | 5738 | sd = &per_cpu(cpu_domains, i); | 
| @@ -5695,6 +5758,19 @@ void build_sched_domains(const cpumask_t *cpu_map) | |||
| 5695 | } | 5758 | } | 
| 5696 | #endif | 5759 | #endif | 
| 5697 | 5760 | ||
| 5761 | #ifdef CONFIG_SCHED_MC | ||
| 5762 | /* Set up multi-core groups */ | ||
| 5763 | for_each_cpu_mask(i, *cpu_map) { | ||
| 5764 | cpumask_t this_core_map = cpu_coregroup_map(i); | ||
| 5765 | cpus_and(this_core_map, this_core_map, *cpu_map); | ||
| 5766 | if (i != first_cpu(this_core_map)) | ||
| 5767 | continue; | ||
| 5768 | init_sched_build_groups(sched_group_core, this_core_map, | ||
| 5769 | &cpu_to_core_group); | ||
| 5770 | } | ||
| 5771 | #endif | ||
| 5772 | |||
| 5773 | |||
| 5698 | /* Set up physical groups */ | 5774 | /* Set up physical groups */ | 
| 5699 | for (i = 0; i < MAX_NUMNODES; i++) { | 5775 | for (i = 0; i < MAX_NUMNODES; i++) { | 
| 5700 | cpumask_t nodemask = node_to_cpumask(i); | 5776 | cpumask_t nodemask = node_to_cpumask(i); | 
| @@ -5791,51 +5867,38 @@ void build_sched_domains(const cpumask_t *cpu_map) | |||
| 5791 | power = SCHED_LOAD_SCALE; | 5867 | power = SCHED_LOAD_SCALE; | 
| 5792 | sd->groups->cpu_power = power; | 5868 | sd->groups->cpu_power = power; | 
| 5793 | #endif | 5869 | #endif | 
| 5870 | #ifdef CONFIG_SCHED_MC | ||
| 5871 | sd = &per_cpu(core_domains, i); | ||
| 5872 | power = SCHED_LOAD_SCALE + (cpus_weight(sd->groups->cpumask)-1) | ||
| 5873 | * SCHED_LOAD_SCALE / 10; | ||
| 5874 | sd->groups->cpu_power = power; | ||
| 5875 | |||
| 5876 | sd = &per_cpu(phys_domains, i); | ||
| 5794 | 5877 | ||
| 5878 | /* | ||
| 5879 | * This has to be < 2 * SCHED_LOAD_SCALE | ||
| 5880 | * Lets keep it SCHED_LOAD_SCALE, so that | ||
| 5881 | * while calculating NUMA group's cpu_power | ||
| 5882 | * we can simply do | ||
| 5883 | * numa_group->cpu_power += phys_group->cpu_power; | ||
| 5884 | * | ||
| 5885 | * See "only add power once for each physical pkg" | ||
| 5886 | * comment below | ||
| 5887 | */ | ||
| 5888 | sd->groups->cpu_power = SCHED_LOAD_SCALE; | ||
| 5889 | #else | ||
| 5795 | sd = &per_cpu(phys_domains, i); | 5890 | sd = &per_cpu(phys_domains, i); | 
| 5796 | power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE * | 5891 | power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE * | 
| 5797 | (cpus_weight(sd->groups->cpumask)-1) / 10; | 5892 | (cpus_weight(sd->groups->cpumask)-1) / 10; | 
| 5798 | sd->groups->cpu_power = power; | 5893 | sd->groups->cpu_power = power; | 
| 5799 | |||
| 5800 | #ifdef CONFIG_NUMA | ||
| 5801 | sd = &per_cpu(allnodes_domains, i); | ||
| 5802 | if (sd->groups) { | ||
| 5803 | power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE * | ||
| 5804 | (cpus_weight(sd->groups->cpumask)-1) / 10; | ||
| 5805 | sd->groups->cpu_power = power; | ||
| 5806 | } | ||
| 5807 | #endif | 5894 | #endif | 
| 5808 | } | 5895 | } | 
| 5809 | 5896 | ||
| 5810 | #ifdef CONFIG_NUMA | 5897 | #ifdef CONFIG_NUMA | 
| 5811 | for (i = 0; i < MAX_NUMNODES; i++) { | 5898 | for (i = 0; i < MAX_NUMNODES; i++) | 
| 5812 | struct sched_group *sg = sched_group_nodes[i]; | 5899 | init_numa_sched_groups_power(sched_group_nodes[i]); | 
| 5813 | int j; | ||
| 5814 | |||
| 5815 | if (sg == NULL) | ||
| 5816 | continue; | ||
| 5817 | next_sg: | ||
| 5818 | for_each_cpu_mask(j, sg->cpumask) { | ||
| 5819 | struct sched_domain *sd; | ||
| 5820 | int power; | ||
| 5821 | 5900 | ||
| 5822 | sd = &per_cpu(phys_domains, j); | 5901 | init_numa_sched_groups_power(sched_group_allnodes); | 
| 5823 | if (j != first_cpu(sd->groups->cpumask)) { | ||
| 5824 | /* | ||
| 5825 | * Only add "power" once for each | ||
| 5826 | * physical package. | ||
| 5827 | */ | ||
| 5828 | continue; | ||
| 5829 | } | ||
| 5830 | power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE * | ||
| 5831 | (cpus_weight(sd->groups->cpumask)-1) / 10; | ||
| 5832 | |||
| 5833 | sg->cpu_power += power; | ||
| 5834 | } | ||
| 5835 | sg = sg->next; | ||
| 5836 | if (sg != sched_group_nodes[i]) | ||
| 5837 | goto next_sg; | ||
| 5838 | } | ||
| 5839 | #endif | 5902 | #endif | 
| 5840 | 5903 | ||
| 5841 | /* Attach the domains */ | 5904 | /* Attach the domains */ | 
| @@ -5843,6 +5906,8 @@ next_sg: | |||
| 5843 | struct sched_domain *sd; | 5906 | struct sched_domain *sd; | 
| 5844 | #ifdef CONFIG_SCHED_SMT | 5907 | #ifdef CONFIG_SCHED_SMT | 
| 5845 | sd = &per_cpu(cpu_domains, i); | 5908 | sd = &per_cpu(cpu_domains, i); | 
| 5909 | #elif defined(CONFIG_SCHED_MC) | ||
| 5910 | sd = &per_cpu(core_domains, i); | ||
| 5846 | #else | 5911 | #else | 
| 5847 | sd = &per_cpu(phys_domains, i); | 5912 | sd = &per_cpu(phys_domains, i); | 
| 5848 | #endif | 5913 | #endif | 
