aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/sched.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/sched.c')
-rw-r--r--kernel/sched.c155
1 files changed, 110 insertions, 45 deletions
diff --git a/kernel/sched.c b/kernel/sched.c
index 7ffaabd64f89..7854ee516b92 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -49,6 +49,7 @@
49#include <linux/syscalls.h> 49#include <linux/syscalls.h>
50#include <linux/times.h> 50#include <linux/times.h>
51#include <linux/acct.h> 51#include <linux/acct.h>
52#include <linux/kprobes.h>
52#include <asm/tlb.h> 53#include <asm/tlb.h>
53 54
54#include <asm/unistd.h> 55#include <asm/unistd.h>
@@ -144,7 +145,8 @@
144 (v1) * (v2_max) / (v1_max) 145 (v1) * (v2_max) / (v1_max)
145 146
146#define DELTA(p) \ 147#define DELTA(p) \
147 (SCALE(TASK_NICE(p), 40, MAX_BONUS) + INTERACTIVE_DELTA) 148 (SCALE(TASK_NICE(p) + 20, 40, MAX_BONUS) - 20 * MAX_BONUS / 40 + \
149 INTERACTIVE_DELTA)
148 150
149#define TASK_INTERACTIVE(p) \ 151#define TASK_INTERACTIVE(p) \
150 ((p)->prio <= (p)->static_prio - DELTA(p)) 152 ((p)->prio <= (p)->static_prio - DELTA(p))
@@ -1546,8 +1548,14 @@ static inline void finish_task_switch(runqueue_t *rq, task_t *prev)
1546 finish_lock_switch(rq, prev); 1548 finish_lock_switch(rq, prev);
1547 if (mm) 1549 if (mm)
1548 mmdrop(mm); 1550 mmdrop(mm);
1549 if (unlikely(prev_task_flags & PF_DEAD)) 1551 if (unlikely(prev_task_flags & PF_DEAD)) {
1552 /*
1553 * Remove function-return probe instances associated with this
1554 * task and put them back on the free list.
1555 */
1556 kprobe_flush_task(prev);
1550 put_task_struct(prev); 1557 put_task_struct(prev);
1558 }
1551} 1559}
1552 1560
1553/** 1561/**
@@ -2871,13 +2879,11 @@ asmlinkage void __sched schedule(void)
2871 * schedule() atomically, we ignore that path for now. 2879 * schedule() atomically, we ignore that path for now.
2872 * Otherwise, whine if we are scheduling when we should not be. 2880 * Otherwise, whine if we are scheduling when we should not be.
2873 */ 2881 */
2874 if (likely(!current->exit_state)) { 2882 if (unlikely(in_atomic() && !current->exit_state)) {
2875 if (unlikely(in_atomic())) { 2883 printk(KERN_ERR "BUG: scheduling while atomic: "
2876 printk(KERN_ERR "BUG: scheduling while atomic: " 2884 "%s/0x%08x/%d\n",
2877 "%s/0x%08x/%d\n", 2885 current->comm, preempt_count(), current->pid);
2878 current->comm, preempt_count(), current->pid); 2886 dump_stack();
2879 dump_stack();
2880 }
2881 } 2887 }
2882 profile_hit(SCHED_PROFILING, __builtin_return_address(0)); 2888 profile_hit(SCHED_PROFILING, __builtin_return_address(0));
2883 2889
@@ -5568,11 +5574,31 @@ static int cpu_to_cpu_group(int cpu)
5568} 5574}
5569#endif 5575#endif
5570 5576
5577#ifdef CONFIG_SCHED_MC
5578static DEFINE_PER_CPU(struct sched_domain, core_domains);
5579static struct sched_group sched_group_core[NR_CPUS];
5580#endif
5581
5582#if defined(CONFIG_SCHED_MC) && defined(CONFIG_SCHED_SMT)
5583static int cpu_to_core_group(int cpu)
5584{
5585 return first_cpu(cpu_sibling_map[cpu]);
5586}
5587#elif defined(CONFIG_SCHED_MC)
5588static int cpu_to_core_group(int cpu)
5589{
5590 return cpu;
5591}
5592#endif
5593
5571static DEFINE_PER_CPU(struct sched_domain, phys_domains); 5594static DEFINE_PER_CPU(struct sched_domain, phys_domains);
5572static struct sched_group sched_group_phys[NR_CPUS]; 5595static struct sched_group sched_group_phys[NR_CPUS];
5573static int cpu_to_phys_group(int cpu) 5596static int cpu_to_phys_group(int cpu)
5574{ 5597{
5575#ifdef CONFIG_SCHED_SMT 5598#if defined(CONFIG_SCHED_MC)
5599 cpumask_t mask = cpu_coregroup_map(cpu);
5600 return first_cpu(mask);
5601#elif defined(CONFIG_SCHED_SMT)
5576 return first_cpu(cpu_sibling_map[cpu]); 5602 return first_cpu(cpu_sibling_map[cpu]);
5577#else 5603#else
5578 return cpu; 5604 return cpu;
@@ -5595,6 +5621,32 @@ static int cpu_to_allnodes_group(int cpu)
5595{ 5621{
5596 return cpu_to_node(cpu); 5622 return cpu_to_node(cpu);
5597} 5623}
5624static void init_numa_sched_groups_power(struct sched_group *group_head)
5625{
5626 struct sched_group *sg = group_head;
5627 int j;
5628
5629 if (!sg)
5630 return;
5631next_sg:
5632 for_each_cpu_mask(j, sg->cpumask) {
5633 struct sched_domain *sd;
5634
5635 sd = &per_cpu(phys_domains, j);
5636 if (j != first_cpu(sd->groups->cpumask)) {
5637 /*
5638 * Only add "power" once for each
5639 * physical package.
5640 */
5641 continue;
5642 }
5643
5644 sg->cpu_power += sd->groups->cpu_power;
5645 }
5646 sg = sg->next;
5647 if (sg != group_head)
5648 goto next_sg;
5649}
5598#endif 5650#endif
5599 5651
5600/* 5652/*
@@ -5670,6 +5722,17 @@ void build_sched_domains(const cpumask_t *cpu_map)
5670 sd->parent = p; 5722 sd->parent = p;
5671 sd->groups = &sched_group_phys[group]; 5723 sd->groups = &sched_group_phys[group];
5672 5724
5725#ifdef CONFIG_SCHED_MC
5726 p = sd;
5727 sd = &per_cpu(core_domains, i);
5728 group = cpu_to_core_group(i);
5729 *sd = SD_MC_INIT;
5730 sd->span = cpu_coregroup_map(i);
5731 cpus_and(sd->span, sd->span, *cpu_map);
5732 sd->parent = p;
5733 sd->groups = &sched_group_core[group];
5734#endif
5735
5673#ifdef CONFIG_SCHED_SMT 5736#ifdef CONFIG_SCHED_SMT
5674 p = sd; 5737 p = sd;
5675 sd = &per_cpu(cpu_domains, i); 5738 sd = &per_cpu(cpu_domains, i);
@@ -5695,6 +5758,19 @@ void build_sched_domains(const cpumask_t *cpu_map)
5695 } 5758 }
5696#endif 5759#endif
5697 5760
5761#ifdef CONFIG_SCHED_MC
5762 /* Set up multi-core groups */
5763 for_each_cpu_mask(i, *cpu_map) {
5764 cpumask_t this_core_map = cpu_coregroup_map(i);
5765 cpus_and(this_core_map, this_core_map, *cpu_map);
5766 if (i != first_cpu(this_core_map))
5767 continue;
5768 init_sched_build_groups(sched_group_core, this_core_map,
5769 &cpu_to_core_group);
5770 }
5771#endif
5772
5773
5698 /* Set up physical groups */ 5774 /* Set up physical groups */
5699 for (i = 0; i < MAX_NUMNODES; i++) { 5775 for (i = 0; i < MAX_NUMNODES; i++) {
5700 cpumask_t nodemask = node_to_cpumask(i); 5776 cpumask_t nodemask = node_to_cpumask(i);
@@ -5791,51 +5867,38 @@ void build_sched_domains(const cpumask_t *cpu_map)
5791 power = SCHED_LOAD_SCALE; 5867 power = SCHED_LOAD_SCALE;
5792 sd->groups->cpu_power = power; 5868 sd->groups->cpu_power = power;
5793#endif 5869#endif
5870#ifdef CONFIG_SCHED_MC
5871 sd = &per_cpu(core_domains, i);
5872 power = SCHED_LOAD_SCALE + (cpus_weight(sd->groups->cpumask)-1)
5873 * SCHED_LOAD_SCALE / 10;
5874 sd->groups->cpu_power = power;
5875
5876 sd = &per_cpu(phys_domains, i);
5794 5877
5878 /*
5879 * This has to be < 2 * SCHED_LOAD_SCALE
5880 * Lets keep it SCHED_LOAD_SCALE, so that
5881 * while calculating NUMA group's cpu_power
5882 * we can simply do
5883 * numa_group->cpu_power += phys_group->cpu_power;
5884 *
5885 * See "only add power once for each physical pkg"
5886 * comment below
5887 */
5888 sd->groups->cpu_power = SCHED_LOAD_SCALE;
5889#else
5795 sd = &per_cpu(phys_domains, i); 5890 sd = &per_cpu(phys_domains, i);
5796 power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE * 5891 power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE *
5797 (cpus_weight(sd->groups->cpumask)-1) / 10; 5892 (cpus_weight(sd->groups->cpumask)-1) / 10;
5798 sd->groups->cpu_power = power; 5893 sd->groups->cpu_power = power;
5799
5800#ifdef CONFIG_NUMA
5801 sd = &per_cpu(allnodes_domains, i);
5802 if (sd->groups) {
5803 power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE *
5804 (cpus_weight(sd->groups->cpumask)-1) / 10;
5805 sd->groups->cpu_power = power;
5806 }
5807#endif 5894#endif
5808 } 5895 }
5809 5896
5810#ifdef CONFIG_NUMA 5897#ifdef CONFIG_NUMA
5811 for (i = 0; i < MAX_NUMNODES; i++) { 5898 for (i = 0; i < MAX_NUMNODES; i++)
5812 struct sched_group *sg = sched_group_nodes[i]; 5899 init_numa_sched_groups_power(sched_group_nodes[i]);
5813 int j;
5814
5815 if (sg == NULL)
5816 continue;
5817next_sg:
5818 for_each_cpu_mask(j, sg->cpumask) {
5819 struct sched_domain *sd;
5820 int power;
5821 5900
5822 sd = &per_cpu(phys_domains, j); 5901 init_numa_sched_groups_power(sched_group_allnodes);
5823 if (j != first_cpu(sd->groups->cpumask)) {
5824 /*
5825 * Only add "power" once for each
5826 * physical package.
5827 */
5828 continue;
5829 }
5830 power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE *
5831 (cpus_weight(sd->groups->cpumask)-1) / 10;
5832
5833 sg->cpu_power += power;
5834 }
5835 sg = sg->next;
5836 if (sg != sched_group_nodes[i])
5837 goto next_sg;
5838 }
5839#endif 5902#endif
5840 5903
5841 /* Attach the domains */ 5904 /* Attach the domains */
@@ -5843,6 +5906,8 @@ next_sg:
5843 struct sched_domain *sd; 5906 struct sched_domain *sd;
5844#ifdef CONFIG_SCHED_SMT 5907#ifdef CONFIG_SCHED_SMT
5845 sd = &per_cpu(cpu_domains, i); 5908 sd = &per_cpu(cpu_domains, i);
5909#elif defined(CONFIG_SCHED_MC)
5910 sd = &per_cpu(core_domains, i);
5846#else 5911#else
5847 sd = &per_cpu(phys_domains, i); 5912 sd = &per_cpu(phys_domains, i);
5848#endif 5913#endif