1 files changed, 106 insertions, 48 deletions
diff --git a/kernel/sched.c b/kernel/sched.c
index 78acdefeccca..a9ecac398bb9 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -145,7 +145,8 @@
        (v1) * (v2_max) / (v1_max)
 #define DELTA(p) \
-        (SCALE(TASK_NICE(p), 40, MAX_BONUS) + INTERACTIVE_DELTA)
+        (SCALE(TASK_NICE(p) + 20, 40, MAX_BONUS) - 20 * MAX_BONUS / 40 + \
+                INTERACTIVE_DELTA)
 #define TASK_INTERACTIVE(p) \
        ((p)->prio <= (p)->static_prio - DELTA(p))
@@ -1624,7 +1625,7 @@ unsigned long nr_uninterruptible(void)
 {
        unsigned long i, sum = 0;
-        for_each_cpu(i)
+        for_each_possible_cpu(i)
                sum += cpu_rq(i)->nr_uninterruptible;
        /*
@@ -1641,7 +1642,7 @@ unsigned long long nr_context_switches(void)
 {
        unsigned long long i, sum = 0;
-        for_each_cpu(i)
+        for_each_possible_cpu(i)
                sum += cpu_rq(i)->nr_switches;
        return sum;
@@ -1651,7 +1652,7 @@ unsigned long nr_iowait(void)
 {
        unsigned long i, sum = 0;
-        for_each_cpu(i)
+        for_each_possible_cpu(i)
                sum += atomic_read(&cpu_rq(i)->nr_iowait);
        return sum;
@@ -2878,13 +2879,11 @@ asmlinkage void __sched schedule(void)
         * schedule() atomically, we ignore that path for now.
         * Otherwise, whine if we are scheduling when we should not be.
         */
-        if (likely(!current->exit_state)) {
+        if (unlikely(in_atomic() && !current->exit_state)) {
-                if (unlikely(in_atomic())) {
+                printk(KERN_ERR "BUG: scheduling while atomic: "
-                        printk(KERN_ERR "BUG: scheduling while atomic: "
+                        "%s/0x%08x/%d\n",
-                                "%s/0x%08x/%d\n",
+                        current->comm, preempt_count(), current->pid);
-                                current->comm, preempt_count(), current->pid);
+                dump_stack();
-                        dump_stack();
-                }
        }
        profile_hit(SCHED_PROFILING, __builtin_return_address(0));
@@ -5575,11 +5574,31 @@ static int cpu_to_cpu_group(int cpu)
 }
 #endif
+#ifdef CONFIG_SCHED_MC
+static DEFINE_PER_CPU(struct sched_domain, core_domains);
+static struct sched_group sched_group_core[NR_CPUS];
+#endif
+#if defined(CONFIG_SCHED_MC) && defined(CONFIG_SCHED_SMT)
+static int cpu_to_core_group(int cpu)
+{
+        return first_cpu(cpu_sibling_map[cpu]);
+}
+#elif defined(CONFIG_SCHED_MC)
+static int cpu_to_core_group(int cpu)
+{
+        return cpu;
+}
+#endif
 static DEFINE_PER_CPU(struct sched_domain, phys_domains);
 static struct sched_group sched_group_phys[NR_CPUS];
 static int cpu_to_phys_group(int cpu)
 {
-#ifdef CONFIG_SCHED_SMT
+#if defined(CONFIG_SCHED_MC)
+        cpumask_t mask = cpu_coregroup_map(cpu);
+        return first_cpu(mask);
+#elif defined(CONFIG_SCHED_SMT)
        return first_cpu(cpu_sibling_map[cpu]);
 #else
        return cpu;
@@ -5602,6 +5621,32 @@ static int cpu_to_allnodes_group(int cpu)
 {
        return cpu_to_node(cpu);
 }
+static void init_numa_sched_groups_power(struct sched_group *group_head)
+{
+        struct sched_group *sg = group_head;
+        int j;
+        if (!sg)
+                return;
+next_sg:
+        for_each_cpu_mask(j, sg->cpumask) {
+                struct sched_domain *sd;
+                sd = &per_cpu(phys_domains, j);
+                if (j != first_cpu(sd->groups->cpumask)) {
+                        /*
+                         * Only add "power" once for each
+                         * physical package.
+                         */
+                        continue;
+                }
+                sg->cpu_power += sd->groups->cpu_power;
+        }
+        sg = sg->next;
+        if (sg != group_head)
+                goto next_sg;
+}
 #endif
 /*
@@ -5677,6 +5722,17 @@ void build_sched_domains(const cpumask_t *cpu_map)
                sd->parent = p;
                sd->groups = &sched_group_phys[group];
+#ifdef CONFIG_SCHED_MC
+                p = sd;
+                sd = &per_cpu(core_domains, i);
+                group = cpu_to_core_group(i);
+                *sd = SD_MC_INIT;
+                sd->span = cpu_coregroup_map(i);
+                cpus_and(sd->span, sd->span, *cpu_map);
+                sd->parent = p;
+                sd->groups = &sched_group_core[group];
+#endif
 #ifdef CONFIG_SCHED_SMT
                p = sd;
                sd = &per_cpu(cpu_domains, i);
@@ -5702,6 +5758,19 @@ void build_sched_domains(const cpumask_t *cpu_map)
        }
 #endif
+#ifdef CONFIG_SCHED_MC
+        /* Set up multi-core groups */
+        for_each_cpu_mask(i, *cpu_map) {
+                cpumask_t this_core_map = cpu_coregroup_map(i);
+                cpus_and(this_core_map, this_core_map, *cpu_map);
+                if (i != first_cpu(this_core_map))
+                        continue;
+                init_sched_build_groups(sched_group_core, this_core_map,
+                                        &cpu_to_core_group);
+        }
+#endif
        /* Set up physical groups */
        for (i = 0; i < MAX_NUMNODES; i++) {
                cpumask_t nodemask = node_to_cpumask(i);
@@ -5798,51 +5867,38 @@ void build_sched_domains(const cpumask_t *cpu_map)
                power = SCHED_LOAD_SCALE;
                sd->groups->cpu_power = power;
 #endif
+#ifdef CONFIG_SCHED_MC
+                sd = &per_cpu(core_domains, i);
+                power = SCHED_LOAD_SCALE + (cpus_weight(sd->groups->cpumask)-1)
+                                            * SCHED_LOAD_SCALE / 10;
+                sd->groups->cpu_power = power;
                sd = &per_cpu(phys_domains, i);
+                /*
+                 * This has to be < 2 * SCHED_LOAD_SCALE
+                 * Lets keep it SCHED_LOAD_SCALE, so that
+                 * while calculating NUMA group's cpu_power
+                 * we can simply do
+                 *  numa_group->cpu_power += phys_group->cpu_power;
+                 *
+                 * See "only add power once for each physical pkg"
+                 * comment below
+                 */
+                sd->groups->cpu_power = SCHED_LOAD_SCALE;
+#else
+                sd = &per_cpu(phys_domains, i);
                power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE *
                                (cpus_weight(sd->groups->cpumask)-1) / 10;
                sd->groups->cpu_power = power;
-#ifdef CONFIG_NUMA
-                sd = &per_cpu(allnodes_domains, i);
-                if (sd->groups) {
-                        power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE *
-                                (cpus_weight(sd->groups->cpumask)-1) / 10;
-                        sd->groups->cpu_power = power;
-                }
 #endif
        }
 #ifdef CONFIG_NUMA
-        for (i = 0; i < MAX_NUMNODES; i++) {
+        for (i = 0; i < MAX_NUMNODES; i++)
-                struct sched_group *sg = sched_group_nodes[i];
+                init_numa_sched_groups_power(sched_group_nodes[i]);
-                int j;
-                if (sg == NULL)
-                        continue;
-next_sg:
-                for_each_cpu_mask(j, sg->cpumask) {
-                        struct sched_domain *sd;
-                        int power;
-                        sd = &per_cpu(phys_domains, j);
-                        if (j != first_cpu(sd->groups->cpumask)) {
-                                /*
-                                 * Only add "power" once for each
-                                 * physical package.
-                                 */
-                                continue;
-                        }
-                        power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE *
-                                (cpus_weight(sd->groups->cpumask)-1) / 10;
-                        sg->cpu_power += power;
+        init_numa_sched_groups_power(sched_group_allnodes);
-                }
-                sg = sg->next;
-                if (sg != sched_group_nodes[i])
-                        goto next_sg;
-        }
 #endif
        /* Attach the domains */
@@ -5850,6 +5906,8 @@ next_sg:
                struct sched_domain *sd;
 #ifdef CONFIG_SCHED_SMT
                sd = &per_cpu(cpu_domains, i);
+#elif defined(CONFIG_SCHED_MC)
+                sd = &per_cpu(core_domains, i);
 #else
                sd = &per_cpu(phys_domains, i);
 #endif
@@ -6022,7 +6080,7 @@ void __init sched_init(void)
        runqueue_t *rq;
        int i, j, k;
-        for_each_cpu(i) {
+        for_each_possible_cpu(i) {
                prio_array_t *array;
                rq = cpu_rq(i);