1 files changed, 59 insertions, 22 deletions
diff --git a/kernel/sched.c b/kernel/sched.c
index bfb8ad8ed171..4e2f60335656 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -312,12 +312,15 @@ static DEFINE_SPINLOCK(task_group_lock);
 #endif
 /*
- * A weight of 0, 1 or ULONG_MAX can cause arithmetics problems.
+ * A weight of 0 or 1 can cause arithmetics problems.
+ * A weight of a cfs_rq is the sum of weights of which entities
+ * are queued on this cfs_rq, so a weight of a entity should not be
+ * too large, so as the shares value of a task group.
 * (The default weight is 1024 - so there's no practical
 *  limitation from this.)
 */
 #define MIN_SHARES      2
-#define MAX_SHARES      (ULONG_MAX - 1)
+#define MAX_SHARES      (1UL << 18)
 static int init_task_group_load = INIT_TASK_GROUP_LOAD;
 #endif
@@ -1124,6 +1127,7 @@ static enum hrtimer_restart hrtick(struct hrtimer *timer)
        return HRTIMER_NORESTART;
 }
+#ifdef CONFIG_SMP
 static void hotplug_hrtick_disable(int cpu)
 {
        struct rq *rq = cpu_rq(cpu);
@@ -1179,6 +1183,7 @@ static void init_hrtick(void)
 {
        hotcpu_notifier(hotplug_hrtick, 0);
 }
+#endif /* CONFIG_SMP */
 static void init_rq_hrtick(struct rq *rq)
 {
@@ -1337,8 +1342,13 @@ calc_delta_mine(unsigned long delta_exec, unsigned long weight,
 {
        u64 tmp;
-        if (!lw->inv_weight)
+        if (!lw->inv_weight) {
-                lw->inv_weight = 1 + (WMULT_CONST-lw->weight/2)/(lw->weight+1);
+                if (BITS_PER_LONG > 32 && unlikely(lw->weight >= WMULT_CONST))
+                        lw->inv_weight = 1;
+                else
+                        lw->inv_weight = 1 + (WMULT_CONST-lw->weight/2)
+                                / (lw->weight+1);
+        }
        tmp = (u64)delta_exec * weight;
        /*
@@ -4159,12 +4169,10 @@ need_resched_nonpreemptible:
        clear_tsk_need_resched(prev);
        if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) {
-                if (unlikely((prev->state & TASK_INTERRUPTIBLE) &&
+                if (unlikely(signal_pending_state(prev->state, prev)))
-                                signal_pending(prev))) {
                        prev->state = TASK_RUNNING;
-                } else {
+                else
                        deactivate_task(rq, prev, 1);
-                }
                switch_count = &prev->nvcsw;
        }
@@ -4390,22 +4398,20 @@ do_wait_for_common(struct completion *x, long timeout, int state)
                             signal_pending(current)) ||
                            (state == TASK_KILLABLE &&
                             fatal_signal_pending(current))) {
-                                __remove_wait_queue(&x->wait, &wait);
+                                timeout = -ERESTARTSYS;
-                                return -ERESTARTSYS;
+                                break;
                        }
                        __set_current_state(state);
                        spin_unlock_irq(&x->wait.lock);
                        timeout = schedule_timeout(timeout);
                        spin_lock_irq(&x->wait.lock);
-                        if (!timeout) {
+                } while (!x->done && timeout);
-                                __remove_wait_queue(&x->wait, &wait);
-                                return timeout;
-                        }
-                } while (!x->done);
                __remove_wait_queue(&x->wait, &wait);
+                if (!x->done)
+                        return timeout;
        }
        x->done--;
-        return timeout;
+        return timeout ?: 1;
 }
 static long __sched
@@ -5616,10 +5622,10 @@ static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu)
        double_rq_lock(rq_src, rq_dest);
        /* Already moved. */
        if (task_cpu(p) != src_cpu)
-                goto out;
+                goto done;
        /* Affinity changed (again). */
        if (!cpu_isset(dest_cpu, p->cpus_allowed))
-                goto out;
+                goto fail;
        on_rq = p->se.on_rq;
        if (on_rq)
@@ -5630,8 +5636,9 @@ static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu)
                activate_task(rq_dest, p, 0);
                check_preempt_curr(rq_dest, p);
        }
+done:
        ret = 1;
-out:
+fail:
        double_rq_unlock(rq_src, rq_dest);
        return ret;
 }
@@ -5881,6 +5888,7 @@ static void migrate_dead_tasks(unsigned int dead_cpu)
                next = pick_next_task(rq, rq->curr);
                if (!next)
                        break;
+                next->sched_class->put_prev_task(rq, next);
                migrate_dead(dead_cpu, next);
        }
@@ -6871,7 +6879,12 @@ static int default_relax_domain_level = -1;
 static int __init setup_relax_domain_level(char *str)
 {
-        default_relax_domain_level = simple_strtoul(str, NULL, 0);
+        unsigned long val;
+        val = simple_strtoul(str, NULL, 0);
+        if (val < SD_LV_MAX)
+                default_relax_domain_level = val;
        return 1;
 }
 __setup("relax_domain_level=", setup_relax_domain_level);
@@ -7230,6 +7243,18 @@ void __attribute__((weak)) arch_update_cpu_topology(void)
 }
 /*
+ * Free current domain masks.
+ * Called after all cpus are attached to NULL domain.
+ */
+static void free_sched_domains(void)
+{
+        ndoms_cur = 0;
+        if (doms_cur != &fallback_doms)
+                kfree(doms_cur);
+        doms_cur = &fallback_doms;
+}
+/*
 * Set up scheduler domains and groups. Callers must hold the hotplug lock.
 * For now this just excludes isolated cpus, but could be used to
 * exclude other special cases in the future.
@@ -7376,6 +7401,7 @@ int arch_reinit_sched_domains(void)
        get_online_cpus();
        mutex_lock(&sched_domains_mutex);
        detach_destroy_domains(&cpu_online_map);
+        free_sched_domains();
        err = arch_init_sched_domains(&cpu_online_map);
        mutex_unlock(&sched_domains_mutex);
        put_online_cpus();
@@ -7461,6 +7487,7 @@ static int update_sched_domains(struct notifier_block *nfb,
        case CPU_DOWN_PREPARE:
        case CPU_DOWN_PREPARE_FROZEN:
                detach_destroy_domains(&cpu_online_map);
+                free_sched_domains();
                return NOTIFY_OK;
        case CPU_UP_CANCELED:
@@ -7479,8 +7506,16 @@ static int update_sched_domains(struct notifier_block *nfb,
                return NOTIFY_DONE;
        }
+#ifndef CONFIG_CPUSETS
+        /*
+         * Create default domain partitioning if cpusets are disabled.
+         * Otherwise we let cpusets rebuild the domains based on the
+         * current setup.
+         */
        /* The hotplug lock is already held by cpu_up/cpu_down */
        arch_init_sched_domains(&cpu_online_map);
+#endif
        return NOTIFY_OK;
 }
@@ -7620,7 +7655,6 @@ static void init_tg_rt_entry(struct task_group *tg, struct rt_rq *rt_rq,
        else
                rt_se->rt_rq = parent->my_q;
-        rt_se->rt_rq = &rq->rt;
        rt_se->my_q = rt_rq;
        rt_se->parent = parent;
        INIT_LIST_HEAD(&rt_se->run_list);
@@ -8342,7 +8376,7 @@ static unsigned long to_ratio(u64 period, u64 runtime)
 #ifdef CONFIG_CGROUP_SCHED
 static int __rt_schedulable(struct task_group *tg, u64 period, u64 runtime)
 {
-        struct task_group *tgi, *parent = tg->parent;
+        struct task_group *tgi, *parent = tg ? tg->parent : NULL;
        unsigned long total = 0;
        if (!parent) {
@@ -8469,6 +8503,9 @@ int sched_group_set_rt_period(struct task_group *tg, long rt_period_us)
        rt_period = (u64)rt_period_us * NSEC_PER_USEC;
        rt_runtime = tg->rt_bandwidth.rt_runtime;
+        if (rt_period == 0)
+                return -EINVAL;
        return tg_set_bandwidth(tg, rt_period, rt_runtime);
 }