Merge commit 'v2.6.28-rc2' into tracing/urgent

author: Ingo Molnar <mingo@elte.hu> 2008-10-27 05:50:54 -0400
committer: Ingo Molnar <mingo@elte.hu> 2008-10-27 05:50:54 -0400
commit: 4944dd62de21230af039eda7cd218e9a09021d11 (patch)
tree: bac70f7bab8506c7e1b0408bacbdb0b1d77262e9 /kernel
parent: f17845e5d97ead8fbdadfd40039e058ec7cf4a42 (diff)
parent: 0173a3265b228da319ceb9c1ec6a5682fd1b2d92 (diff)
28 files changed, 785 insertions, 551 deletions
diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c
index f7921a2ecf16..8ba0e0d934f2 100644
--- a/kernel/audit_tree.c
+++ b/kernel/audit_tree.c
@@ -532,7 +532,7 @@ void audit_trim_trees(void)
        list_add(&cursor, &tree_list);
        while (cursor.next != &tree_list) {
                struct audit_tree *tree;
-                struct nameidata nd;
+                struct path path;
                struct vfsmount *root_mnt;
                struct node *node;
                struct list_head list;
@@ -544,12 +544,12 @@ void audit_trim_trees(void)
                list_add(&cursor, &tree->list);
                mutex_unlock(&audit_filter_mutex);
-                err = path_lookup(tree->pathname, 0, &nd);
+                err = kern_path(tree->pathname, 0, &path);
                if (err)
                        goto skip_it;
-                root_mnt = collect_mounts(nd.path.mnt, nd.path.dentry);
+                root_mnt = collect_mounts(path.mnt, path.dentry);
-                path_put(&nd.path);
+                path_put(&path);
                if (!root_mnt)
                        goto skip_it;
@@ -580,19 +580,19 @@ skip_it:
 }
 static int is_under(struct vfsmount *mnt, struct dentry *dentry,
-                    struct nameidata *nd)
+                    struct path *path)
 {
-        if (mnt != nd->path.mnt) {
+        if (mnt != path->mnt) {
                for (;;) {
                        if (mnt->mnt_parent == mnt)
                                return 0;
-                        if (mnt->mnt_parent == nd->path.mnt)
+                        if (mnt->mnt_parent == path->mnt)
                                        break;
                        mnt = mnt->mnt_parent;
                }
                dentry = mnt->mnt_mountpoint;
        }
-        return is_subdir(dentry, nd->path.dentry);
+        return is_subdir(dentry, path->dentry);
 }
 int audit_make_tree(struct audit_krule *rule, char *pathname, u32 op)
@@ -618,7 +618,7 @@ void audit_put_tree(struct audit_tree *tree)
 int audit_add_tree_rule(struct audit_krule *rule)
 {
        struct audit_tree *seed = rule->tree, *tree;
-        struct nameidata nd;
+        struct path path;
        struct vfsmount *mnt, *p;
        struct list_head list;
        int err;
@@ -637,11 +637,11 @@ int audit_add_tree_rule(struct audit_krule *rule)
        /* do not set rule->tree yet */
        mutex_unlock(&audit_filter_mutex);
-        err = path_lookup(tree->pathname, 0, &nd);
+        err = kern_path(tree->pathname, 0, &path);
        if (err)
                goto Err;
-        mnt = collect_mounts(nd.path.mnt, nd.path.dentry);
+        mnt = collect_mounts(path.mnt, path.dentry);
-        path_put(&nd.path);
+        path_put(&path);
        if (!mnt) {
                err = -ENOMEM;
                goto Err;
@@ -690,29 +690,29 @@ int audit_tag_tree(char *old, char *new)
 {
        struct list_head cursor, barrier;
        int failed = 0;
-        struct nameidata nd;
+        struct path path;
        struct vfsmount *tagged;
        struct list_head list;
        struct vfsmount *mnt;
        struct dentry *dentry;
        int err;
-        err = path_lookup(new, 0, &nd);
+        err = kern_path(new, 0, &path);
        if (err)
                return err;
-        tagged = collect_mounts(nd.path.mnt, nd.path.dentry);
+        tagged = collect_mounts(path.mnt, path.dentry);
-        path_put(&nd.path);
+        path_put(&path);
        if (!tagged)
                return -ENOMEM;
-        err = path_lookup(old, 0, &nd);
+        err = kern_path(old, 0, &path);
        if (err) {
                drop_collected_mounts(tagged);
                return err;
        }
-        mnt = mntget(nd.path.mnt);
+        mnt = mntget(path.mnt);
-        dentry = dget(nd.path.dentry);
+        dentry = dget(path.dentry);
-        path_put(&nd.path);
+        path_put(&path);
        if (dentry == tagged->mnt_root && dentry == mnt->mnt_root)
                follow_up(&mnt, &dentry);
@@ -733,7 +733,7 @@ int audit_tag_tree(char *old, char *new)
                list_add(&cursor, &tree->list);
                mutex_unlock(&audit_filter_mutex);
-                err = path_lookup(tree->pathname, 0, &nd);
+                err = kern_path(tree->pathname, 0, &path);
                if (err) {
                        put_tree(tree);
                        mutex_lock(&audit_filter_mutex);
@@ -741,15 +741,15 @@ int audit_tag_tree(char *old, char *new)
                }
                spin_lock(&vfsmount_lock);
-                if (!is_under(mnt, dentry, &nd)) {
+                if (!is_under(mnt, dentry, &path)) {
                        spin_unlock(&vfsmount_lock);
-                        path_put(&nd.path);
+                        path_put(&path);
                        put_tree(tree);
                        mutex_lock(&audit_filter_mutex);
                        continue;
                }
                spin_unlock(&vfsmount_lock);
-                path_put(&nd.path);
+                path_put(&path);
                list_for_each_entry(p, &list, mnt_list) {
                        failed = tag_chunk(p->mnt_root->d_inode, tree);
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 046c1609606b..35eebd5510c2 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -2104,7 +2104,7 @@ static void *cgroup_tasks_start(struct seq_file *s, loff_t *pos)
        down_read(&cgrp->pids_mutex);
        if (pid) {
                int end = cgrp->pids_length;
-                int i;
                while (index < end) {
                        int mid = (index + end) / 2;
                        if (cgrp->tasks_pids[mid] == pid) {
diff --git a/kernel/exec_domain.c b/kernel/exec_domain.c
index 0d407e886735..0511716e9424 100644
--- a/kernel/exec_domain.c
+++ b/kernel/exec_domain.c
@@ -12,7 +12,9 @@
 #include <linux/kmod.h>
 #include <linux/module.h>
 #include <linux/personality.h>
+#include <linux/proc_fs.h>
 #include <linux/sched.h>
+#include <linux/seq_file.h>
 #include <linux/syscalls.h>
 #include <linux/sysctl.h>
 #include <linux/types.h>
@@ -173,20 +175,39 @@ __set_personality(u_long personality)
        return 0;
 }
-int
+#ifdef CONFIG_PROC_FS
-get_exec_domain_list(char *page)
+static int execdomains_proc_show(struct seq_file *m, void *v)
 {
        struct exec_domain      *ep;
-        int                     len = 0;
        read_lock(&exec_domains_lock);
-        for (ep = exec_domains; ep && len < PAGE_SIZE - 80; ep = ep->next)
+        for (ep = exec_domains; ep; ep = ep->next)
-                len += sprintf(page + len, "%d-%d\t%-16s\t[%s]\n",
+                seq_printf(m, "%d-%d\t%-16s\t[%s]\n",
                               ep->pers_low, ep->pers_high, ep->name,
                               module_name(ep->module));
        read_unlock(&exec_domains_lock);
-        return (len);
+        return 0;
+}
+static int execdomains_proc_open(struct inode *inode, struct file *file)
+{
+        return single_open(file, execdomains_proc_show, NULL);
+}
+static const struct file_operations execdomains_proc_fops = {
+        .open           = execdomains_proc_open,
+        .read           = seq_read,
+        .llseek         = seq_lseek,
+        .release        = single_release,
+};
+static int __init proc_execdomains_init(void)
+{
+        proc_create("execdomains", 0, NULL, &execdomains_proc_fops);
+        return 0;
 }
+module_init(proc_execdomains_init);
+#endif
 asmlinkage long
 sys_personality(u_long personality)
diff --git a/kernel/fork.c b/kernel/fork.c
index 4d093552dd6e..f6083561dfe0 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1018,6 +1018,8 @@ static struct task_struct *copy_process(unsigned long clone_flags,
        p->prev_utime = cputime_zero;
        p->prev_stime = cputime_zero;
+        p->default_timer_slack_ns = current->timer_slack_ns;
 #ifdef CONFIG_DETECT_SOFTLOCKUP
        p->last_switch_count = 0;
        p->last_switch_timestamp = 0;
diff --git a/kernel/futex.c b/kernel/futex.c
index 7d1136e97c14..8af10027514b 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -1296,13 +1296,16 @@ static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared,
                if (!abs_time)
                        schedule();
                else {
+                        unsigned long slack;
+                        slack = current->timer_slack_ns;
+                        if (rt_task(current))
+                                slack = 0;
                        hrtimer_init_on_stack(&t.timer, CLOCK_MONOTONIC,
                                                HRTIMER_MODE_ABS);
                        hrtimer_init_sleeper(&t, current);
-                        t.timer.expires = *abs_time;
+                        hrtimer_set_expires_range_ns(&t.timer, *abs_time, slack);
-                        hrtimer_start(&t.timer, t.timer.expires,
+                        hrtimer_start_expires(&t.timer, HRTIMER_MODE_ABS);
-                                                HRTIMER_MODE_ABS);
                        if (!hrtimer_active(&t.timer))
                                t.task = NULL;
@@ -1404,7 +1407,7 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared,
                hrtimer_init_on_stack(&to->timer, CLOCK_REALTIME,
                                      HRTIMER_MODE_ABS);
                hrtimer_init_sleeper(to, current);
-                to->timer.expires = *time;
+                hrtimer_set_expires(&to->timer, *time);
        }
        q.pi_state = NULL;
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index 95978f48e039..2b465dfde426 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -517,7 +517,7 @@ static void hrtimer_force_reprogram(struct hrtimer_cpu_base *cpu_base)
                if (!base->first)
                        continue;
                timer = rb_entry(base->first, struct hrtimer, node);
-                expires = ktime_sub(timer->expires, base->offset);
+                expires = ktime_sub(hrtimer_get_expires(timer), base->offset);
                if (expires.tv64 < cpu_base->expires_next.tv64)
                        cpu_base->expires_next = expires;
        }
@@ -539,10 +539,10 @@ static int hrtimer_reprogram(struct hrtimer *timer,
                             struct hrtimer_clock_base *base)
 {
        ktime_t *expires_next = &__get_cpu_var(hrtimer_bases).expires_next;
-        ktime_t expires = ktime_sub(timer->expires, base->offset);
+        ktime_t expires = ktime_sub(hrtimer_get_expires(timer), base->offset);
        int res;
-        WARN_ON_ONCE(timer->expires.tv64 < 0);
+        WARN_ON_ONCE(hrtimer_get_expires_tv64(timer) < 0);
        /*
         * When the callback is running, we do not reprogram the clock event
@@ -795,7 +795,7 @@ u64 hrtimer_forward(struct hrtimer *timer, ktime_t now, ktime_t interval)
        u64 orun = 1;
        ktime_t delta;
-        delta = ktime_sub(now, timer->expires);
+        delta = ktime_sub(now, hrtimer_get_expires(timer));
        if (delta.tv64 < 0)
                return 0;
@@ -807,8 +807,8 @@ u64 hrtimer_forward(struct hrtimer *timer, ktime_t now, ktime_t interval)
                s64 incr = ktime_to_ns(interval);
                orun = ktime_divns(delta, incr);
-                timer->expires = ktime_add_ns(timer->expires, incr * orun);
+                hrtimer_add_expires_ns(timer, incr * orun);
-                if (timer->expires.tv64 > now.tv64)
+                if (hrtimer_get_expires_tv64(timer) > now.tv64)
                        return orun;
                /*
                 * This (and the ktime_add() below) is the
@@ -816,7 +816,7 @@ u64 hrtimer_forward(struct hrtimer *timer, ktime_t now, ktime_t interval)
                 */
                orun++;
        }
-        timer->expires = ktime_add_safe(timer->expires, interval);
+        hrtimer_add_expires(timer, interval);
        return orun;
 }
@@ -848,7 +848,8 @@ static void enqueue_hrtimer(struct hrtimer *timer,
                 * We dont care about collisions. Nodes with
                 * the same expiry time stay together.
                 */
-                if (timer->expires.tv64 < entry->expires.tv64) {
+                if (hrtimer_get_expires_tv64(timer) <
+                                hrtimer_get_expires_tv64(entry)) {
                        link = &(*link)->rb_left;
                } else {
                        link = &(*link)->rb_right;
@@ -945,9 +946,10 @@ remove_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *base)
 }
 /**
- * hrtimer_start - (re)start an relative timer on the current CPU
+ * hrtimer_start_range_ns - (re)start an hrtimer on the current CPU
 * @timer:      the timer to be added
 * @tim:        expiry time
+ * @delta_ns:   "slack" range for the timer
 * @mode:       expiry mode: absolute (HRTIMER_ABS) or relative (HRTIMER_REL)
 *
 * Returns:
@@ -955,7 +957,8 @@ remove_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *base)
 *  1 when the timer was active
 */
 int
-hrtimer_start(struct hrtimer *timer, ktime_t tim, const enum hrtimer_mode mode)
+hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, unsigned long delta_ns,
+                        const enum hrtimer_mode mode)
 {
        struct hrtimer_clock_base *base, *new_base;
        unsigned long flags;
@@ -983,7 +986,7 @@ hrtimer_start(struct hrtimer *timer, ktime_t tim, const enum hrtimer_mode mode)
 #endif
        }
-        timer->expires = tim;
+        hrtimer_set_expires_range_ns(timer, tim, delta_ns);
        timer_stats_hrtimer_set_start_info(timer);
@@ -1016,8 +1019,26 @@ hrtimer_start(struct hrtimer *timer, ktime_t tim, const enum hrtimer_mode mode)
        return ret;
 }
+EXPORT_SYMBOL_GPL(hrtimer_start_range_ns);
+/**
+ * hrtimer_start - (re)start an hrtimer on the current CPU
+ * @timer:      the timer to be added
+ * @tim:        expiry time
+ * @mode:       expiry mode: absolute (HRTIMER_ABS) or relative (HRTIMER_REL)
+ *
+ * Returns:
+ *  0 on success
+ *  1 when the timer was active
+ */
+int
+hrtimer_start(struct hrtimer *timer, ktime_t tim, const enum hrtimer_mode mode)
+{
+        return hrtimer_start_range_ns(timer, tim, 0, mode);
+}
 EXPORT_SYMBOL_GPL(hrtimer_start);
 /**
 * hrtimer_try_to_cancel - try to deactivate a timer
 * @timer:      hrtimer to stop
@@ -1077,7 +1098,7 @@ ktime_t hrtimer_get_remaining(const struct hrtimer *timer)
        ktime_t rem;
        base = lock_hrtimer_base(timer, &flags);
-        rem = ktime_sub(timer->expires, base->get_time());
+        rem = hrtimer_expires_remaining(timer);
        unlock_hrtimer_base(timer, &flags);
        return rem;
@@ -1109,7 +1130,7 @@ ktime_t hrtimer_get_next_event(void)
                                continue;
                        timer = rb_entry(base->first, struct hrtimer, node);
-                        delta.tv64 = timer->expires.tv64;
+                        delta.tv64 = hrtimer_get_expires_tv64(timer);
                        delta = ktime_sub(delta, base->get_time());
                        if (delta.tv64 < mindelta.tv64)
                                mindelta.tv64 = delta.tv64;
@@ -1310,10 +1331,23 @@ void hrtimer_interrupt(struct clock_event_device *dev)
                        timer = rb_entry(node, struct hrtimer, node);
-                        if (basenow.tv64 < timer->expires.tv64) {
+                        /*
+                         * The immediate goal for using the softexpires is
+                         * minimizing wakeups, not running timers at the
+                         * earliest interrupt after their soft expiration.
+                         * This allows us to avoid using a Priority Search
+                         * Tree, which can answer a stabbing querry for
+                         * overlapping intervals and instead use the simple
+                         * BST we already have.
+                         * We don't add extra wakeups by delaying timers that
+                         * are right-of a not yet expired timer, because that
+                         * timer will have to trigger a wakeup anyway.
+                         */
+                        if (basenow.tv64 < hrtimer_get_softexpires_tv64(timer)) {
                                ktime_t expires;
-                                expires = ktime_sub(timer->expires,
+                                expires = ktime_sub(hrtimer_get_expires(timer),
                                                    base->offset);
                                if (expires.tv64 < expires_next.tv64)
                                        expires_next = expires;
@@ -1349,6 +1383,30 @@ void hrtimer_interrupt(struct clock_event_device *dev)
                raise_softirq(HRTIMER_SOFTIRQ);
 }
+/**
+ * hrtimer_peek_ahead_timers -- run soft-expired timers now
+ *
+ * hrtimer_peek_ahead_timers will peek at the timer queue of
+ * the current cpu and check if there are any timers for which
+ * the soft expires time has passed. If any such timers exist,
+ * they are run immediately and then removed from the timer queue.
+ *
+ */
+void hrtimer_peek_ahead_timers(void)
+{
+        struct tick_device *td;
+        unsigned long flags;
+        if (!hrtimer_hres_active())
+                return;
+        local_irq_save(flags);
+        td = &__get_cpu_var(tick_cpu_device);
+        if (td && td->evtdev)
+                hrtimer_interrupt(td->evtdev);
+        local_irq_restore(flags);
+}
 static void run_hrtimer_softirq(struct softirq_action *h)
 {
        run_hrtimer_pending(&__get_cpu_var(hrtimer_bases));
@@ -1414,7 +1472,8 @@ void hrtimer_run_queues(void)
                        struct hrtimer *timer;
                        timer = rb_entry(node, struct hrtimer, node);
-                        if (base->softirq_time.tv64 <= timer->expires.tv64)
+                        if (base->softirq_time.tv64 <=
+                                        hrtimer_get_expires_tv64(timer))
                                break;
                        if (timer->cb_mode == HRTIMER_CB_SOFTIRQ) {
@@ -1462,7 +1521,7 @@ static int __sched do_nanosleep(struct hrtimer_sleeper *t, enum hrtimer_mode mod
        do {
                set_current_state(TASK_INTERRUPTIBLE);
-                hrtimer_start(&t->timer, t->timer.expires, mode);
+                hrtimer_start_expires(&t->timer, mode);
                if (!hrtimer_active(&t->timer))
                        t->task = NULL;
@@ -1484,7 +1543,7 @@ static int update_rmtp(struct hrtimer *timer, struct timespec __user *rmtp)
        struct timespec rmt;
        ktime_t rem;
-        rem = ktime_sub(timer->expires, timer->base->get_time());
+        rem = hrtimer_expires_remaining(timer);
        if (rem.tv64 <= 0)
                return 0;
        rmt = ktime_to_timespec(rem);
@@ -1503,7 +1562,7 @@ long __sched hrtimer_nanosleep_restart(struct restart_block *restart)
        hrtimer_init_on_stack(&t.timer, restart->nanosleep.index,
                                HRTIMER_MODE_ABS);
-        t.timer.expires.tv64 = restart->nanosleep.expires;
+        hrtimer_set_expires_tv64(&t.timer, restart->nanosleep.expires);
        if (do_nanosleep(&t, HRTIMER_MODE_ABS))
                goto out;
@@ -1528,9 +1587,14 @@ long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp,
        struct restart_block *restart;
        struct hrtimer_sleeper t;
        int ret = 0;
+        unsigned long slack;
+        slack = current->timer_slack_ns;
+        if (rt_task(current))
+                slack = 0;
        hrtimer_init_on_stack(&t.timer, clockid, mode);
-        t.timer.expires = timespec_to_ktime(*rqtp);
+        hrtimer_set_expires_range_ns(&t.timer, timespec_to_ktime(*rqtp), slack);
        if (do_nanosleep(&t, mode))
                goto out;
@@ -1550,7 +1614,7 @@ long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp,
        restart->fn = hrtimer_nanosleep_restart;
        restart->nanosleep.index = t.timer.base->index;
        restart->nanosleep.rmtp = rmtp;
-        restart->nanosleep.expires = t.timer.expires.tv64;
+        restart->nanosleep.expires = hrtimer_get_expires_tv64(&t.timer);
        ret = -ERESTART_RESTARTBLOCK;
 out:
@@ -1752,3 +1816,103 @@ void __init hrtimers_init(void)
 #endif
 }
+/**
+ * schedule_hrtimeout_range - sleep until timeout
+ * @expires:    timeout value (ktime_t)
+ * @delta:      slack in expires timeout (ktime_t)
+ * @mode:       timer mode, HRTIMER_MODE_ABS or HRTIMER_MODE_REL
+ *
+ * Make the current task sleep until the given expiry time has
+ * elapsed. The routine will return immediately unless
+ * the current task state has been set (see set_current_state()).
+ *
+ * The @delta argument gives the kernel the freedom to schedule the
+ * actual wakeup to a time that is both power and performance friendly.
+ * The kernel give the normal best effort behavior for "@expires+@delta",
+ * but may decide to fire the timer earlier, but no earlier than @expires.
+ *
+ * You can set the task state as follows -
+ *
+ * %TASK_UNINTERRUPTIBLE - at least @timeout time is guaranteed to
+ * pass before the routine returns.
+ *
+ * %TASK_INTERRUPTIBLE - the routine may return early if a signal is
+ * delivered to the current task.
+ *
+ * The current task state is guaranteed to be TASK_RUNNING when this
+ * routine returns.
+ *
+ * Returns 0 when the timer has expired otherwise -EINTR
+ */
+int __sched schedule_hrtimeout_range(ktime_t *expires, unsigned long delta,
+                               const enum hrtimer_mode mode)
+{
+        struct hrtimer_sleeper t;
+        /*
+         * Optimize when a zero timeout value is given. It does not
+         * matter whether this is an absolute or a relative time.
+         */
+        if (expires && !expires->tv64) {
+                __set_current_state(TASK_RUNNING);
+                return 0;
+        }
+        /*
+         * A NULL parameter means "inifinte"
+         */
+        if (!expires) {
+                schedule();
+                __set_current_state(TASK_RUNNING);
+                return -EINTR;
+        }
+        hrtimer_init_on_stack(&t.timer, CLOCK_MONOTONIC, mode);
+        hrtimer_set_expires_range_ns(&t.timer, *expires, delta);
+        hrtimer_init_sleeper(&t, current);
+        hrtimer_start_expires(&t.timer, mode);
+        if (!hrtimer_active(&t.timer))
+                t.task = NULL;
+        if (likely(t.task))
+                schedule();
+        hrtimer_cancel(&t.timer);
+        destroy_hrtimer_on_stack(&t.timer);
+        __set_current_state(TASK_RUNNING);
+        return !t.task ? 0 : -EINTR;
+}
+EXPORT_SYMBOL_GPL(schedule_hrtimeout_range);
+/**
+ * schedule_hrtimeout - sleep until timeout
+ * @expires:    timeout value (ktime_t)
+ * @mode:       timer mode, HRTIMER_MODE_ABS or HRTIMER_MODE_REL
+ *
+ * Make the current task sleep until the given expiry time has
+ * elapsed. The routine will return immediately unless
+ * the current task state has been set (see set_current_state()).
+ *
+ * You can set the task state as follows -
+ *
+ * %TASK_UNINTERRUPTIBLE - at least @timeout time is guaranteed to
+ * pass before the routine returns.
+ *
+ * %TASK_INTERRUPTIBLE - the routine may return early if a signal is
+ * delivered to the current task.
+ *
+ * The current task state is guaranteed to be TASK_RUNNING when this
+ * routine returns.
+ *
+ * Returns 0 when the timer has expired otherwise -EINTR
+ */
+int __sched schedule_hrtimeout(ktime_t *expires,
+                               const enum hrtimer_mode mode)
+{
+        return schedule_hrtimeout_range(expires, 0, mode);
+}
+EXPORT_SYMBOL_GPL(schedule_hrtimeout);
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index 4895fde4eb93..10b5092e9bfe 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -76,6 +76,7 @@ void dynamic_irq_cleanup(unsigned int irq)
        desc->chip_data = NULL;
        desc->handle_irq = handle_bad_irq;
        desc->chip = &no_irq_chip;
+        desc->name = NULL;
        spin_unlock_irqrestore(&desc->lock, flags);
 }
@@ -127,7 +128,7 @@ int set_irq_type(unsigned int irq, unsigned int type)
                return 0;
        spin_lock_irqsave(&desc->lock, flags);
-        ret = __irq_set_trigger(desc, irq, flags);
+        ret = __irq_set_trigger(desc, irq, type);
        spin_unlock_irqrestore(&desc->lock, flags);
        return ret;
 }
diff --git a/kernel/module.c b/kernel/module.c
index 0d8d21ee792c..1f4cc00e0c20 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -20,11 +20,13 @@
 #include <linux/moduleloader.h>
 #include <linux/init.h>
 #include <linux/kallsyms.h>
+#include <linux/fs.h>
 #include <linux/sysfs.h>
 #include <linux/kernel.h>
 #include <linux/slab.h>
 #include <linux/vmalloc.h>
 #include <linux/elf.h>
+#include <linux/proc_fs.h>
 #include <linux/seq_file.h>
 #include <linux/syscalls.h>
 #include <linux/fcntl.h>
@@ -42,6 +44,7 @@
 #include <linux/string.h>
 #include <linux/mutex.h>
 #include <linux/unwind.h>
+#include <linux/rculist.h>
 #include <asm/uaccess.h>
 #include <asm/cacheflush.h>
 #include <linux/license.h>
@@ -63,7 +66,7 @@
 #define INIT_OFFSET_MASK (1UL << (BITS_PER_LONG-1))
 /* List of modules, protected by module_mutex or preempt_disable
- * (add/delete uses stop_machine). */
+ * (delete uses stop_machine/add uses RCU list operations). */
 static DEFINE_MUTEX(module_mutex);
 static LIST_HEAD(modules);
@@ -132,6 +135,29 @@ static unsigned int find_sec(Elf_Ehdr *hdr,
        return 0;
 }
+/* Find a module section, or NULL. */
+static void *section_addr(Elf_Ehdr *hdr, Elf_Shdr *shdrs,
+                          const char *secstrings, const char *name)
+{
+        /* Section 0 has sh_addr 0. */
+        return (void *)shdrs[find_sec(hdr, shdrs, secstrings, name)].sh_addr;
+}
+/* Find a module section, or NULL.  Fill in number of "objects" in section. */
+static void *section_objs(Elf_Ehdr *hdr,
+                          Elf_Shdr *sechdrs,
+                          const char *secstrings,
+                          const char *name,
+                          size_t object_size,
+                          unsigned int *num)
+{
+        unsigned int sec = find_sec(hdr, sechdrs, secstrings, name);
+        /* Section 0 has sh_addr 0 and sh_size 0. */
+        *num = sechdrs[sec].sh_size / object_size;
+        return (void *)sechdrs[sec].sh_addr;
+}
 /* Provided by the linker */
 extern const struct kernel_symbol __start___ksymtab[];
 extern const struct kernel_symbol __stop___ksymtab[];
@@ -218,7 +244,7 @@ static bool each_symbol(bool (*fn)(const struct symsearch *arr,
        if (each_symbol_in_section(arr, ARRAY_SIZE(arr), NULL, fn, data))
                return true;
-        list_for_each_entry(mod, &modules, list) {
+        list_for_each_entry_rcu(mod, &modules, list) {
                struct symsearch arr[] = {
                        { mod->syms, mod->syms + mod->num_syms, mod->crcs,
                          NOT_GPL_ONLY, false },
@@ -1394,17 +1420,6 @@ static void mod_kobject_remove(struct module *mod)
 }
 /*
- * link the module with the whole machine is stopped with interrupts off
- * - this defends against kallsyms not taking locks
- */
-static int __link_module(void *_mod)
-{
-        struct module *mod = _mod;
-        list_add(&mod->list, &modules);
-        return 0;
-}
-/*
 * unlink the module with the whole machine is stopped with interrupts off
 * - this defends against kallsyms not taking locks
 */
@@ -1789,32 +1804,20 @@ static inline void add_kallsyms(struct module *mod,
 }
 #endif /* CONFIG_KALLSYMS */
-#ifdef CONFIG_DYNAMIC_PRINTK_DEBUG
+static void dynamic_printk_setup(struct mod_debug *debug, unsigned int num)
-static void dynamic_printk_setup(Elf_Shdr *sechdrs, unsigned int verboseindex)
 {
-        struct mod_debug *debug_info;
+#ifdef CONFIG_DYNAMIC_PRINTK_DEBUG
-        unsigned long pos, end;
+        unsigned int i;
-        unsigned int num_verbose;
-        pos = sechdrs[verboseindex].sh_addr;
-        num_verbose = sechdrs[verboseindex].sh_size /
-                                sizeof(struct mod_debug);
-        end = pos + (num_verbose * sizeof(struct mod_debug));
-        for (; pos < end; pos += sizeof(struct mod_debug)) {
+        for (i = 0; i < num; i++) {
-                debug_info = (struct mod_debug *)pos;
+                register_dynamic_debug_module(debug[i].modname,
-                register_dynamic_debug_module(debug_info->modname,
+                                              debug[i].type,
-                        debug_info->type, debug_info->logical_modname,
+                                              debug[i].logical_modname,
-                        debug_info->flag_names, debug_info->hash,
+                                              debug[i].flag_names,
-                        debug_info->hash2);
+                                              debug[i].hash, debug[i].hash2);
        }
-}
-#else
-static inline void dynamic_printk_setup(Elf_Shdr *sechdrs,
-                                        unsigned int verboseindex)
-{
-}
 #endif /* CONFIG_DYNAMIC_PRINTK_DEBUG */
+}
 static void *module_alloc_update_bounds(unsigned long size)
 {
@@ -1843,37 +1846,14 @@ static noinline struct module *load_module(void __user *umod,
        unsigned int i;
        unsigned int symindex = 0;
        unsigned int strindex = 0;
-        unsigned int setupindex;
+        unsigned int modindex, versindex, infoindex, pcpuindex;
-        unsigned int exindex;
-        unsigned int exportindex;
-        unsigned int modindex;
-        unsigned int obsparmindex;
-        unsigned int infoindex;
-        unsigned int gplindex;
-        unsigned int crcindex;
-        unsigned int gplcrcindex;
-        unsigned int versindex;
-        unsigned int pcpuindex;
-        unsigned int gplfutureindex;
-        unsigned int gplfuturecrcindex;
        unsigned int unwindex = 0;
-#ifdef CONFIG_UNUSED_SYMBOLS
+        unsigned int num_kp, num_mcount;
-        unsigned int unusedindex;
+        struct kernel_param *kp;
-        unsigned int unusedcrcindex;
-        unsigned int unusedgplindex;
-        unsigned int unusedgplcrcindex;
-#endif
-        unsigned int markersindex;
-        unsigned int markersstringsindex;
-        unsigned int verboseindex;
-        unsigned int tracepointsindex;
-        unsigned int tracepointsstringsindex;
-        unsigned int mcountindex;
        struct module *mod;
        long err = 0;
        void *percpu = NULL, *ptr = NULL; /* Stops spurious gcc warning */
-        void *mseg;
+        unsigned long *mseg;
-        struct exception_table_entry *extable;
        mm_segment_t old_fs;
        DEBUGP("load_module: umod=%p, len=%lu, uargs=%p\n",
@@ -1937,6 +1917,7 @@ static noinline struct module *load_module(void __user *umod,
                err = -ENOEXEC;
                goto free_hdr;
        }
+        /* This is temporary: point mod into copy of data. */
        mod = (void *)sechdrs[modindex].sh_addr;
        if (symindex == 0) {
@@ -1946,22 +1927,6 @@ static noinline struct module *load_module(void __user *umod,
                goto free_hdr;
        }
-        /* Optional sections */
-        exportindex = find_sec(hdr, sechdrs, secstrings, "__ksymtab");
-        gplindex = find_sec(hdr, sechdrs, secstrings, "__ksymtab_gpl");
-        gplfutureindex = find_sec(hdr, sechdrs, secstrings, "__ksymtab_gpl_future");
-        crcindex = find_sec(hdr, sechdrs, secstrings, "__kcrctab");
-        gplcrcindex = find_sec(hdr, sechdrs, secstrings, "__kcrctab_gpl");
-        gplfuturecrcindex = find_sec(hdr, sechdrs, secstrings, "__kcrctab_gpl_future");
-#ifdef CONFIG_UNUSED_SYMBOLS
-        unusedindex = find_sec(hdr, sechdrs, secstrings, "__ksymtab_unused");
-        unusedgplindex = find_sec(hdr, sechdrs, secstrings, "__ksymtab_unused_gpl");
-        unusedcrcindex = find_sec(hdr, sechdrs, secstrings, "__kcrctab_unused");
-        unusedgplcrcindex = find_sec(hdr, sechdrs, secstrings, "__kcrctab_unused_gpl");
-#endif
-        setupindex = find_sec(hdr, sechdrs, secstrings, "__param");
-        exindex = find_sec(hdr, sechdrs, secstrings, "__ex_table");
-        obsparmindex = find_sec(hdr, sechdrs, secstrings, "__obsparm");
        versindex = find_sec(hdr, sechdrs, secstrings, "__versions");
        infoindex = find_sec(hdr, sechdrs, secstrings, ".modinfo");
        pcpuindex = find_pcpusec(hdr, sechdrs, secstrings);
@@ -2117,42 +2082,57 @@ static noinline struct module *load_module(void __user *umod,
        if (err < 0)
                goto cleanup;
-        /* Set up EXPORTed & EXPORT_GPLed symbols (section 0 is 0 length) */
+        /* Now we've got everything in the final locations, we can
-        mod->num_syms = sechdrs[exportindex].sh_size / sizeof(*mod->syms);
+         * find optional sections. */
-        mod->syms = (void *)sechdrs[exportindex].sh_addr;
+        kp = section_objs(hdr, sechdrs, secstrings, "__param", sizeof(*kp),
-        if (crcindex)
+                          &num_kp);
-                mod->crcs = (void *)sechdrs[crcindex].sh_addr;
+        mod->syms = section_objs(hdr, sechdrs, secstrings, "__ksymtab",
-        mod->num_gpl_syms = sechdrs[gplindex].sh_size / sizeof(*mod->gpl_syms);
+                                 sizeof(*mod->syms), &mod->num_syms);
-        mod->gpl_syms = (void *)sechdrs[gplindex].sh_addr;
+        mod->crcs = section_addr(hdr, sechdrs, secstrings, "__kcrctab");
-        if (gplcrcindex)
+        mod->gpl_syms = section_objs(hdr, sechdrs, secstrings, "__ksymtab_gpl",
-                mod->gpl_crcs = (void *)sechdrs[gplcrcindex].sh_addr;
+                                     sizeof(*mod->gpl_syms),
-        mod->num_gpl_future_syms = sechdrs[gplfutureindex].sh_size /
+                                     &mod->num_gpl_syms);
-                                        sizeof(*mod->gpl_future_syms);
+        mod->gpl_crcs = section_addr(hdr, sechdrs, secstrings, "__kcrctab_gpl");
-        mod->gpl_future_syms = (void *)sechdrs[gplfutureindex].sh_addr;
+        mod->gpl_future_syms = section_objs(hdr, sechdrs, secstrings,
-        if (gplfuturecrcindex)
+                                            "__ksymtab_gpl_future",
-                mod->gpl_future_crcs = (void *)sechdrs[gplfuturecrcindex].sh_addr;
+                                            sizeof(*mod->gpl_future_syms),
+                                            &mod->num_gpl_future_syms);
+        mod->gpl_future_crcs = section_addr(hdr, sechdrs, secstrings,
+                                            "__kcrctab_gpl_future");
 #ifdef CONFIG_UNUSED_SYMBOLS
-        mod->num_unused_syms = sechdrs[unusedindex].sh_size /
+        mod->unused_syms = section_objs(hdr, sechdrs, secstrings,
-                                        sizeof(*mod->unused_syms);
+                                        "__ksymtab_unused",
-        mod->num_unused_gpl_syms = sechdrs[unusedgplindex].sh_size /
+                                        sizeof(*mod->unused_syms),
-                                        sizeof(*mod->unused_gpl_syms);
+                                        &mod->num_unused_syms);
-        mod->unused_syms = (void *)sechdrs[unusedindex].sh_addr;
+        mod->unused_crcs = section_addr(hdr, sechdrs, secstrings,
-        if (unusedcrcindex)
+                                        "__kcrctab_unused");
-                mod->unused_crcs = (void *)sechdrs[unusedcrcindex].sh_addr;
+        mod->unused_gpl_syms = section_objs(hdr, sechdrs, secstrings,
-        mod->unused_gpl_syms = (void *)sechdrs[unusedgplindex].sh_addr;
+                                            "__ksymtab_unused_gpl",
-        if (unusedgplcrcindex)
+                                            sizeof(*mod->unused_gpl_syms),
-                mod->unused_gpl_crcs
+                                            &mod->num_unused_gpl_syms);
-                        = (void *)sechdrs[unusedgplcrcindex].sh_addr;
+        mod->unused_gpl_crcs = section_addr(hdr, sechdrs, secstrings,
+                                            "__kcrctab_unused_gpl");
+#endif
+#ifdef CONFIG_MARKERS
+        mod->markers = section_objs(hdr, sechdrs, secstrings, "__markers",
+                                    sizeof(*mod->markers), &mod->num_markers);
+#endif
+#ifdef CONFIG_TRACEPOINTS
+        mod->tracepoints = section_objs(hdr, sechdrs, secstrings,
+                                        "__tracepoints",
+                                        sizeof(*mod->tracepoints),
+                                        &mod->num_tracepoints);
 #endif
 #ifdef CONFIG_MODVERSIONS
-        if ((mod->num_syms && !crcindex)
+        if ((mod->num_syms && !mod->crcs)
-            || (mod->num_gpl_syms && !gplcrcindex)
+            || (mod->num_gpl_syms && !mod->gpl_crcs)
-            || (mod->num_gpl_future_syms && !gplfuturecrcindex)
+            || (mod->num_gpl_future_syms && !mod->gpl_future_crcs)
 #ifdef CONFIG_UNUSED_SYMBOLS
-            || (mod->num_unused_syms && !unusedcrcindex)
+            || (mod->num_unused_syms && !mod->unused_crcs)
-            || (mod->num_unused_gpl_syms && !unusedgplcrcindex)
+            || (mod->num_unused_gpl_syms && !mod->unused_gpl_crcs)
 #endif
                ) {
                printk(KERN_WARNING "%s: No versions for exported symbols.\n", mod->name);
@@ -2161,16 +2141,6 @@ static noinline struct module *load_module(void __user *umod,
                        goto cleanup;
        }
 #endif
-        markersindex = find_sec(hdr, sechdrs, secstrings, "__markers");
-        markersstringsindex = find_sec(hdr, sechdrs, secstrings,
-                                        "__markers_strings");
-        verboseindex = find_sec(hdr, sechdrs, secstrings, "__verbose");
-        tracepointsindex = find_sec(hdr, sechdrs, secstrings, "__tracepoints");
-        tracepointsstringsindex = find_sec(hdr, sechdrs, secstrings,
-                                        "__tracepoints_strings");
-        mcountindex = find_sec(hdr, sechdrs, secstrings,
-                               "__mcount_loc");
        /* Now do relocations. */
        for (i = 1; i < hdr->e_shnum; i++) {
@@ -2193,28 +2163,16 @@ static noinline struct module *load_module(void __user *umod,
                if (err < 0)
                        goto cleanup;
        }
-#ifdef CONFIG_MARKERS
-        mod->markers = (void *)sechdrs[markersindex].sh_addr;
-        mod->num_markers =
-                sechdrs[markersindex].sh_size / sizeof(*mod->markers);
-#endif
-#ifdef CONFIG_TRACEPOINTS
-        mod->tracepoints = (void *)sechdrs[tracepointsindex].sh_addr;
-        mod->num_tracepoints =
-                sechdrs[tracepointsindex].sh_size / sizeof(*mod->tracepoints);
-#endif
        /* Find duplicate symbols */
        err = verify_export_symbols(mod);
        if (err < 0)
                goto cleanup;
        /* Set up and sort exception table */
-        mod->num_exentries = sechdrs[exindex].sh_size / sizeof(*mod->extable);
+        mod->extable = section_objs(hdr, sechdrs, secstrings, "__ex_table",
-        mod->extable = extable = (void *)sechdrs[exindex].sh_addr;
+                                    sizeof(*mod->extable), &mod->num_exentries);
-        sort_extable(extable, extable + mod->num_exentries);
+        sort_extable(mod->extable, mod->extable + mod->num_exentries);
        /* Finally, copy percpu area over. */
        percpu_modcopy(mod->percpu, (void *)sechdrs[pcpuindex].sh_addr,
@@ -2223,11 +2181,17 @@ static noinline struct module *load_module(void __user *umod,
        add_kallsyms(mod, sechdrs, symindex, strindex, secstrings);
        if (!mod->taints) {
+                struct mod_debug *debug;
+                unsigned int num_debug;
 #ifdef CONFIG_MARKERS
                marker_update_probe_range(mod->markers,
                        mod->markers + mod->num_markers);
 #endif
-        dynamic_printk_setup(sechdrs, verboseindex);
+                debug = section_objs(hdr, sechdrs, secstrings, "__verbose",
+                                     sizeof(*debug), &num_debug);
+                dynamic_printk_setup(debug, num_debug);
 #ifdef CONFIG_TRACEPOINTS
                tracepoint_update_probe_range(mod->tracepoints,
                        mod->tracepoints + mod->num_tracepoints);
@@ -2235,8 +2199,9 @@ static noinline struct module *load_module(void __user *umod,
        }
        /* sechdrs[0].sh_size is always zero */
-        mseg = (void *)sechdrs[mcountindex].sh_addr;
+        mseg = section_objs(hdr, sechdrs, secstrings, "__mcount_loc",
-        ftrace_init_module(mseg, mseg + sechdrs[mcountindex].sh_size);
+                            sizeof(*mseg), &num_mcount);
+        ftrace_init_module(mseg, mseg + num_mcount);
        err = module_finalize(hdr, sechdrs, mod);
        if (err < 0)
@@ -2261,30 +2226,24 @@ static noinline struct module *load_module(void __user *umod,
        set_fs(old_fs);
        mod->args = args;
-        if (obsparmindex)
+        if (section_addr(hdr, sechdrs, secstrings, "__obsparm"))
                printk(KERN_WARNING "%s: Ignoring obsolete parameters\n",
                       mod->name);
        /* Now sew it into the lists so we can get lockdep and oops
-         * info during argument parsing.  Noone should access us, since
+         * info during argument parsing.  Noone should access us, since
-         * strong_try_module_get() will fail. */
+         * strong_try_module_get() will fail.
-        stop_machine(__link_module, mod, NULL);
+         * lockdep/oops can run asynchronous, so use the RCU list insertion
+         * function to insert in a way safe to concurrent readers.
-        /* Size of section 0 is 0, so this works well if no params */
+         * The mutex protects against concurrent writers.
-        err = parse_args(mod->name, mod->args,
+         */
-                         (struct kernel_param *)
+        list_add_rcu(&mod->list, &modules);
-                         sechdrs[setupindex].sh_addr,
-                         sechdrs[setupindex].sh_size
+        err = parse_args(mod->name, mod->args, kp, num_kp, NULL);
-                         / sizeof(struct kernel_param),
-                         NULL);
        if (err < 0)
                goto unlink;
-        err = mod_sysfs_setup(mod,
+        err = mod_sysfs_setup(mod, kp, num_kp);
-                              (struct kernel_param *)
-                              sechdrs[setupindex].sh_addr,
-                              sechdrs[setupindex].sh_size
-                              / sizeof(struct kernel_param));
        if (err < 0)
                goto unlink;
        add_sect_attrs(mod, hdr->e_shnum, secstrings, sechdrs);
@@ -2473,7 +2432,7 @@ const char *module_address_lookup(unsigned long addr,
        const char *ret = NULL;
        preempt_disable();
-        list_for_each_entry(mod, &modules, list) {
+        list_for_each_entry_rcu(mod, &modules, list) {
                if (within(addr, mod->module_init, mod->init_size)
                    || within(addr, mod->module_core, mod->core_size)) {
                        if (modname)
@@ -2496,7 +2455,7 @@ int lookup_module_symbol_name(unsigned long addr, char *symname)
        struct module *mod;
        preempt_disable();
-        list_for_each_entry(mod, &modules, list) {
+        list_for_each_entry_rcu(mod, &modules, list) {
                if (within(addr, mod->module_init, mod->init_size) ||
                    within(addr, mod->module_core, mod->core_size)) {
                        const char *sym;
@@ -2520,7 +2479,7 @@ int lookup_module_symbol_attrs(unsigned long addr, unsigned long *size,
        struct module *mod;
        preempt_disable();
-        list_for_each_entry(mod, &modules, list) {
+        list_for_each_entry_rcu(mod, &modules, list) {
                if (within(addr, mod->module_init, mod->init_size) ||
                    within(addr, mod->module_core, mod->core_size)) {
                        const char *sym;
@@ -2547,7 +2506,7 @@ int module_get_kallsym(unsigned int symnum, unsigned long *value, char *type,
        struct module *mod;
        preempt_disable();
-        list_for_each_entry(mod, &modules, list) {
+        list_for_each_entry_rcu(mod, &modules, list) {
                if (symnum < mod->num_symtab) {
                        *value = mod->symtab[symnum].st_value;
                        *type = mod->symtab[symnum].st_info;
@@ -2590,7 +2549,7 @@ unsigned long module_kallsyms_lookup_name(const char *name)
                        ret = mod_find_symname(mod, colon+1);
                *colon = ':';
        } else {
-                list_for_each_entry(mod, &modules, list)
+                list_for_each_entry_rcu(mod, &modules, list)
                        if ((ret = mod_find_symname(mod, name)) != 0)
                                break;
        }
@@ -2599,23 +2558,6 @@ unsigned long module_kallsyms_lookup_name(const char *name)
 }
 #endif /* CONFIG_KALLSYMS */
-/* Called by the /proc file system to return a list of modules. */
-static void *m_start(struct seq_file *m, loff_t *pos)
-{
-        mutex_lock(&module_mutex);
-        return seq_list_start(&modules, *pos);
-}
-static void *m_next(struct seq_file *m, void *p, loff_t *pos)
-{
-        return seq_list_next(p, &modules, pos);
-}
-static void m_stop(struct seq_file *m, void *p)
-{
-        mutex_unlock(&module_mutex);
-}
 static char *module_flags(struct module *mod, char *buf)
 {
        int bx = 0;
@@ -2649,6 +2591,24 @@ static char *module_flags(struct module *mod, char *buf)
        return buf;
 }
+#ifdef CONFIG_PROC_FS
+/* Called by the /proc file system to return a list of modules. */
+static void *m_start(struct seq_file *m, loff_t *pos)
+{
+        mutex_lock(&module_mutex);
+        return seq_list_start(&modules, *pos);
+}
+static void *m_next(struct seq_file *m, void *p, loff_t *pos)
+{
+        return seq_list_next(p, &modules, pos);
+}
+static void m_stop(struct seq_file *m, void *p)
+{
+        mutex_unlock(&module_mutex);
+}
 static int m_show(struct seq_file *m, void *p)
 {
        struct module *mod = list_entry(p, struct module, list);
@@ -2679,13 +2639,33 @@ static int m_show(struct seq_file *m, void *p)
   Where refcount is a number or -, and deps is a comma-separated list
   of depends or -.
 */
-const struct seq_operations modules_op = {
+static const struct seq_operations modules_op = {
        .start  = m_start,
        .next   = m_next,
        .stop   = m_stop,
        .show   = m_show
 };
+static int modules_open(struct inode *inode, struct file *file)
+{
+        return seq_open(file, &modules_op);
+}
+static const struct file_operations proc_modules_operations = {
+        .open           = modules_open,
+        .read           = seq_read,
+        .llseek         = seq_lseek,
+        .release        = seq_release,
+};
+static int __init proc_modules_init(void)
+{
+        proc_create("modules", 0, NULL, &proc_modules_operations);
+        return 0;
+}
+module_init(proc_modules_init);
+#endif
 /* Given an address, look for it in the module exception tables. */
 const struct exception_table_entry *search_module_extables(unsigned long addr)
 {
@@ -2693,7 +2673,7 @@ const struct exception_table_entry *search_module_extables(unsigned long addr)
        struct module *mod;
        preempt_disable();
-        list_for_each_entry(mod, &modules, list) {
+        list_for_each_entry_rcu(mod, &modules, list) {
                if (mod->num_exentries == 0)
                        continue;
@@ -2719,7 +2699,7 @@ int is_module_address(unsigned long addr)
        preempt_disable();
-        list_for_each_entry(mod, &modules, list) {
+        list_for_each_entry_rcu(mod, &modules, list) {
                if (within(addr, mod->module_core, mod->core_size)) {
                        preempt_enable();
                        return 1;
@@ -2740,7 +2720,7 @@ struct module *__module_text_address(unsigned long addr)
        if (addr < module_addr_min || addr > module_addr_max)
                return NULL;
-        list_for_each_entry(mod, &modules, list)
+        list_for_each_entry_rcu(mod, &modules, list)
                if (within(addr, mod->module_init, mod->init_text_size)
                    || within(addr, mod->module_core, mod->core_text_size))
                        return mod;
@@ -2765,8 +2745,11 @@ void print_modules(void)
        char buf[8];
        printk("Modules linked in:");
-        list_for_each_entry(mod, &modules, list)
+        /* Most callers should already have preempt disabled, but make sure */
+        preempt_disable();
+        list_for_each_entry_rcu(mod, &modules, list)
                printk(" %s%s", mod->name, module_flags(mod, buf));
+        preempt_enable();
        if (last_unloaded_module[0])
                printk(" [last unloaded: %s]", last_unloaded_module);
        printk("\n");
diff --git a/kernel/panic.c b/kernel/panic.c
index bda561ef3cdf..6513aac8e992 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -34,13 +34,6 @@ ATOMIC_NOTIFIER_HEAD(panic_notifier_list);
 EXPORT_SYMBOL(panic_notifier_list);
-static int __init panic_setup(char *str)
-{
-        panic_timeout = simple_strtoul(str, NULL, 0);
-        return 1;
-}
-__setup("panic=", panic_setup);
 static long no_blink(long time)
 {
        return 0;
@@ -218,13 +211,6 @@ void add_taint(unsigned flag)
 }
 EXPORT_SYMBOL(add_taint);
-static int __init pause_on_oops_setup(char *str)
-{
-        pause_on_oops = simple_strtoul(str, NULL, 0);
-        return 1;
-}
-__setup("pause_on_oops=", pause_on_oops_setup);
 static void spin_msec(int msecs)
 {
        int i;
@@ -384,3 +370,6 @@ void __stack_chk_fail(void)
 }
 EXPORT_SYMBOL(__stack_chk_fail);
 #endif
+core_param(panic, panic_timeout, int, 0644);
+core_param(pause_on_oops, pause_on_oops, int, 0644);
diff --git a/kernel/params.c b/kernel/params.c
index afc46a23eb6d..a1e3025b19a9 100644
--- a/kernel/params.c
+++ b/kernel/params.c
@@ -373,6 +373,8 @@ int param_get_string(char *buffer, struct kernel_param *kp)
 }
 /* sysfs output in /sys/modules/XYZ/parameters/ */
+#define to_module_attr(n) container_of(n, struct module_attribute, attr);
+#define to_module_kobject(n) container_of(n, struct module_kobject, kobj);
 extern struct kernel_param __start___param[], __stop___param[];
@@ -384,6 +386,7 @@ struct param_attribute
 struct module_param_attrs
 {
+        unsigned int num;
        struct attribute_group grp;
        struct param_attribute attrs[0];
 };
@@ -434,93 +437,120 @@ static ssize_t param_attr_store(struct module_attribute *mattr,
 #ifdef CONFIG_SYSFS
 /*
- * param_sysfs_setup - setup sysfs support for one module or KBUILD_MODNAME
+ * add_sysfs_param - add a parameter to sysfs
- * @mk: struct module_kobject (contains parent kobject)
+ * @mk: struct module_kobject
- * @kparam: array of struct kernel_param, the actual parameter definitions
+ * @kparam: the actual parameter definition to add to sysfs
- * @num_params: number of entries in array
+ * @name: name of parameter
- * @name_skip: offset where the parameter name start in kparam[].name. Needed for built-in "modules"
 *
- * Create a kobject for a (per-module) group of parameters, and create files
+ * Create a kobject if for a (per-module) parameter if mp NULL, and
- * in sysfs. A pointer to the param_kobject is returned on success,
+ * create file in sysfs.  Returns an error on out of memory.  Always cleans up
- * NULL if there's no parameter to export, or other ERR_PTR(err).
+ * if there's an error.
 */
-static __modinit struct module_param_attrs *
+static __modinit int add_sysfs_param(struct module_kobject *mk,
-param_sysfs_setup(struct module_kobject *mk,
+                                     struct kernel_param *kp,
-                  struct kernel_param *kparam,
+                                     const char *name)
-                  unsigned int num_params,
-                  unsigned int name_skip)
 {
-        struct module_param_attrs *mp;
+        struct module_param_attrs *new;
-        unsigned int valid_attrs = 0;
+        struct attribute **attrs;
-        unsigned int i, size[2];
+        int err, num;
-        struct param_attribute *pattr;
-        struct attribute **gattr;
+        /* We don't bother calling this with invisible parameters. */
-        int err;
+        BUG_ON(!kp->perm);
-        for (i=0; i<num_params; i++) {
+        if (!mk->mp) {
-                if (kparam[i].perm)
+                num = 0;
-                        valid_attrs++;
+                attrs = NULL;
+        } else {
+                num = mk->mp->num;
+                attrs = mk->mp->grp.attrs;
        }
-        if (!valid_attrs)
+        /* Enlarge. */
-                return NULL;
+        new = krealloc(mk->mp,
+                       sizeof(*mk->mp) + sizeof(mk->mp->attrs[0]) * (num+1),
-        size[0] = ALIGN(sizeof(*mp) +
+                       GFP_KERNEL);
-                        valid_attrs * sizeof(mp->attrs[0]),
+        if (!new) {
-                        sizeof(mp->grp.attrs[0]));
+                kfree(mk->mp);
-        size[1] = (valid_attrs + 1) * sizeof(mp->grp.attrs[0]);
+                err = -ENOMEM;
+                goto fail;
-        mp = kzalloc(size[0] + size[1], GFP_KERNEL);
-        if (!mp)
-                return ERR_PTR(-ENOMEM);
-        mp->grp.name = "parameters";
-        mp->grp.attrs = (void *)mp + size[0];
-        pattr = &mp->attrs[0];
-        gattr = &mp->grp.attrs[0];
-        for (i = 0; i < num_params; i++) {
-                struct kernel_param *kp = &kparam[i];
-                if (kp->perm) {
-                        pattr->param = kp;
-                        pattr->mattr.show = param_attr_show;
-                        pattr->mattr.store = param_attr_store;
-                        pattr->mattr.attr.name = (char *)&kp->name[name_skip];
-                        pattr->mattr.attr.mode = kp->perm;
-                        *(gattr++) = &(pattr++)->mattr.attr;
-                }
        }
-        *gattr = NULL;
+        attrs = krealloc(attrs, sizeof(new->grp.attrs[0])*(num+2), GFP_KERNEL);
+        if (!attrs) {
-        if ((err = sysfs_create_group(&mk->kobj, &mp->grp))) {
+                err = -ENOMEM;
-                kfree(mp);
+                goto fail_free_new;
-                return ERR_PTR(err);
        }
-        return mp;
+        /* Sysfs wants everything zeroed. */
+        memset(new, 0, sizeof(*new));
+        memset(&new->attrs[num], 0, sizeof(new->attrs[num]));
+        memset(&attrs[num], 0, sizeof(attrs[num]));
+        new->grp.name = "parameters";
+        new->grp.attrs = attrs;
+        /* Tack new one on the end. */
+        new->attrs[num].param = kp;
+        new->attrs[num].mattr.show = param_attr_show;
+        new->attrs[num].mattr.store = param_attr_store;
+        new->attrs[num].mattr.attr.name = (char *)name;
+        new->attrs[num].mattr.attr.mode = kp->perm;
+        new->num = num+1;
+        /* Fix up all the pointers, since krealloc can move us */
+        for (num = 0; num < new->num; num++)
+                new->grp.attrs[num] = &new->attrs[num].mattr.attr;
+        new->grp.attrs[num] = NULL;
+        mk->mp = new;
+        return 0;
+fail_free_new:
+        kfree(new);
+fail:
+        mk->mp = NULL;
+        return err;
 }
 #ifdef CONFIG_MODULES
+static void free_module_param_attrs(struct module_kobject *mk)
+{
+        kfree(mk->mp->grp.attrs);
+        kfree(mk->mp);
+        mk->mp = NULL;
+}
 /*
 * module_param_sysfs_setup - setup sysfs support for one module
 * @mod: module
 * @kparam: module parameters (array)
 * @num_params: number of module parameters
 *
- * Adds sysfs entries for module parameters, and creates a link from
+ * Adds sysfs entries for module parameters under
- * /sys/module/[mod->name]/parameters to /sys/parameters/[mod->name]/
+ * /sys/module/[mod->name]/parameters/
 */
 int module_param_sysfs_setup(struct module *mod,
                             struct kernel_param *kparam,
                             unsigned int num_params)
 {
-        struct module_param_attrs *mp;
+        int i, err;
+        bool params = false;
+        for (i = 0; i < num_params; i++) {
+                if (kparam[i].perm == 0)
+                        continue;
+                err = add_sysfs_param(&mod->mkobj, &kparam[i], kparam[i].name);
+                if (err)
+                        return err;
+                params = true;
+        }
-        mp = param_sysfs_setup(&mod->mkobj, kparam, num_params, 0);
+        if (!params)
-        if (IS_ERR(mp))
+                return 0;
-                return PTR_ERR(mp);
-        mod->param_attrs = mp;
+        /* Create the param group. */
-        return 0;
+        err = sysfs_create_group(&mod->mkobj.kobj, &mod->mkobj.mp->grp);
+        if (err)
+                free_module_param_attrs(&mod->mkobj);
+        return err;
 }
 /*
@@ -532,43 +562,55 @@ int module_param_sysfs_setup(struct module *mod,
 */
 void module_param_sysfs_remove(struct module *mod)
 {
-        if (mod->param_attrs) {
+        if (mod->mkobj.mp) {
-                sysfs_remove_group(&mod->mkobj.kobj,
+                sysfs_remove_group(&mod->mkobj.kobj, &mod->mkobj.mp->grp);
-                                   &mod->param_attrs->grp);
                /* We are positive that no one is using any param
                 * attrs at this point.  Deallocate immediately. */
-                kfree(mod->param_attrs);
+                free_module_param_attrs(&mod->mkobj);
-                mod->param_attrs = NULL;
        }
 }
 #endif
-/*
+static void __init kernel_add_sysfs_param(const char *name,
- * kernel_param_sysfs_setup - wrapper for built-in params support
+                                          struct kernel_param *kparam,
- */
+                                          unsigned int name_skip)
-static void __init kernel_param_sysfs_setup(const char *name,
-                                            struct kernel_param *kparam,
-                                            unsigned int num_params,
-                                            unsigned int name_skip)
 {
        struct module_kobject *mk;
-        int ret;
+        struct kobject *kobj;
+        int err;
-        mk = kzalloc(sizeof(struct module_kobject), GFP_KERNEL);
+        kobj = kset_find_obj(module_kset, name);
-        BUG_ON(!mk);
+        if (kobj) {
+                /* We already have one.  Remove params so we can add more. */
-        mk->mod = THIS_MODULE;
+                mk = to_module_kobject(kobj);
-        mk->kobj.kset = module_kset;
+                /* We need to remove it before adding parameters. */
-        ret = kobject_init_and_add(&mk->kobj, &module_ktype, NULL, "%s", name);
+                sysfs_remove_group(&mk->kobj, &mk->mp->grp);
-        if (ret) {
+        } else {
-                kobject_put(&mk->kobj);
+                mk = kzalloc(sizeof(struct module_kobject), GFP_KERNEL);
-                printk(KERN_ERR "Module '%s' failed to be added to sysfs, "
+                BUG_ON(!mk);
-                      "error number %d\n", name, ret);
-                printk(KERN_ERR "The system will be unstable now.\n");
+                mk->mod = THIS_MODULE;
-                return;
+                mk->kobj.kset = module_kset;
+                err = kobject_init_and_add(&mk->kobj, &module_ktype, NULL,
+                                           "%s", name);
+                if (err) {
+                        kobject_put(&mk->kobj);
+                        printk(KERN_ERR "Module '%s' failed add to sysfs, "
+                               "error number %d\n", name, err);
+                        printk(KERN_ERR "The system will be unstable now.\n");
+                        return;
+                }
+                /* So that exit path is even. */
+                kobject_get(&mk->kobj);
        }
-        param_sysfs_setup(mk, kparam, num_params, name_skip);
+        /* These should not fail at boot. */
+        err = add_sysfs_param(mk, kparam, kparam->name + name_skip);
+        BUG_ON(err);
+        err = sysfs_create_group(&mk->kobj, &mk->mp->grp);
+        BUG_ON(err);
        kobject_uevent(&mk->kobj, KOBJ_ADD);
+        kobject_put(&mk->kobj);
 }
 /*
@@ -579,60 +621,36 @@ static void __init kernel_param_sysfs_setup(const char *name,
 * The "module" name (KBUILD_MODNAME) is stored before a dot, the
 * "parameter" name is stored behind a dot in kernel_param->name. So,
 * extract the "module" name for all built-in kernel_param-eters,
- * and for all who have the same, call kernel_param_sysfs_setup.
+ * and for all who have the same, call kernel_add_sysfs_param.
 */
 static void __init param_sysfs_builtin(void)
 {
-        struct kernel_param *kp, *kp_begin = NULL;
+        struct kernel_param *kp;
-        unsigned int i, name_len, count = 0;
+        unsigned int name_len;
-        char modname[MODULE_NAME_LEN + 1] = "";
+        char modname[MODULE_NAME_LEN];
-        for (i=0; i < __stop___param - __start___param; i++) {
+        for (kp = __start___param; kp < __stop___param; kp++) {
                char *dot;
-                size_t max_name_len;
-                kp = &__start___param[i];
+                if (kp->perm == 0)
-                max_name_len =
+                        continue;
-                        min_t(size_t, MODULE_NAME_LEN, strlen(kp->name));
-                dot = memchr(kp->name, '.', max_name_len);
+                dot = strchr(kp->name, '.');
                if (!dot) {
-                        DEBUGP("couldn't find period in first %d characters "
+                        /* This happens for core_param() */
-                               "of %s\n", MODULE_NAME_LEN, kp->name);
+                        strcpy(modname, "kernel");
-                        continue;
+                        name_len = 0;
-                }
+                } else {
-                name_len = dot - kp->name;
+                        name_len = dot - kp->name + 1;
+                        strlcpy(modname, kp->name, name_len);
-                /* new kbuild_modname? */
-                if (strlen(modname) != name_len
-                    || strncmp(modname, kp->name, name_len) != 0) {
-                        /* add a new kobject for previous kernel_params. */
-                        if (count)
-                                kernel_param_sysfs_setup(modname,
-                                                         kp_begin,
-                                                         count,
-                                                         strlen(modname)+1);
-                        strncpy(modname, kp->name, name_len);
-                        modname[name_len] = '\0';
-                        count = 0;
-                        kp_begin = kp;
                }
-                count++;
+                kernel_add_sysfs_param(modname, kp, name_len);
        }
-        /* last kernel_params need to be registered as well */
-        if (count)
-                kernel_param_sysfs_setup(modname, kp_begin, count,
-                                         strlen(modname)+1);
 }
 /* module-related sysfs stuff */
-#define to_module_attr(n) container_of(n, struct module_attribute, attr);
-#define to_module_kobject(n) container_of(n, struct module_kobject, kobj);
 static ssize_t module_attr_show(struct kobject *kobj,
                                struct attribute *attr,
                                char *buf)
diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c
index b931d7cedbfa..5e79c662294b 100644
--- a/kernel/posix-timers.c
+++ b/kernel/posix-timers.c
@@ -639,7 +639,7 @@ common_timer_get(struct k_itimer *timr, struct itimerspec *cur_setting)
            (timr->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE))
                timr->it_overrun += (unsigned int) hrtimer_forward(timer, now, iv);
-        remaining = ktime_sub(timer->expires, now);
+        remaining = ktime_sub(hrtimer_get_expires(timer), now);
        /* Return 0 only, when the timer is expired and not pending */
        if (remaining.tv64 <= 0) {
                /*
@@ -733,7 +733,7 @@ common_timer_set(struct k_itimer *timr, int flags,
        hrtimer_init(&timr->it.real.timer, timr->it_clock, mode);
        timr->it.real.timer.function = posix_timer_fn;
-        timer->expires = timespec_to_ktime(new_setting->it_value);
+        hrtimer_set_expires(timer, timespec_to_ktime(new_setting->it_value));
        /* Convert interval */
        timr->it.real.interval = timespec_to_ktime(new_setting->it_interval);
@@ -742,14 +742,12 @@ common_timer_set(struct k_itimer *timr, int flags,
        if (((timr->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE)) {
                /* Setup correct expiry time for relative timers */
                if (mode == HRTIMER_MODE_REL) {
-                        timer->expires =
+                        hrtimer_add_expires(timer, timer->base->get_time());
-                                ktime_add_safe(timer->expires,
-                                               timer->base->get_time());
                }
                return 0;
        }
-        hrtimer_start(timer, timer->expires, mode);
+        hrtimer_start_expires(timer, mode);
        return 0;
 }
diff --git a/kernel/power/disk.c b/kernel/power/disk.c
index 331f9836383f..c9d74083746f 100644
--- a/kernel/power/disk.c
+++ b/kernel/power/disk.c
@@ -651,7 +651,7 @@ static int software_resume(void)
        pr_debug("PM: Preparing processes for restore.\n");
        error = prepare_processes();
        if (error) {
-                swsusp_close();
+                swsusp_close(FMODE_READ);
                goto Done;
        }
diff --git a/kernel/power/power.h b/kernel/power/power.h
index acc0c101dbd5..46b5ec7a3afb 100644
--- a/kernel/power/power.h
+++ b/kernel/power/power.h
@@ -153,7 +153,7 @@ extern int swsusp_shrink_memory(void);
 extern void swsusp_free(void);
 extern int swsusp_read(unsigned int *flags_p);
 extern int swsusp_write(unsigned int flags);
-extern void swsusp_close(void);
+extern void swsusp_close(fmode_t);
 struct timeval;
 /* kernel/power/swsusp.c */
diff --git a/kernel/power/swap.c b/kernel/power/swap.c
index 80ccac849e46..b7713b53d07a 100644
--- a/kernel/power/swap.c
+++ b/kernel/power/swap.c
@@ -172,13 +172,13 @@ static int swsusp_swap_check(void) /* This is called before saving image */
                return res;
        root_swap = res;
-        res = blkdev_get(resume_bdev, FMODE_WRITE, O_RDWR);
+        res = blkdev_get(resume_bdev, FMODE_WRITE);
        if (res)
                return res;
        res = set_blocksize(resume_bdev, PAGE_SIZE);
        if (res < 0)
-                blkdev_put(resume_bdev);
+                blkdev_put(resume_bdev, FMODE_WRITE);
        return res;
 }
@@ -426,7 +426,7 @@ int swsusp_write(unsigned int flags)
        release_swap_writer(&handle);
 out:
-        swsusp_close();
+        swsusp_close(FMODE_WRITE);
        return error;
 }
@@ -574,7 +574,7 @@ int swsusp_read(unsigned int *flags_p)
                error = load_image(&handle, &snapshot, header->pages - 1);
        release_swap_reader(&handle);
-        blkdev_put(resume_bdev);
+        blkdev_put(resume_bdev, FMODE_READ);
        if (!error)
                pr_debug("PM: Image successfully loaded\n");
@@ -609,7 +609,7 @@ int swsusp_check(void)
                        return -EINVAL;
                }
                if (error)
-                        blkdev_put(resume_bdev);
+                        blkdev_put(resume_bdev, FMODE_READ);
                else
                        pr_debug("PM: Signature found, resuming\n");
        } else {
@@ -626,14 +626,14 @@ int swsusp_check(void)
 *      swsusp_close - close swap device.
 */
-void swsusp_close(void)
+void swsusp_close(fmode_t mode)
 {
        if (IS_ERR(resume_bdev)) {
                pr_debug("PM: Image device not initialised\n");
                return;
        }
-        blkdev_put(resume_bdev);
+        blkdev_put(resume_bdev, mode); /* move up */
 }
 static int swsusp_header_init(void)
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c
index 467d5940f624..ad63af8b2521 100644
--- a/kernel/rcupdate.c
+++ b/kernel/rcupdate.c
@@ -119,18 +119,19 @@ static void _rcu_barrier(enum rcu_barrier type)
        /* Take cpucontrol mutex to protect against CPU hotplug */
        mutex_lock(&rcu_barrier_mutex);
        init_completion(&rcu_barrier_completion);
-        atomic_set(&rcu_barrier_cpu_count, 0);
        /*
-         * The queueing of callbacks in all CPUs must be atomic with
+         * Initialize rcu_barrier_cpu_count to 1, then invoke
-         * respect to RCU, otherwise one CPU may queue a callback,
+         * rcu_barrier_func() on each CPU, so that each CPU also has
-         * wait for a grace period, decrement barrier count and call
+         * incremented rcu_barrier_cpu_count.  Only then is it safe to
-         * complete(), while other CPUs have not yet queued anything.
+         * decrement rcu_barrier_cpu_count -- otherwise the first CPU
-         * So, we need to make sure that grace periods cannot complete
+         * might complete its grace period before all of the other CPUs
-         * until all the callbacks are queued.
+         * did their increment, causing this function to return too
+         * early.
         */
-        rcu_read_lock();
+        atomic_set(&rcu_barrier_cpu_count, 1);
        on_each_cpu(rcu_barrier_func, (void *)type, 1);
-        rcu_read_unlock();
+        if (atomic_dec_and_test(&rcu_barrier_cpu_count))
+                complete(&rcu_barrier_completion);
        wait_for_completion(&rcu_barrier_completion);
        mutex_unlock(&rcu_barrier_mutex);
 }
diff --git a/kernel/rtmutex.c b/kernel/rtmutex.c
index 6522ae5b14a2..69d9cb921ffa 100644
--- a/kernel/rtmutex.c
+++ b/kernel/rtmutex.c
@@ -631,8 +631,7 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state,
        /* Setup the timer, when timeout != NULL */
        if (unlikely(timeout)) {
-                hrtimer_start(&timeout->timer, timeout->timer.expires,
+                hrtimer_start_expires(&timeout->timer, HRTIMER_MODE_ABS);
-                              HRTIMER_MODE_ABS);
                if (!hrtimer_active(&timeout->timer))
                        timeout->task = NULL;
        }
diff --git a/kernel/sched.c b/kernel/sched.c
index d906f72b42d2..6625c3c4b10d 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -55,6 +55,7 @@
 #include <linux/cpuset.h>
 #include <linux/percpu.h>
 #include <linux/kthread.h>
+#include <linux/proc_fs.h>
 #include <linux/seq_file.h>
 #include <linux/sysctl.h>
 #include <linux/syscalls.h>
@@ -227,9 +228,8 @@ static void start_rt_bandwidth(struct rt_bandwidth *rt_b)
                now = hrtimer_cb_get_time(&rt_b->rt_period_timer);
                hrtimer_forward(&rt_b->rt_period_timer, now, rt_b->rt_period);
-                hrtimer_start(&rt_b->rt_period_timer,
+                hrtimer_start_expires(&rt_b->rt_period_timer,
-                              rt_b->rt_period_timer.expires,
+                                HRTIMER_MODE_ABS);
-                              HRTIMER_MODE_ABS);
        }
        spin_unlock(&rt_b->rt_runtime_lock);
 }
@@ -819,6 +819,13 @@ const_debug unsigned int sysctl_sched_nr_migrate = 32;
 unsigned int sysctl_sched_shares_ratelimit = 250000;
 /*
+ * Inject some fuzzyness into changing the per-cpu group shares
+ * this avoids remote rq-locks at the expense of fairness.
+ * default: 4
+ */
+unsigned int sysctl_sched_shares_thresh = 4;
+/*
 * period over which we measure -rt task cpu usage in us.
 * default: 1s
 */
@@ -1064,7 +1071,7 @@ static void hrtick_start(struct rq *rq, u64 delay)
        struct hrtimer *timer = &rq->hrtick_timer;
        ktime_t time = ktime_add_ns(timer->base->get_time(), delay);
-        timer->expires = time;
+        hrtimer_set_expires(timer, time);
        if (rq == this_rq()) {
                hrtimer_restart(timer);
@@ -1454,8 +1461,8 @@ static void __set_se_shares(struct sched_entity *se, unsigned long shares);
 * Calculate and set the cpu's group shares.
 */
 static void
-__update_group_shares_cpu(struct task_group *tg, int cpu,
+update_group_shares_cpu(struct task_group *tg, int cpu,
-                          unsigned long sd_shares, unsigned long sd_rq_weight)
+                        unsigned long sd_shares, unsigned long sd_rq_weight)
 {
        int boost = 0;
        unsigned long shares;
@@ -1486,19 +1493,23 @@ __update_group_shares_cpu(struct task_group *tg, int cpu,
         *
         */
        shares = (sd_shares * rq_weight) / (sd_rq_weight + 1);
+        shares = clamp_t(unsigned long, shares, MIN_SHARES, MAX_SHARES);
-        /*
+        if (abs(shares - tg->se[cpu]->load.weight) >
-         * record the actual number of shares, not the boosted amount.
+                        sysctl_sched_shares_thresh) {
-         */
+                struct rq *rq = cpu_rq(cpu);
-        tg->cfs_rq[cpu]->shares = boost ? 0 : shares;
+                unsigned long flags;
-        tg->cfs_rq[cpu]->rq_weight = rq_weight;
-        if (shares < MIN_SHARES)
+                spin_lock_irqsave(&rq->lock, flags);
-                shares = MIN_SHARES;
+                /*
-        else if (shares > MAX_SHARES)
+                 * record the actual number of shares, not the boosted amount.
-                shares = MAX_SHARES;
+                 */
+                tg->cfs_rq[cpu]->shares = boost ? 0 : shares;
+                tg->cfs_rq[cpu]->rq_weight = rq_weight;
-        __set_se_shares(tg->se[cpu], shares);
+                __set_se_shares(tg->se[cpu], shares);
+                spin_unlock_irqrestore(&rq->lock, flags);
+        }
 }
 /*
@@ -1527,14 +1538,8 @@ static int tg_shares_up(struct task_group *tg, void *data)
        if (!rq_weight)
                rq_weight = cpus_weight(sd->span) * NICE_0_LOAD;
-        for_each_cpu_mask(i, sd->span) {
+        for_each_cpu_mask(i, sd->span)
-                struct rq *rq = cpu_rq(i);
+                update_group_shares_cpu(tg, i, shares, rq_weight);
-                unsigned long flags;
-                spin_lock_irqsave(&rq->lock, flags);
-                __update_group_shares_cpu(tg, i, shares, rq_weight);
-                spin_unlock_irqrestore(&rq->lock, flags);
-        }
        return 0;
 }
@@ -4443,12 +4448,8 @@ need_resched_nonpreemptible:
        if (sched_feat(HRTICK))
                hrtick_clear(rq);
-        /*
+        spin_lock_irq(&rq->lock);
-         * Do the rq-clock update outside the rq lock:
-         */
-        local_irq_disable();
        update_rq_clock(rq);
-        spin_lock(&rq->lock);
        clear_tsk_need_resched(prev);
        if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) {
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index f604dae71316..9573c33688b8 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -73,6 +73,8 @@ unsigned int sysctl_sched_wakeup_granularity = 5000000UL;
 const_debug unsigned int sysctl_sched_migration_cost = 500000UL;
+static const struct sched_class fair_sched_class;
 /**************************************************************
 * CFS operations on generic schedulable entities:
 */
@@ -334,7 +336,7 @@ int sched_nr_latency_handler(struct ctl_table *table, int write,
 #endif
 /*
- * delta *= w / rw
+ * delta *= P[w / rw]
 */
 static inline unsigned long
 calc_delta_weight(unsigned long delta, struct sched_entity *se)
@@ -348,15 +350,13 @@ calc_delta_weight(unsigned long delta, struct sched_entity *se)
 }
 /*
- * delta *= rw / w
+ * delta /= w
 */
 static inline unsigned long
 calc_delta_fair(unsigned long delta, struct sched_entity *se)
 {
-        for_each_sched_entity(se) {
+        if (unlikely(se->load.weight != NICE_0_LOAD))
-                delta = calc_delta_mine(delta,
+                delta = calc_delta_mine(delta, NICE_0_LOAD, &se->load);
-                                cfs_rq_of(se)->load.weight, &se->load);
-        }
        return delta;
 }
@@ -386,26 +386,26 @@ static u64 __sched_period(unsigned long nr_running)
 * We calculate the wall-time slice from the period by taking a part
 * proportional to the weight.
 *
- * s = p*w/rw
+ * s = p*P[w/rw]
 */
 static u64 sched_slice(struct cfs_rq *cfs_rq, struct sched_entity *se)
 {
-        return calc_delta_weight(__sched_period(cfs_rq->nr_running), se);
+        unsigned long nr_running = cfs_rq->nr_running;
+        if (unlikely(!se->on_rq))
+                nr_running++;
+        return calc_delta_weight(__sched_period(nr_running), se);
 }
 /*
 * We calculate the vruntime slice of a to be inserted task
 *
- * vs = s*rw/w = p
+ * vs = s/w
 */
-static u64 sched_vslice_add(struct cfs_rq *cfs_rq, struct sched_entity *se)
+static u64 sched_vslice(struct cfs_rq *cfs_rq, struct sched_entity *se)
 {
-        unsigned long nr_running = cfs_rq->nr_running;
+        return calc_delta_fair(sched_slice(cfs_rq, se), se);
-        if (!se->on_rq)
-                nr_running++;
-        return __sched_period(nr_running);
 }
 /*
@@ -628,7 +628,7 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial)
         * stays open at the end.
         */
        if (initial && sched_feat(START_DEBIT))
-                vruntime += sched_vslice_add(cfs_rq, se);
+                vruntime += sched_vslice(cfs_rq, se);
        if (!initial) {
                /* sleeps upto a single latency don't count. */
@@ -748,7 +748,7 @@ pick_next(struct cfs_rq *cfs_rq, struct sched_entity *se)
        struct rq *rq = rq_of(cfs_rq);
        u64 pair_slice = rq->clock - cfs_rq->pair_start;
-        if (!cfs_rq->next || pair_slice > sched_slice(cfs_rq, cfs_rq->next)) {
+        if (!cfs_rq->next || pair_slice > sysctl_sched_min_granularity) {
                cfs_rq->pair_start = rq->clock;
                return se;
        }
@@ -849,11 +849,31 @@ static void hrtick_start_fair(struct rq *rq, struct task_struct *p)
                hrtick_start(rq, delta);
        }
 }
+/*
+ * called from enqueue/dequeue and updates the hrtick when the
+ * current task is from our class and nr_running is low enough
+ * to matter.
+ */
+static void hrtick_update(struct rq *rq)
+{
+        struct task_struct *curr = rq->curr;
+        if (curr->sched_class != &fair_sched_class)
+                return;
+        if (cfs_rq_of(&curr->se)->nr_running < sched_nr_latency)
+                hrtick_start_fair(rq, curr);
+}
 #else /* !CONFIG_SCHED_HRTICK */
 static inline void
 hrtick_start_fair(struct rq *rq, struct task_struct *p)
 {
 }
+static inline void hrtick_update(struct rq *rq)
+{
+}
 #endif
 /*
@@ -874,7 +894,7 @@ static void enqueue_task_fair(struct rq *rq, struct task_struct *p, int wakeup)
                wakeup = 1;
        }
-        hrtick_start_fair(rq, rq->curr);
+        hrtick_update(rq);
 }
 /*
@@ -896,7 +916,7 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int sleep)
                sleep = 1;
        }
-        hrtick_start_fair(rq, rq->curr);
+        hrtick_update(rq);
 }
 /*
@@ -1002,8 +1022,6 @@ static inline int wake_idle(int cpu, struct task_struct *p)
 #ifdef CONFIG_SMP
-static const struct sched_class fair_sched_class;
 #ifdef CONFIG_FAIR_GROUP_SCHED
 /*
 * effective_load() calculates the load change as seen from the root_task_group
diff --git a/kernel/sched_features.h b/kernel/sched_features.h
index 7c9e8f4a049f..fda016218296 100644
--- a/kernel/sched_features.h
+++ b/kernel/sched_features.h
@@ -5,7 +5,7 @@ SCHED_FEAT(START_DEBIT, 1)
 SCHED_FEAT(AFFINE_WAKEUPS, 1)
 SCHED_FEAT(CACHE_HOT_BUDDY, 1)
 SCHED_FEAT(SYNC_WAKEUPS, 1)
-SCHED_FEAT(HRTICK, 1)
+SCHED_FEAT(HRTICK, 0)
 SCHED_FEAT(DOUBLE_TICK, 0)
 SCHED_FEAT(ASYM_GRAN, 1)
 SCHED_FEAT(LB_BIAS, 1)
diff --git a/kernel/sched_stats.h b/kernel/sched_stats.h
index b8c156979cf2..ee71bec1da66 100644
--- a/kernel/sched_stats.h
+++ b/kernel/sched_stats.h
@@ -9,7 +9,7 @@
 static int show_schedstat(struct seq_file *seq, void *v)
 {
        int cpu;
-        int mask_len = NR_CPUS/32 * 9;
+        int mask_len = DIV_ROUND_UP(NR_CPUS, 32) * 9;
        char *mask_str = kmalloc(mask_len, GFP_KERNEL);
        if (mask_str == NULL)
@@ -90,13 +90,20 @@ static int schedstat_open(struct inode *inode, struct file *file)
        return res;
 }
-const struct file_operations proc_schedstat_operations = {
+static const struct file_operations proc_schedstat_operations = {
        .open    = schedstat_open,
        .read    = seq_read,
        .llseek  = seq_lseek,
        .release = single_release,
 };
+static int __init proc_schedstat_init(void)
+{
+        proc_create("schedstat", 0, NULL, &proc_schedstat_operations);
+        return 0;
+}
+module_init(proc_schedstat_init);
 /*
 * Expects runqueue lock to be held for atomicity of update
 */
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
index af3c7cea258b..9bc4c00872c9 100644
--- a/kernel/stop_machine.c
+++ b/kernel/stop_machine.c
@@ -37,9 +37,13 @@ struct stop_machine_data {
 /* Like num_online_cpus(), but hotplug cpu uses us, so we need this. */
 static unsigned int num_threads;
 static atomic_t thread_ack;
-static struct completion finished;
 static DEFINE_MUTEX(lock);
+static struct workqueue_struct *stop_machine_wq;
+static struct stop_machine_data active, idle;
+static const cpumask_t *active_cpus;
+static void *stop_machine_work;
 static void set_state(enum stopmachine_state newstate)
 {
        /* Reset ack counter. */
@@ -51,21 +55,26 @@ static void set_state(enum stopmachine_state newstate)
 /* Last one to ack a state moves to the next state. */
 static void ack_state(void)
 {
-        if (atomic_dec_and_test(&thread_ack)) {
+        if (atomic_dec_and_test(&thread_ack))
-                /* If we're the last one to ack the EXIT, we're finished. */
+                set_state(state + 1);
-                if (state == STOPMACHINE_EXIT)
-                        complete(&finished);
-                else
-                        set_state(state + 1);
-        }
 }
-/* This is the actual thread which stops the CPU.  It exits by itself rather
+/* This is the actual function which stops the CPU. It runs
- * than waiting for kthread_stop(), because it's easier for hotplug CPU. */
+ * in the context of a dedicated stopmachine workqueue. */
-static int stop_cpu(struct stop_machine_data *smdata)
+static void stop_cpu(struct work_struct *unused)
 {
        enum stopmachine_state curstate = STOPMACHINE_NONE;
+        struct stop_machine_data *smdata = &idle;
+        int cpu = smp_processor_id();
+        int err;
+        if (!active_cpus) {
+                if (cpu == first_cpu(cpu_online_map))
+                        smdata = &active;
+        } else {
+                if (cpu_isset(cpu, *active_cpus))
+                        smdata = &active;
+        }
        /* Simple state machine */
        do {
                /* Chill out and ensure we re-read stopmachine_state. */
@@ -78,9 +87,11 @@ static int stop_cpu(struct stop_machine_data *smdata)
                                hard_irq_disable();
                                break;
                        case STOPMACHINE_RUN:
-                                /* |= allows error detection if functions on
+                                /* On multiple CPUs only a single error code
-                                 * multiple CPUs. */
+                                 * is needed to tell that something failed. */
-                                smdata->fnret |= smdata->fn(smdata->data);
+                                err = smdata->fn(smdata->data);
+                                if (err)
+                                        smdata->fnret = err;
                                break;
                        default:
                                break;
@@ -90,7 +101,6 @@ static int stop_cpu(struct stop_machine_data *smdata)
        } while (curstate != STOPMACHINE_EXIT);
        local_irq_enable();
-        do_exit(0);
 }
 /* Callback for CPUs which aren't supposed to do anything. */
@@ -101,78 +111,34 @@ static int chill(void *unused)
 int __stop_machine(int (*fn)(void *), void *data, const cpumask_t *cpus)
 {
-        int i, err;
+        struct work_struct *sm_work;
-        struct stop_machine_data active, idle;
+        int i;
-        struct task_struct **threads;
+        /* Set up initial state. */
+        mutex_lock(&lock);
+        num_threads = num_online_cpus();
+        active_cpus = cpus;
        active.fn = fn;
        active.data = data;
        active.fnret = 0;
        idle.fn = chill;
        idle.data = NULL;
-        /* This could be too big for stack on large machines. */
-        threads = kcalloc(NR_CPUS, sizeof(threads[0]), GFP_KERNEL);
-        if (!threads)
-                return -ENOMEM;
-        /* Set up initial state. */
-        mutex_lock(&lock);
-        init_completion(&finished);
-        num_threads = num_online_cpus();
        set_state(STOPMACHINE_PREPARE);
-        for_each_online_cpu(i) {
+        /* Schedule the stop_cpu work on all cpus: hold this CPU so one
-                struct stop_machine_data *smdata = &idle;
-                struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
-                if (!cpus) {
-                        if (i == first_cpu(cpu_online_map))
-                                smdata = &active;
-                } else {
-                        if (cpu_isset(i, *cpus))
-                                smdata = &active;
-                }
-                threads[i] = kthread_create((void *)stop_cpu, smdata, "kstop%u",
-                                            i);
-                if (IS_ERR(threads[i])) {
-                        err = PTR_ERR(threads[i]);
-                        threads[i] = NULL;
-                        goto kill_threads;
-                }
-                /* Place it onto correct cpu. */
-                kthread_bind(threads[i], i);
-                /* Make it highest prio. */
-                if (sched_setscheduler_nocheck(threads[i], SCHED_FIFO, &param))
-                        BUG();
-        }
-        /* We've created all the threads.  Wake them all: hold this CPU so one
         * doesn't hit this CPU until we're ready. */
        get_cpu();
-        for_each_online_cpu(i)
+        for_each_online_cpu(i) {
-                wake_up_process(threads[i]);
+                sm_work = percpu_ptr(stop_machine_work, i);
+                INIT_WORK(sm_work, stop_cpu);
+                queue_work_on(i, stop_machine_wq, sm_work);
+        }
        /* This will release the thread on our CPU. */
        put_cpu();
-        wait_for_completion(&finished);
+        flush_workqueue(stop_machine_wq);
        mutex_unlock(&lock);
-        kfree(threads);
        return active.fnret;
-kill_threads:
-        for_each_online_cpu(i)
-                if (threads[i])
-                        kthread_stop(threads[i]);
-        mutex_unlock(&lock);
-        kfree(threads);
-        return err;
 }
 int stop_machine(int (*fn)(void *), void *data, const cpumask_t *cpus)
@@ -187,3 +153,11 @@ int stop_machine(int (*fn)(void *), void *data, const cpumask_t *cpus)
        return ret;
 }
 EXPORT_SYMBOL_GPL(stop_machine);
+static int __init stop_machine_init(void)
+{
+        stop_machine_wq = create_rt_workqueue("kstop");
+        stop_machine_work = alloc_percpu(struct work_struct);
+        return 0;
+}
+core_initcall(stop_machine_init);
diff --git a/kernel/sys.c b/kernel/sys.c
index 53879cdae483..31deba8f7d16 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -1716,6 +1716,16 @@ asmlinkage long sys_prctl(int option, unsigned long arg2, unsigned long arg3,
                case PR_SET_TSC:
                        error = SET_TSC_CTL(arg2);
                        break;
+                case PR_GET_TIMERSLACK:
+                        error = current->timer_slack_ns;
+                        break;
+                case PR_SET_TIMERSLACK:
+                        if (arg2 <= 0)
+                                current->timer_slack_ns =
+                                        current->default_timer_slack_ns;
+                        else
+                                current->timer_slack_ns = arg2;
+                        break;
                default:
                        error = -EINVAL;
                        break;
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index edb1075f80d2..9d048fa2d902 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -276,6 +276,16 @@ static struct ctl_table kern_table[] = {
        },
        {
                .ctl_name       = CTL_UNNUMBERED,
+                .procname       = "sched_shares_thresh",
+                .data           = &sysctl_sched_shares_thresh,
+                .maxlen         = sizeof(unsigned int),
+                .mode           = 0644,
+                .proc_handler   = &proc_dointvec_minmax,
+                .strategy       = &sysctl_intvec,
+                .extra1         = &zero,
+        },
+        {
+                .ctl_name       = CTL_UNNUMBERED,
                .procname       = "sched_child_runs_first",
                .data           = &sysctl_sched_child_runs_first,
                .maxlen         = sizeof(unsigned int),
diff --git a/kernel/time.c b/kernel/time.c
index 6a08660b4fac..d63a4336fad6 100644
--- a/kernel/time.c
+++ b/kernel/time.c
@@ -669,3 +669,21 @@ EXPORT_SYMBOL(get_jiffies_64);
 #endif
 EXPORT_SYMBOL(jiffies);
+/*
+ * Add two timespec values and do a safety check for overflow.
+ * It's assumed that both values are valid (>= 0)
+ */
+struct timespec timespec_add_safe(const struct timespec lhs,
+                                  const struct timespec rhs)
+{
+        struct timespec res;
+        set_normalized_timespec(&res, lhs.tv_sec + rhs.tv_sec,
+                                lhs.tv_nsec + rhs.tv_nsec);
+        if (res.tv_sec < lhs.tv_sec || res.tv_sec < rhs.tv_sec)
+                res.tv_sec = TIME_T_MAX;
+        return res;
+}
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c
index 1a20715bfd6e..8ff15e5d486b 100644
--- a/kernel/time/ntp.c
+++ b/kernel/time/ntp.c
@@ -142,8 +142,7 @@ static enum hrtimer_restart ntp_leap_second(struct hrtimer *timer)
                time_state = TIME_OOP;
                printk(KERN_NOTICE "Clock: "
                       "inserting leap second 23:59:60 UTC\n");
-                leap_timer.expires = ktime_add_ns(leap_timer.expires,
+                hrtimer_add_expires_ns(&leap_timer, NSEC_PER_SEC);
-                                                  NSEC_PER_SEC);
                res = HRTIMER_RESTART;
                break;
        case TIME_DEL:
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 0581c11fe6c6..5bbb1044f847 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -300,7 +300,7 @@ void tick_nohz_stop_sched_tick(int inidle)
                                goto out;
                        }
-                        ts->idle_tick = ts->sched_timer.expires;
+                        ts->idle_tick = hrtimer_get_expires(&ts->sched_timer);
                        ts->tick_stopped = 1;
                        ts->idle_jiffies = last_jiffies;
                        rcu_enter_nohz();
@@ -380,21 +380,21 @@ ktime_t tick_nohz_get_sleep_length(void)
 static void tick_nohz_restart(struct tick_sched *ts, ktime_t now)
 {
        hrtimer_cancel(&ts->sched_timer);
-        ts->sched_timer.expires = ts->idle_tick;
+        hrtimer_set_expires(&ts->sched_timer, ts->idle_tick);
        while (1) {
                /* Forward the time to expire in the future */
                hrtimer_forward(&ts->sched_timer, now, tick_period);
                if (ts->nohz_mode == NOHZ_MODE_HIGHRES) {
-                        hrtimer_start(&ts->sched_timer,
+                        hrtimer_start_expires(&ts->sched_timer,
-                                      ts->sched_timer.expires,
                                      HRTIMER_MODE_ABS);
                        /* Check, if the timer was already in the past */
                        if (hrtimer_active(&ts->sched_timer))
                                break;
                } else {
-                        if (!tick_program_event(ts->sched_timer.expires, 0))
+                        if (!tick_program_event(
+                                hrtimer_get_expires(&ts->sched_timer), 0))
                                break;
                }
                /* Update jiffies and reread time */
@@ -456,14 +456,16 @@ void tick_nohz_restart_sched_tick(void)
         */
        ts->tick_stopped  = 0;
        ts->idle_exittime = now;
        tick_nohz_restart(ts, now);
        local_irq_enable();
 }
 static int tick_nohz_reprogram(struct tick_sched *ts, ktime_t now)
 {
        hrtimer_forward(&ts->sched_timer, now, tick_period);
-        return tick_program_event(ts->sched_timer.expires, 0);
+        return tick_program_event(hrtimer_get_expires(&ts->sched_timer), 0);
 }
 /*
@@ -542,7 +544,7 @@ static void tick_nohz_switch_to_nohz(void)
        next = tick_init_jiffy_update();
        for (;;) {
-                ts->sched_timer.expires = next;
+                hrtimer_set_expires(&ts->sched_timer, next);
                if (!tick_program_event(next, 0))
                        break;
                next = ktime_add(next, tick_period);
@@ -567,11 +569,21 @@ static void tick_nohz_switch_to_nohz(void)
 static void tick_nohz_kick_tick(int cpu)
 {
        struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
+        ktime_t delta, now;
        if (!ts->tick_stopped)
                return;
-        tick_nohz_restart(ts, ktime_get());
+        /*
+         * Do not touch the tick device, when the next expiry is either
+         * already reached or less/equal than the tick period.
+         */
+        now = ktime_get();
+        delta = ktime_sub(hrtimer_get_expires(&ts->sched_timer), now);
+        if (delta.tv64 <= tick_period.tv64)
+                return;
+        tick_nohz_restart(ts, now);
 }
 #else
@@ -668,16 +680,15 @@ void tick_setup_sched_timer(void)
        ts->sched_timer.cb_mode = HRTIMER_CB_IRQSAFE_PERCPU;
        /* Get the next period (per cpu) */
-        ts->sched_timer.expires = tick_init_jiffy_update();
+        hrtimer_set_expires(&ts->sched_timer, tick_init_jiffy_update());
        offset = ktime_to_ns(tick_period) >> 1;
        do_div(offset, num_possible_cpus());
        offset *= smp_processor_id();
-        ts->sched_timer.expires = ktime_add_ns(ts->sched_timer.expires, offset);
+        hrtimer_add_expires_ns(&ts->sched_timer, offset);
        for (;;) {
                hrtimer_forward(&ts->sched_timer, now, tick_period);
-                hrtimer_start(&ts->sched_timer, ts->sched_timer.expires,
+                hrtimer_start_expires(&ts->sched_timer, HRTIMER_MODE_ABS);
-                              HRTIMER_MODE_ABS);
                /* Check, if the timer was already in the past */
                if (hrtimer_active(&ts->sched_timer))
                        break;
diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c
index f6426911e35a..a999b92a1277 100644
--- a/kernel/time/timer_list.c
+++ b/kernel/time/timer_list.c
@@ -66,9 +66,11 @@ print_timer(struct seq_file *m, struct hrtimer *taddr, struct hrtimer *timer,
        SEQ_printf(m, ", %s/%d", tmp, timer->start_pid);
 #endif
        SEQ_printf(m, "\n");
-        SEQ_printf(m, " # expires at %Lu nsecs [in %Ld nsecs]\n",
+        SEQ_printf(m, " # expires at %Lu-%Lu nsecs [in %Ld to %Ld nsecs]\n",
-                (unsigned long long)ktime_to_ns(timer->expires),
+                (unsigned long long)ktime_to_ns(hrtimer_get_softexpires(timer)),
-                (long long)(ktime_to_ns(timer->expires) - now));
+                (unsigned long long)ktime_to_ns(hrtimer_get_expires(timer)),
+                (long long)(ktime_to_ns(hrtimer_get_softexpires(timer)) - now),
+                (long long)(ktime_to_ns(hrtimer_get_expires(timer)) - now));
 }
 static void
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 714afad46539..f928f2a87b9b 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -62,6 +62,7 @@ struct workqueue_struct {
        const char *name;
        int singlethread;
        int freezeable;         /* Freeze threads during suspend */
+        int rt;
 #ifdef CONFIG_LOCKDEP
        struct lockdep_map lockdep_map;
 #endif
@@ -766,6 +767,7 @@ init_cpu_workqueue(struct workqueue_struct *wq, int cpu)
 static int create_workqueue_thread(struct cpu_workqueue_struct *cwq, int cpu)
 {
+        struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
        struct workqueue_struct *wq = cwq->wq;
        const char *fmt = is_single_threaded(wq) ? "%s" : "%s/%d";
        struct task_struct *p;
@@ -781,7 +783,8 @@ static int create_workqueue_thread(struct cpu_workqueue_struct *cwq, int cpu)
         */
        if (IS_ERR(p))
                return PTR_ERR(p);
+        if (cwq->wq->rt)
+                sched_setscheduler_nocheck(p, SCHED_FIFO, &param);
        cwq->thread = p;
        return 0;
@@ -801,6 +804,7 @@ static void start_workqueue_thread(struct cpu_workqueue_struct *cwq, int cpu)
 struct workqueue_struct *__create_workqueue_key(const char *name,
                                                int singlethread,
                                                int freezeable,
+                                                int rt,
                                                struct lock_class_key *key,
                                                const char *lock_name)
 {
@@ -822,6 +826,7 @@ struct workqueue_struct *__create_workqueue_key(const char *name,
        lockdep_init_map(&wq->lockdep_map, lock_name, key, 0);
        wq->singlethread = singlethread;
        wq->freezeable = freezeable;
+        wq->rt = rt;
        INIT_LIST_HEAD(&wq->list);
        if (singlethread) {
author	Ingo Molnar <mingo@elte.hu>	2008-10-27 05:50:54 -0400
committer	Ingo Molnar <mingo@elte.hu>	2008-10-27 05:50:54 -0400
commit	4944dd62de21230af039eda7cd218e9a09021d11 (patch)
tree	bac70f7bab8506c7e1b0408bacbdb0b1d77262e9 /kernel
parent	f17845e5d97ead8fbdadfd40039e058ec7cf4a42 (diff)
parent	0173a3265b228da319ceb9c1ec6a5682fd1b2d92 (diff)