33 files changed, 1381 insertions, 989 deletions
diff --git a/kernel/acct.c b/kernel/acct.c
index e4c0e1fee9b0..385b88461c29 100644
--- a/kernel/acct.c
+++ b/kernel/acct.c
@@ -216,7 +216,6 @@ static int acct_on(char *name)
 {
        struct file *file;
        struct vfsmount *mnt;
-        int error;
        struct pid_namespace *ns;
        struct bsd_acct_struct *acct = NULL;
@@ -244,13 +243,6 @@ static int acct_on(char *name)
                }
        }
-        error = security_acct(file);
-        if (error) {
-                kfree(acct);
-                filp_close(file, NULL);
-                return error;
-        }
        spin_lock(&acct_lock);
        if (ns->bacct == NULL) {
                ns->bacct = acct;
@@ -281,7 +273,7 @@ static int acct_on(char *name)
 */
 SYSCALL_DEFINE1(acct, const char __user *, name)
 {
-        int error;
+        int error = 0;
        if (!capable(CAP_SYS_PACCT))
                return -EPERM;
@@ -299,13 +291,11 @@ SYSCALL_DEFINE1(acct, const char __user *, name)
                if (acct == NULL)
                        return 0;
-                error = security_acct(NULL);
+                spin_lock(&acct_lock);
-                if (!error) {
+                acct_file_reopen(acct, NULL, NULL);
-                        spin_lock(&acct_lock);
+                spin_unlock(&acct_lock);
-                        acct_file_reopen(acct, NULL, NULL);
-                        spin_unlock(&acct_lock);
-                }
        }
        return error;
 }
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index e9ec642932ee..291775021b2e 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -3615,7 +3615,7 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss)
 * @ss: the subsystem to load
 *
 * This function should be called in a modular subsystem's initcall. If the
- * subsytem is built as a module, it will be assigned a new subsys_id and set
+ * subsystem is built as a module, it will be assigned a new subsys_id and set
 * up for use. If the subsystem is built-in anyway, work is delegated to the
 * simpler cgroup_init_subsys.
 */
diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c
index e5c0244962b0..ce71ed53e88f 100644
--- a/kernel/cgroup_freezer.c
+++ b/kernel/cgroup_freezer.c
@@ -89,10 +89,10 @@ struct cgroup_subsys freezer_subsys;
 /* Locks taken and their ordering
 * ------------------------------
- * css_set_lock
 * cgroup_mutex (AKA cgroup_lock)
- * task->alloc_lock (AKA task_lock)
 * freezer->lock
+ * css_set_lock
+ * task->alloc_lock (AKA task_lock)
 * task->sighand->siglock
 *
 * cgroup code forces css_set_lock to be taken before task->alloc_lock
@@ -100,33 +100,38 @@ struct cgroup_subsys freezer_subsys;
 * freezer_create(), freezer_destroy():
 * cgroup_mutex [ by cgroup core ]
 *
- * can_attach():
+ * freezer_can_attach():
- * cgroup_mutex
+ * cgroup_mutex (held by caller of can_attach)
 *
- * cgroup_frozen():
+ * cgroup_freezing_or_frozen():
 * task->alloc_lock (to get task's cgroup)
 *
 * freezer_fork() (preserving fork() performance means can't take cgroup_mutex):
- * task->alloc_lock (to get task's cgroup)
 * freezer->lock
 *  sighand->siglock (if the cgroup is freezing)
 *
 * freezer_read():
 * cgroup_mutex
 *  freezer->lock
+ *   write_lock css_set_lock (cgroup iterator start)
+ *    task->alloc_lock
 *   read_lock css_set_lock (cgroup iterator start)
 *
 * freezer_write() (freeze):
 * cgroup_mutex
 *  freezer->lock
+ *   write_lock css_set_lock (cgroup iterator start)
+ *    task->alloc_lock
 *   read_lock css_set_lock (cgroup iterator start)
- *    sighand->siglock
+ *    sighand->siglock (fake signal delivery inside freeze_task())
 *
 * freezer_write() (unfreeze):
 * cgroup_mutex
 *  freezer->lock
+ *   write_lock css_set_lock (cgroup iterator start)
+ *    task->alloc_lock
 *   read_lock css_set_lock (cgroup iterator start)
- *    task->alloc_lock (to prevent races with freeze_task())
+ *    task->alloc_lock (inside thaw_process(), prevents race with refrigerator())
 *     sighand->siglock
 */
 static struct cgroup_subsys_state *freezer_create(struct cgroup_subsys *ss,
diff --git a/kernel/compat.c b/kernel/compat.c
index 7f40e9275fd9..5adab05a3172 100644
--- a/kernel/compat.c
+++ b/kernel/compat.c
@@ -495,29 +495,26 @@ asmlinkage long compat_sys_sched_getaffinity(compat_pid_t pid, unsigned int len,
 {
        int ret;
        cpumask_var_t mask;
-        unsigned long *k;
-        unsigned int min_length = cpumask_size();
-        if (nr_cpu_ids <= BITS_PER_COMPAT_LONG)
-                min_length = sizeof(compat_ulong_t);
-        if (len < min_length)
+        if ((len * BITS_PER_BYTE) < nr_cpu_ids)
+                return -EINVAL;
+        if (len & (sizeof(compat_ulong_t)-1))
                return -EINVAL;
        if (!alloc_cpumask_var(&mask, GFP_KERNEL))
                return -ENOMEM;
        ret = sched_getaffinity(pid, mask);
-        if (ret < 0)
+        if (ret == 0) {
-                goto out;
+                size_t retlen = min_t(size_t, len, cpumask_size());
-        k = cpumask_bits(mask);
+                if (compat_put_bitmap(user_mask_ptr, cpumask_bits(mask), retlen * 8))
-        ret = compat_put_bitmap(user_mask_ptr, k, min_length * 8);
+                        ret = -EFAULT;
-        if (ret == 0)
+                else
-                ret = min_length;
+                        ret = retlen;
+        }
-out:
        free_cpumask_var(mask);
        return ret;
 }
diff --git a/kernel/cred.c b/kernel/cred.c
index 8f3672a58a1e..2c24870c55d1 100644
--- a/kernel/cred.c
+++ b/kernel/cred.c
@@ -522,8 +522,6 @@ int commit_creds(struct cred *new)
 #endif
        BUG_ON(atomic_read(&new->usage) < 1);
-        security_commit_creds(new, old);
        get_cred(new); /* we will require a ref for the subj creds too */
        /* dumpability changes */
diff --git a/kernel/groups.c b/kernel/groups.c
index 2b45b2ee3964..53b1916c9492 100644
--- a/kernel/groups.c
+++ b/kernel/groups.c
@@ -164,12 +164,6 @@ int groups_search(const struct group_info *group_info, gid_t grp)
 */
 int set_groups(struct cred *new, struct group_info *group_info)
 {
-        int retval;
-        retval = security_task_setgroups(group_info);
-        if (retval)
-                return retval;
        put_group_info(new->group_info);
        groups_sort(group_info);
        get_group_info(group_info);
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index 0086628b6e97..b9b134b35088 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -1749,35 +1749,15 @@ void __init hrtimers_init(void)
 }
 /**
- * schedule_hrtimeout_range - sleep until timeout
+ * schedule_hrtimeout_range_clock - sleep until timeout
 * @expires:    timeout value (ktime_t)
 * @delta:      slack in expires timeout (ktime_t)
 * @mode:       timer mode, HRTIMER_MODE_ABS or HRTIMER_MODE_REL
- *
+ * @clock:      timer clock, CLOCK_MONOTONIC or CLOCK_REALTIME
- * Make the current task sleep until the given expiry time has
- * elapsed. The routine will return immediately unless
- * the current task state has been set (see set_current_state()).
- *
- * The @delta argument gives the kernel the freedom to schedule the
- * actual wakeup to a time that is both power and performance friendly.
- * The kernel give the normal best effort behavior for "@expires+@delta",
- * but may decide to fire the timer earlier, but no earlier than @expires.
- *
- * You can set the task state as follows -
- *
- * %TASK_UNINTERRUPTIBLE - at least @timeout time is guaranteed to
- * pass before the routine returns.
- *
- * %TASK_INTERRUPTIBLE - the routine may return early if a signal is
- * delivered to the current task.
- *
- * The current task state is guaranteed to be TASK_RUNNING when this
- * routine returns.
- *
- * Returns 0 when the timer has expired otherwise -EINTR
 */
-int __sched schedule_hrtimeout_range(ktime_t *expires, unsigned long delta,
+int __sched
-                               const enum hrtimer_mode mode)
+schedule_hrtimeout_range_clock(ktime_t *expires, unsigned long delta,
+                               const enum hrtimer_mode mode, int clock)
 {
        struct hrtimer_sleeper t;
@@ -1799,7 +1779,7 @@ int __sched schedule_hrtimeout_range(ktime_t *expires, unsigned long delta,
                return -EINTR;
        }
-        hrtimer_init_on_stack(&t.timer, CLOCK_MONOTONIC, mode);
+        hrtimer_init_on_stack(&t.timer, clock, mode);
        hrtimer_set_expires_range_ns(&t.timer, *expires, delta);
        hrtimer_init_sleeper(&t, current);
@@ -1818,6 +1798,41 @@ int __sched schedule_hrtimeout_range(ktime_t *expires, unsigned long delta,
        return !t.task ? 0 : -EINTR;
 }
+/**
+ * schedule_hrtimeout_range - sleep until timeout
+ * @expires:    timeout value (ktime_t)
+ * @delta:      slack in expires timeout (ktime_t)
+ * @mode:       timer mode, HRTIMER_MODE_ABS or HRTIMER_MODE_REL
+ *
+ * Make the current task sleep until the given expiry time has
+ * elapsed. The routine will return immediately unless
+ * the current task state has been set (see set_current_state()).
+ *
+ * The @delta argument gives the kernel the freedom to schedule the
+ * actual wakeup to a time that is both power and performance friendly.
+ * The kernel give the normal best effort behavior for "@expires+@delta",
+ * but may decide to fire the timer earlier, but no earlier than @expires.
+ *
+ * You can set the task state as follows -
+ *
+ * %TASK_UNINTERRUPTIBLE - at least @timeout time is guaranteed to
+ * pass before the routine returns.
+ *
+ * %TASK_INTERRUPTIBLE - the routine may return early if a signal is
+ * delivered to the current task.
+ *
+ * The current task state is guaranteed to be TASK_RUNNING when this
+ * routine returns.
+ *
+ * Returns 0 when the timer has expired otherwise -EINTR
+ */
+int __sched schedule_hrtimeout_range(ktime_t *expires, unsigned long delta,
+                                     const enum hrtimer_mode mode)
+{
+        return schedule_hrtimeout_range_clock(expires, delta, mode,
+                                              CLOCK_MONOTONIC);
+}
 EXPORT_SYMBOL_GPL(schedule_hrtimeout_range);
 /**
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index 76d5a671bfe1..27e5c6911223 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -370,9 +370,6 @@ irqreturn_t handle_IRQ_event(unsigned int irq, struct irqaction *action)
        irqreturn_t ret, retval = IRQ_NONE;
        unsigned int status = 0;
-        if (!(action->flags & IRQF_DISABLED))
-                local_irq_enable_in_hardirq();
        do {
                trace_irq_handler_entry(irq, action);
                ret = action->handler(irq, action->dev_id);
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 704e488730a5..3164ba7ce151 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -138,6 +138,22 @@ int irq_set_affinity(unsigned int irq, const struct cpumask *cpumask)
        return 0;
 }
+int irq_set_affinity_hint(unsigned int irq, const struct cpumask *m)
+{
+        struct irq_desc *desc = irq_to_desc(irq);
+        unsigned long flags;
+        if (!desc)
+                return -EINVAL;
+        raw_spin_lock_irqsave(&desc->lock, flags);
+        desc->affinity_hint = m;
+        raw_spin_unlock_irqrestore(&desc->lock, flags);
+        return 0;
+}
+EXPORT_SYMBOL_GPL(irq_set_affinity_hint);
 #ifndef CONFIG_AUTO_IRQ_AFFINITY
 /*
 * Generic version of the affinity autoselector.
@@ -757,16 +773,6 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
                if (new->flags & IRQF_ONESHOT)
                        desc->status |= IRQ_ONESHOT;
-                /*
-                 * Force MSI interrupts to run with interrupts
-                 * disabled. The multi vector cards can cause stack
-                 * overflows due to nested interrupts when enough of
-                 * them are directed to a core and fire at the same
-                 * time.
-                 */
-                if (desc->msi_desc)
-                        new->flags |= IRQF_DISABLED;
                if (!(desc->status & IRQ_NOAUTOEN)) {
                        desc->depth = 0;
                        desc->status &= ~IRQ_DISABLED;
@@ -916,6 +922,12 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id)
                        desc->chip->disable(irq);
        }
+#ifdef CONFIG_SMP
+        /* make sure affinity_hint is cleaned up */
+        if (WARN_ON_ONCE(desc->affinity_hint))
+                desc->affinity_hint = NULL;
+#endif
        raw_spin_unlock_irqrestore(&desc->lock, flags);
        unregister_handler_proc(irq, action);
@@ -1027,7 +1039,6 @@ EXPORT_SYMBOL(free_irq);
 *      Flags:
 *
 *      IRQF_SHARED             Interrupt is shared
- *      IRQF_DISABLED   Disable local interrupts while processing
 *      IRQF_SAMPLE_RANDOM      The interrupt can be used for entropy
 *      IRQF_TRIGGER_*          Specify active edge(s) or level
 *
@@ -1041,25 +1052,6 @@ int request_threaded_irq(unsigned int irq, irq_handler_t handler,
        int retval;
        /*
-         * handle_IRQ_event() always ignores IRQF_DISABLED except for
-         * the _first_ irqaction (sigh).  That can cause oopsing, but
-         * the behavior is classified as "will not fix" so we need to
-         * start nudging drivers away from using that idiom.
-         */
-        if ((irqflags & (IRQF_SHARED|IRQF_DISABLED)) ==
-                                        (IRQF_SHARED|IRQF_DISABLED)) {
-                pr_warning(
-                  "IRQ %d/%s: IRQF_DISABLED is not guaranteed on shared IRQs\n",
-                        irq, devname);
-        }
-#ifdef CONFIG_LOCKDEP
-        /*
-         * Lockdep wants atomic interrupt handlers:
-         */
-        irqflags |= IRQF_DISABLED;
-#endif
-        /*
         * Sanity-check: shared interrupts must pass in a real dev-ID,
         * otherwise we'll have trouble later trying to figure out
         * which interrupt is which (messes up the interrupt freeing
@@ -1120,3 +1112,40 @@ int request_threaded_irq(unsigned int irq, irq_handler_t handler,
        return retval;
 }
 EXPORT_SYMBOL(request_threaded_irq);
+/**
+ *      request_any_context_irq - allocate an interrupt line
+ *      @irq: Interrupt line to allocate
+ *      @handler: Function to be called when the IRQ occurs.
+ *                Threaded handler for threaded interrupts.
+ *      @flags: Interrupt type flags
+ *      @name: An ascii name for the claiming device
+ *      @dev_id: A cookie passed back to the handler function
+ *
+ *      This call allocates interrupt resources and enables the
+ *      interrupt line and IRQ handling. It selects either a
+ *      hardirq or threaded handling method depending on the
+ *      context.
+ *
+ *      On failure, it returns a negative value. On success,
+ *      it returns either IRQC_IS_HARDIRQ or IRQC_IS_NESTED.
+ */
+int request_any_context_irq(unsigned int irq, irq_handler_t handler,
+                            unsigned long flags, const char *name, void *dev_id)
+{
+        struct irq_desc *desc = irq_to_desc(irq);
+        int ret;
+        if (!desc)
+                return -EINVAL;
+        if (desc->status & IRQ_NESTED_THREAD) {
+                ret = request_threaded_irq(irq, NULL, handler,
+                                           flags, name, dev_id);
+                return !ret ? IRQC_IS_NESTED : ret;
+        }
+        ret = request_irq(irq, handler, flags, name, dev_id);
+        return !ret ? IRQC_IS_HARDIRQ : ret;
+}
+EXPORT_SYMBOL_GPL(request_any_context_irq);
diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c
index 7a6eb04ef6b5..09a2ee540bd2 100644
--- a/kernel/irq/proc.c
+++ b/kernel/irq/proc.c
@@ -32,6 +32,27 @@ static int irq_affinity_proc_show(struct seq_file *m, void *v)
        return 0;
 }
+static int irq_affinity_hint_proc_show(struct seq_file *m, void *v)
+{
+        struct irq_desc *desc = irq_to_desc((long)m->private);
+        unsigned long flags;
+        cpumask_var_t mask;
+        if (!zalloc_cpumask_var(&mask, GFP_KERNEL))
+                return -ENOMEM;
+        raw_spin_lock_irqsave(&desc->lock, flags);
+        if (desc->affinity_hint)
+                cpumask_copy(mask, desc->affinity_hint);
+        raw_spin_unlock_irqrestore(&desc->lock, flags);
+        seq_cpumask(m, mask);
+        seq_putc(m, '\n');
+        free_cpumask_var(mask);
+        return 0;
+}
 #ifndef is_affinity_mask_valid
 #define is_affinity_mask_valid(val) 1
 #endif
@@ -84,6 +105,11 @@ static int irq_affinity_proc_open(struct inode *inode, struct file *file)
        return single_open(file, irq_affinity_proc_show, PDE(inode)->data);
 }
+static int irq_affinity_hint_proc_open(struct inode *inode, struct file *file)
+{
+        return single_open(file, irq_affinity_hint_proc_show, PDE(inode)->data);
+}
 static const struct file_operations irq_affinity_proc_fops = {
        .open           = irq_affinity_proc_open,
        .read           = seq_read,
@@ -92,6 +118,13 @@ static const struct file_operations irq_affinity_proc_fops = {
        .write          = irq_affinity_proc_write,
 };
+static const struct file_operations irq_affinity_hint_proc_fops = {
+        .open           = irq_affinity_hint_proc_open,
+        .read           = seq_read,
+        .llseek         = seq_lseek,
+        .release        = single_release,
+};
 static int default_affinity_show(struct seq_file *m, void *v)
 {
        seq_cpumask(m, irq_default_affinity);
@@ -147,6 +180,26 @@ static const struct file_operations default_affinity_proc_fops = {
        .release        = single_release,
        .write          = default_affinity_write,
 };
+static int irq_node_proc_show(struct seq_file *m, void *v)
+{
+        struct irq_desc *desc = irq_to_desc((long) m->private);
+        seq_printf(m, "%d\n", desc->node);
+        return 0;
+}
+static int irq_node_proc_open(struct inode *inode, struct file *file)
+{
+        return single_open(file, irq_node_proc_show, PDE(inode)->data);
+}
+static const struct file_operations irq_node_proc_fops = {
+        .open           = irq_node_proc_open,
+        .read           = seq_read,
+        .llseek         = seq_lseek,
+        .release        = single_release,
+};
 #endif
 static int irq_spurious_proc_show(struct seq_file *m, void *v)
@@ -231,6 +284,13 @@ void register_irq_proc(unsigned int irq, struct irq_desc *desc)
        /* create /proc/irq/<irq>/smp_affinity */
        proc_create_data("smp_affinity", 0600, desc->dir,
                         &irq_affinity_proc_fops, (void *)(long)irq);
+        /* create /proc/irq/<irq>/affinity_hint */
+        proc_create_data("affinity_hint", 0400, desc->dir,
+                         &irq_affinity_hint_proc_fops, (void *)(long)irq);
+        proc_create_data("node", 0444, desc->dir,
+                         &irq_node_proc_fops, (void *)(long)irq);
 #endif
        proc_create_data("spurious", 0444, desc->dir,
diff --git a/kernel/ksysfs.c b/kernel/ksysfs.c
index 21fe3c426948..0b624e791805 100644
--- a/kernel/ksysfs.c
+++ b/kernel/ksysfs.c
@@ -138,7 +138,8 @@ extern const void __start_notes __attribute__((weak));
 extern const void __stop_notes __attribute__((weak));
 #define notes_size (&__stop_notes - &__start_notes)
-static ssize_t notes_read(struct kobject *kobj, struct bin_attribute *bin_attr,
+static ssize_t notes_read(struct file *filp, struct kobject *kobj,
+                          struct bin_attribute *bin_attr,
                          char *buf, loff_t off, size_t count)
 {
        memcpy(buf, &__start_notes + off, count);
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index ec21304856d1..54286798c37b 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -2711,6 +2711,8 @@ void lockdep_init_map(struct lockdep_map *lock, const char *name,
 }
 EXPORT_SYMBOL_GPL(lockdep_init_map);
+struct lock_class_key __lockdep_no_validate__;
 /*
 * This gets called for every mutex_lock*()/spin_lock*() operation.
 * We maintain the dependency maps and validate the locking attempt:
@@ -2745,6 +2747,9 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
                return 0;
        }
+        if (lock->key == &__lockdep_no_validate__)
+                check = 1;
        if (!subclass)
                class = lock->class_cache;
        /*
diff --git a/kernel/module.c b/kernel/module.c
index e2564580f3f1..5e14483768bb 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -1182,7 +1182,7 @@ struct module_notes_attrs {
        struct bin_attribute attrs[0];
 };
-static ssize_t module_notes_read(struct kobject *kobj,
+static ssize_t module_notes_read(struct file *filp, struct kobject *kobj,
                                 struct bin_attribute *bin_attr,
                                 char *buf, loff_t pos, size_t count)
 {
diff --git a/kernel/pm_qos_params.c b/kernel/pm_qos_params.c
index 3db49b9ca374..f42d3f737a33 100644
--- a/kernel/pm_qos_params.c
+++ b/kernel/pm_qos_params.c
@@ -2,7 +2,7 @@
 * This module exposes the interface to kernel space for specifying
 * QoS dependencies.  It provides infrastructure for registration of:
 *
- * Dependents on a QoS value : register requirements
+ * Dependents on a QoS value : register requests
 * Watchers of QoS value : get notified when target QoS value changes
 *
 * This QoS design is best effort based.  Dependents register their QoS needs.
@@ -14,19 +14,21 @@
 * timeout: usec <-- currently not used.
 * throughput: kbs (kilo byte / sec)
 *
- * There are lists of pm_qos_objects each one wrapping requirements, notifiers
+ * There are lists of pm_qos_objects each one wrapping requests, notifiers
 *
- * User mode requirements on a QOS parameter register themselves to the
+ * User mode requests on a QOS parameter register themselves to the
 * subsystem by opening the device node /dev/... and writing there request to
 * the node.  As long as the process holds a file handle open to the node the
 * client continues to be accounted for.  Upon file release the usermode
- * requirement is removed and a new qos target is computed.  This way when the
+ * request is removed and a new qos target is computed.  This way when the
- * requirement that the application has is cleaned up when closes the file
+ * request that the application has is cleaned up when closes the file
 * pointer or exits the pm_qos_object will get an opportunity to clean up.
 *
 * Mark Gross <mgross@linux.intel.com>
 */
+/*#define DEBUG*/
 #include <linux/pm_qos_params.h>
 #include <linux/sched.h>
 #include <linux/spinlock.h>
@@ -42,25 +44,25 @@
 #include <linux/uaccess.h>
 /*
- * locking rule: all changes to requirements or notifiers lists
+ * locking rule: all changes to requests or notifiers lists
 * or pm_qos_object list and pm_qos_objects need to happen with pm_qos_lock
 * held, taken with _irqsave.  One lock to rule them all
 */
-struct requirement_list {
+struct pm_qos_request_list {
        struct list_head list;
        union {
                s32 value;
                s32 usec;
                s32 kbps;
        };
-        char *name;
+        int pm_qos_class;
 };
 static s32 max_compare(s32 v1, s32 v2);
 static s32 min_compare(s32 v1, s32 v2);
 struct pm_qos_object {
-        struct requirement_list requirements;
+        struct pm_qos_request_list requests;
        struct blocking_notifier_head *notifiers;
        struct miscdevice pm_qos_power_miscdev;
        char *name;
@@ -72,7 +74,7 @@ struct pm_qos_object {
 static struct pm_qos_object null_pm_qos;
 static BLOCKING_NOTIFIER_HEAD(cpu_dma_lat_notifier);
 static struct pm_qos_object cpu_dma_pm_qos = {
-        .requirements = {LIST_HEAD_INIT(cpu_dma_pm_qos.requirements.list)},
+        .requests = {LIST_HEAD_INIT(cpu_dma_pm_qos.requests.list)},
        .notifiers = &cpu_dma_lat_notifier,
        .name = "cpu_dma_latency",
        .default_value = 2000 * USEC_PER_SEC,
@@ -82,7 +84,7 @@ static struct pm_qos_object cpu_dma_pm_qos = {
 static BLOCKING_NOTIFIER_HEAD(network_lat_notifier);
 static struct pm_qos_object network_lat_pm_qos = {
-        .requirements = {LIST_HEAD_INIT(network_lat_pm_qos.requirements.list)},
+        .requests = {LIST_HEAD_INIT(network_lat_pm_qos.requests.list)},
        .notifiers = &network_lat_notifier,
        .name = "network_latency",
        .default_value = 2000 * USEC_PER_SEC,
@@ -93,8 +95,7 @@ static struct pm_qos_object network_lat_pm_qos = {
 static BLOCKING_NOTIFIER_HEAD(network_throughput_notifier);
 static struct pm_qos_object network_throughput_pm_qos = {
-        .requirements =
+        .requests = {LIST_HEAD_INIT(network_throughput_pm_qos.requests.list)},
-                {LIST_HEAD_INIT(network_throughput_pm_qos.requirements.list)},
        .notifiers = &network_throughput_notifier,
        .name = "network_throughput",
        .default_value = 0,
@@ -135,31 +136,34 @@ static s32 min_compare(s32 v1, s32 v2)
 }
-static void update_target(int target)
+static void update_target(int pm_qos_class)
 {
        s32 extreme_value;
-        struct requirement_list *node;
+        struct pm_qos_request_list *node;
        unsigned long flags;
        int call_notifier = 0;
        spin_lock_irqsave(&pm_qos_lock, flags);
-        extreme_value = pm_qos_array[target]->default_value;
+        extreme_value = pm_qos_array[pm_qos_class]->default_value;
        list_for_each_entry(node,
-                        &pm_qos_array[target]->requirements.list, list) {
+                        &pm_qos_array[pm_qos_class]->requests.list, list) {
-                extreme_value = pm_qos_array[target]->comparitor(
+                extreme_value = pm_qos_array[pm_qos_class]->comparitor(
                                extreme_value, node->value);
        }
-        if (atomic_read(&pm_qos_array[target]->target_value) != extreme_value) {
+        if (atomic_read(&pm_qos_array[pm_qos_class]->target_value) !=
+                        extreme_value) {
                call_notifier = 1;
-                atomic_set(&pm_qos_array[target]->target_value, extreme_value);
+                atomic_set(&pm_qos_array[pm_qos_class]->target_value,
-                pr_debug(KERN_ERR "new target for qos %d is %d\n", target,
+                                extreme_value);
-                        atomic_read(&pm_qos_array[target]->target_value));
+                pr_debug(KERN_ERR "new target for qos %d is %d\n", pm_qos_class,
+                        atomic_read(&pm_qos_array[pm_qos_class]->target_value));
        }
        spin_unlock_irqrestore(&pm_qos_lock, flags);
        if (call_notifier)
-                blocking_notifier_call_chain(pm_qos_array[target]->notifiers,
+                blocking_notifier_call_chain(
-                        (unsigned long) extreme_value, NULL);
+                                pm_qos_array[pm_qos_class]->notifiers,
+                                        (unsigned long) extreme_value, NULL);
 }
 static int register_pm_qos_misc(struct pm_qos_object *qos)
@@ -185,125 +189,112 @@ static int find_pm_qos_object_by_minor(int minor)
 }
 /**
- * pm_qos_requirement - returns current system wide qos expectation
+ * pm_qos_request - returns current system wide qos expectation
 * @pm_qos_class: identification of which qos value is requested
 *
 * This function returns the current target value in an atomic manner.
 */
-int pm_qos_requirement(int pm_qos_class)
+int pm_qos_request(int pm_qos_class)
 {
        return atomic_read(&pm_qos_array[pm_qos_class]->target_value);
 }
-EXPORT_SYMBOL_GPL(pm_qos_requirement);
+EXPORT_SYMBOL_GPL(pm_qos_request);
 /**
- * pm_qos_add_requirement - inserts new qos request into the list
+ * pm_qos_add_request - inserts new qos request into the list
 * @pm_qos_class: identifies which list of qos request to us
- * @name: identifies the request
 * @value: defines the qos request
 *
 * This function inserts a new entry in the pm_qos_class list of requested qos
 * performance characteristics.  It recomputes the aggregate QoS expectations
- * for the pm_qos_class of parameters.
+ * for the pm_qos_class of parameters, and returns the pm_qos_request list
+ * element as a handle for use in updating and removal.  Call needs to save
+ * this handle for later use.
 */
-int pm_qos_add_requirement(int pm_qos_class, char *name, s32 value)
+struct pm_qos_request_list *pm_qos_add_request(int pm_qos_class, s32 value)
 {
-        struct requirement_list *dep;
+        struct pm_qos_request_list *dep;
        unsigned long flags;
-        dep = kzalloc(sizeof(struct requirement_list), GFP_KERNEL);
+        dep = kzalloc(sizeof(struct pm_qos_request_list), GFP_KERNEL);
        if (dep) {
                if (value == PM_QOS_DEFAULT_VALUE)
                        dep->value = pm_qos_array[pm_qos_class]->default_value;
                else
                        dep->value = value;
-                dep->name = kstrdup(name, GFP_KERNEL);
+                dep->pm_qos_class = pm_qos_class;
-                if (!dep->name)
-                        goto cleanup;
                spin_lock_irqsave(&pm_qos_lock, flags);
                list_add(&dep->list,
-                        &pm_qos_array[pm_qos_class]->requirements.list);
+                        &pm_qos_array[pm_qos_class]->requests.list);
                spin_unlock_irqrestore(&pm_qos_lock, flags);
                update_target(pm_qos_class);
-                return 0;
        }
-cleanup:
+        return dep;
-        kfree(dep);
-        return -ENOMEM;
 }
-EXPORT_SYMBOL_GPL(pm_qos_add_requirement);
+EXPORT_SYMBOL_GPL(pm_qos_add_request);
 /**
- * pm_qos_update_requirement - modifies an existing qos request
+ * pm_qos_update_request - modifies an existing qos request
- * @pm_qos_class: identifies which list of qos request to us
+ * @pm_qos_req : handle to list element holding a pm_qos request to use
- * @name: identifies the request
 * @value: defines the qos request
 *
- * Updates an existing qos requirement for the pm_qos_class of parameters along
+ * Updates an existing qos request for the pm_qos_class of parameters along
 * with updating the target pm_qos_class value.
 *
- * If the named request isn't in the list then no change is made.
+ * Attempts are made to make this code callable on hot code paths.
 */
-int pm_qos_update_requirement(int pm_qos_class, char *name, s32 new_value)
+void pm_qos_update_request(struct pm_qos_request_list *pm_qos_req,
+                s32 new_value)
 {
        unsigned long flags;
-        struct requirement_list *node;
        int pending_update = 0;
+        s32 temp;
-        spin_lock_irqsave(&pm_qos_lock, flags);
+        if (pm_qos_req) { /*guard against callers passing in null */
-        list_for_each_entry(node,
+                spin_lock_irqsave(&pm_qos_lock, flags);
-                &pm_qos_array[pm_qos_class]->requirements.list, list) {
+                if (new_value == PM_QOS_DEFAULT_VALUE)
-                if (strcmp(node->name, name) == 0) {
+                        temp = pm_qos_array[pm_qos_req->pm_qos_class]->default_value;
-                        if (new_value == PM_QOS_DEFAULT_VALUE)
+                else
-                                node->value =
+                        temp = new_value;
-                                pm_qos_array[pm_qos_class]->default_value;
-                        else
+                if (temp != pm_qos_req->value) {
-                                node->value = new_value;
                        pending_update = 1;
-                        break;
+                        pm_qos_req->value = temp;
                }
+                spin_unlock_irqrestore(&pm_qos_lock, flags);
+                if (pending_update)
+                        update_target(pm_qos_req->pm_qos_class);
        }
-        spin_unlock_irqrestore(&pm_qos_lock, flags);
-        if (pending_update)
-                update_target(pm_qos_class);
-        return 0;
 }
-EXPORT_SYMBOL_GPL(pm_qos_update_requirement);
+EXPORT_SYMBOL_GPL(pm_qos_update_request);
 /**
- * pm_qos_remove_requirement - modifies an existing qos request
+ * pm_qos_remove_request - modifies an existing qos request
- * @pm_qos_class: identifies which list of qos request to us
+ * @pm_qos_req: handle to request list element
- * @name: identifies the request
 *
- * Will remove named qos request from pm_qos_class list of parameters and
+ * Will remove pm qos request from the list of requests and
- * recompute the current target value for the pm_qos_class.
+ * recompute the current target value for the pm_qos_class.  Call this
+ * on slow code paths.
 */
-void pm_qos_remove_requirement(int pm_qos_class, char *name)
+void pm_qos_remove_request(struct pm_qos_request_list *pm_qos_req)
 {
        unsigned long flags;
-        struct requirement_list *node;
+        int qos_class;
-        int pending_update = 0;
+        if (pm_qos_req == NULL)
+                return;
+                /* silent return to keep pcm code cleaner */
+        qos_class = pm_qos_req->pm_qos_class;
        spin_lock_irqsave(&pm_qos_lock, flags);
-        list_for_each_entry(node,
+        list_del(&pm_qos_req->list);
-                &pm_qos_array[pm_qos_class]->requirements.list, list) {
+        kfree(pm_qos_req);
-                if (strcmp(node->name, name) == 0) {
-                        kfree(node->name);
-                        list_del(&node->list);
-                        kfree(node);
-                        pending_update = 1;
-                        break;
-                }
-        }
        spin_unlock_irqrestore(&pm_qos_lock, flags);
-        if (pending_update)
+        update_target(qos_class);
-                update_target(pm_qos_class);
 }
-EXPORT_SYMBOL_GPL(pm_qos_remove_requirement);
+EXPORT_SYMBOL_GPL(pm_qos_remove_request);
 /**
 * pm_qos_add_notifier - sets notification entry for changes to target value
@@ -313,7 +304,7 @@ EXPORT_SYMBOL_GPL(pm_qos_remove_requirement);
 * will register the notifier into a notification chain that gets called
 * upon changes to the pm_qos_class target value.
 */
- int pm_qos_add_notifier(int pm_qos_class, struct notifier_block *notifier)
+int pm_qos_add_notifier(int pm_qos_class, struct notifier_block *notifier)
 {
        int retval;
@@ -343,21 +334,16 @@ int pm_qos_remove_notifier(int pm_qos_class, struct notifier_block *notifier)
 }
 EXPORT_SYMBOL_GPL(pm_qos_remove_notifier);
-#define PID_NAME_LEN 32
 static int pm_qos_power_open(struct inode *inode, struct file *filp)
 {
-        int ret;
        long pm_qos_class;
-        char name[PID_NAME_LEN];
        pm_qos_class = find_pm_qos_object_by_minor(iminor(inode));
        if (pm_qos_class >= 0) {
-                filp->private_data = (void *)pm_qos_class;
+                filp->private_data = (void *) pm_qos_add_request(pm_qos_class,
-                snprintf(name, PID_NAME_LEN, "process_%d", current->pid);
+                                PM_QOS_DEFAULT_VALUE);
-                ret = pm_qos_add_requirement(pm_qos_class, name,
-                                        PM_QOS_DEFAULT_VALUE);
+                if (filp->private_data)
-                if (ret >= 0)
                        return 0;
        }
        return -EPERM;
@@ -365,32 +351,40 @@ static int pm_qos_power_open(struct inode *inode, struct file *filp)
 static int pm_qos_power_release(struct inode *inode, struct file *filp)
 {
-        int pm_qos_class;
+        struct pm_qos_request_list *req;
-        char name[PID_NAME_LEN];
-        pm_qos_class = (long)filp->private_data;
+        req = (struct pm_qos_request_list *)filp->private_data;
-        snprintf(name, PID_NAME_LEN, "process_%d", current->pid);
+        pm_qos_remove_request(req);
-        pm_qos_remove_requirement(pm_qos_class, name);
        return 0;
 }
 static ssize_t pm_qos_power_write(struct file *filp, const char __user *buf,
                size_t count, loff_t *f_pos)
 {
        s32 value;
-        int pm_qos_class;
+        int x;
-        char name[PID_NAME_LEN];
+        char ascii_value[11];
+        struct pm_qos_request_list *pm_qos_req;
-        pm_qos_class = (long)filp->private_data;
-        if (count != sizeof(s32))
+        if (count == sizeof(s32)) {
+                if (copy_from_user(&value, buf, sizeof(s32)))
+                        return -EFAULT;
+        } else if (count == 11) { /* len('0x12345678/0') */
+                if (copy_from_user(ascii_value, buf, 11))
+                        return -EFAULT;
+                x = sscanf(ascii_value, "%x", &value);
+                if (x != 1)
+                        return -EINVAL;
+                pr_debug(KERN_ERR "%s, %d, 0x%x\n", ascii_value, x, value);
+        } else
                return -EINVAL;
-        if (copy_from_user(&value, buf, sizeof(s32)))
-                return -EFAULT;
-        snprintf(name, PID_NAME_LEN, "process_%d", current->pid);
-        pm_qos_update_requirement(pm_qos_class, name, value);
-        return  sizeof(s32);
+        pm_qos_req = (struct pm_qos_request_list *)filp->private_data;
+        pm_qos_update_request(pm_qos_req, value);
+        return count;
 }
diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c
index bc7704b3a443..00bb252f29a2 100644
--- a/kernel/posix-cpu-timers.c
+++ b/kernel/posix-cpu-timers.c
@@ -11,19 +11,18 @@
 #include <trace/events/timer.h>
 /*
- * Called after updating RLIMIT_CPU to set timer expiration if necessary.
+ * Called after updating RLIMIT_CPU to run cpu timer and update
+ * tsk->signal->cputime_expires expiration cache if necessary. Needs
+ * siglock protection since other code may update expiration cache as
+ * well.
 */
 void update_rlimit_cpu(unsigned long rlim_new)
 {
        cputime_t cputime = secs_to_cputime(rlim_new);
-        struct signal_struct *const sig = current->signal;
-        if (cputime_eq(sig->it[CPUCLOCK_PROF].expires, cputime_zero) ||
+        spin_lock_irq(&current->sighand->siglock);
-            cputime_gt(sig->it[CPUCLOCK_PROF].expires, cputime)) {
+        set_process_cpu_timer(current, CPUCLOCK_PROF, &cputime, NULL);
-                spin_lock_irq(&current->sighand->siglock);
+        spin_unlock_irq(&current->sighand->siglock);
-                set_process_cpu_timer(current, CPUCLOCK_PROF, &cputime, NULL);
-                spin_unlock_irq(&current->sighand->siglock);
-        }
 }
 static int check_clock(const clockid_t which_clock)
@@ -548,111 +547,62 @@ static inline int expires_gt(cputime_t expires, cputime_t new_exp)
               cputime_gt(expires, new_exp);
 }
-static inline int expires_le(cputime_t expires, cputime_t new_exp)
-{
-        return !cputime_eq(expires, cputime_zero) &&
-               cputime_le(expires, new_exp);
-}
 /*
 * Insert the timer on the appropriate list before any timers that
 * expire later.  This must be called with the tasklist_lock held
- * for reading, and interrupts disabled.
+ * for reading, interrupts disabled and p->sighand->siglock taken.
 */
-static void arm_timer(struct k_itimer *timer, union cpu_time_count now)
+static void arm_timer(struct k_itimer *timer)
 {
        struct task_struct *p = timer->it.cpu.task;
        struct list_head *head, *listpos;
+        struct task_cputime *cputime_expires;
        struct cpu_timer_list *const nt = &timer->it.cpu;
        struct cpu_timer_list *next;
-        unsigned long i;
-        head = (CPUCLOCK_PERTHREAD(timer->it_clock) ?
+        if (CPUCLOCK_PERTHREAD(timer->it_clock)) {
-                p->cpu_timers : p->signal->cpu_timers);
+                head = p->cpu_timers;
+                cputime_expires = &p->cputime_expires;
+        } else {
+                head = p->signal->cpu_timers;
+                cputime_expires = &p->signal->cputime_expires;
+        }
        head += CPUCLOCK_WHICH(timer->it_clock);
-        BUG_ON(!irqs_disabled());
-        spin_lock(&p->sighand->siglock);
        listpos = head;
-        if (CPUCLOCK_WHICH(timer->it_clock) == CPUCLOCK_SCHED) {
+        list_for_each_entry(next, head, entry) {
-                list_for_each_entry(next, head, entry) {
+                if (cpu_time_before(timer->it_clock, nt->expires, next->expires))
-                        if (next->expires.sched > nt->expires.sched)
+                        break;
-                                break;
+                listpos = &next->entry;
-                        listpos = &next->entry;
-                }
-        } else {
-                list_for_each_entry(next, head, entry) {
-                        if (cputime_gt(next->expires.cpu, nt->expires.cpu))
-                                break;
-                        listpos = &next->entry;
-                }
        }
        list_add(&nt->entry, listpos);
        if (listpos == head) {
+                union cpu_time_count *exp = &nt->expires;
                /*
-                 * We are the new earliest-expiring timer.
+                 * We are the new earliest-expiring POSIX 1.b timer, hence
-                 * If we are a thread timer, there can always
+                 * need to update expiration cache. Take into account that
-                 * be a process timer telling us to stop earlier.
+                 * for process timers we share expiration cache with itimers
+                 * and RLIMIT_CPU and for thread timers with RLIMIT_RTTIME.
                 */
-                if (CPUCLOCK_PERTHREAD(timer->it_clock)) {
+                switch (CPUCLOCK_WHICH(timer->it_clock)) {
-                        union cpu_time_count *exp = &nt->expires;
+                case CPUCLOCK_PROF:
+                        if (expires_gt(cputime_expires->prof_exp, exp->cpu))
-                        switch (CPUCLOCK_WHICH(timer->it_clock)) {
+                                cputime_expires->prof_exp = exp->cpu;
-                        default:
+                        break;
-                                BUG();
+                case CPUCLOCK_VIRT:
-                        case CPUCLOCK_PROF:
+                        if (expires_gt(cputime_expires->virt_exp, exp->cpu))
-                                if (expires_gt(p->cputime_expires.prof_exp,
+                                cputime_expires->virt_exp = exp->cpu;
-                                               exp->cpu))
+                        break;
-                                        p->cputime_expires.prof_exp = exp->cpu;
+                case CPUCLOCK_SCHED:
-                                break;
+                        if (cputime_expires->sched_exp == 0 ||
-                        case CPUCLOCK_VIRT:
+                            cputime_expires->sched_exp > exp->sched)
-                                if (expires_gt(p->cputime_expires.virt_exp,
+                                cputime_expires->sched_exp = exp->sched;
-                                               exp->cpu))
+                        break;
-                                        p->cputime_expires.virt_exp = exp->cpu;
-                                break;
-                        case CPUCLOCK_SCHED:
-                                if (p->cputime_expires.sched_exp == 0 ||
-                                    p->cputime_expires.sched_exp > exp->sched)
-                                        p->cputime_expires.sched_exp =
-                                                                exp->sched;
-                                break;
-                        }
-                } else {
-                        struct signal_struct *const sig = p->signal;
-                        union cpu_time_count *exp = &timer->it.cpu.expires;
-                        /*
-                         * For a process timer, set the cached expiration time.
-                         */
-                        switch (CPUCLOCK_WHICH(timer->it_clock)) {
-                        default:
-                                BUG();
-                        case CPUCLOCK_VIRT:
-                                if (expires_le(sig->it[CPUCLOCK_VIRT].expires,
-                                               exp->cpu))
-                                        break;
-                                sig->cputime_expires.virt_exp = exp->cpu;
-                                break;
-                        case CPUCLOCK_PROF:
-                                if (expires_le(sig->it[CPUCLOCK_PROF].expires,
-                                               exp->cpu))
-                                        break;
-                                i = sig->rlim[RLIMIT_CPU].rlim_cur;
-                                if (i != RLIM_INFINITY &&
-                                    i <= cputime_to_secs(exp->cpu))
-                                        break;
-                                sig->cputime_expires.prof_exp = exp->cpu;
-                                break;
-                        case CPUCLOCK_SCHED:
-                                sig->cputime_expires.sched_exp = exp->sched;
-                                break;
-                        }
                }
        }
-        spin_unlock(&p->sighand->siglock);
 }
 /*
@@ -660,7 +610,12 @@ static void arm_timer(struct k_itimer *timer, union cpu_time_count now)
 */
 static void cpu_timer_fire(struct k_itimer *timer)
 {
-        if (unlikely(timer->sigq == NULL)) {
+        if ((timer->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE) {
+                /*
+                 * User don't want any signal.
+                 */
+                timer->it.cpu.expires.sched = 0;
+        } else if (unlikely(timer->sigq == NULL)) {
                /*
                 * This a special case for clock_nanosleep,
                 * not a normal timer from sys_timer_create.
@@ -721,7 +676,7 @@ int posix_cpu_timer_set(struct k_itimer *timer, int flags,
                        struct itimerspec *new, struct itimerspec *old)
 {
        struct task_struct *p = timer->it.cpu.task;
-        union cpu_time_count old_expires, new_expires, val;
+        union cpu_time_count old_expires, new_expires, old_incr, val;
        int ret;
        if (unlikely(p == NULL)) {
@@ -752,6 +707,7 @@ int posix_cpu_timer_set(struct k_itimer *timer, int flags,
        BUG_ON(!irqs_disabled());
        ret = 0;
+        old_incr = timer->it.cpu.incr;
        spin_lock(&p->sighand->siglock);
        old_expires = timer->it.cpu.expires;
        if (unlikely(timer->it.cpu.firing)) {
@@ -759,7 +715,6 @@ int posix_cpu_timer_set(struct k_itimer *timer, int flags,
                ret = TIMER_RETRY;
        } else
                list_del_init(&timer->it.cpu.entry);
-        spin_unlock(&p->sighand->siglock);
        /*
         * We need to sample the current value to convert the new
@@ -813,6 +768,7 @@ int posix_cpu_timer_set(struct k_itimer *timer, int flags,
                 * disable this firing since we are already reporting
                 * it as an overrun (thanks to bump_cpu_timer above).
                 */
+                spin_unlock(&p->sighand->siglock);
                read_unlock(&tasklist_lock);
                goto out;
        }
@@ -828,11 +784,11 @@ int posix_cpu_timer_set(struct k_itimer *timer, int flags,
         */
        timer->it.cpu.expires = new_expires;
        if (new_expires.sched != 0 &&
-            (timer->it_sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_NONE &&
            cpu_time_before(timer->it_clock, val, new_expires)) {
-                arm_timer(timer, val);
+                arm_timer(timer);
        }
+        spin_unlock(&p->sighand->siglock);
        read_unlock(&tasklist_lock);
        /*
@@ -853,7 +809,6 @@ int posix_cpu_timer_set(struct k_itimer *timer, int flags,
        timer->it_overrun = -1;
        if (new_expires.sched != 0 &&
-            (timer->it_sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_NONE &&
            !cpu_time_before(timer->it_clock, val, new_expires)) {
                /*
                 * The designated time already passed, so we notify
@@ -867,7 +822,7 @@ int posix_cpu_timer_set(struct k_itimer *timer, int flags,
 out:
        if (old) {
                sample_to_timespec(timer->it_clock,
-                                   timer->it.cpu.incr, &old->it_interval);
+                                   old_incr, &old->it_interval);
        }
        return ret;
 }
@@ -927,25 +882,6 @@ void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp)
                read_unlock(&tasklist_lock);
        }
-        if ((timer->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE) {
-                if (timer->it.cpu.incr.sched == 0 &&
-                    cpu_time_before(timer->it_clock,
-                                    timer->it.cpu.expires, now)) {
-                        /*
-                         * Do-nothing timer expired and has no reload,
-                         * so it's as if it was never set.
-                         */
-                        timer->it.cpu.expires.sched = 0;
-                        itp->it_value.tv_sec = itp->it_value.tv_nsec = 0;
-                        return;
-                }
-                /*
-                 * Account for any expirations and reloads that should
-                 * have happened.
-                 */
-                bump_cpu_timer(timer, now);
-        }
        if (unlikely(clear_dead)) {
                /*
                 * We've noticed that the thread is dead, but
@@ -1066,16 +1002,9 @@ static void stop_process_timers(struct signal_struct *sig)
        struct thread_group_cputimer *cputimer = &sig->cputimer;
        unsigned long flags;
-        if (!cputimer->running)
-                return;
        spin_lock_irqsave(&cputimer->lock, flags);
        cputimer->running = 0;
        spin_unlock_irqrestore(&cputimer->lock, flags);
-        sig->cputime_expires.prof_exp = cputime_zero;
-        sig->cputime_expires.virt_exp = cputime_zero;
-        sig->cputime_expires.sched_exp = 0;
 }
 static u32 onecputick;
@@ -1112,6 +1041,23 @@ static void check_cpu_itimer(struct task_struct *tsk, struct cpu_itimer *it,
        }
 }
+/**
+ * task_cputime_zero - Check a task_cputime struct for all zero fields.
+ *
+ * @cputime:    The struct to compare.
+ *
+ * Checks @cputime to see if all fields are zero.  Returns true if all fields
+ * are zero, false if any field is nonzero.
+ */
+static inline int task_cputime_zero(const struct task_cputime *cputime)
+{
+        if (cputime_eq(cputime->utime, cputime_zero) &&
+            cputime_eq(cputime->stime, cputime_zero) &&
+            cputime->sum_exec_runtime == 0)
+                return 1;
+        return 0;
+}
 /*
 * Check for any per-thread CPU timers that have fired and move them
 * off the tsk->*_timers list onto the firing list.  Per-thread timers
@@ -1129,19 +1075,6 @@ static void check_process_timers(struct task_struct *tsk,
        unsigned long soft;
        /*
-         * Don't sample the current process CPU clocks if there are no timers.
-         */
-        if (list_empty(&timers[CPUCLOCK_PROF]) &&
-            cputime_eq(sig->it[CPUCLOCK_PROF].expires, cputime_zero) &&
-            sig->rlim[RLIMIT_CPU].rlim_cur == RLIM_INFINITY &&
-            list_empty(&timers[CPUCLOCK_VIRT]) &&
-            cputime_eq(sig->it[CPUCLOCK_VIRT].expires, cputime_zero) &&
-            list_empty(&timers[CPUCLOCK_SCHED])) {
-                stop_process_timers(sig);
-                return;
-        }
-        /*
         * Collect the current process totals.
         */
        thread_group_cputimer(tsk, &cputime);
@@ -1230,18 +1163,11 @@ static void check_process_timers(struct task_struct *tsk,
                }
        }
-        if (!cputime_eq(prof_expires, cputime_zero) &&
+        sig->cputime_expires.prof_exp = prof_expires;
-            (cputime_eq(sig->cputime_expires.prof_exp, cputime_zero) ||
+        sig->cputime_expires.virt_exp = virt_expires;
-             cputime_gt(sig->cputime_expires.prof_exp, prof_expires)))
+        sig->cputime_expires.sched_exp = sched_expires;
-                sig->cputime_expires.prof_exp = prof_expires;
+        if (task_cputime_zero(&sig->cputime_expires))
-        if (!cputime_eq(virt_expires, cputime_zero) &&
+                stop_process_timers(sig);
-            (cputime_eq(sig->cputime_expires.virt_exp, cputime_zero) ||
-             cputime_gt(sig->cputime_expires.virt_exp, virt_expires)))
-                sig->cputime_expires.virt_exp = virt_expires;
-        if (sched_expires != 0 &&
-            (sig->cputime_expires.sched_exp == 0 ||
-             sig->cputime_expires.sched_exp > sched_expires))
-                sig->cputime_expires.sched_exp = sched_expires;
 }
 /*
@@ -1270,6 +1196,7 @@ void posix_cpu_timer_schedule(struct k_itimer *timer)
                        goto out;
                }
                read_lock(&tasklist_lock); /* arm_timer needs it.  */
+                spin_lock(&p->sighand->siglock);
        } else {
                read_lock(&tasklist_lock);
                if (unlikely(p->signal == NULL)) {
@@ -1290,6 +1217,7 @@ void posix_cpu_timer_schedule(struct k_itimer *timer)
                        clear_dead_task(timer, now);
                        goto out_unlock;
                }
+                spin_lock(&p->sighand->siglock);
                cpu_timer_sample_group(timer->it_clock, p, &now);
                bump_cpu_timer(timer, now);
                /* Leave the tasklist_lock locked for the call below.  */
@@ -1298,7 +1226,9 @@ void posix_cpu_timer_schedule(struct k_itimer *timer)
        /*
         * Now re-arm for the new expiry time.
         */
-        arm_timer(timer, now);
+        BUG_ON(!irqs_disabled());
+        arm_timer(timer);
+        spin_unlock(&p->sighand->siglock);
 out_unlock:
        read_unlock(&tasklist_lock);
@@ -1310,23 +1240,6 @@ out:
 }
 /**
- * task_cputime_zero - Check a task_cputime struct for all zero fields.
- *
- * @cputime:    The struct to compare.
- *
- * Checks @cputime to see if all fields are zero.  Returns true if all fields
- * are zero, false if any field is nonzero.
- */
-static inline int task_cputime_zero(const struct task_cputime *cputime)
-{
-        if (cputime_eq(cputime->utime, cputime_zero) &&
-            cputime_eq(cputime->stime, cputime_zero) &&
-            cputime->sum_exec_runtime == 0)
-                return 1;
-        return 0;
-}
-/**
 * task_cputime_expired - Compare two task_cputime entities.
 *
 * @sample:     The task_cputime structure to be checked for expiration.
@@ -1382,7 +1295,7 @@ static inline int fastpath_timer_check(struct task_struct *tsk)
        }
        sig = tsk->signal;
-        if (!task_cputime_zero(&sig->cputime_expires)) {
+        if (sig->cputimer.running) {
                struct task_cputime group_sample;
                thread_group_cputimer(tsk, &group_sample);
@@ -1390,7 +1303,7 @@ static inline int fastpath_timer_check(struct task_struct *tsk)
                        return 1;
        }
-        return sig->rlim[RLIMIT_CPU].rlim_cur != RLIM_INFINITY;
+        return 0;
 }
 /*
@@ -1419,7 +1332,12 @@ void run_posix_cpu_timers(struct task_struct *tsk)
         * put them on the firing list.
         */
        check_thread_timers(tsk, &firing);
-        check_process_timers(tsk, &firing);
+        /*
+         * If there are any active process wide timers (POSIX 1.b, itimers,
+         * RLIMIT_CPU) cputimer must be running.
+         */
+        if (tsk->signal->cputimer.running)
+                check_process_timers(tsk, &firing);
        /*
         * We must release these locks before taking any timer's lock.
@@ -1456,21 +1374,23 @@ void run_posix_cpu_timers(struct task_struct *tsk)
 }
 /*
- * Set one of the process-wide special case CPU timers.
+ * Set one of the process-wide special case CPU timers or RLIMIT_CPU.
 * The tsk->sighand->siglock must be held by the caller.
- * The *newval argument is relative and we update it to be absolute, *oldval
- * is absolute and we update it to be relative.
 */
 void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx,
                           cputime_t *newval, cputime_t *oldval)
 {
        union cpu_time_count now;
-        struct list_head *head;
        BUG_ON(clock_idx == CPUCLOCK_SCHED);
        cpu_timer_sample_group(clock_idx, tsk, &now);
        if (oldval) {
+                /*
+                 * We are setting itimer. The *oldval is absolute and we update
+                 * it to be relative, *newval argument is relative and we update
+                 * it to be absolute.
+                 */
                if (!cputime_eq(*oldval, cputime_zero)) {
                        if (cputime_le(*oldval, now.cpu)) {
                                /* Just about to fire. */
@@ -1483,33 +1403,21 @@ void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx,
                if (cputime_eq(*newval, cputime_zero))
                        return;
                *newval = cputime_add(*newval, now.cpu);
-                /*
-                 * If the RLIMIT_CPU timer will expire before the
-                 * ITIMER_PROF timer, we have nothing else to do.
-                 */
-                if (tsk->signal->rlim[RLIMIT_CPU].rlim_cur
-                    < cputime_to_secs(*newval))
-                        return;
        }
        /*
-         * Check whether there are any process timers already set to fire
+         * Update expiration cache if we are the earliest timer, or eventually
-         * before this one.  If so, we don't have anything more to do.
+         * RLIMIT_CPU limit is earlier than prof_exp cpu timer expire.
         */
-        head = &tsk->signal->cpu_timers[clock_idx];
+        switch (clock_idx) {
-        if (list_empty(head) ||
+        case CPUCLOCK_PROF:
-            cputime_ge(list_first_entry(head,
+                if (expires_gt(tsk->signal->cputime_expires.prof_exp, *newval))
-                                  struct cpu_timer_list, entry)->expires.cpu,
-                       *newval)) {
-                switch (clock_idx) {
-                case CPUCLOCK_PROF:
                        tsk->signal->cputime_expires.prof_exp = *newval;
-                        break;
+                break;
-                case CPUCLOCK_VIRT:
+        case CPUCLOCK_VIRT:
+                if (expires_gt(tsk->signal->cputime_expires.virt_exp, *newval))
                        tsk->signal->cputime_expires.virt_exp = *newval;
-                        break;
+                break;
-                }
        }
 }
diff --git a/kernel/power/Makefile b/kernel/power/Makefile
index 43191815f874..524e058dcf06 100644
--- a/kernel/power/Makefile
+++ b/kernel/power/Makefile
@@ -8,7 +8,8 @@ obj-$(CONFIG_PM_SLEEP)		+= console.o
 obj-$(CONFIG_FREEZER)           += process.o
 obj-$(CONFIG_SUSPEND)           += suspend.o
 obj-$(CONFIG_PM_TEST_SUSPEND)   += suspend_test.o
-obj-$(CONFIG_HIBERNATION)       += hibernate.o snapshot.o swap.o user.o
+obj-$(CONFIG_HIBERNATION)       += hibernate.o snapshot.o swap.o user.o \
+                                   block_io.o
 obj-$(CONFIG_HIBERNATION_NVS)   += hibernate_nvs.o
 obj-$(CONFIG_MAGIC_SYSRQ)       += poweroff.o
diff --git a/kernel/power/block_io.c b/kernel/power/block_io.c
new file mode 100644
index 000000000000..97024fd40cd5
--- /dev/null
+++ b/kernel/power/block_io.c
@@ -0,0 +1,103 @@
+/*
+ * This file provides functions for block I/O operations on swap/file.
+ *
+ * Copyright (C) 1998,2001-2005 Pavel Machek <pavel@ucw.cz>
+ * Copyright (C) 2006 Rafael J. Wysocki <rjw@sisk.pl>
+ *
+ * This file is released under the GPLv2.
+ */
+#include <linux/bio.h>
+#include <linux/kernel.h>
+#include <linux/pagemap.h>
+#include <linux/swap.h>
+#include "power.h"
+/**
+ *      submit - submit BIO request.
+ *      @rw:    READ or WRITE.
+ *      @off    physical offset of page.
+ *      @page:  page we're reading or writing.
+ *      @bio_chain: list of pending biod (for async reading)
+ *
+ *      Straight from the textbook - allocate and initialize the bio.
+ *      If we're reading, make sure the page is marked as dirty.
+ *      Then submit it and, if @bio_chain == NULL, wait.
+ */
+static int submit(int rw, struct block_device *bdev, sector_t sector,
+                struct page *page, struct bio **bio_chain)
+{
+        const int bio_rw = rw | (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_UNPLUG);
+        struct bio *bio;
+        bio = bio_alloc(__GFP_WAIT | __GFP_HIGH, 1);
+        bio->bi_sector = sector;
+        bio->bi_bdev = bdev;
+        bio->bi_end_io = end_swap_bio_read;
+        if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) {
+                printk(KERN_ERR "PM: Adding page to bio failed at %llu\n",
+                        (unsigned long long)sector);
+                bio_put(bio);
+                return -EFAULT;
+        }
+        lock_page(page);
+        bio_get(bio);
+        if (bio_chain == NULL) {
+                submit_bio(bio_rw, bio);
+                wait_on_page_locked(page);
+                if (rw == READ)
+                        bio_set_pages_dirty(bio);
+                bio_put(bio);
+        } else {
+                if (rw == READ)
+                        get_page(page); /* These pages are freed later */
+                bio->bi_private = *bio_chain;
+                *bio_chain = bio;
+                submit_bio(bio_rw, bio);
+        }
+        return 0;
+}
+int hib_bio_read_page(pgoff_t page_off, void *addr, struct bio **bio_chain)
+{
+        return submit(READ, hib_resume_bdev, page_off * (PAGE_SIZE >> 9),
+                        virt_to_page(addr), bio_chain);
+}
+int hib_bio_write_page(pgoff_t page_off, void *addr, struct bio **bio_chain)
+{
+        return submit(WRITE, hib_resume_bdev, page_off * (PAGE_SIZE >> 9),
+                        virt_to_page(addr), bio_chain);
+}
+int hib_wait_on_bio_chain(struct bio **bio_chain)
+{
+        struct bio *bio;
+        struct bio *next_bio;
+        int ret = 0;
+        if (bio_chain == NULL)
+                return 0;
+        bio = *bio_chain;
+        if (bio == NULL)
+                return 0;
+        while (bio) {
+                struct page *page;
+                next_bio = bio->bi_private;
+                page = bio->bi_io_vec[0].bv_page;
+                wait_on_page_locked(page);
+                if (!PageUptodate(page) || PageError(page))
+                        ret = -EIO;
+                put_page(page);
+                bio_put(bio);
+                bio = next_bio;
+        }
+        *bio_chain = NULL;
+        return ret;
+}
diff --git a/kernel/power/power.h b/kernel/power/power.h
index 46c5a26630a3..006270fe382d 100644
--- a/kernel/power/power.h
+++ b/kernel/power/power.h
@@ -97,24 +97,12 @@ extern int hibernate_preallocate_memory(void);
 */
 struct snapshot_handle {
-        loff_t          offset; /* number of the last byte ready for reading
-                                 * or writing in the sequence
-                                 */
        unsigned int    cur;    /* number of the block of PAGE_SIZE bytes the
                                 * next operation will refer to (ie. current)
                                 */
-        unsigned int    cur_offset;     /* offset with respect to the current
-                                         * block (for the next operation)
-                                         */
-        unsigned int    prev;   /* number of the block of PAGE_SIZE bytes that
-                                 * was the current one previously
-                                 */
        void            *buffer;        /* address of the block to read from
                                         * or write to
                                         */
-        unsigned int    buf_offset;     /* location to read from or write to,
-                                         * given as a displacement from 'buffer'
-                                         */
        int             sync_read;      /* Set to one to notify the caller of
                                         * snapshot_write_next() that it may
                                         * need to call wait_on_bio_chain()
@@ -125,12 +113,12 @@ struct snapshot_handle {
 * snapshot_read_next()/snapshot_write_next() is allowed to
 * read/write data after the function returns
 */
-#define data_of(handle) ((handle).buffer + (handle).buf_offset)
+#define data_of(handle) ((handle).buffer)
 extern unsigned int snapshot_additional_pages(struct zone *zone);
 extern unsigned long snapshot_get_image_size(void);
-extern int snapshot_read_next(struct snapshot_handle *handle, size_t count);
+extern int snapshot_read_next(struct snapshot_handle *handle);
-extern int snapshot_write_next(struct snapshot_handle *handle, size_t count);
+extern int snapshot_write_next(struct snapshot_handle *handle);
 extern void snapshot_write_finalize(struct snapshot_handle *handle);
 extern int snapshot_image_loaded(struct snapshot_handle *handle);
@@ -154,6 +142,15 @@ extern int swsusp_read(unsigned int *flags_p);
 extern int swsusp_write(unsigned int flags);
 extern void swsusp_close(fmode_t);
+/* kernel/power/block_io.c */
+extern struct block_device *hib_resume_bdev;
+extern int hib_bio_read_page(pgoff_t page_off, void *addr,
+                struct bio **bio_chain);
+extern int hib_bio_write_page(pgoff_t page_off, void *addr,
+                struct bio **bio_chain);
+extern int hib_wait_on_bio_chain(struct bio **bio_chain);
 struct timeval;
 /* kernel/power/swsusp.c */
 extern void swsusp_show_speed(struct timeval *, struct timeval *,
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index be861c26dda7..25ce010e9f8b 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -1604,14 +1604,9 @@ pack_pfns(unsigned long *buf, struct memory_bitmap *bm)
 *      snapshot_handle structure.  The structure gets updated and a pointer
 *      to it should be passed to this function every next time.
 *
- *      The @count parameter should contain the number of bytes the caller
- *      wants to read from the snapshot.  It must not be zero.
- *
 *      On success the function returns a positive number.  Then, the caller
 *      is allowed to read up to the returned number of bytes from the memory
- *      location computed by the data_of() macro.  The number returned
+ *      location computed by the data_of() macro.
- *      may be smaller than @count, but this only happens if the read would
- *      cross a page boundary otherwise.
 *
 *      The function returns 0 to indicate the end of data stream condition,
 *      and a negative number is returned on error.  In such cases the
@@ -1619,7 +1614,7 @@ pack_pfns(unsigned long *buf, struct memory_bitmap *bm)
 *      any more.
 */
-int snapshot_read_next(struct snapshot_handle *handle, size_t count)
+int snapshot_read_next(struct snapshot_handle *handle)
 {
        if (handle->cur > nr_meta_pages + nr_copy_pages)
                return 0;
@@ -1630,7 +1625,7 @@ int snapshot_read_next(struct snapshot_handle *handle, size_t count)
                if (!buffer)
                        return -ENOMEM;
        }
-        if (!handle->offset) {
+        if (!handle->cur) {
                int error;
                error = init_header((struct swsusp_info *)buffer);
@@ -1639,42 +1634,30 @@ int snapshot_read_next(struct snapshot_handle *handle, size_t count)
                handle->buffer = buffer;
                memory_bm_position_reset(&orig_bm);
                memory_bm_position_reset(&copy_bm);
-        }
+        } else if (handle->cur <= nr_meta_pages) {
-        if (handle->prev < handle->cur) {
+                memset(buffer, 0, PAGE_SIZE);
-                if (handle->cur <= nr_meta_pages) {
+                pack_pfns(buffer, &orig_bm);
-                        memset(buffer, 0, PAGE_SIZE);
+        } else {
-                        pack_pfns(buffer, &orig_bm);
+                struct page *page;
-                } else {
-                        struct page *page;
-                        page = pfn_to_page(memory_bm_next_pfn(&copy_bm));
+                page = pfn_to_page(memory_bm_next_pfn(&copy_bm));
-                        if (PageHighMem(page)) {
+                if (PageHighMem(page)) {
-                                /* Highmem pages are copied to the buffer,
+                        /* Highmem pages are copied to the buffer,
-                                 * because we can't return with a kmapped
+                         * because we can't return with a kmapped
-                                 * highmem page (we may not be called again).
+                         * highmem page (we may not be called again).
-                                 */
+                         */
-                                void *kaddr;
+                        void *kaddr;
-                                kaddr = kmap_atomic(page, KM_USER0);
+                        kaddr = kmap_atomic(page, KM_USER0);
-                                memcpy(buffer, kaddr, PAGE_SIZE);
+                        memcpy(buffer, kaddr, PAGE_SIZE);
-                                kunmap_atomic(kaddr, KM_USER0);
+                        kunmap_atomic(kaddr, KM_USER0);
-                                handle->buffer = buffer;
+                        handle->buffer = buffer;
-                        } else {
+                } else {
-                                handle->buffer = page_address(page);
+                        handle->buffer = page_address(page);
-                        }
                }
-                handle->prev = handle->cur;
-        }
-        handle->buf_offset = handle->cur_offset;
-        if (handle->cur_offset + count >= PAGE_SIZE) {
-                count = PAGE_SIZE - handle->cur_offset;
-                handle->cur_offset = 0;
-                handle->cur++;
-        } else {
-                handle->cur_offset += count;
        }
-        handle->offset += count;
+        handle->cur++;
-        return count;
+        return PAGE_SIZE;
 }
 /**
@@ -2133,14 +2116,9 @@ static void *get_buffer(struct memory_bitmap *bm, struct chain_allocator *ca)
 *      snapshot_handle structure.  The structure gets updated and a pointer
 *      to it should be passed to this function every next time.
 *
- *      The @count parameter should contain the number of bytes the caller
- *      wants to write to the image.  It must not be zero.
- *
 *      On success the function returns a positive number.  Then, the caller
 *      is allowed to write up to the returned number of bytes to the memory
- *      location computed by the data_of() macro.  The number returned
+ *      location computed by the data_of() macro.
- *      may be smaller than @count, but this only happens if the write would
- *      cross a page boundary otherwise.
 *
 *      The function returns 0 to indicate the "end of file" condition,
 *      and a negative number is returned on error.  In such cases the
@@ -2148,16 +2126,18 @@ static void *get_buffer(struct memory_bitmap *bm, struct chain_allocator *ca)
 *      any more.
 */
-int snapshot_write_next(struct snapshot_handle *handle, size_t count)
+int snapshot_write_next(struct snapshot_handle *handle)
 {
        static struct chain_allocator ca;
        int error = 0;
        /* Check if we have already loaded the entire image */
-        if (handle->prev && handle->cur > nr_meta_pages + nr_copy_pages)
+        if (handle->cur > 1 && handle->cur > nr_meta_pages + nr_copy_pages)
                return 0;
-        if (handle->offset == 0) {
+        handle->sync_read = 1;
+        if (!handle->cur) {
                if (!buffer)
                        /* This makes the buffer be freed by swsusp_free() */
                        buffer = get_image_page(GFP_ATOMIC, PG_ANY);
@@ -2166,56 +2146,43 @@ int snapshot_write_next(struct snapshot_handle *handle, size_t count)
                        return -ENOMEM;
                handle->buffer = buffer;
-        }
+        } else if (handle->cur == 1) {
-        handle->sync_read = 1;
+                error = load_header(buffer);
-        if (handle->prev < handle->cur) {
+                if (error)
-                if (handle->prev == 0) {
+                        return error;
-                        error = load_header(buffer);
-                        if (error)
-                                return error;
-                        error = memory_bm_create(&copy_bm, GFP_ATOMIC, PG_ANY);
+                error = memory_bm_create(&copy_bm, GFP_ATOMIC, PG_ANY);
-                        if (error)
+                if (error)
-                                return error;
+                        return error;
+        } else if (handle->cur <= nr_meta_pages + 1) {
+                error = unpack_orig_pfns(buffer, &copy_bm);
+                if (error)
+                        return error;
-                } else if (handle->prev <= nr_meta_pages) {
+                if (handle->cur == nr_meta_pages + 1) {
-                        error = unpack_orig_pfns(buffer, &copy_bm);
+                        error = prepare_image(&orig_bm, &copy_bm);
                        if (error)
                                return error;
-                        if (handle->prev == nr_meta_pages) {
+                        chain_init(&ca, GFP_ATOMIC, PG_SAFE);
-                                error = prepare_image(&orig_bm, &copy_bm);
+                        memory_bm_position_reset(&orig_bm);
-                                if (error)
+                        restore_pblist = NULL;
-                                        return error;
-                                chain_init(&ca, GFP_ATOMIC, PG_SAFE);
-                                memory_bm_position_reset(&orig_bm);
-                                restore_pblist = NULL;
-                                handle->buffer = get_buffer(&orig_bm, &ca);
-                                handle->sync_read = 0;
-                                if (IS_ERR(handle->buffer))
-                                        return PTR_ERR(handle->buffer);
-                        }
-                } else {
-                        copy_last_highmem_page();
                        handle->buffer = get_buffer(&orig_bm, &ca);
+                        handle->sync_read = 0;
                        if (IS_ERR(handle->buffer))
                                return PTR_ERR(handle->buffer);
-                        if (handle->buffer != buffer)
-                                handle->sync_read = 0;
                }
-                handle->prev = handle->cur;
-        }
-        handle->buf_offset = handle->cur_offset;
-        if (handle->cur_offset + count >= PAGE_SIZE) {
-                count = PAGE_SIZE - handle->cur_offset;
-                handle->cur_offset = 0;
-                handle->cur++;
        } else {
-                handle->cur_offset += count;
+                copy_last_highmem_page();
+                handle->buffer = get_buffer(&orig_bm, &ca);
+                if (IS_ERR(handle->buffer))
+                        return PTR_ERR(handle->buffer);
+                if (handle->buffer != buffer)
+                        handle->sync_read = 0;
        }
-        handle->offset += count;
+        handle->cur++;
-        return count;
+        return PAGE_SIZE;
 }
 /**
@@ -2230,7 +2197,7 @@ void snapshot_write_finalize(struct snapshot_handle *handle)
 {
        copy_last_highmem_page();
        /* Free only if we have loaded the image entirely */
-        if (handle->prev && handle->cur > nr_meta_pages + nr_copy_pages) {
+        if (handle->cur > 1 && handle->cur > nr_meta_pages + nr_copy_pages) {
                memory_bm_free(&orig_bm, PG_UNSAFE_CLEAR);
                free_highmem_data();
        }
diff --git a/kernel/power/swap.c b/kernel/power/swap.c
index 66824d71983a..b0bb21778391 100644
--- a/kernel/power/swap.c
+++ b/kernel/power/swap.c
@@ -29,6 +29,40 @@
 #define SWSUSP_SIG      "S1SUSPEND"
+/*
+ *      The swap map is a data structure used for keeping track of each page
+ *      written to a swap partition.  It consists of many swap_map_page
+ *      structures that contain each an array of MAP_PAGE_SIZE swap entries.
+ *      These structures are stored on the swap and linked together with the
+ *      help of the .next_swap member.
+ *
+ *      The swap map is created during suspend.  The swap map pages are
+ *      allocated and populated one at a time, so we only need one memory
+ *      page to set up the entire structure.
+ *
+ *      During resume we also only need to use one swap_map_page structure
+ *      at a time.
+ */
+#define MAP_PAGE_ENTRIES        (PAGE_SIZE / sizeof(sector_t) - 1)
+struct swap_map_page {
+        sector_t entries[MAP_PAGE_ENTRIES];
+        sector_t next_swap;
+};
+/**
+ *      The swap_map_handle structure is used for handling swap in
+ *      a file-alike way
+ */
+struct swap_map_handle {
+        struct swap_map_page *cur;
+        sector_t cur_swap;
+        sector_t first_sector;
+        unsigned int k;
+};
 struct swsusp_header {
        char reserved[PAGE_SIZE - 20 - sizeof(sector_t) - sizeof(int)];
        sector_t image;
@@ -145,110 +179,24 @@ int swsusp_swap_in_use(void)
 */
 static unsigned short root_swap = 0xffff;
-static struct block_device *resume_bdev;
+struct block_device *hib_resume_bdev;
-/**
- *      submit - submit BIO request.
- *      @rw:    READ or WRITE.
- *      @off    physical offset of page.
- *      @page:  page we're reading or writing.
- *      @bio_chain: list of pending biod (for async reading)
- *
- *      Straight from the textbook - allocate and initialize the bio.
- *      If we're reading, make sure the page is marked as dirty.
- *      Then submit it and, if @bio_chain == NULL, wait.
- */
-static int submit(int rw, pgoff_t page_off, struct page *page,
-                        struct bio **bio_chain)
-{
-        const int bio_rw = rw | (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_UNPLUG);
-        struct bio *bio;
-        bio = bio_alloc(__GFP_WAIT | __GFP_HIGH, 1);
-        bio->bi_sector = page_off * (PAGE_SIZE >> 9);
-        bio->bi_bdev = resume_bdev;
-        bio->bi_end_io = end_swap_bio_read;
-        if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) {
-                printk(KERN_ERR "PM: Adding page to bio failed at %ld\n",
-                        page_off);
-                bio_put(bio);
-                return -EFAULT;
-        }
-        lock_page(page);
-        bio_get(bio);
-        if (bio_chain == NULL) {
-                submit_bio(bio_rw, bio);
-                wait_on_page_locked(page);
-                if (rw == READ)
-                        bio_set_pages_dirty(bio);
-                bio_put(bio);
-        } else {
-                if (rw == READ)
-                        get_page(page); /* These pages are freed later */
-                bio->bi_private = *bio_chain;
-                *bio_chain = bio;
-                submit_bio(bio_rw, bio);
-        }
-        return 0;
-}
-static int bio_read_page(pgoff_t page_off, void *addr, struct bio **bio_chain)
-{
-        return submit(READ, page_off, virt_to_page(addr), bio_chain);
-}
-static int bio_write_page(pgoff_t page_off, void *addr, struct bio **bio_chain)
-{
-        return submit(WRITE, page_off, virt_to_page(addr), bio_chain);
-}
-static int wait_on_bio_chain(struct bio **bio_chain)
-{
-        struct bio *bio;
-        struct bio *next_bio;
-        int ret = 0;
-        if (bio_chain == NULL)
-                return 0;
-        bio = *bio_chain;
-        if (bio == NULL)
-                return 0;
-        while (bio) {
-                struct page *page;
-                next_bio = bio->bi_private;
-                page = bio->bi_io_vec[0].bv_page;
-                wait_on_page_locked(page);
-                if (!PageUptodate(page) || PageError(page))
-                        ret = -EIO;
-                put_page(page);
-                bio_put(bio);
-                bio = next_bio;
-        }
-        *bio_chain = NULL;
-        return ret;
-}
 /*
 * Saving part
 */
-static int mark_swapfiles(sector_t start, unsigned int flags)
+static int mark_swapfiles(struct swap_map_handle *handle, unsigned int flags)
 {
        int error;
-        bio_read_page(swsusp_resume_block, swsusp_header, NULL);
+        hib_bio_read_page(swsusp_resume_block, swsusp_header, NULL);
        if (!memcmp("SWAP-SPACE",swsusp_header->sig, 10) ||
            !memcmp("SWAPSPACE2",swsusp_header->sig, 10)) {
                memcpy(swsusp_header->orig_sig,swsusp_header->sig, 10);
                memcpy(swsusp_header->sig,SWSUSP_SIG, 10);
-                swsusp_header->image = start;
+                swsusp_header->image = handle->first_sector;
                swsusp_header->flags = flags;
-                error = bio_write_page(swsusp_resume_block,
+                error = hib_bio_write_page(swsusp_resume_block,
                                        swsusp_header, NULL);
        } else {
                printk(KERN_ERR "PM: Swap header not found!\n");
@@ -260,25 +208,26 @@ static int mark_swapfiles(sector_t start, unsigned int flags)
 /**
 *      swsusp_swap_check - check if the resume device is a swap device
 *      and get its index (if so)
+ *
+ *      This is called before saving image
 */
+static int swsusp_swap_check(void)
-static int swsusp_swap_check(void) /* This is called before saving image */
 {
        int res;
        res = swap_type_of(swsusp_resume_device, swsusp_resume_block,
-                        &resume_bdev);
+                        &hib_resume_bdev);
        if (res < 0)
                return res;
        root_swap = res;
-        res = blkdev_get(resume_bdev, FMODE_WRITE);
+        res = blkdev_get(hib_resume_bdev, FMODE_WRITE);
        if (res)
                return res;
-        res = set_blocksize(resume_bdev, PAGE_SIZE);
+        res = set_blocksize(hib_resume_bdev, PAGE_SIZE);
        if (res < 0)
-                blkdev_put(resume_bdev, FMODE_WRITE);
+                blkdev_put(hib_resume_bdev, FMODE_WRITE);
        return res;
 }
@@ -309,42 +258,9 @@ static int write_page(void *buf, sector_t offset, struct bio **bio_chain)
        } else {
                src = buf;
        }
-        return bio_write_page(offset, src, bio_chain);
+        return hib_bio_write_page(offset, src, bio_chain);
 }
-/*
- *      The swap map is a data structure used for keeping track of each page
- *      written to a swap partition.  It consists of many swap_map_page
- *      structures that contain each an array of MAP_PAGE_SIZE swap entries.
- *      These structures are stored on the swap and linked together with the
- *      help of the .next_swap member.
- *
- *      The swap map is created during suspend.  The swap map pages are
- *      allocated and populated one at a time, so we only need one memory
- *      page to set up the entire structure.
- *
- *      During resume we also only need to use one swap_map_page structure
- *      at a time.
- */
-#define MAP_PAGE_ENTRIES        (PAGE_SIZE / sizeof(sector_t) - 1)
-struct swap_map_page {
-        sector_t entries[MAP_PAGE_ENTRIES];
-        sector_t next_swap;
-};
-/**
- *      The swap_map_handle structure is used for handling swap in
- *      a file-alike way
- */
-struct swap_map_handle {
-        struct swap_map_page *cur;
-        sector_t cur_swap;
-        unsigned int k;
-};
 static void release_swap_writer(struct swap_map_handle *handle)
 {
        if (handle->cur)
@@ -354,16 +270,33 @@ static void release_swap_writer(struct swap_map_handle *handle)
 static int get_swap_writer(struct swap_map_handle *handle)
 {
+        int ret;
+        ret = swsusp_swap_check();
+        if (ret) {
+                if (ret != -ENOSPC)
+                        printk(KERN_ERR "PM: Cannot find swap device, try "
+                                        "swapon -a.\n");
+                return ret;
+        }
        handle->cur = (struct swap_map_page *)get_zeroed_page(GFP_KERNEL);
-        if (!handle->cur)
+        if (!handle->cur) {
-                return -ENOMEM;
+                ret = -ENOMEM;
+                goto err_close;
+        }
        handle->cur_swap = alloc_swapdev_block(root_swap);
        if (!handle->cur_swap) {
-                release_swap_writer(handle);
+                ret = -ENOSPC;
-                return -ENOSPC;
+                goto err_rel;
        }
        handle->k = 0;
+        handle->first_sector = handle->cur_swap;
        return 0;
+err_rel:
+        release_swap_writer(handle);
+err_close:
+        swsusp_close(FMODE_WRITE);
+        return ret;
 }
 static int swap_write_page(struct swap_map_handle *handle, void *buf,
@@ -380,7 +313,7 @@ static int swap_write_page(struct swap_map_handle *handle, void *buf,
                return error;
        handle->cur->entries[handle->k++] = offset;
        if (handle->k >= MAP_PAGE_ENTRIES) {
-                error = wait_on_bio_chain(bio_chain);
+                error = hib_wait_on_bio_chain(bio_chain);
                if (error)
                        goto out;
                offset = alloc_swapdev_block(root_swap);
@@ -406,6 +339,24 @@ static int flush_swap_writer(struct swap_map_handle *handle)
                return -EINVAL;
 }
+static int swap_writer_finish(struct swap_map_handle *handle,
+                unsigned int flags, int error)
+{
+        if (!error) {
+                flush_swap_writer(handle);
+                printk(KERN_INFO "PM: S");
+                error = mark_swapfiles(handle, flags);
+                printk("|\n");
+        }
+        if (error)
+                free_all_swap_pages(root_swap);
+        release_swap_writer(handle);
+        swsusp_close(FMODE_WRITE);
+        return error;
+}
 /**
 *      save_image - save the suspend image data
 */
@@ -431,7 +382,7 @@ static int save_image(struct swap_map_handle *handle,
        bio = NULL;
        do_gettimeofday(&start);
        while (1) {
-                ret = snapshot_read_next(snapshot, PAGE_SIZE);
+                ret = snapshot_read_next(snapshot);
                if (ret <= 0)
                        break;
                ret = swap_write_page(handle, data_of(*snapshot), &bio);
@@ -441,7 +392,7 @@ static int save_image(struct swap_map_handle *handle,
                        printk(KERN_CONT "\b\b\b\b%3d%%", nr_pages / m);
                nr_pages++;
        }
-        err2 = wait_on_bio_chain(&bio);
+        err2 = hib_wait_on_bio_chain(&bio);
        do_gettimeofday(&stop);
        if (!ret)
                ret = err2;
@@ -483,50 +434,34 @@ int swsusp_write(unsigned int flags)
        struct swap_map_handle handle;
        struct snapshot_handle snapshot;
        struct swsusp_info *header;
+        unsigned long pages;
        int error;
-        error = swsusp_swap_check();
+        pages = snapshot_get_image_size();
+        error = get_swap_writer(&handle);
        if (error) {
-                printk(KERN_ERR "PM: Cannot find swap device, try "
+                printk(KERN_ERR "PM: Cannot get swap writer\n");
-                                "swapon -a.\n");
                return error;
        }
+        if (!enough_swap(pages)) {
+                printk(KERN_ERR "PM: Not enough free swap\n");
+                error = -ENOSPC;
+                goto out_finish;
+        }
        memset(&snapshot, 0, sizeof(struct snapshot_handle));
-        error = snapshot_read_next(&snapshot, PAGE_SIZE);
+        error = snapshot_read_next(&snapshot);
        if (error < PAGE_SIZE) {
                if (error >= 0)
                        error = -EFAULT;
-                goto out;
+                goto out_finish;
        }
        header = (struct swsusp_info *)data_of(snapshot);
-        if (!enough_swap(header->pages)) {
+        error = swap_write_page(&handle, header, NULL);
-                printk(KERN_ERR "PM: Not enough free swap\n");
+        if (!error)
-                error = -ENOSPC;
+                error = save_image(&handle, &snapshot, pages - 1);
-                goto out;
+out_finish:
-        }
+        error = swap_writer_finish(&handle, flags, error);
-        error = get_swap_writer(&handle);
-        if (!error) {
-                sector_t start = handle.cur_swap;
-                error = swap_write_page(&handle, header, NULL);
-                if (!error)
-                        error = save_image(&handle, &snapshot,
-                                        header->pages - 1);
-                if (!error) {
-                        flush_swap_writer(&handle);
-                        printk(KERN_INFO "PM: S");
-                        error = mark_swapfiles(start, flags);
-                        printk("|\n");
-                }
-        }
-        if (error)
-                free_all_swap_pages(root_swap);
-        release_swap_writer(&handle);
- out:
-        swsusp_close(FMODE_WRITE);
        return error;
 }
@@ -542,18 +477,21 @@ static void release_swap_reader(struct swap_map_handle *handle)
        handle->cur = NULL;
 }
-static int get_swap_reader(struct swap_map_handle *handle, sector_t start)
+static int get_swap_reader(struct swap_map_handle *handle,
+                unsigned int *flags_p)
 {
        int error;
-        if (!start)
+        *flags_p = swsusp_header->flags;
+        if (!swsusp_header->image) /* how can this happen? */
                return -EINVAL;
        handle->cur = (struct swap_map_page *)get_zeroed_page(__GFP_WAIT | __GFP_HIGH);
        if (!handle->cur)
                return -ENOMEM;
-        error = bio_read_page(start, handle->cur, NULL);
+        error = hib_bio_read_page(swsusp_header->image, handle->cur, NULL);
        if (error) {
                release_swap_reader(handle);
                return error;
@@ -573,21 +511,28 @@ static int swap_read_page(struct swap_map_handle *handle, void *buf,
        offset = handle->cur->entries[handle->k];
        if (!offset)
                return -EFAULT;
-        error = bio_read_page(offset, buf, bio_chain);
+        error = hib_bio_read_page(offset, buf, bio_chain);
        if (error)
                return error;
        if (++handle->k >= MAP_PAGE_ENTRIES) {
-                error = wait_on_bio_chain(bio_chain);
+                error = hib_wait_on_bio_chain(bio_chain);
                handle->k = 0;
                offset = handle->cur->next_swap;
                if (!offset)
                        release_swap_reader(handle);
                else if (!error)
-                        error = bio_read_page(offset, handle->cur, NULL);
+                        error = hib_bio_read_page(offset, handle->cur, NULL);
        }
        return error;
 }
+static int swap_reader_finish(struct swap_map_handle *handle)
+{
+        release_swap_reader(handle);
+        return 0;
+}
 /**
 *      load_image - load the image using the swap map handle
 *      @handle and the snapshot handle @snapshot
@@ -615,21 +560,21 @@ static int load_image(struct swap_map_handle *handle,
        bio = NULL;
        do_gettimeofday(&start);
        for ( ; ; ) {
-                error = snapshot_write_next(snapshot, PAGE_SIZE);
+                error = snapshot_write_next(snapshot);
                if (error <= 0)
                        break;
                error = swap_read_page(handle, data_of(*snapshot), &bio);
                if (error)
                        break;
                if (snapshot->sync_read)
-                        error = wait_on_bio_chain(&bio);
+                        error = hib_wait_on_bio_chain(&bio);
                if (error)
                        break;
                if (!(nr_pages % m))
                        printk("\b\b\b\b%3d%%", nr_pages / m);
                nr_pages++;
        }
-        err2 = wait_on_bio_chain(&bio);
+        err2 = hib_wait_on_bio_chain(&bio);
        do_gettimeofday(&stop);
        if (!error)
                error = err2;
@@ -657,20 +602,20 @@ int swsusp_read(unsigned int *flags_p)
        struct snapshot_handle snapshot;
        struct swsusp_info *header;
-        *flags_p = swsusp_header->flags;
        memset(&snapshot, 0, sizeof(struct snapshot_handle));
-        error = snapshot_write_next(&snapshot, PAGE_SIZE);
+        error = snapshot_write_next(&snapshot);
        if (error < PAGE_SIZE)
                return error < 0 ? error : -EFAULT;
        header = (struct swsusp_info *)data_of(snapshot);
-        error = get_swap_reader(&handle, swsusp_header->image);
+        error = get_swap_reader(&handle, flags_p);
+        if (error)
+                goto end;
        if (!error)
                error = swap_read_page(&handle, header, NULL);
        if (!error)
                error = load_image(&handle, &snapshot, header->pages - 1);
-        release_swap_reader(&handle);
+        swap_reader_finish(&handle);
+end:
        if (!error)
                pr_debug("PM: Image successfully loaded\n");
        else
@@ -686,11 +631,11 @@ int swsusp_check(void)
 {
        int error;
-        resume_bdev = open_by_devnum(swsusp_resume_device, FMODE_READ);
+        hib_resume_bdev = open_by_devnum(swsusp_resume_device, FMODE_READ);
-        if (!IS_ERR(resume_bdev)) {
+        if (!IS_ERR(hib_resume_bdev)) {
-                set_blocksize(resume_bdev, PAGE_SIZE);
+                set_blocksize(hib_resume_bdev, PAGE_SIZE);
                memset(swsusp_header, 0, PAGE_SIZE);
-                error = bio_read_page(swsusp_resume_block,
+                error = hib_bio_read_page(swsusp_resume_block,
                                        swsusp_header, NULL);
                if (error)
                        goto put;
@@ -698,7 +643,7 @@ int swsusp_check(void)
                if (!memcmp(SWSUSP_SIG, swsusp_header->sig, 10)) {
                        memcpy(swsusp_header->sig, swsusp_header->orig_sig, 10);
                        /* Reset swap signature now */
-                        error = bio_write_page(swsusp_resume_block,
+                        error = hib_bio_write_page(swsusp_resume_block,
                                                swsusp_header, NULL);
                } else {
                        error = -EINVAL;
@@ -706,11 +651,11 @@ int swsusp_check(void)
 put:
                if (error)
-                        blkdev_put(resume_bdev, FMODE_READ);
+                        blkdev_put(hib_resume_bdev, FMODE_READ);
                else
                        pr_debug("PM: Signature found, resuming\n");
        } else {
-                error = PTR_ERR(resume_bdev);
+                error = PTR_ERR(hib_resume_bdev);
        }
        if (error)
@@ -725,12 +670,12 @@ put:
 void swsusp_close(fmode_t mode)
 {
-        if (IS_ERR(resume_bdev)) {
+        if (IS_ERR(hib_resume_bdev)) {
                pr_debug("PM: Image device not initialised\n");
                return;
        }
-        blkdev_put(resume_bdev, mode);
+        blkdev_put(hib_resume_bdev, mode);
 }
 static int swsusp_header_init(void)
diff --git a/kernel/power/user.c b/kernel/power/user.c
index a8c96212bc1b..e819e17877ca 100644
--- a/kernel/power/user.c
+++ b/kernel/power/user.c
@@ -151,6 +151,7 @@ static ssize_t snapshot_read(struct file *filp, char __user *buf,
 {
        struct snapshot_data *data;
        ssize_t res;
+        loff_t pg_offp = *offp & ~PAGE_MASK;
        mutex_lock(&pm_mutex);
@@ -159,14 +160,19 @@ static ssize_t snapshot_read(struct file *filp, char __user *buf,
                res = -ENODATA;
                goto Unlock;
        }
-        res = snapshot_read_next(&data->handle, count);
+        if (!pg_offp) { /* on page boundary? */
-        if (res > 0) {
+                res = snapshot_read_next(&data->handle);
-                if (copy_to_user(buf, data_of(data->handle), res))
+                if (res <= 0)
-                        res = -EFAULT;
+                        goto Unlock;
-                else
+        } else {
-                        *offp = data->handle.offset;
+                res = PAGE_SIZE - pg_offp;
        }
+        res = simple_read_from_buffer(buf, count, &pg_offp,
+                        data_of(data->handle), res);
+        if (res > 0)
+                *offp += res;
 Unlock:
        mutex_unlock(&pm_mutex);
@@ -178,18 +184,25 @@ static ssize_t snapshot_write(struct file *filp, const char __user *buf,
 {
        struct snapshot_data *data;
        ssize_t res;
+        loff_t pg_offp = *offp & ~PAGE_MASK;
        mutex_lock(&pm_mutex);
        data = filp->private_data;
-        res = snapshot_write_next(&data->handle, count);
-        if (res > 0) {
+        if (!pg_offp) {
-                if (copy_from_user(data_of(data->handle), buf, res))
+                res = snapshot_write_next(&data->handle);
-                        res = -EFAULT;
+                if (res <= 0)
-                else
+                        goto unlock;
-                        *offp = data->handle.offset;
+        } else {
+                res = PAGE_SIZE - pg_offp;
        }
+        res = simple_write_to_buffer(data_of(data->handle), res, &pg_offp,
+                        buf, count);
+        if (res > 0)
+                *offp += res;
+unlock:
        mutex_unlock(&pm_mutex);
        return res;
diff --git a/kernel/sched.c b/kernel/sched.c
index 1d93cd0ae4d3..d9c0368eeb21 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -3851,6 +3851,7 @@ void __wake_up_locked(wait_queue_head_t *q, unsigned int mode)
 {
        __wake_up_common(q, mode, 1, 0, NULL);
 }
+EXPORT_SYMBOL_GPL(__wake_up_locked);
 void __wake_up_locked_key(wait_queue_head_t *q, unsigned int mode, void *key)
 {
diff --git a/kernel/sys.c b/kernel/sys.c
index 7cb426a58965..0d36d889c74d 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -492,10 +492,6 @@ SYSCALL_DEFINE2(setregid, gid_t, rgid, gid_t, egid)
                return -ENOMEM;
        old = current_cred();
-        retval = security_task_setgid(rgid, egid, (gid_t)-1, LSM_SETID_RE);
-        if (retval)
-                goto error;
        retval = -EPERM;
        if (rgid != (gid_t) -1) {
                if (old->gid == rgid ||
@@ -543,10 +539,6 @@ SYSCALL_DEFINE1(setgid, gid_t, gid)
                return -ENOMEM;
        old = current_cred();
-        retval = security_task_setgid(gid, (gid_t)-1, (gid_t)-1, LSM_SETID_ID);
-        if (retval)
-                goto error;
        retval = -EPERM;
        if (capable(CAP_SETGID))
                new->gid = new->egid = new->sgid = new->fsgid = gid;
@@ -610,10 +602,6 @@ SYSCALL_DEFINE2(setreuid, uid_t, ruid, uid_t, euid)
                return -ENOMEM;
        old = current_cred();
-        retval = security_task_setuid(ruid, euid, (uid_t)-1, LSM_SETID_RE);
-        if (retval)
-                goto error;
        retval = -EPERM;
        if (ruid != (uid_t) -1) {
                new->uid = ruid;
@@ -675,10 +663,6 @@ SYSCALL_DEFINE1(setuid, uid_t, uid)
                return -ENOMEM;
        old = current_cred();
-        retval = security_task_setuid(uid, (uid_t)-1, (uid_t)-1, LSM_SETID_ID);
-        if (retval)
-                goto error;
        retval = -EPERM;
        if (capable(CAP_SETUID)) {
                new->suid = new->uid = uid;
@@ -719,9 +703,6 @@ SYSCALL_DEFINE3(setresuid, uid_t, ruid, uid_t, euid, uid_t, suid)
        if (!new)
                return -ENOMEM;
-        retval = security_task_setuid(ruid, euid, suid, LSM_SETID_RES);
-        if (retval)
-                goto error;
        old = current_cred();
        retval = -EPERM;
@@ -788,10 +769,6 @@ SYSCALL_DEFINE3(setresgid, gid_t, rgid, gid_t, egid, gid_t, sgid)
                return -ENOMEM;
        old = current_cred();
-        retval = security_task_setgid(rgid, egid, sgid, LSM_SETID_RES);
-        if (retval)
-                goto error;
        retval = -EPERM;
        if (!capable(CAP_SETGID)) {
                if (rgid != (gid_t) -1 && rgid != old->gid &&
@@ -851,9 +828,6 @@ SYSCALL_DEFINE1(setfsuid, uid_t, uid)
        old = current_cred();
        old_fsuid = old->fsuid;
-        if (security_task_setuid(uid, (uid_t)-1, (uid_t)-1, LSM_SETID_FS) < 0)
-                goto error;
        if (uid == old->uid  || uid == old->euid  ||
            uid == old->suid || uid == old->fsuid ||
            capable(CAP_SETUID)) {
@@ -864,7 +838,6 @@ SYSCALL_DEFINE1(setfsuid, uid_t, uid)
                }
        }
-error:
        abort_creds(new);
        return old_fsuid;
@@ -888,9 +861,6 @@ SYSCALL_DEFINE1(setfsgid, gid_t, gid)
        old = current_cred();
        old_fsgid = old->fsgid;
-        if (security_task_setgid(gid, (gid_t)-1, (gid_t)-1, LSM_SETID_FS))
-                goto error;
        if (gid == old->gid  || gid == old->egid  ||
            gid == old->sgid || gid == old->fsgid ||
            capable(CAP_SETGID)) {
@@ -900,7 +870,6 @@ SYSCALL_DEFINE1(setfsgid, gid_t, gid)
                }
        }
-error:
        abort_creds(new);
        return old_fsgid;
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 8686b0f5fc12..b12583047757 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -163,6 +163,27 @@ static int proc_taint(struct ctl_table *table, int write,
                               void __user *buffer, size_t *lenp, loff_t *ppos);
 #endif
+#ifdef CONFIG_MAGIC_SYSRQ
+static int __sysrq_enabled; /* Note: sysrq code ises it's own private copy */
+static int sysrq_sysctl_handler(ctl_table *table, int write,
+                                void __user *buffer, size_t *lenp,
+                                loff_t *ppos)
+{
+        int error;
+        error = proc_dointvec(table, write, buffer, lenp, ppos);
+        if (error)
+                return error;
+        if (write)
+                sysrq_toggle_support(__sysrq_enabled);
+        return 0;
+}
+#endif
 static struct ctl_table root_table[];
 static struct ctl_table_root sysctl_table_root;
 static struct ctl_table_header root_table_header = {
@@ -567,7 +588,7 @@ static struct ctl_table kern_table[] = {
                .data           = &__sysrq_enabled,
                .maxlen         = sizeof (int),
                .mode           = 0644,
-                .proc_handler   = proc_dointvec,
+                .proc_handler   = sysrq_sysctl_handler,
        },
 #endif
 #ifdef CONFIG_PROC_SYSCTL
@@ -621,7 +642,7 @@ static struct ctl_table kern_table[] = {
 #endif
        {
                .procname       = "userprocess_debug",
-                .data           = &sysctl_userprocess_debug,
+                .data           = &show_unhandled_signals,
                .maxlen         = sizeof(int),
                .mode           = 0644,
                .proc_handler   = proc_dointvec,
@@ -1431,7 +1452,8 @@ static struct ctl_table fs_table[] = {
 };
 static struct ctl_table debug_table[] = {
-#if defined(CONFIG_X86) || defined(CONFIG_PPC) || defined(CONFIG_SPARC)
+#if defined(CONFIG_X86) || defined(CONFIG_PPC) || defined(CONFIG_SPARC) || \
+    defined(CONFIG_S390)
        {
                .procname       = "exception-trace",
                .data           = &show_unhandled_signals,
@@ -2040,8 +2062,132 @@ int proc_dostring(struct ctl_table *table, int write,
                               buffer, lenp, ppos);
 }
+static size_t proc_skip_spaces(char **buf)
+{
+        size_t ret;
+        char *tmp = skip_spaces(*buf);
+        ret = tmp - *buf;
+        *buf = tmp;
+        return ret;
+}
+static void proc_skip_char(char **buf, size_t *size, const char v)
+{
+        while (*size) {
+                if (**buf != v)
+                        break;
+                (*size)--;
+                (*buf)++;
+        }
+}
+#define TMPBUFLEN 22
+/**
+ * proc_get_long - reads an ASCII formated integer from a user buffer
+ *
+ * @buf - a kernel buffer
+ * @size - size of the kernel buffer
+ * @val - this is where the number will be stored
+ * @neg - set to %TRUE if number is negative
+ * @perm_tr - a vector which contains the allowed trailers
+ * @perm_tr_len - size of the perm_tr vector
+ * @tr - pointer to store the trailer character
+ *
+ * In case of success 0 is returned and buf and size are updated with
+ * the amount of bytes read. If tr is non NULL and a trailing
+ * character exist (size is non zero after returning from this
+ * function) tr is updated with the trailing character.
+ */
+static int proc_get_long(char **buf, size_t *size,
+                          unsigned long *val, bool *neg,
+                          const char *perm_tr, unsigned perm_tr_len, char *tr)
+{
+        int len;
+        char *p, tmp[TMPBUFLEN];
+        if (!*size)
+                return -EINVAL;
+        len = *size;
+        if (len > TMPBUFLEN - 1)
+                len = TMPBUFLEN - 1;
+        memcpy(tmp, *buf, len);
+        tmp[len] = 0;
+        p = tmp;
+        if (*p == '-' && *size > 1) {
+                *neg = true;
+                p++;
+        } else
+                *neg = false;
+        if (!isdigit(*p))
+                return -EINVAL;
+        *val = simple_strtoul(p, &p, 0);
+        len = p - tmp;
+        /* We don't know if the next char is whitespace thus we may accept
+         * invalid integers (e.g. 1234...a) or two integers instead of one
+         * (e.g. 123...1). So lets not allow such large numbers. */
+        if (len == TMPBUFLEN - 1)
+                return -EINVAL;
+        if (len < *size && perm_tr_len && !memchr(perm_tr, *p, perm_tr_len))
+                return -EINVAL;
+        if (tr && (len < *size))
+                *tr = *p;
+        *buf += len;
+        *size -= len;
+        return 0;
+}
+/**
+ * proc_put_long - coverts an integer to a decimal ASCII formated string
+ *
+ * @buf - the user buffer
+ * @size - the size of the user buffer
+ * @val - the integer to be converted
+ * @neg - sign of the number, %TRUE for negative
+ *
+ * In case of success 0 is returned and buf and size are updated with
+ * the amount of bytes read.
+ */
+static int proc_put_long(void __user **buf, size_t *size, unsigned long val,
+                          bool neg)
+{
+        int len;
+        char tmp[TMPBUFLEN], *p = tmp;
+        sprintf(p, "%s%lu", neg ? "-" : "", val);
+        len = strlen(tmp);
+        if (len > *size)
+                len = *size;
+        if (copy_to_user(*buf, tmp, len))
+                return -EFAULT;
+        *size -= len;
+        *buf += len;
+        return 0;
+}
+#undef TMPBUFLEN
-static int do_proc_dointvec_conv(int *negp, unsigned long *lvalp,
+static int proc_put_char(void __user **buf, size_t *size, char c)
+{
+        if (*size) {
+                char __user **buffer = (char __user **)buf;
+                if (put_user(c, *buffer))
+                        return -EFAULT;
+                (*size)--, (*buffer)++;
+                *buf = *buffer;
+        }
+        return 0;
+}
+static int do_proc_dointvec_conv(bool *negp, unsigned long *lvalp,
                                 int *valp,
                                 int write, void *data)
 {
@@ -2050,33 +2196,31 @@ static int do_proc_dointvec_conv(int *negp, unsigned long *lvalp,
        } else {
                int val = *valp;
                if (val < 0) {
-                        *negp = -1;
+                        *negp = true;
                        *lvalp = (unsigned long)-val;
                } else {
-                        *negp = 0;
+                        *negp = false;
                        *lvalp = (unsigned long)val;
                }
        }
        return 0;
 }
+static const char proc_wspace_sep[] = { ' ', '\t', '\n' };
 static int __do_proc_dointvec(void *tbl_data, struct ctl_table *table,
                  int write, void __user *buffer,
                  size_t *lenp, loff_t *ppos,
-                  int (*conv)(int *negp, unsigned long *lvalp, int *valp,
+                  int (*conv)(bool *negp, unsigned long *lvalp, int *valp,
                              int write, void *data),
                  void *data)
 {
-#define TMPBUFLEN 21
+        int *i, vleft, first = 1, err = 0;
-        int *i, vleft, first = 1, neg;
+        unsigned long page = 0;
-        unsigned long lval;
+        size_t left;
-        size_t left, len;
+        char *kbuf;
        
-        char buf[TMPBUFLEN], *p;
+        if (!tbl_data || !table->maxlen || !*lenp || (*ppos && !write)) {
-        char __user *s = buffer;
-        
-        if (!tbl_data || !table->maxlen || !*lenp ||
-            (*ppos && !write)) {
                *lenp = 0;
                return 0;
        }
@@ -2088,89 +2232,69 @@ static int __do_proc_dointvec(void *tbl_data, struct ctl_table *table,
        if (!conv)
                conv = do_proc_dointvec_conv;
+        if (write) {
+                if (left > PAGE_SIZE - 1)
+                        left = PAGE_SIZE - 1;
+                page = __get_free_page(GFP_TEMPORARY);
+                kbuf = (char *) page;
+                if (!kbuf)
+                        return -ENOMEM;
+                if (copy_from_user(kbuf, buffer, left)) {
+                        err = -EFAULT;
+                        goto free;
+                }
+                kbuf[left] = 0;
+        }
        for (; left && vleft--; i++, first=0) {
-                if (write) {
+                unsigned long lval;
-                        while (left) {
+                bool neg;
-                                char c;
-                                if (get_user(c, s))
-                                        return -EFAULT;
-                                if (!isspace(c))
-                                        break;
-                                left--;
-                                s++;
-                        }
-                        if (!left)
-                                break;
-                        neg = 0;
-                        len = left;
-                        if (len > sizeof(buf) - 1)
-                                len = sizeof(buf) - 1;
-                        if (copy_from_user(buf, s, len))
-                                return -EFAULT;
-                        buf[len] = 0;
-                        p = buf;
-                        if (*p == '-' && left > 1) {
-                                neg = 1;
-                                p++;
-                        }
-                        if (*p < '0' || *p > '9')
-                                break;
-                        lval = simple_strtoul(p, &p, 0);
+                if (write) {
+                        left -= proc_skip_spaces(&kbuf);
-                        len = p-buf;
+                        err = proc_get_long(&kbuf, &left, &lval, &neg,
-                        if ((len < left) && *p && !isspace(*p))
+                                             proc_wspace_sep,
+                                             sizeof(proc_wspace_sep), NULL);
+                        if (err)
                                break;
-                        s += len;
+                        if (conv(&neg, &lval, i, 1, data)) {
-                        left -= len;
+                                err = -EINVAL;
-                        if (conv(&neg, &lval, i, 1, data))
                                break;
+                        }
                } else {
-                        p = buf;
+                        if (conv(&neg, &lval, i, 0, data)) {
+                                err = -EINVAL;
+                                break;
+                        }
                        if (!first)
-                                *p++ = '\t';
+                                err = proc_put_char(&buffer, &left, '\t');
-        
+                        if (err)
-                        if (conv(&neg, &lval, i, 0, data))
+                                break;
+                        err = proc_put_long(&buffer, &left, lval, neg);
+                        if (err)
                                break;
-                        sprintf(p, "%s%lu", neg ? "-" : "", lval);
-                        len = strlen(buf);
-                        if (len > left)
-                                len = left;
-                        if(copy_to_user(s, buf, len))
-                                return -EFAULT;
-                        left -= len;
-                        s += len;
                }
        }
-        if (!write && !first && left) {
+        if (!write && !first && left && !err)
-                if(put_user('\n', s))
+                err = proc_put_char(&buffer, &left, '\n');
-                        return -EFAULT;
+        if (write && !err)
-                left--, s++;
+                left -= proc_skip_spaces(&kbuf);
-        }
+free:
        if (write) {
-                while (left) {
+                free_page(page);
-                        char c;
+                if (first)
-                        if (get_user(c, s++))
+                        return err ? : -EINVAL;
-                                return -EFAULT;
-                        if (!isspace(c))
-                                break;
-                        left--;
-                }
        }
-        if (write && first)
-                return -EINVAL;
        *lenp -= left;
        *ppos += *lenp;
-        return 0;
+        return err;
-#undef TMPBUFLEN
 }
 static int do_proc_dointvec(struct ctl_table *table, int write,
                  void __user *buffer, size_t *lenp, loff_t *ppos,
-                  int (*conv)(int *negp, unsigned long *lvalp, int *valp,
+                  int (*conv)(bool *negp, unsigned long *lvalp, int *valp,
                              int write, void *data),
                  void *data)
 {
@@ -2238,8 +2362,8 @@ struct do_proc_dointvec_minmax_conv_param {
        int *max;
 };
-static int do_proc_dointvec_minmax_conv(int *negp, unsigned long *lvalp, 
+static int do_proc_dointvec_minmax_conv(bool *negp, unsigned long *lvalp,
-                                        int *valp, 
+                                        int *valp,
                                        int write, void *data)
 {
        struct do_proc_dointvec_minmax_conv_param *param = data;
@@ -2252,10 +2376,10 @@ static int do_proc_dointvec_minmax_conv(int *negp, unsigned long *lvalp,
        } else {
                int val = *valp;
                if (val < 0) {
-                        *negp = -1;
+                        *negp = true;
                        *lvalp = (unsigned long)-val;
                } else {
-                        *negp = 0;
+                        *negp = false;
                        *lvalp = (unsigned long)val;
                }
        }
@@ -2295,102 +2419,78 @@ static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table, int
                                     unsigned long convmul,
                                     unsigned long convdiv)
 {
-#define TMPBUFLEN 21
+        unsigned long *i, *min, *max;
-        unsigned long *i, *min, *max, val;
+        int vleft, first = 1, err = 0;
-        int vleft, first=1, neg;
+        unsigned long page = 0;
-        size_t len, left;
+        size_t left;
-        char buf[TMPBUFLEN], *p;
+        char *kbuf;
-        char __user *s = buffer;
-        
+        if (!data || !table->maxlen || !*lenp || (*ppos && !write)) {
-        if (!data || !table->maxlen || !*lenp ||
-            (*ppos && !write)) {
                *lenp = 0;
                return 0;
        }
-        
        i = (unsigned long *) data;
        min = (unsigned long *) table->extra1;
        max = (unsigned long *) table->extra2;
        vleft = table->maxlen / sizeof(unsigned long);
        left = *lenp;
-        
+        if (write) {
+                if (left > PAGE_SIZE - 1)
+                        left = PAGE_SIZE - 1;
+                page = __get_free_page(GFP_TEMPORARY);
+                kbuf = (char *) page;
+                if (!kbuf)
+                        return -ENOMEM;
+                if (copy_from_user(kbuf, buffer, left)) {
+                        err = -EFAULT;
+                        goto free;
+                }
+                kbuf[left] = 0;
+        }
        for (; left && vleft--; i++, min++, max++, first=0) {
+                unsigned long val;
                if (write) {
-                        while (left) {
+                        bool neg;
-                                char c;
-                                if (get_user(c, s))
+                        left -= proc_skip_spaces(&kbuf);
-                                        return -EFAULT;
-                                if (!isspace(c))
+                        err = proc_get_long(&kbuf, &left, &val, &neg,
-                                        break;
+                                             proc_wspace_sep,
-                                left--;
+                                             sizeof(proc_wspace_sep), NULL);
-                                s++;
+                        if (err)
-                        }
-                        if (!left)
-                                break;
-                        neg = 0;
-                        len = left;
-                        if (len > TMPBUFLEN-1)
-                                len = TMPBUFLEN-1;
-                        if (copy_from_user(buf, s, len))
-                                return -EFAULT;
-                        buf[len] = 0;
-                        p = buf;
-                        if (*p == '-' && left > 1) {
-                                neg = 1;
-                                p++;
-                        }
-                        if (*p < '0' || *p > '9')
-                                break;
-                        val = simple_strtoul(p, &p, 0) * convmul / convdiv ;
-                        len = p-buf;
-                        if ((len < left) && *p && !isspace(*p))
                                break;
                        if (neg)
-                                val = -val;
-                        s += len;
-                        left -= len;
-                        if(neg)
                                continue;
                        if ((min && val < *min) || (max && val > *max))
                                continue;
                        *i = val;
                } else {
-                        p = buf;
+                        val = convdiv * (*i) / convmul;
                        if (!first)
-                                *p++ = '\t';
+                                err = proc_put_char(&buffer, &left, '\t');
-                        sprintf(p, "%lu", convdiv * (*i) / convmul);
+                        err = proc_put_long(&buffer, &left, val, false);
-                        len = strlen(buf);
+                        if (err)
-                        if (len > left)
+                                break;
-                                len = left;
-                        if(copy_to_user(s, buf, len))
-                                return -EFAULT;
-                        left -= len;
-                        s += len;
                }
        }
-        if (!write && !first && left) {
+        if (!write && !first && left && !err)
-                if(put_user('\n', s))
+                err = proc_put_char(&buffer, &left, '\n');
-                        return -EFAULT;
+        if (write && !err)
-                left--, s++;
+                left -= proc_skip_spaces(&kbuf);
-        }
+free:
        if (write) {
-                while (left) {
+                free_page(page);
-                        char c;
+                if (first)
-                        if (get_user(c, s++))
+                        return err ? : -EINVAL;
-                                return -EFAULT;
-                        if (!isspace(c))
-                                break;
-                        left--;
-                }
        }
-        if (write && first)
-                return -EINVAL;
        *lenp -= left;
        *ppos += *lenp;
-        return 0;
+        return err;
-#undef TMPBUFLEN
 }
 static int do_proc_doulongvec_minmax(struct ctl_table *table, int write,
@@ -2451,7 +2551,7 @@ int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write,
 }
-static int do_proc_dointvec_jiffies_conv(int *negp, unsigned long *lvalp,
+static int do_proc_dointvec_jiffies_conv(bool *negp, unsigned long *lvalp,
                                         int *valp,
                                         int write, void *data)
 {
@@ -2463,10 +2563,10 @@ static int do_proc_dointvec_jiffies_conv(int *negp, unsigned long *lvalp,
                int val = *valp;
                unsigned long lval;
                if (val < 0) {
-                        *negp = -1;
+                        *negp = true;
                        lval = (unsigned long)-val;
                } else {
-                        *negp = 0;
+                        *negp = false;
                        lval = (unsigned long)val;
                }
                *lvalp = lval / HZ;
@@ -2474,7 +2574,7 @@ static int do_proc_dointvec_jiffies_conv(int *negp, unsigned long *lvalp,
        return 0;
 }
-static int do_proc_dointvec_userhz_jiffies_conv(int *negp, unsigned long *lvalp,
+static int do_proc_dointvec_userhz_jiffies_conv(bool *negp, unsigned long *lvalp,
                                                int *valp,
                                                int write, void *data)
 {
@@ -2486,10 +2586,10 @@ static int do_proc_dointvec_userhz_jiffies_conv(int *negp, unsigned long *lvalp,
                int val = *valp;
                unsigned long lval;
                if (val < 0) {
-                        *negp = -1;
+                        *negp = true;
                        lval = (unsigned long)-val;
                } else {
-                        *negp = 0;
+                        *negp = false;
                        lval = (unsigned long)val;
                }
                *lvalp = jiffies_to_clock_t(lval);
@@ -2497,7 +2597,7 @@ static int do_proc_dointvec_userhz_jiffies_conv(int *negp, unsigned long *lvalp,
        return 0;
 }
-static int do_proc_dointvec_ms_jiffies_conv(int *negp, unsigned long *lvalp,
+static int do_proc_dointvec_ms_jiffies_conv(bool *negp, unsigned long *lvalp,
                                            int *valp,
                                            int write, void *data)
 {
@@ -2507,10 +2607,10 @@ static int do_proc_dointvec_ms_jiffies_conv(int *negp, unsigned long *lvalp,
                int val = *valp;
                unsigned long lval;
                if (val < 0) {
-                        *negp = -1;
+                        *negp = true;
                        lval = (unsigned long)-val;
                } else {
-                        *negp = 0;
+                        *negp = false;
                        lval = (unsigned long)val;
                }
                *lvalp = jiffies_to_msecs(lval);
@@ -2607,6 +2707,157 @@ static int proc_do_cad_pid(struct ctl_table *table, int write,
        return 0;
 }
+/**
+ * proc_do_large_bitmap - read/write from/to a large bitmap
+ * @table: the sysctl table
+ * @write: %TRUE if this is a write to the sysctl file
+ * @buffer: the user buffer
+ * @lenp: the size of the user buffer
+ * @ppos: file position
+ *
+ * The bitmap is stored at table->data and the bitmap length (in bits)
+ * in table->maxlen.
+ *
+ * We use a range comma separated format (e.g. 1,3-4,10-10) so that
+ * large bitmaps may be represented in a compact manner. Writing into
+ * the file will clear the bitmap then update it with the given input.
+ *
+ * Returns 0 on success.
+ */
+int proc_do_large_bitmap(struct ctl_table *table, int write,
+                         void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+        int err = 0;
+        bool first = 1;
+        size_t left = *lenp;
+        unsigned long bitmap_len = table->maxlen;
+        unsigned long *bitmap = (unsigned long *) table->data;
+        unsigned long *tmp_bitmap = NULL;
+        char tr_a[] = { '-', ',', '\n' }, tr_b[] = { ',', '\n', 0 }, c;
+        if (!bitmap_len || !left || (*ppos && !write)) {
+                *lenp = 0;
+                return 0;
+        }
+        if (write) {
+                unsigned long page = 0;
+                char *kbuf;
+                if (left > PAGE_SIZE - 1)
+                        left = PAGE_SIZE - 1;
+                page = __get_free_page(GFP_TEMPORARY);
+                kbuf = (char *) page;
+                if (!kbuf)
+                        return -ENOMEM;
+                if (copy_from_user(kbuf, buffer, left)) {
+                        free_page(page);
+                        return -EFAULT;
+                }
+                kbuf[left] = 0;
+                tmp_bitmap = kzalloc(BITS_TO_LONGS(bitmap_len) * sizeof(unsigned long),
+                                     GFP_KERNEL);
+                if (!tmp_bitmap) {
+                        free_page(page);
+                        return -ENOMEM;
+                }
+                proc_skip_char(&kbuf, &left, '\n');
+                while (!err && left) {
+                        unsigned long val_a, val_b;
+                        bool neg;
+                        err = proc_get_long(&kbuf, &left, &val_a, &neg, tr_a,
+                                             sizeof(tr_a), &c);
+                        if (err)
+                                break;
+                        if (val_a >= bitmap_len || neg) {
+                                err = -EINVAL;
+                                break;
+                        }
+                        val_b = val_a;
+                        if (left) {
+                                kbuf++;
+                                left--;
+                        }
+                        if (c == '-') {
+                                err = proc_get_long(&kbuf, &left, &val_b,
+                                                     &neg, tr_b, sizeof(tr_b),
+                                                     &c);
+                                if (err)
+                                        break;
+                                if (val_b >= bitmap_len || neg ||
+                                    val_a > val_b) {
+                                        err = -EINVAL;
+                                        break;
+                                }
+                                if (left) {
+                                        kbuf++;
+                                        left--;
+                                }
+                        }
+                        while (val_a <= val_b)
+                                set_bit(val_a++, tmp_bitmap);
+                        first = 0;
+                        proc_skip_char(&kbuf, &left, '\n');
+                }
+                free_page(page);
+        } else {
+                unsigned long bit_a, bit_b = 0;
+                while (left) {
+                        bit_a = find_next_bit(bitmap, bitmap_len, bit_b);
+                        if (bit_a >= bitmap_len)
+                                break;
+                        bit_b = find_next_zero_bit(bitmap, bitmap_len,
+                                                   bit_a + 1) - 1;
+                        if (!first) {
+                                err = proc_put_char(&buffer, &left, ',');
+                                if (err)
+                                        break;
+                        }
+                        err = proc_put_long(&buffer, &left, bit_a, false);
+                        if (err)
+                                break;
+                        if (bit_a != bit_b) {
+                                err = proc_put_char(&buffer, &left, '-');
+                                if (err)
+                                        break;
+                                err = proc_put_long(&buffer, &left, bit_b, false);
+                                if (err)
+                                        break;
+                        }
+                        first = 0; bit_b++;
+                }
+                if (!err)
+                        err = proc_put_char(&buffer, &left, '\n');
+        }
+        if (!err) {
+                if (write) {
+                        if (*ppos)
+                                bitmap_or(bitmap, bitmap, tmp_bitmap, bitmap_len);
+                        else
+                                memcpy(bitmap, tmp_bitmap,
+                                        BITS_TO_LONGS(bitmap_len) * sizeof(unsigned long));
+                }
+                kfree(tmp_bitmap);
+                *lenp -= left;
+                *ppos += *lenp;
+                return 0;
+        } else {
+                kfree(tmp_bitmap);
+                return err;
+        }
+}
 #else /* CONFIG_PROC_FS */
 int proc_dostring(struct ctl_table *table, int write,
diff --git a/kernel/sysctl_binary.c b/kernel/sysctl_binary.c
index 59030570f5ca..937d31dc8566 100644
--- a/kernel/sysctl_binary.c
+++ b/kernel/sysctl_binary.c
@@ -224,7 +224,6 @@ static const struct bin_table bin_net_ipv4_route_table[] = {
        { CTL_INT,      NET_IPV4_ROUTE_MTU_EXPIRES,             "mtu_expires" },
        { CTL_INT,      NET_IPV4_ROUTE_MIN_PMTU,                "min_pmtu" },
        { CTL_INT,      NET_IPV4_ROUTE_MIN_ADVMSS,              "min_adv_mss" },
-        { CTL_INT,      NET_IPV4_ROUTE_SECRET_INTERVAL,         "secret_interval" },
        {}
 };
diff --git a/kernel/time.c b/kernel/time.c
index 656dccfe1cbb..50612faa9baf 100644
--- a/kernel/time.c
+++ b/kernel/time.c
@@ -132,12 +132,11 @@ SYSCALL_DEFINE2(gettimeofday, struct timeval __user *, tv,
 */
 static inline void warp_clock(void)
 {
-        write_seqlock_irq(&xtime_lock);
+        struct timespec delta, adjust;
-        wall_to_monotonic.tv_sec -= sys_tz.tz_minuteswest * 60;
+        delta.tv_sec = sys_tz.tz_minuteswest * 60;
-        xtime.tv_sec += sys_tz.tz_minuteswest * 60;
+        delta.tv_nsec = 0;
-        update_xtime_cache(0);
+        adjust = timespec_add_safe(current_kernel_time(), delta);
-        write_sequnlock_irq(&xtime_lock);
+        do_settimeofday(&adjust);
-        clock_was_set();
 }
 /*
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index 1f5dde637457..f08e99c1d561 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -625,6 +625,54 @@ static void clocksource_enqueue(struct clocksource *cs)
        list_add(&cs->list, entry);
 }
+/*
+ * Maximum time we expect to go between ticks. This includes idle
+ * tickless time. It provides the trade off between selecting a
+ * mult/shift pair that is very precise but can only handle a short
+ * period of time, vs. a mult/shift pair that can handle long periods
+ * of time but isn't as precise.
+ *
+ * This is a subsystem constant, and actual hardware limitations
+ * may override it (ie: clocksources that wrap every 3 seconds).
+ */
+#define MAX_UPDATE_LENGTH 5 /* Seconds */
+/**
+ * __clocksource_register_scale - Used to install new clocksources
+ * @t:          clocksource to be registered
+ * @scale:      Scale factor multiplied against freq to get clocksource hz
+ * @freq:       clocksource frequency (cycles per second) divided by scale
+ *
+ * Returns -EBUSY if registration fails, zero otherwise.
+ *
+ * This *SHOULD NOT* be called directly! Please use the
+ * clocksource_register_hz() or clocksource_register_khz helper functions.
+ */
+int __clocksource_register_scale(struct clocksource *cs, u32 scale, u32 freq)
+{
+        /*
+         * Ideally we want to use  some of the limits used in
+         * clocksource_max_deferment, to provide a more informed
+         * MAX_UPDATE_LENGTH. But for now this just gets the
+         * register interface working properly.
+         */
+        clocks_calc_mult_shift(&cs->mult, &cs->shift, freq,
+                                      NSEC_PER_SEC/scale,
+                                      MAX_UPDATE_LENGTH*scale);
+        cs->max_idle_ns = clocksource_max_deferment(cs);
+        mutex_lock(&clocksource_mutex);
+        clocksource_enqueue(cs);
+        clocksource_select();
+        clocksource_enqueue_watchdog(cs);
+        mutex_unlock(&clocksource_mutex);
+        return 0;
+}
+EXPORT_SYMBOL_GPL(__clocksource_register_scale);
 /**
 * clocksource_register - Used to install new clocksources
 * @t:          clocksource to be registered
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c
index 7c0f180d6e9d..c63116863a80 100644
--- a/kernel/time/ntp.c
+++ b/kernel/time/ntp.c
@@ -69,7 +69,7 @@ static s64			time_freq;
 /* time at last adjustment (secs):                                      */
 static long                     time_reftime;
-long                            time_adjust;
+static long                     time_adjust;
 /* constant (boot-param configurable) NTP tick adjustment (upscaled)    */
 static s64                      ntp_tick_adj;
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 39f6177fafac..caf8d4d4f5c8 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -165,13 +165,6 @@ struct timespec raw_time;
 /* flag for if timekeeping is suspended */
 int __read_mostly timekeeping_suspended;
-static struct timespec xtime_cache __attribute__ ((aligned (16)));
-void update_xtime_cache(u64 nsec)
-{
-        xtime_cache = xtime;
-        timespec_add_ns(&xtime_cache, nsec);
-}
 /* must hold xtime_lock */
 void timekeeping_leap_insert(int leapsecond)
 {
@@ -332,8 +325,6 @@ int do_settimeofday(struct timespec *tv)
        xtime = *tv;
-        update_xtime_cache(0);
        timekeeper.ntp_error = 0;
        ntp_clear();
@@ -559,7 +550,6 @@ void __init timekeeping_init(void)
        }
        set_normalized_timespec(&wall_to_monotonic,
                                -boot.tv_sec, -boot.tv_nsec);
-        update_xtime_cache(0);
        total_sleep_time.tv_sec = 0;
        total_sleep_time.tv_nsec = 0;
        write_sequnlock_irqrestore(&xtime_lock, flags);
@@ -593,7 +583,6 @@ static int timekeeping_resume(struct sys_device *dev)
                wall_to_monotonic = timespec_sub(wall_to_monotonic, ts);
                total_sleep_time = timespec_add_safe(total_sleep_time, ts);
        }
-        update_xtime_cache(0);
        /* re-base the last cycle value */
        timekeeper.clock->cycle_last = timekeeper.clock->read(timekeeper.clock);
        timekeeper.ntp_error = 0;
@@ -788,7 +777,6 @@ void update_wall_time(void)
 {
        struct clocksource *clock;
        cycle_t offset;
-        u64 nsecs;
        int shift = 0, maxshift;
        /* Make sure we're fully resumed: */
@@ -847,7 +835,9 @@ void update_wall_time(void)
                timekeeper.ntp_error += neg << timekeeper.ntp_error_shift;
        }
-        /* store full nanoseconds into xtime after rounding it up and
+        /*
+         * Store full nanoseconds into xtime after rounding it up and
         * add the remainder to the error difference.
         */
        xtime.tv_nsec = ((s64) timekeeper.xtime_nsec >> timekeeper.shift) + 1;
@@ -855,8 +845,15 @@ void update_wall_time(void)
        timekeeper.ntp_error += timekeeper.xtime_nsec <<
                                timekeeper.ntp_error_shift;
-        nsecs = clocksource_cyc2ns(offset, timekeeper.mult, timekeeper.shift);
+        /*
-        update_xtime_cache(nsecs);
+         * Finally, make sure that after the rounding
+         * xtime.tv_nsec isn't larger then NSEC_PER_SEC
+         */
+        if (unlikely(xtime.tv_nsec >= NSEC_PER_SEC)) {
+                xtime.tv_nsec -= NSEC_PER_SEC;
+                xtime.tv_sec++;
+                second_overflow();
+        }
        /* check to see if there is a new clocksource to use */
        update_vsyscall(&xtime, timekeeper.clock, timekeeper.mult);
@@ -896,13 +893,13 @@ EXPORT_SYMBOL_GPL(monotonic_to_bootbased);
 unsigned long get_seconds(void)
 {
-        return xtime_cache.tv_sec;
+        return xtime.tv_sec;
 }
 EXPORT_SYMBOL(get_seconds);
 struct timespec __current_kernel_time(void)
 {
-        return xtime_cache;
+        return xtime;
 }
 struct timespec current_kernel_time(void)
@@ -913,7 +910,7 @@ struct timespec current_kernel_time(void)
        do {
                seq = read_seqbegin(&xtime_lock);
-                now = xtime_cache;
+                now = xtime;
        } while (read_seqretry(&xtime_lock, seq));
        return now;
@@ -928,7 +925,7 @@ struct timespec get_monotonic_coarse(void)
        do {
                seq = read_seqbegin(&xtime_lock);
-                now = xtime_cache;
+                now = xtime;
                mono = wall_to_monotonic;
        } while (read_seqretry(&xtime_lock, seq));
diff --git a/kernel/timer.c b/kernel/timer.c
index aeb6a54f2771..9199f3c52215 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -319,6 +319,24 @@ unsigned long round_jiffies_up_relative(unsigned long j)
 }
 EXPORT_SYMBOL_GPL(round_jiffies_up_relative);
+/**
+ * set_timer_slack - set the allowed slack for a timer
+ * @slack_hz: the amount of time (in jiffies) allowed for rounding
+ *
+ * Set the amount of time, in jiffies, that a certain timer has
+ * in terms of slack. By setting this value, the timer subsystem
+ * will schedule the actual timer somewhere between
+ * the time mod_timer() asks for, and that time plus the slack.
+ *
+ * By setting the slack to -1, a percentage of the delay is used
+ * instead.
+ */
+void set_timer_slack(struct timer_list *timer, int slack_hz)
+{
+        timer->slack = slack_hz;
+}
+EXPORT_SYMBOL_GPL(set_timer_slack);
 static inline void set_running_timer(struct tvec_base *base,
                                        struct timer_list *timer)
@@ -550,6 +568,7 @@ static void __init_timer(struct timer_list *timer,
 {
        timer->entry.next = NULL;
        timer->base = __raw_get_cpu_var(tvec_bases);
+        timer->slack = -1;
 #ifdef CONFIG_TIMER_STATS
        timer->start_site = NULL;
        timer->start_pid = -1;
@@ -715,6 +734,41 @@ int mod_timer_pending(struct timer_list *timer, unsigned long expires)
 }
 EXPORT_SYMBOL(mod_timer_pending);
+/*
+ * Decide where to put the timer while taking the slack into account
+ *
+ * Algorithm:
+ *   1) calculate the maximum (absolute) time
+ *   2) calculate the highest bit where the expires and new max are different
+ *   3) use this bit to make a mask
+ *   4) use the bitmask to round down the maximum time, so that all last
+ *      bits are zeros
+ */
+static inline
+unsigned long apply_slack(struct timer_list *timer, unsigned long expires)
+{
+        unsigned long expires_limit, mask;
+        int bit;
+        expires_limit = expires + timer->slack;
+        if (timer->slack < 0) /* auto slack: use 0.4% */
+                expires_limit = expires + (expires - jiffies)/256;
+        mask = expires ^ expires_limit;
+        if (mask == 0)
+                return expires;
+        bit = find_last_bit(&mask, BITS_PER_LONG);
+        mask = (1 << bit) - 1;
+        expires_limit = expires_limit & ~(mask);
+        return expires_limit;
+}
 /**
 * mod_timer - modify a timer's timeout
 * @timer: the timer to be modified
@@ -745,6 +799,8 @@ int mod_timer(struct timer_list *timer, unsigned long expires)
        if (timer_pending(timer) && timer->expires == expires)
                return 1;
+        expires = apply_slack(timer, expires);
        return __mod_timer(timer, expires, false, TIMER_NOT_PINNED);
 }
 EXPORT_SYMBOL(mod_timer);
@@ -955,6 +1011,47 @@ static int cascade(struct tvec_base *base, struct tvec *tv, int index)
        return index;
 }
+static void call_timer_fn(struct timer_list *timer, void (*fn)(unsigned long),
+                          unsigned long data)
+{
+        int preempt_count = preempt_count();
+#ifdef CONFIG_LOCKDEP
+        /*
+         * It is permissible to free the timer from inside the
+         * function that is called from it, this we need to take into
+         * account for lockdep too. To avoid bogus "held lock freed"
+         * warnings as well as problems when looking into
+         * timer->lockdep_map, make a copy and use that here.
+         */
+        struct lockdep_map lockdep_map = timer->lockdep_map;
+#endif
+        /*
+         * Couple the lock chain with the lock chain at
+         * del_timer_sync() by acquiring the lock_map around the fn()
+         * call here and in del_timer_sync().
+         */
+        lock_map_acquire(&lockdep_map);
+        trace_timer_expire_entry(timer);
+        fn(data);
+        trace_timer_expire_exit(timer);
+        lock_map_release(&lockdep_map);
+        if (preempt_count != preempt_count()) {
+                WARN_ONCE(1, "timer: %pF preempt leak: %08x -> %08x\n",
+                          fn, preempt_count, preempt_count());
+                /*
+                 * Restore the preempt count. That gives us a decent
+                 * chance to survive and extract information. If the
+                 * callback kept a lock held, bad luck, but not worse
+                 * than the BUG() we had.
+                 */
+                preempt_count() = preempt_count;
+        }
+}
 #define INDEX(N) ((base->timer_jiffies >> (TVR_BITS + (N) * TVN_BITS)) & TVN_MASK)
 /**
@@ -998,45 +1095,7 @@ static inline void __run_timers(struct tvec_base *base)
                        detach_timer(timer, 1);
                        spin_unlock_irq(&base->lock);
-                        {
+                        call_timer_fn(timer, fn, data);
-                                int preempt_count = preempt_count();
-#ifdef CONFIG_LOCKDEP
-                                /*
-                                 * It is permissible to free the timer from
-                                 * inside the function that is called from
-                                 * it, this we need to take into account for
-                                 * lockdep too. To avoid bogus "held lock
-                                 * freed" warnings as well as problems when
-                                 * looking into timer->lockdep_map, make a
-                                 * copy and use that here.
-                                 */
-                                struct lockdep_map lockdep_map =
-                                        timer->lockdep_map;
-#endif
-                                /*
-                                 * Couple the lock chain with the lock chain at
-                                 * del_timer_sync() by acquiring the lock_map
-                                 * around the fn() call here and in
-                                 * del_timer_sync().
-                                 */
-                                lock_map_acquire(&lockdep_map);
-                                trace_timer_expire_entry(timer);
-                                fn(data);
-                                trace_timer_expire_exit(timer);
-                                lock_map_release(&lockdep_map);
-                                if (preempt_count != preempt_count()) {
-                                        printk(KERN_ERR "huh, entered %p "
-                                               "with preempt_count %08x, exited"
-                                               " with %08x?\n",
-                                               fn, preempt_count,
-                                               preempt_count());
-                                        BUG();
-                                }
-                        }
                        spin_lock_irq(&base->lock);
                }
        }
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index 2404c129a8c9..ab13d7008061 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -209,6 +209,7 @@ int trace_seq_putc(struct trace_seq *s, unsigned char c)
        return 1;
 }
+EXPORT_SYMBOL(trace_seq_putc);
 int trace_seq_putmem(struct trace_seq *s, const void *mem, size_t len)
 {
@@ -355,6 +356,21 @@ ftrace_print_symbols_seq(struct trace_seq *p, unsigned long val,
 }
 EXPORT_SYMBOL(ftrace_print_symbols_seq);
+const char *
+ftrace_print_hex_seq(struct trace_seq *p, const unsigned char *buf, int buf_len)
+{
+        int i;
+        const char *ret = p->buffer + p->len;
+        for (i = 0; i < buf_len; i++)
+                trace_seq_printf(p, "%s%2.2x", i == 0 ? "" : " ", buf[i]);
+        trace_seq_putc(p, 0);
+        return ret;
+}
+EXPORT_SYMBOL(ftrace_print_hex_seq);
 #ifdef CONFIG_KRETPROBES
 static inline const char *kretprobed(const char *name)
 {
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index 076c7c8215b0..b2d70d38dff4 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -54,8 +54,8 @@ int create_user_ns(struct cred *new)
 #endif
        /* tgcred will be cleared in our caller bc CLONE_THREAD won't be set */
-        /* alloc_uid() incremented the userns refcount.  Just set it to 1 */
+        /* root_user holds a reference to ns, our reference can be dropped */
-        kref_set(&ns->kref, 1);
+        put_user_ns(ns);
        return 0;
 }
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 5bfb213984b2..77dabbf64b8f 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -229,6 +229,16 @@ static inline void set_wq_data(struct work_struct *work,
        atomic_long_set(&work->data, new);
 }
+/*
+ * Clear WORK_STRUCT_PENDING and the workqueue on which it was queued.
+ */
+static inline void clear_wq_data(struct work_struct *work)
+{
+        unsigned long flags = *work_data_bits(work) &
+                                (1UL << WORK_STRUCT_STATIC);
+        atomic_long_set(&work->data, flags);
+}
 static inline
 struct cpu_workqueue_struct *get_wq_data(struct work_struct *work)
 {
@@ -671,7 +681,7 @@ static int __cancel_work_timer(struct work_struct *work,
                wait_on_work(work);
        } while (unlikely(ret < 0));
-        work_clear_pending(work);
+        clear_wq_data(work);
        return ret;
 }
@@ -845,6 +855,30 @@ int schedule_on_each_cpu(work_func_t func)
        return 0;
 }
+/**
+ * flush_scheduled_work - ensure that any scheduled work has run to completion.
+ *
+ * Forces execution of the kernel-global workqueue and blocks until its
+ * completion.
+ *
+ * Think twice before calling this function!  It's very easy to get into
+ * trouble if you don't take great care.  Either of the following situations
+ * will lead to deadlock:
+ *
+ *      One of the work items currently on the workqueue needs to acquire
+ *      a lock held by your code or its caller.
+ *
+ *      Your code is running in the context of a work routine.
+ *
+ * They will be detected by lockdep when they occur, but the first might not
+ * occur very often.  It depends on what work items are on the workqueue and
+ * what locks they need, which you have no control over.
+ *
+ * In most situations flushing the entire workqueue is overkill; you merely
+ * need to know that a particular work item isn't queued and isn't running.
+ * In such cases you should use cancel_delayed_work_sync() or
+ * cancel_work_sync() instead.
+ */
 void flush_scheduled_work(void)
 {
        flush_workqueue(keventd_wq);