16 files changed, 414 insertions, 428 deletions
diff --git a/kernel/bounds.c b/kernel/bounds.c
new file mode 100644
index 000000000000..c3c55544db2f
--- /dev/null
+++ b/kernel/bounds.c
@@ -0,0 +1,23 @@
+/*
+ * Generate definitions needed by the preprocessor.
+ * This code generates raw asm output which is post-processed
+ * to extract and format the required data.
+ */
+#define __GENERATING_BOUNDS_H
+/* Include headers that define the enum constants of interest */
+#include <linux/page-flags.h>
+#include <linux/mmzone.h>
+#define DEFINE(sym, val) \
+        asm volatile("\n->" #sym " %0 " #val : : "i" (val))
+#define BLANK() asm volatile("\n->" : : )
+void foo(void)
+{
+        /* The enum constants to put into include/linux/bounds.h */
+        DEFINE(NR_PAGEFLAGS, __NR_PAGEFLAGS);
+        DEFINE(MAX_NR_ZONES, __MAX_NR_ZONES);
+        /* End of constants */
+}
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 8b35fbd8292f..48a976c52cf5 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -941,7 +941,7 @@ static int update_nodemask(struct cpuset *cs, char *buf)
        cs->mems_generation = cpuset_mems_generation++;
        mutex_unlock(&callback_mutex);
-        cpuset_being_rebound = cs;              /* causes mpol_copy() rebind */
+        cpuset_being_rebound = cs;              /* causes mpol_dup() rebind */
        fudge = 10;                             /* spare mmarray[] slots */
        fudge += cpus_weight(cs->cpus_allowed); /* imagine one fork-bomb/cpu */
@@ -992,7 +992,7 @@ static int update_nodemask(struct cpuset *cs, char *buf)
         * rebind the vma mempolicies of each mm in mmarray[] to their
         * new cpuset, and release that mm.  The mpol_rebind_mm()
         * call takes mmap_sem, which we couldn't take while holding
-         * tasklist_lock.  Forks can happen again now - the mpol_copy()
+         * tasklist_lock.  Forks can happen again now - the mpol_dup()
         * cpuset_being_rebound check will catch such forks, and rebind
         * their vma mempolicies too.  Because we still hold the global
         * cgroup_mutex, we know that no other rebind effort will
@@ -1265,7 +1265,8 @@ static ssize_t cpuset_common_file_write(struct cgroup *cont,
                return -E2BIG;
        /* +1 for nul-terminator */
-        if ((buffer = kmalloc(nbytes + 1, GFP_KERNEL)) == 0)
+        buffer = kmalloc(nbytes + 1, GFP_KERNEL);
+        if (!buffer)
                return -ENOMEM;
        if (copy_from_user(buffer, userbuf, nbytes)) {
@@ -1958,22 +1959,14 @@ nodemask_t cpuset_mems_allowed(struct task_struct *tsk)
 }
 /**
- * cpuset_zonelist_valid_mems_allowed - check zonelist vs. curremt mems_allowed
+ * cpuset_nodemask_valid_mems_allowed - check nodemask vs. curremt mems_allowed
- * @zl: the zonelist to be checked
+ * @nodemask: the nodemask to be checked
 *
- * Are any of the nodes on zonelist zl allowed in current->mems_allowed?
+ * Are any of the nodes in the nodemask allowed in current->mems_allowed?
 */
-int cpuset_zonelist_valid_mems_allowed(struct zonelist *zl)
+int cpuset_nodemask_valid_mems_allowed(nodemask_t *nodemask)
 {
-        int i;
+        return nodes_intersects(*nodemask, current->mems_allowed);
-        for (i = 0; zl->zones[i]; i++) {
-                int nid = zone_to_nid(zl->zones[i]);
-                if (node_isset(nid, current->mems_allowed))
-                        return 1;
-        }
-        return 0;
 }
 /*
diff --git a/kernel/exit.c b/kernel/exit.c
index cece89f80ab4..2a9d98c641ac 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -507,10 +507,9 @@ void put_files_struct(struct files_struct *files)
        }
 }
-EXPORT_SYMBOL(put_files_struct);
+void reset_files_struct(struct files_struct *files)
-void reset_files_struct(struct task_struct *tsk, struct files_struct *files)
 {
+        struct task_struct *tsk = current;
        struct files_struct *old;
        old = tsk->files;
@@ -519,7 +518,6 @@ void reset_files_struct(struct task_struct *tsk, struct files_struct *files)
        task_unlock(tsk);
        put_files_struct(old);
 }
-EXPORT_SYMBOL(reset_files_struct);
 void exit_files(struct task_struct *tsk)
 {
@@ -969,7 +967,7 @@ NORET_TYPE void do_exit(long code)
        proc_exit_connector(tsk);
        exit_notify(tsk, group_dead);
 #ifdef CONFIG_NUMA
-        mpol_free(tsk->mempolicy);
+        mpol_put(tsk->mempolicy);
        tsk->mempolicy = NULL;
 #endif
 #ifdef CONFIG_FUTEX
diff --git a/kernel/fork.c b/kernel/fork.c
index 89fe414645e9..6067e429f281 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -279,7 +279,7 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
                if (!tmp)
                        goto fail_nomem;
                *tmp = *mpnt;
-                pol = mpol_copy(vma_policy(mpnt));
+                pol = mpol_dup(vma_policy(mpnt));
                retval = PTR_ERR(pol);
                if (IS_ERR(pol))
                        goto fail_nomem_policy;
@@ -521,7 +521,7 @@ void mm_release(struct task_struct *tsk, struct mm_struct *mm)
 * Allocate a new mm structure and copy contents from the
 * mm structure of the passed in task structure.
 */
-static struct mm_struct *dup_mm(struct task_struct *tsk)
+struct mm_struct *dup_mm(struct task_struct *tsk)
 {
        struct mm_struct *mm, *oldmm = current->mm;
        int err;
@@ -805,12 +805,6 @@ static int copy_files(unsigned long clone_flags, struct task_struct * tsk)
                goto out;
        }
-        /*
-         * Note: we may be using current for both targets (See exec.c)
-         * This works because we cache current->files (old) as oldf. Don't
-         * break this.
-         */
-        tsk->files = NULL;
        newf = dup_fd(oldf, &error);
        if (!newf)
                goto out;
@@ -846,34 +840,6 @@ static int copy_io(unsigned long clone_flags, struct task_struct *tsk)
        return 0;
 }
-/*
- *      Helper to unshare the files of the current task.
- *      We don't want to expose copy_files internals to
- *      the exec layer of the kernel.
- */
-int unshare_files(void)
-{
-        struct files_struct *files  = current->files;
-        int rc;
-        BUG_ON(!files);
-        /* This can race but the race causes us to copy when we don't
-           need to and drop the copy */
-        if(atomic_read(&files->count) == 1)
-        {
-                atomic_inc(&files->count);
-                return 0;
-        }
-        rc = copy_files(0, current);
-        if(rc)
-                current->files = files;
-        return rc;
-}
-EXPORT_SYMBOL(unshare_files);
 static int copy_sighand(unsigned long clone_flags, struct task_struct *tsk)
 {
        struct sighand_struct *sig;
@@ -1150,7 +1116,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
        p->audit_context = NULL;
        cgroup_fork(p);
 #ifdef CONFIG_NUMA
-        p->mempolicy = mpol_copy(p->mempolicy);
+        p->mempolicy = mpol_dup(p->mempolicy);
        if (IS_ERR(p->mempolicy)) {
                retval = PTR_ERR(p->mempolicy);
                p->mempolicy = NULL;
@@ -1408,7 +1374,7 @@ bad_fork_cleanup_security:
        security_task_free(p);
 bad_fork_cleanup_policy:
 #ifdef CONFIG_NUMA
-        mpol_free(p->mempolicy);
+        mpol_put(p->mempolicy);
 bad_fork_cleanup_cgroup:
 #endif
        cgroup_exit(p, cgroup_callbacks_done);
@@ -1811,3 +1777,27 @@ bad_unshare_cleanup_thread:
 bad_unshare_out:
        return err;
 }
+/*
+ *      Helper to unshare the files of the current task.
+ *      We don't want to expose copy_files internals to
+ *      the exec layer of the kernel.
+ */
+int unshare_files(struct files_struct **displaced)
+{
+        struct task_struct *task = current;
+        struct files_struct *copy = NULL;
+        int error;
+        error = unshare_fd(CLONE_FILES, &copy);
+        if (error || !copy) {
+                *displaced = NULL;
+                return error;
+        }
+        *displaced = task->files;
+        task_lock(task);
+        task->files = copy;
+        task_unlock(task);
+        return 0;
+}
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index f78777abe769..dea4c9124ac8 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -590,7 +590,6 @@ static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer,
                        list_add_tail(&timer->cb_entry,
                                      &base->cpu_base->cb_pending);
                        timer->state = HRTIMER_STATE_PENDING;
-                        raise_softirq(HRTIMER_SOFTIRQ);
                        return 1;
                default:
                        BUG();
@@ -633,6 +632,11 @@ static int hrtimer_switch_to_hres(void)
        return 1;
 }
+static inline void hrtimer_raise_softirq(void)
+{
+        raise_softirq(HRTIMER_SOFTIRQ);
+}
 #else
 static inline int hrtimer_hres_active(void) { return 0; }
@@ -651,6 +655,7 @@ static inline int hrtimer_reprogram(struct hrtimer *timer,
 {
        return 0;
 }
+static inline void hrtimer_raise_softirq(void) { }
 #endif /* CONFIG_HIGH_RES_TIMERS */
@@ -850,7 +855,7 @@ hrtimer_start(struct hrtimer *timer, ktime_t tim, const enum hrtimer_mode mode)
 {
        struct hrtimer_clock_base *base, *new_base;
        unsigned long flags;
-        int ret;
+        int ret, raise;
        base = lock_hrtimer_base(timer, &flags);
@@ -884,8 +889,18 @@ hrtimer_start(struct hrtimer *timer, ktime_t tim, const enum hrtimer_mode mode)
        enqueue_hrtimer(timer, new_base,
                        new_base->cpu_base == &__get_cpu_var(hrtimer_bases));
+        /*
+         * The timer may be expired and moved to the cb_pending
+         * list. We can not raise the softirq with base lock held due
+         * to a possible deadlock with runqueue lock.
+         */
+        raise = timer->state == HRTIMER_STATE_PENDING;
        unlock_hrtimer_base(timer, &flags);
+        if (raise)
+                hrtimer_raise_softirq();
        return ret;
 }
 EXPORT_SYMBOL_GPL(hrtimer_start);
@@ -1080,8 +1095,19 @@ static void run_hrtimer_pending(struct hrtimer_cpu_base *cpu_base)
                         * If the timer was rearmed on another CPU, reprogram
                         * the event device.
                         */
-                        if (timer->base->first == &timer->node)
+                        struct hrtimer_clock_base *base = timer->base;
-                                hrtimer_reprogram(timer, timer->base);
+                        if (base->first == &timer->node &&
+                            hrtimer_reprogram(timer, base)) {
+                                /*
+                                 * Timer is expired. Thus move it from tree to
+                                 * pending list again.
+                                 */
+                                __remove_hrtimer(timer, base,
+                                                 HRTIMER_STATE_PENDING, 0);
+                                list_add_tail(&timer->cb_entry,
+                                              &base->cpu_base->cb_pending);
+                        }
                }
        }
        spin_unlock_irq(&cpu_base->lock);
diff --git a/kernel/kexec.c b/kernel/kexec.c
index 6782dce93d01..cb85c79989b4 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -1405,6 +1405,9 @@ static int __init crash_save_vmcoreinfo_init(void)
        VMCOREINFO_LENGTH(zone.free_area, MAX_ORDER);
        VMCOREINFO_LENGTH(free_area.free_list, MIGRATE_TYPES);
        VMCOREINFO_NUMBER(NR_FREE_PAGES);
+        VMCOREINFO_NUMBER(PG_lru);
+        VMCOREINFO_NUMBER(PG_private);
+        VMCOREINFO_NUMBER(PG_swapcache);
        arch_crash_save_vmcoreinfo();
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index fcfb580c3afc..1e0250cb9486 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -72,6 +72,18 @@ DEFINE_MUTEX(kprobe_mutex);		/* Protects kprobe_table */
 DEFINE_SPINLOCK(kretprobe_lock);        /* Protects kretprobe_inst_table */
 static DEFINE_PER_CPU(struct kprobe *, kprobe_instance) = NULL;
+/*
+ * Normally, functions that we'd want to prohibit kprobes in, are marked
+ * __kprobes. But, there are cases where such functions already belong to
+ * a different section (__sched for preempt_schedule)
+ *
+ * For such cases, we now have a blacklist
+ */
+struct kprobe_blackpoint kprobe_blacklist[] = {
+        {"preempt_schedule",},
+        {NULL}    /* Terminator */
+};
 #ifdef __ARCH_WANT_KPROBES_INSN_SLOT
 /*
 * kprobe->ainsn.insn points to the copy of the instruction to be
@@ -417,6 +429,21 @@ static inline void free_rp_inst(struct kretprobe *rp)
        }
 }
+static void __kprobes cleanup_rp_inst(struct kretprobe *rp)
+{
+        unsigned long flags;
+        struct kretprobe_instance *ri;
+        struct hlist_node *pos, *next;
+        /* No race here */
+        spin_lock_irqsave(&kretprobe_lock, flags);
+        hlist_for_each_entry_safe(ri, pos, next, &rp->used_instances, uflist) {
+                ri->rp = NULL;
+                hlist_del(&ri->uflist);
+        }
+        spin_unlock_irqrestore(&kretprobe_lock, flags);
+        free_rp_inst(rp);
+}
 /*
 * Keep all fields in the kprobe consistent
 */
@@ -492,9 +519,22 @@ static int __kprobes register_aggr_kprobe(struct kprobe *old_p,
 static int __kprobes in_kprobes_functions(unsigned long addr)
 {
+        struct kprobe_blackpoint *kb;
        if (addr >= (unsigned long)__kprobes_text_start &&
            addr < (unsigned long)__kprobes_text_end)
                return -EINVAL;
+        /*
+         * If there exists a kprobe_blacklist, verify and
+         * fail any probe registration in the prohibited area
+         */
+        for (kb = kprobe_blacklist; kb->name != NULL; kb++) {
+                if (kb->start_addr) {
+                        if (addr >= kb->start_addr &&
+                            addr < (kb->start_addr + kb->range))
+                                return -EINVAL;
+                }
+        }
        return 0;
 }
@@ -555,6 +595,7 @@ static int __kprobes __register_kprobe(struct kprobe *p,
        }
        p->nmissed = 0;
+        INIT_LIST_HEAD(&p->list);
        mutex_lock(&kprobe_mutex);
        old_p = get_kprobe(p->addr);
        if (old_p) {
@@ -581,35 +622,28 @@ out:
        return ret;
 }
-int __kprobes register_kprobe(struct kprobe *p)
+/*
-{
+ * Unregister a kprobe without a scheduler synchronization.
-        return __register_kprobe(p, (unsigned long)__builtin_return_address(0));
+ */
-}
+static int __kprobes __unregister_kprobe_top(struct kprobe *p)
-void __kprobes unregister_kprobe(struct kprobe *p)
 {
-        struct module *mod;
        struct kprobe *old_p, *list_p;
-        int cleanup_p;
-        mutex_lock(&kprobe_mutex);
        old_p = get_kprobe(p->addr);
-        if (unlikely(!old_p)) {
+        if (unlikely(!old_p))
-                mutex_unlock(&kprobe_mutex);
+                return -EINVAL;
-                return;
-        }
        if (p != old_p) {
                list_for_each_entry_rcu(list_p, &old_p->list, list)
                        if (list_p == p)
                        /* kprobe p is a valid probe */
                                goto valid_p;
-                mutex_unlock(&kprobe_mutex);
+                return -EINVAL;
-                return;
        }
 valid_p:
        if (old_p == p ||
            (old_p->pre_handler == aggr_pre_handler &&
-             p->list.next == &old_p->list && p->list.prev == &old_p->list)) {
+             list_is_singular(&old_p->list))) {
                /*
                 * Only probe on the hash list. Disarm only if kprobes are
                 * enabled - otherwise, the breakpoint would already have
@@ -618,43 +652,97 @@ valid_p:
                if (kprobe_enabled)
                        arch_disarm_kprobe(p);
                hlist_del_rcu(&old_p->hlist);
-                cleanup_p = 1;
        } else {
+                if (p->break_handler)
+                        old_p->break_handler = NULL;
+                if (p->post_handler) {
+                        list_for_each_entry_rcu(list_p, &old_p->list, list) {
+                                if ((list_p != p) && (list_p->post_handler))
+                                        goto noclean;
+                        }
+                        old_p->post_handler = NULL;
+                }
+noclean:
                list_del_rcu(&p->list);
-                cleanup_p = 0;
        }
+        return 0;
+}
-        mutex_unlock(&kprobe_mutex);
+static void __kprobes __unregister_kprobe_bottom(struct kprobe *p)
+{
+        struct module *mod;
+        struct kprobe *old_p;
-        synchronize_sched();
        if (p->mod_refcounted) {
                mod = module_text_address((unsigned long)p->addr);
                if (mod)
                        module_put(mod);
        }
-        if (cleanup_p) {
+        if (list_empty(&p->list) || list_is_singular(&p->list)) {
-                if (p != old_p) {
+                if (!list_empty(&p->list)) {
-                        list_del_rcu(&p->list);
+                        /* "p" is the last child of an aggr_kprobe */
+                        old_p = list_entry(p->list.next, struct kprobe, list);
+                        list_del(&p->list);
                        kfree(old_p);
                }
                arch_remove_kprobe(p);
-        } else {
+        }
-                mutex_lock(&kprobe_mutex);
+}
-                if (p->break_handler)
-                        old_p->break_handler = NULL;
+static int __register_kprobes(struct kprobe **kps, int num,
-                if (p->post_handler){
+        unsigned long called_from)
-                        list_for_each_entry_rcu(list_p, &old_p->list, list){
+{
-                                if (list_p->post_handler){
+        int i, ret = 0;
-                                        cleanup_p = 2;
-                                        break;
+        if (num <= 0)
-                                }
+                return -EINVAL;
-                        }
+        for (i = 0; i < num; i++) {
-                        if (cleanup_p == 0)
+                ret = __register_kprobe(kps[i], called_from);
-                                old_p->post_handler = NULL;
+                if (ret < 0 && i > 0) {
+                        unregister_kprobes(kps, i);
+                        break;
                }
-                mutex_unlock(&kprobe_mutex);
        }
+        return ret;
+}
+/*
+ * Registration and unregistration functions for kprobe.
+ */
+int __kprobes register_kprobe(struct kprobe *p)
+{
+        return __register_kprobes(&p, 1,
+                                  (unsigned long)__builtin_return_address(0));
+}
+void __kprobes unregister_kprobe(struct kprobe *p)
+{
+        unregister_kprobes(&p, 1);
+}
+int __kprobes register_kprobes(struct kprobe **kps, int num)
+{
+        return __register_kprobes(kps, num,
+                                  (unsigned long)__builtin_return_address(0));
+}
+void __kprobes unregister_kprobes(struct kprobe **kps, int num)
+{
+        int i;
+        if (num <= 0)
+                return;
+        mutex_lock(&kprobe_mutex);
+        for (i = 0; i < num; i++)
+                if (__unregister_kprobe_top(kps[i]) < 0)
+                        kps[i]->addr = NULL;
+        mutex_unlock(&kprobe_mutex);
+        synchronize_sched();
+        for (i = 0; i < num; i++)
+                if (kps[i]->addr)
+                        __unregister_kprobe_bottom(kps[i]);
 }
 static struct notifier_block kprobe_exceptions_nb = {
@@ -667,24 +755,69 @@ unsigned long __weak arch_deref_entry_point(void *entry)
        return (unsigned long)entry;
 }
-int __kprobes register_jprobe(struct jprobe *jp)
+static int __register_jprobes(struct jprobe **jps, int num,
+        unsigned long called_from)
 {
-        unsigned long addr = arch_deref_entry_point(jp->entry);
+        struct jprobe *jp;
+        int ret = 0, i;
-        if (!kernel_text_address(addr))
+        if (num <= 0)
                return -EINVAL;
+        for (i = 0; i < num; i++) {
+                unsigned long addr;
+                jp = jps[i];
+                addr = arch_deref_entry_point(jp->entry);
+                if (!kernel_text_address(addr))
+                        ret = -EINVAL;
+                else {
+                        /* Todo: Verify probepoint is a function entry point */
+                        jp->kp.pre_handler = setjmp_pre_handler;
+                        jp->kp.break_handler = longjmp_break_handler;
+                        ret = __register_kprobe(&jp->kp, called_from);
+                }
+                if (ret < 0 && i > 0) {
+                        unregister_jprobes(jps, i);
+                        break;
+                }
+        }
+        return ret;
+}
-        /* Todo: Verify probepoint is a function entry point */
+int __kprobes register_jprobe(struct jprobe *jp)
-        jp->kp.pre_handler = setjmp_pre_handler;
+{
-        jp->kp.break_handler = longjmp_break_handler;
+        return __register_jprobes(&jp, 1,
-        return __register_kprobe(&jp->kp,
                (unsigned long)__builtin_return_address(0));
 }
 void __kprobes unregister_jprobe(struct jprobe *jp)
 {
-        unregister_kprobe(&jp->kp);
+        unregister_jprobes(&jp, 1);
+}
+int __kprobes register_jprobes(struct jprobe **jps, int num)
+{
+        return __register_jprobes(jps, num,
+                (unsigned long)__builtin_return_address(0));
+}
+void __kprobes unregister_jprobes(struct jprobe **jps, int num)
+{
+        int i;
+        if (num <= 0)
+                return;
+        mutex_lock(&kprobe_mutex);
+        for (i = 0; i < num; i++)
+                if (__unregister_kprobe_top(&jps[i]->kp) < 0)
+                        jps[i]->kp.addr = NULL;
+        mutex_unlock(&kprobe_mutex);
+        synchronize_sched();
+        for (i = 0; i < num; i++) {
+                if (jps[i]->kp.addr)
+                        __unregister_kprobe_bottom(&jps[i]->kp);
+        }
 }
 #ifdef CONFIG_KRETPROBES
@@ -725,7 +858,8 @@ static int __kprobes pre_handler_kretprobe(struct kprobe *p,
        return 0;
 }
-int __kprobes register_kretprobe(struct kretprobe *rp)
+static int __kprobes __register_kretprobe(struct kretprobe *rp,
+                                          unsigned long called_from)
 {
        int ret = 0;
        struct kretprobe_instance *inst;
@@ -771,46 +905,101 @@ int __kprobes register_kretprobe(struct kretprobe *rp)
        rp->nmissed = 0;
        /* Establish function entry probe point */
-        if ((ret = __register_kprobe(&rp->kp,
+        ret = __register_kprobe(&rp->kp, called_from);
-                (unsigned long)__builtin_return_address(0))) != 0)
+        if (ret != 0)
                free_rp_inst(rp);
        return ret;
 }
+static int __register_kretprobes(struct kretprobe **rps, int num,
+        unsigned long called_from)
+{
+        int ret = 0, i;
+        if (num <= 0)
+                return -EINVAL;
+        for (i = 0; i < num; i++) {
+                ret = __register_kretprobe(rps[i], called_from);
+                if (ret < 0 && i > 0) {
+                        unregister_kretprobes(rps, i);
+                        break;
+                }
+        }
+        return ret;
+}
+int __kprobes register_kretprobe(struct kretprobe *rp)
+{
+        return __register_kretprobes(&rp, 1,
+                        (unsigned long)__builtin_return_address(0));
+}
+void __kprobes unregister_kretprobe(struct kretprobe *rp)
+{
+        unregister_kretprobes(&rp, 1);
+}
+int __kprobes register_kretprobes(struct kretprobe **rps, int num)
+{
+        return __register_kretprobes(rps, num,
+                        (unsigned long)__builtin_return_address(0));
+}
+void __kprobes unregister_kretprobes(struct kretprobe **rps, int num)
+{
+        int i;
+        if (num <= 0)
+                return;
+        mutex_lock(&kprobe_mutex);
+        for (i = 0; i < num; i++)
+                if (__unregister_kprobe_top(&rps[i]->kp) < 0)
+                        rps[i]->kp.addr = NULL;
+        mutex_unlock(&kprobe_mutex);
+        synchronize_sched();
+        for (i = 0; i < num; i++) {
+                if (rps[i]->kp.addr) {
+                        __unregister_kprobe_bottom(&rps[i]->kp);
+                        cleanup_rp_inst(rps[i]);
+                }
+        }
+}
 #else /* CONFIG_KRETPROBES */
 int __kprobes register_kretprobe(struct kretprobe *rp)
 {
        return -ENOSYS;
 }
-static int __kprobes pre_handler_kretprobe(struct kprobe *p,
+int __kprobes register_kretprobes(struct kretprobe **rps, int num)
-                                           struct pt_regs *regs)
 {
-        return 0;
+        return -ENOSYS;
 }
-#endif /* CONFIG_KRETPROBES */
 void __kprobes unregister_kretprobe(struct kretprobe *rp)
 {
-        unsigned long flags;
+}
-        struct kretprobe_instance *ri;
-        struct hlist_node *pos, *next;
-        unregister_kprobe(&rp->kp);
+void __kprobes unregister_kretprobes(struct kretprobe **rps, int num)
+{
+}
-        /* No race here */
+static int __kprobes pre_handler_kretprobe(struct kprobe *p,
-        spin_lock_irqsave(&kretprobe_lock, flags);
+                                           struct pt_regs *regs)
-        hlist_for_each_entry_safe(ri, pos, next, &rp->used_instances, uflist) {
+{
-                ri->rp = NULL;
+        return 0;
-                hlist_del(&ri->uflist);
-        }
-        spin_unlock_irqrestore(&kretprobe_lock, flags);
-        free_rp_inst(rp);
 }
+#endif /* CONFIG_KRETPROBES */
 static int __init init_kprobes(void)
 {
        int i, err = 0;
+        unsigned long offset = 0, size = 0;
+        char *modname, namebuf[128];
+        const char *symbol_name;
+        void *addr;
+        struct kprobe_blackpoint *kb;
        /* FIXME allocate the probe table, currently defined statically */
        /* initialize all list heads */
@@ -819,6 +1008,28 @@ static int __init init_kprobes(void)
                INIT_HLIST_HEAD(&kretprobe_inst_table[i]);
        }
+        /*
+         * Lookup and populate the kprobe_blacklist.
+         *
+         * Unlike the kretprobe blacklist, we'll need to determine
+         * the range of addresses that belong to the said functions,
+         * since a kprobe need not necessarily be at the beginning
+         * of a function.
+         */
+        for (kb = kprobe_blacklist; kb->name != NULL; kb++) {
+                kprobe_lookup_name(kb->name, addr);
+                if (!addr)
+                        continue;
+                kb->start_addr = (unsigned long)addr;
+                symbol_name = kallsyms_lookup(kb->start_addr,
+                                &size, &offset, &modname, namebuf);
+                if (!symbol_name)
+                        kb->range = 0;
+                else
+                        kb->range = size;
+        }
        if (kretprobe_blacklist_size) {
                /* lookup the function address from its name */
                for (i = 0; kretprobe_blacklist[i].name != NULL; i++) {
@@ -1066,8 +1277,12 @@ module_init(init_kprobes);
 EXPORT_SYMBOL_GPL(register_kprobe);
 EXPORT_SYMBOL_GPL(unregister_kprobe);
+EXPORT_SYMBOL_GPL(register_kprobes);
+EXPORT_SYMBOL_GPL(unregister_kprobes);
 EXPORT_SYMBOL_GPL(register_jprobe);
 EXPORT_SYMBOL_GPL(unregister_jprobe);
+EXPORT_SYMBOL_GPL(register_jprobes);
+EXPORT_SYMBOL_GPL(unregister_jprobes);
 #ifdef CONFIG_KPROBES
 EXPORT_SYMBOL_GPL(jprobe_return);
 #endif
@@ -1075,4 +1290,6 @@ EXPORT_SYMBOL_GPL(jprobe_return);
 #ifdef CONFIG_KPROBES
 EXPORT_SYMBOL_GPL(register_kretprobe);
 EXPORT_SYMBOL_GPL(unregister_kretprobe);
+EXPORT_SYMBOL_GPL(register_kretprobes);
+EXPORT_SYMBOL_GPL(unregister_kretprobes);
 #endif
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c
index 6d792b66d854..5ca37fa50beb 100644
--- a/kernel/pid_namespace.c
+++ b/kernel/pid_namespace.c
@@ -92,7 +92,7 @@ static struct pid_namespace *create_pid_namespace(int level)
        atomic_set(&ns->pidmap[0].nr_free, BITS_PER_PAGE - 1);
        for (i = 1; i < PIDMAP_ENTRIES; i++) {
-                ns->pidmap[i].page = 0;
+                ns->pidmap[i].page = NULL;
                atomic_set(&ns->pidmap[i].nr_free, BITS_PER_PAGE);
        }
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig
index 6233f3b4ae66..b45da40e8d25 100644
--- a/kernel/power/Kconfig
+++ b/kernel/power/Kconfig
@@ -19,16 +19,6 @@ config PM
          will issue the hlt instruction if nothing is to be done, thereby
          sending the processor to sleep and saving power.
-config PM_LEGACY
-        bool "Legacy Power Management API (DEPRECATED)"
-        depends on PM
-        default n
-        ---help---
-           Support for pm_register() and friends.  This old API is obsoleted
-           by the driver model.
-           If unsure, say N.
 config PM_DEBUG
        bool "Power Management Debug Support"
        depends on PM
diff --git a/kernel/power/Makefile b/kernel/power/Makefile
index f7dfff28ecdb..597823b5b700 100644
--- a/kernel/power/Makefile
+++ b/kernel/power/Makefile
@@ -4,7 +4,6 @@ EXTRA_CFLAGS	+=	-DDEBUG
 endif
 obj-y                           := main.o
-obj-$(CONFIG_PM_LEGACY)         += pm.o
 obj-$(CONFIG_PM_SLEEP)          += process.o console.o
 obj-$(CONFIG_HIBERNATION)       += swsusp.o disk.o snapshot.o swap.o user.o
diff --git a/kernel/power/console.c b/kernel/power/console.c
index 89bcf4973ee5..b8628be2a465 100644
--- a/kernel/power/console.c
+++ b/kernel/power/console.c
@@ -7,17 +7,39 @@
 #include <linux/vt_kern.h>
 #include <linux/kbd_kern.h>
 #include <linux/console.h>
+#include <linux/module.h>
 #include "power.h"
 #if defined(CONFIG_VT) && defined(CONFIG_VT_CONSOLE)
 #define SUSPEND_CONSOLE (MAX_NR_CONSOLES-1)
 static int orig_fgconsole, orig_kmsg;
+static int disable_vt_switch;
+/*
+ * Normally during a suspend, we allocate a new console and switch to it.
+ * When we resume, we switch back to the original console.  This switch
+ * can be slow, so on systems where the framebuffer can handle restoration
+ * of video registers anyways, there's little point in doing the console
+ * switch.  This function allows you to disable it by passing it '0'.
+ */
+void pm_set_vt_switch(int do_switch)
+{
+        acquire_console_sem();
+        disable_vt_switch = !do_switch;
+        release_console_sem();
+}
+EXPORT_SYMBOL(pm_set_vt_switch);
 int pm_prepare_console(void)
 {
        acquire_console_sem();
+        if (disable_vt_switch) {
+                release_console_sem();
+                return 0;
+        }
        orig_fgconsole = fg_console;
        if (vc_allocate(SUSPEND_CONSOLE)) {
@@ -50,9 +72,12 @@ int pm_prepare_console(void)
 void pm_restore_console(void)
 {
        acquire_console_sem();
+        if (disable_vt_switch) {
+                release_console_sem();
+                return;
+        }
        set_console(orig_fgconsole);
        release_console_sem();
        kmsg_redirect = orig_kmsg;
-        return;
 }
 #endif
diff --git a/kernel/power/pm.c b/kernel/power/pm.c
deleted file mode 100644
index 60c73fa670d5..000000000000
--- a/kernel/power/pm.c
+++ /dev/null
@@ -1,205 +0,0 @@
-/*
- *  pm.c - Power management interface
- *
- *  Copyright (C) 2000 Andrew Henroid
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2 of the License, or
- *  (at your option) any later version.
- *
- *  This program is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *  GNU General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License
- *  along with this program; if not, write to the Free Software
- *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- */
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/spinlock.h>
-#include <linux/mm.h>
-#include <linux/slab.h>
-#include <linux/pm.h>
-#include <linux/pm_legacy.h>
-#include <linux/interrupt.h>
-#include <linux/mutex.h>
-/*
- *      Locking notes:
- *              pm_devs_lock can be a semaphore providing pm ops are not called
- *      from an interrupt handler (already a bad idea so no change here). Each
- *      change must be protected so that an unlink of an entry doesn't clash
- *      with a pm send - which is permitted to sleep in the current architecture
- *
- *      Module unloads clashing with pm events now work out safely, the module 
- *      unload path will block until the event has been sent. It may well block
- *      until a resume but that will be fine.
- */
- 
-static DEFINE_MUTEX(pm_devs_lock);
-static LIST_HEAD(pm_devs);
-/**
- *      pm_register - register a device with power management
- *      @type: device type 
- *      @id: device ID
- *      @callback: callback function
- *
- *      Add a device to the list of devices that wish to be notified about
- *      power management events. A &pm_dev structure is returned on success,
- *      on failure the return is %NULL.
- *
- *      The callback function will be called in process context and
- *      it may sleep.
- */
- 
-struct pm_dev *pm_register(pm_dev_t type,
-                           unsigned long id,
-                           pm_callback callback)
-{
-        struct pm_dev *dev = kzalloc(sizeof(struct pm_dev), GFP_KERNEL);
-        if (dev) {
-                dev->type = type;
-                dev->id = id;
-                dev->callback = callback;
-                mutex_lock(&pm_devs_lock);
-                list_add(&dev->entry, &pm_devs);
-                mutex_unlock(&pm_devs_lock);
-        }
-        return dev;
-}
-/**
- *      pm_send - send request to a single device
- *      @dev: device to send to
- *      @rqst: power management request
- *      @data: data for the callback
- *
- *      Issue a power management request to a given device. The 
- *      %PM_SUSPEND and %PM_RESUME events are handled specially. The
- *      data field must hold the intended next state. No call is made
- *      if the state matches.
- *
- *      BUGS: what stops two power management requests occurring in parallel
- *      and conflicting.
- *
- *      WARNING: Calling pm_send directly is not generally recommended, in
- *      particular there is no locking against the pm_dev going away. The
- *      caller must maintain all needed locking or have 'inside knowledge'
- *      on the safety. Also remember that this function is not locked against
- *      pm_unregister. This means that you must handle SMP races on callback
- *      execution and unload yourself.
- */
- 
-static int pm_send(struct pm_dev *dev, pm_request_t rqst, void *data)
-{
-        int status = 0;
-        unsigned long prev_state, next_state;
-        if (in_interrupt())
-                BUG();
-        switch (rqst) {
-        case PM_SUSPEND:
-        case PM_RESUME:
-                prev_state = dev->state;
-                next_state = (unsigned long) data;
-                if (prev_state != next_state) {
-                        if (dev->callback)
-                                status = (*dev->callback)(dev, rqst, data);
-                        if (!status) {
-                                dev->state = next_state;
-                                dev->prev_state = prev_state;
-                        }
-                }
-                else {
-                        dev->prev_state = prev_state;
-                }
-                break;
-        default:
-                if (dev->callback)
-                        status = (*dev->callback)(dev, rqst, data);
-                break;
-        }
-        return status;
-}
-/*
- * Undo incomplete request
- */
-static void pm_undo_all(struct pm_dev *last)
-{
-        struct list_head *entry = last->entry.prev;
-        while (entry != &pm_devs) {
-                struct pm_dev *dev = list_entry(entry, struct pm_dev, entry);
-                if (dev->state != dev->prev_state) {
-                        /* previous state was zero (running) resume or
-                         * previous state was non-zero (suspended) suspend
-                         */
-                        pm_request_t undo = (dev->prev_state
-                                             ? PM_SUSPEND:PM_RESUME);
-                        pm_send(dev, undo, (void*) dev->prev_state);
-                }
-                entry = entry->prev;
-        }
-}
-/**
- *      pm_send_all - send request to all managed devices
- *      @rqst: power management request
- *      @data: data for the callback
- *
- *      Issue a power management request to a all devices. The 
- *      %PM_SUSPEND events are handled specially. Any device is 
- *      permitted to fail a suspend by returning a non zero (error)
- *      value from its callback function. If any device vetoes a 
- *      suspend request then all other devices that have suspended 
- *      during the processing of this request are restored to their
- *      previous state.
- *
- *      WARNING:  This function takes the pm_devs_lock. The lock is not dropped until
- *      the callbacks have completed. This prevents races against pm locking
- *      functions, races against module unload pm_unregister code. It does
- *      mean however that you must not issue pm_ functions within the callback
- *      or you will deadlock and users will hate you.
- *
- *      Zero is returned on success. If a suspend fails then the status
- *      from the device that vetoes the suspend is returned.
- *
- *      BUGS: what stops two power management requests occurring in parallel
- *      and conflicting.
- */
- 
-int pm_send_all(pm_request_t rqst, void *data)
-{
-        struct list_head *entry;
-        
-        mutex_lock(&pm_devs_lock);
-        entry = pm_devs.next;
-        while (entry != &pm_devs) {
-                struct pm_dev *dev = list_entry(entry, struct pm_dev, entry);
-                if (dev->callback) {
-                        int status = pm_send(dev, rqst, data);
-                        if (status) {
-                                /* return devices to previous state on
-                                 * failed suspend request
-                                 */
-                                if (rqst == PM_SUSPEND)
-                                        pm_undo_all(dev);
-                                mutex_unlock(&pm_devs_lock);
-                                return status;
-                        }
-                }
-                entry = entry->next;
-        }
-        mutex_unlock(&pm_devs_lock);
-        return 0;
-}
-EXPORT_SYMBOL(pm_register);
-EXPORT_SYMBOL(pm_send_all);
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 67e392ed5496..dac4b4e57293 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -612,7 +612,7 @@ int generic_ptrace_pokedata(struct task_struct *tsk, long addr, long data)
        return (copied == sizeof(data)) ? 0 : -EIO;
 }
-#ifdef CONFIG_COMPAT
+#if defined CONFIG_COMPAT && defined __ARCH_WANT_COMPAT_SYS_PTRACE
 #include <linux/compat.h>
 int compat_ptrace_request(struct task_struct *child, compat_long_t request,
@@ -667,7 +667,6 @@ int compat_ptrace_request(struct task_struct *child, compat_long_t request,
        return ret;
 }
-#ifdef __ARCH_WANT_COMPAT_SYS_PTRACE
 asmlinkage long compat_sys_ptrace(compat_long_t request, compat_long_t pid,
                                  compat_long_t addr, compat_long_t data)
 {
@@ -710,6 +709,4 @@ asmlinkage long compat_sys_ptrace(compat_long_t request, compat_long_t pid,
        unlock_kernel();
        return ret;
 }
-#endif /* __ARCH_WANT_COMPAT_SYS_PTRACE */
+#endif  /* CONFIG_COMPAT && __ARCH_WANT_COMPAT_SYS_PTRACE */
-#endif  /* CONFIG_COMPAT */
diff --git a/kernel/sched.c b/kernel/sched.c
index 0014b03adaca..740fb409e5bb 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -1657,42 +1657,6 @@ void aggregate_group_weight(struct task_group *tg, struct sched_domain *sd)
 }
 /*
- * Redistribute tg->shares amongst all tg->cfs_rq[]s.
- */
-static void __aggregate_redistribute_shares(struct task_group *tg)
-{
-        int i, max_cpu = smp_processor_id();
-        unsigned long rq_weight = 0;
-        unsigned long shares, max_shares = 0, shares_rem = tg->shares;
-        for_each_possible_cpu(i)
-                rq_weight += tg->cfs_rq[i]->load.weight;
-        for_each_possible_cpu(i) {
-                /*
-                 * divide shares proportional to the rq_weights.
-                 */
-                shares = tg->shares * tg->cfs_rq[i]->load.weight;
-                shares /= rq_weight + 1;
-                tg->cfs_rq[i]->shares = shares;
-                if (shares > max_shares) {
-                        max_shares = shares;
-                        max_cpu = i;
-                }
-                shares_rem -= shares;
-        }
-        /*
-         * Ensure it all adds up to tg->shares; we can loose a few
-         * due to rounding down when computing the per-cpu shares.
-         */
-        if (shares_rem)
-                tg->cfs_rq[max_cpu]->shares += shares_rem;
-}
-/*
 * Compute the weight of this group on the given cpus.
 */
 static
@@ -1701,18 +1665,11 @@ void aggregate_group_shares(struct task_group *tg, struct sched_domain *sd)
        unsigned long shares = 0;
        int i;
-again:
        for_each_cpu_mask(i, sd->span)
                shares += tg->cfs_rq[i]->shares;
-        /*
+        if ((!shares && aggregate(tg, sd)->rq_weight) || shares > tg->shares)
-         * When the span doesn't have any shares assigned, but does have
+                shares = tg->shares;
-         * tasks to run do a machine wide rebalance (should be rare).
-         */
-        if (unlikely(!shares && aggregate(tg, sd)->rq_weight)) {
-                __aggregate_redistribute_shares(tg);
-                goto again;
-        }
        aggregate(tg, sd)->shares = shares;
 }
@@ -7991,11 +7948,6 @@ void __init sched_init_smp(void)
 #else
 void __init sched_init_smp(void)
 {
-#if defined(CONFIG_NUMA)
-        sched_group_nodes_bycpu = kzalloc(nr_cpu_ids * sizeof(void **),
-                                                                GFP_KERNEL);
-        BUG_ON(sched_group_nodes_bycpu == NULL);
-#endif
        sched_init_granularity();
 }
 #endif /* CONFIG_SMP */
@@ -8128,7 +8080,7 @@ void __init sched_init(void)
         * we use alloc_bootmem().
         */
        if (alloc_size) {
-                ptr = (unsigned long)alloc_bootmem_low(alloc_size);
+                ptr = (unsigned long)alloc_bootmem(alloc_size);
 #ifdef CONFIG_FAIR_GROUP_SCHED
                init_task_group.se = (struct sched_entity **)ptr;
diff --git a/kernel/sys.c b/kernel/sys.c
index 6a0cc71ee88d..f2a451366953 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -1632,10 +1632,9 @@ asmlinkage long sys_umask(int mask)
 asmlinkage long sys_prctl(int option, unsigned long arg2, unsigned long arg3,
                          unsigned long arg4, unsigned long arg5)
 {
-        long error;
+        long uninitialized_var(error);
-        error = security_task_prctl(option, arg2, arg3, arg4, arg5);
+        if (security_task_prctl(option, arg2, arg3, arg4, arg5, &error))
-        if (error)
                return error;
        switch (option) {
@@ -1688,17 +1687,6 @@ asmlinkage long sys_prctl(int option, unsigned long arg2, unsigned long arg3,
                                error = -EINVAL;
                        break;
-                case PR_GET_KEEPCAPS:
-                        if (current->keep_capabilities)
-                                error = 1;
-                        break;
-                case PR_SET_KEEPCAPS:
-                        if (arg2 != 0 && arg2 != 1) {
-                                error = -EINVAL;
-                                break;
-                        }
-                        current->keep_capabilities = arg2;
-                        break;
                case PR_SET_NAME: {
                        struct task_struct *me = current;
                        unsigned char ncomm[sizeof(me->comm)];
@@ -1732,17 +1720,6 @@ asmlinkage long sys_prctl(int option, unsigned long arg2, unsigned long arg3,
                case PR_SET_SECCOMP:
                        error = prctl_set_seccomp(arg2);
                        break;
-                case PR_CAPBSET_READ:
-                        if (!cap_valid(arg2))
-                                return -EINVAL;
-                        return !!cap_raised(current->cap_bset, arg2);
-                case PR_CAPBSET_DROP:
-#ifdef CONFIG_SECURITY_FILE_CAPABILITIES
-                        return cap_prctl_drop(arg2);
-#else
-                        return -EINVAL;
-#endif
                case PR_GET_TSC:
                        error = GET_TSC_CTL(arg2);
                        break;
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index d358d4e3a958..b854a895591e 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -393,6 +393,7 @@ void tick_nohz_restart_sched_tick(void)
                sub_preempt_count(HARDIRQ_OFFSET);
        }
+        touch_softlockup_watchdog();
        /*
         * Cancel the scheduled timer and restore the tick
         */