31 files changed, 2034 insertions, 881 deletions
diff --git a/kernel/Makefile b/kernel/Makefile
index d62ec66c1a..d948ca12ac 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -8,7 +8,7 @@ obj-y     = sched.o fork.o exec_domain.o panic.o printk.o profile.o \
            signal.o sys.o kmod.o workqueue.o pid.o \
            rcupdate.o extable.o params.o posix-timers.o \
            kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \
-            hrtimer.o rwsem.o
+            hrtimer.o rwsem.o latency.o nsproxy.o
 obj-$(CONFIG_STACKTRACE) += stacktrace.o
 obj-y += time/
@@ -48,8 +48,9 @@ obj-$(CONFIG_GENERIC_HARDIRQS) += irq/
 obj-$(CONFIG_SECCOMP) += seccomp.o
 obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o
 obj-$(CONFIG_RELAY) += relay.o
+obj-$(CONFIG_UTS_NS) += utsname.o
 obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o
-obj-$(CONFIG_TASKSTATS) += taskstats.o
+obj-$(CONFIG_TASKSTATS) += taskstats.o tsacct.o
 ifneq ($(CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER),y)
 # According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is
diff --git a/kernel/acct.c b/kernel/acct.c
index f4330acead..0aad5ca36a 100644
--- a/kernel/acct.c
+++ b/kernel/acct.c
@@ -602,33 +602,3 @@ void acct_process(void)
        do_acct_process(file);
        fput(file);
 }
-/**
- * acct_update_integrals - update mm integral fields in task_struct
- * @tsk: task_struct for accounting
- */
-void acct_update_integrals(struct task_struct *tsk)
-{
-        if (likely(tsk->mm)) {
-                long delta =
-                        cputime_to_jiffies(tsk->stime) - tsk->acct_stimexpd;
-                if (delta == 0)
-                        return;
-                tsk->acct_stimexpd = tsk->stime;
-                tsk->acct_rss_mem1 += delta * get_mm_rss(tsk->mm);
-                tsk->acct_vm_mem1 += delta * tsk->mm->total_vm;
-        }
-}
-/**
- * acct_clear_integrals - clear the mm integral fields in task_struct
- * @tsk: task_struct whose accounting fields are cleared
- */
-void acct_clear_integrals(struct task_struct *tsk)
-{
-        tsk->acct_stimexpd = 0;
-        tsk->acct_rss_mem1 = 0;
-        tsk->acct_vm_mem1 = 0;
-}
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 8c3c400cce..9d850ae13b 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -377,7 +377,7 @@ static int cpuset_fill_super(struct super_block *sb, void *unused_data,
                inode->i_op = &simple_dir_inode_operations;
                inode->i_fop = &simple_dir_operations;
                /* directories start off with i_nlink == 2 (for "." entry) */
-                inode->i_nlink++;
+                inc_nlink(inode);
        } else {
                return -ENOMEM;
        }
@@ -1565,7 +1565,7 @@ static int cpuset_create_file(struct dentry *dentry, int mode)
                inode->i_fop = &simple_dir_operations;
                /* start off with i_nlink == 2 (for "." entry) */
-                inode->i_nlink++;
+                inc_nlink(inode);
        } else if (S_ISREG(mode)) {
                inode->i_size = 0;
                inode->i_fop = &cpuset_file_operations;
@@ -1598,7 +1598,7 @@ static int cpuset_create_dir(struct cpuset *cs, const char *name, int mode)
        error = cpuset_create_file(dentry, S_IFDIR | mode);
        if (!error) {
                dentry->d_fsdata = cs;
-                parent->d_inode->i_nlink++;
+                inc_nlink(parent->d_inode);
                cs->dentry = dentry;
        }
        dput(dentry);
@@ -2033,7 +2033,7 @@ int __init cpuset_init(void)
        }
        root = cpuset_mount->mnt_sb->s_root;
        root->d_fsdata = &top_cpuset;
-        root->d_inode->i_nlink++;
+        inc_nlink(root->d_inode);
        top_cpuset.dentry = root;
        root->d_inode->i_op = &cpuset_dir_inode_operations;
        number_of_cpusets = 1;
diff --git a/kernel/dma.c b/kernel/dma.c
index aef0a45b78..2020644c93 100644
--- a/kernel/dma.c
+++ b/kernel/dma.c
@@ -62,6 +62,11 @@ static struct dma_chan dma_chan_busy[MAX_DMA_CHANNELS] = {
 };
+/**
+ * request_dma - request and reserve a system DMA channel
+ * @dmanr: DMA channel number
+ * @device_id: reserving device ID string, used in /proc/dma
+ */
 int request_dma(unsigned int dmanr, const char * device_id)
 {
        if (dmanr >= MAX_DMA_CHANNELS)
@@ -76,7 +81,10 @@ int request_dma(unsigned int dmanr, const char * device_id)
        return 0;
 } /* request_dma */
+/**
+ * free_dma - free a reserved system DMA channel
+ * @dmanr: DMA channel number
+ */
 void free_dma(unsigned int dmanr)
 {
        if (dmanr >= MAX_DMA_CHANNELS) {
diff --git a/kernel/exit.c b/kernel/exit.c
index 2e4c13cba9..f250a5e3e2 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -18,8 +18,10 @@
 #include <linux/security.h>
 #include <linux/cpu.h>
 #include <linux/acct.h>
+#include <linux/tsacct_kern.h>
 #include <linux/file.h>
 #include <linux/binfmts.h>
+#include <linux/nsproxy.h>
 #include <linux/ptrace.h>
 #include <linux/profile.h>
 #include <linux/mount.h>
@@ -38,6 +40,7 @@
 #include <linux/pipe_fs_i.h>
 #include <linux/audit.h> /* for audit_free() */
 #include <linux/resource.h>
+#include <linux/blkdev.h>
 #include <asm/uaccess.h>
 #include <asm/unistd.h>
@@ -395,9 +398,11 @@ void daemonize(const char *name, ...)
        fs = init_task.fs;
        current->fs = fs;
        atomic_inc(&fs->count);
-        exit_namespace(current);
-        current->namespace = init_task.namespace;
+        exit_task_namespaces(current);
-        get_namespace(current->namespace);
+        current->nsproxy = init_task.nsproxy;
+        get_task_namespaces(current);
        exit_files(current);
        current->files = init_task.files;
        atomic_inc(&current->files->count);
@@ -915,7 +920,6 @@ fastcall NORET_TYPE void do_exit(long code)
        exit_sem(tsk);
        __exit_files(tsk);
        __exit_fs(tsk);
-        exit_namespace(tsk);
        exit_thread();
        cpuset_exit(tsk);
        exit_keys(tsk);
@@ -930,6 +934,7 @@ fastcall NORET_TYPE void do_exit(long code)
        tsk->exit_code = code;
        proc_exit_connector(tsk);
        exit_notify(tsk);
+        exit_task_namespaces(tsk);
 #ifdef CONFIG_NUMA
        mpol_free(tsk->mempolicy);
        tsk->mempolicy = NULL;
diff --git a/kernel/fork.c b/kernel/fork.c
index 1c999f3e0b..7dc6140baa 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -27,6 +27,7 @@
 #include <linux/binfmts.h>
 #include <linux/mman.h>
 #include <linux/fs.h>
+#include <linux/nsproxy.h>
 #include <linux/capability.h>
 #include <linux/cpu.h>
 #include <linux/cpuset.h>
@@ -42,6 +43,7 @@
 #include <linux/profile.h>
 #include <linux/rmap.h>
 #include <linux/acct.h>
+#include <linux/tsacct_kern.h>
 #include <linux/cn_proc.h>
 #include <linux/delayacct.h>
 #include <linux/taskstats_kern.h>
@@ -1115,11 +1117,11 @@ static struct task_struct *copy_process(unsigned long clone_flags,
                goto bad_fork_cleanup_signal;
        if ((retval = copy_keys(clone_flags, p)))
                goto bad_fork_cleanup_mm;
-        if ((retval = copy_namespace(clone_flags, p)))
+        if ((retval = copy_namespaces(clone_flags, p)))
                goto bad_fork_cleanup_keys;
        retval = copy_thread(0, clone_flags, stack_start, stack_size, p, regs);
        if (retval)
-                goto bad_fork_cleanup_namespace;
+                goto bad_fork_cleanup_namespaces;
        p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL;
        /*
@@ -1211,7 +1213,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
                spin_unlock(&current->sighand->siglock);
                write_unlock_irq(&tasklist_lock);
                retval = -ERESTARTNOINTR;
-                goto bad_fork_cleanup_namespace;
+                goto bad_fork_cleanup_namespaces;
        }
        if (clone_flags & CLONE_THREAD) {
@@ -1259,8 +1261,8 @@ static struct task_struct *copy_process(unsigned long clone_flags,
        proc_fork_connector(p);
        return p;
-bad_fork_cleanup_namespace:
+bad_fork_cleanup_namespaces:
-        exit_namespace(p);
+        exit_task_namespaces(p);
 bad_fork_cleanup_keys:
        exit_keys(p);
 bad_fork_cleanup_mm:
@@ -1513,10 +1515,9 @@ static int unshare_fs(unsigned long unshare_flags, struct fs_struct **new_fsp)
 */
 static int unshare_namespace(unsigned long unshare_flags, struct namespace **new_nsp, struct fs_struct *new_fs)
 {
-        struct namespace *ns = current->namespace;
+        struct namespace *ns = current->nsproxy->namespace;
-        if ((unshare_flags & CLONE_NEWNS) &&
+        if ((unshare_flags & CLONE_NEWNS) && ns) {
-            (ns && atomic_read(&ns->count) > 1)) {
                if (!capable(CAP_SYS_ADMIN))
                        return -EPERM;
@@ -1588,6 +1589,16 @@ static int unshare_semundo(unsigned long unshare_flags, struct sem_undo_list **n
        return 0;
 }
+#ifndef CONFIG_IPC_NS
+static inline int unshare_ipcs(unsigned long flags, struct ipc_namespace **ns)
+{
+        if (flags & CLONE_NEWIPC)
+                return -EINVAL;
+        return 0;
+}
+#endif
 /*
 * unshare allows a process to 'unshare' part of the process
 * context which was originally shared using clone.  copy_*
@@ -1605,13 +1616,17 @@ asmlinkage long sys_unshare(unsigned long unshare_flags)
        struct mm_struct *mm, *new_mm = NULL, *active_mm = NULL;
        struct files_struct *fd, *new_fd = NULL;
        struct sem_undo_list *new_ulist = NULL;
+        struct nsproxy *new_nsproxy = NULL, *old_nsproxy = NULL;
+        struct uts_namespace *uts, *new_uts = NULL;
+        struct ipc_namespace *ipc, *new_ipc = NULL;
        check_unshare_flags(&unshare_flags);
        /* Return -EINVAL for all unsupported flags */
        err = -EINVAL;
        if (unshare_flags & ~(CLONE_THREAD|CLONE_FS|CLONE_NEWNS|CLONE_SIGHAND|
-                                CLONE_VM|CLONE_FILES|CLONE_SYSVSEM))
+                                CLONE_VM|CLONE_FILES|CLONE_SYSVSEM|
+                                CLONE_NEWUTS|CLONE_NEWIPC))
                goto bad_unshare_out;
        if ((err = unshare_thread(unshare_flags)))
@@ -1628,11 +1643,30 @@ asmlinkage long sys_unshare(unsigned long unshare_flags)
                goto bad_unshare_cleanup_vm;
        if ((err = unshare_semundo(unshare_flags, &new_ulist)))
                goto bad_unshare_cleanup_fd;
+        if ((err = unshare_utsname(unshare_flags, &new_uts)))
+                goto bad_unshare_cleanup_semundo;
+        if ((err = unshare_ipcs(unshare_flags, &new_ipc)))
+                goto bad_unshare_cleanup_uts;
+        if (new_ns || new_uts || new_ipc) {
+                old_nsproxy = current->nsproxy;
+                new_nsproxy = dup_namespaces(old_nsproxy);
+                if (!new_nsproxy) {
+                        err = -ENOMEM;
+                        goto bad_unshare_cleanup_ipc;
+                }
+        }
-        if (new_fs || new_ns || new_sigh || new_mm || new_fd || new_ulist) {
+        if (new_fs || new_ns || new_sigh || new_mm || new_fd || new_ulist ||
+                                new_uts || new_ipc) {
                task_lock(current);
+                if (new_nsproxy) {
+                        current->nsproxy = new_nsproxy;
+                        new_nsproxy = old_nsproxy;
+                }
                if (new_fs) {
                        fs = current->fs;
                        current->fs = new_fs;
@@ -1640,8 +1674,8 @@ asmlinkage long sys_unshare(unsigned long unshare_flags)
                }
                if (new_ns) {
-                        ns = current->namespace;
+                        ns = current->nsproxy->namespace;
-                        current->namespace = new_ns;
+                        current->nsproxy->namespace = new_ns;
                        new_ns = ns;
                }
@@ -1666,9 +1700,33 @@ asmlinkage long sys_unshare(unsigned long unshare_flags)
                        new_fd = fd;
                }
+                if (new_uts) {
+                        uts = current->nsproxy->uts_ns;
+                        current->nsproxy->uts_ns = new_uts;
+                        new_uts = uts;
+                }
+                if (new_ipc) {
+                        ipc = current->nsproxy->ipc_ns;
+                        current->nsproxy->ipc_ns = new_ipc;
+                        new_ipc = ipc;
+                }
                task_unlock(current);
        }
+        if (new_nsproxy)
+                put_nsproxy(new_nsproxy);
+bad_unshare_cleanup_ipc:
+        if (new_ipc)
+                put_ipc_ns(new_ipc);
+bad_unshare_cleanup_uts:
+        if (new_uts)
+                put_uts_ns(new_uts);
+bad_unshare_cleanup_semundo:
 bad_unshare_cleanup_fd:
        if (new_fd)
                put_files_struct(new_fd);
diff --git a/kernel/futex.c b/kernel/futex.c
index 4b6770e980..4aaf91951a 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -1527,7 +1527,7 @@ static int futex_fd(u32 __user *uaddr, int signal)
        filp->f_mapping = filp->f_dentry->d_inode->i_mapping;
        if (signal) {
-                err = f_setown(filp, current->pid, 1);
+                err = __f_setown(filp, task_pid(current), PIDTYPE_PID, 1);
                if (err < 0) {
                        goto error;
                }
diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c
index ab16a5a4cf..eeac3e313b 100644
--- a/kernel/kallsyms.c
+++ b/kernel/kallsyms.c
@@ -69,6 +69,15 @@ static inline int is_kernel(unsigned long addr)
        return in_gate_area_no_task(addr);
 }
+static int is_ksym_addr(unsigned long addr)
+{
+        if (all_var)
+                return is_kernel(addr);
+        return is_kernel_text(addr) || is_kernel_inittext(addr) ||
+                is_kernel_extratext(addr);
+}
 /* expand a compressed symbol data into the resulting uncompressed string,
   given the offset to where the symbol is in the compressed stream */
 static unsigned int kallsyms_expand_symbol(unsigned int off, char *result)
@@ -154,7 +163,73 @@ unsigned long kallsyms_lookup_name(const char *name)
        }
        return module_kallsyms_lookup_name(name);
 }
-EXPORT_SYMBOL_GPL(kallsyms_lookup_name);
+static unsigned long get_symbol_pos(unsigned long addr,
+                                    unsigned long *symbolsize,
+                                    unsigned long *offset)
+{
+        unsigned long symbol_start = 0, symbol_end = 0;
+        unsigned long i, low, high, mid;
+        /* This kernel should never had been booted. */
+        BUG_ON(!kallsyms_addresses);
+        /* do a binary search on the sorted kallsyms_addresses array */
+        low = 0;
+        high = kallsyms_num_syms;
+        while (high - low > 1) {
+                mid = (low + high) / 2;
+                if (kallsyms_addresses[mid] <= addr)
+                        low = mid;
+                else
+                        high = mid;
+        }
+        /*
+         * search for the first aliased symbol. Aliased
+         * symbols are symbols with the same address
+         */
+        while (low && kallsyms_addresses[low-1] == kallsyms_addresses[low])
+                --low;
+        symbol_start = kallsyms_addresses[low];
+        /* Search for next non-aliased symbol */
+        for (i = low + 1; i < kallsyms_num_syms; i++) {
+                if (kallsyms_addresses[i] > symbol_start) {
+                        symbol_end = kallsyms_addresses[i];
+                        break;
+                }
+        }
+        /* if we found no next symbol, we use the end of the section */
+        if (!symbol_end) {
+                if (is_kernel_inittext(addr))
+                        symbol_end = (unsigned long)_einittext;
+                else if (all_var)
+                        symbol_end = (unsigned long)_end;
+                else
+                        symbol_end = (unsigned long)_etext;
+        }
+        *symbolsize = symbol_end - symbol_start;
+        *offset = addr - symbol_start;
+        return low;
+}
+/*
+ * Lookup an address but don't bother to find any names.
+ */
+int kallsyms_lookup_size_offset(unsigned long addr, unsigned long *symbolsize,
+                                unsigned long *offset)
+{
+        if (is_ksym_addr(addr))
+                return !!get_symbol_pos(addr, symbolsize, offset);
+        return !!module_address_lookup(addr, symbolsize, offset, NULL);
+}
 /*
 * Lookup an address
@@ -168,57 +243,18 @@ const char *kallsyms_lookup(unsigned long addr,
                            unsigned long *offset,
                            char **modname, char *namebuf)
 {
-        unsigned long i, low, high, mid;
        const char *msym;
-        /* This kernel should never had been booted. */
-        BUG_ON(!kallsyms_addresses);
        namebuf[KSYM_NAME_LEN] = 0;
        namebuf[0] = 0;
-        if ((all_var && is_kernel(addr)) ||
+        if (is_ksym_addr(addr)) {
-            (!all_var && (is_kernel_text(addr) || is_kernel_inittext(addr) ||
+                unsigned long pos;
-                                is_kernel_extratext(addr)))) {
-                unsigned long symbol_end = 0;
-                /* do a binary search on the sorted kallsyms_addresses array */
-                low = 0;
-                high = kallsyms_num_syms;
-                while (high-low > 1) {
-                        mid = (low + high) / 2;
-                        if (kallsyms_addresses[mid] <= addr) low = mid;
-                        else high = mid;
-                }
-                /* search for the first aliased symbol. Aliased symbols are
-                   symbols with the same address */
-                while (low && kallsyms_addresses[low - 1] == kallsyms_addresses[low])
-                        --low;
+                pos = get_symbol_pos(addr, symbolsize, offset);
                /* Grab name */
-                kallsyms_expand_symbol(get_symbol_offset(low), namebuf);
+                kallsyms_expand_symbol(get_symbol_offset(pos), namebuf);
-                /* Search for next non-aliased symbol */
-                for (i = low + 1; i < kallsyms_num_syms; i++) {
-                        if (kallsyms_addresses[i] > kallsyms_addresses[low]) {
-                                symbol_end = kallsyms_addresses[i];
-                                break;
-                        }
-                }
-                /* if we found no next symbol, we use the end of the section */
-                if (!symbol_end) {
-                        if (is_kernel_inittext(addr))
-                                symbol_end = (unsigned long)_einittext;
-                        else
-                                symbol_end = all_var ? (unsigned long)_end : (unsigned long)_etext;
-                }
-                *symbolsize = symbol_end - kallsyms_addresses[low];
                *modname = NULL;
-                *offset = addr - kallsyms_addresses[low];
                return namebuf;
        }
diff --git a/kernel/kmod.c b/kernel/kmod.c
index 842f8015d7..bb4e29d924 100644
--- a/kernel/kmod.c
+++ b/kernel/kmod.c
@@ -18,8 +18,6 @@
        call_usermodehelper wait flag, and remove exec_usermodehelper.
        Rusty Russell <rusty@rustcorp.com.au>  Jan 2003
 */
-#define __KERNEL_SYSCALLS__
 #include <linux/module.h>
 #include <linux/sched.h>
 #include <linux/syscalls.h>
@@ -35,6 +33,7 @@
 #include <linux/mount.h>
 #include <linux/kernel.h>
 #include <linux/init.h>
+#include <linux/resource.h>
 #include <asm/uaccess.h>
 extern int max_threads;
@@ -122,6 +121,7 @@ struct subprocess_info {
        struct key *ring;
        int wait;
        int retval;
+        struct file *stdin;
 };
 /*
@@ -145,12 +145,30 @@ static int ____call_usermodehelper(void *data)
        key_put(old_session);
+        /* Install input pipe when needed */
+        if (sub_info->stdin) {
+                struct files_struct *f = current->files;
+                struct fdtable *fdt;
+                /* no races because files should be private here */
+                sys_close(0);
+                fd_install(0, sub_info->stdin);
+                spin_lock(&f->file_lock);
+                fdt = files_fdtable(f);
+                FD_SET(0, fdt->open_fds);
+                FD_CLR(0, fdt->close_on_exec);
+                spin_unlock(&f->file_lock);
+                /* and disallow core files too */
+                current->signal->rlim[RLIMIT_CORE] = (struct rlimit){0, 0};
+        }
        /* We can run anywhere, unlike our parent keventd(). */
        set_cpus_allowed(current, CPU_MASK_ALL);
        retval = -EPERM;
        if (current->fs->root)
-                retval = execve(sub_info->path, sub_info->argv,sub_info->envp);
+                retval = kernel_execve(sub_info->path,
+                                sub_info->argv, sub_info->envp);
        /* Exec failed? */
        sub_info->retval = retval;
@@ -268,6 +286,44 @@ int call_usermodehelper_keys(char *path, char **argv, char **envp,
 }
 EXPORT_SYMBOL(call_usermodehelper_keys);
+int call_usermodehelper_pipe(char *path, char **argv, char **envp,
+                             struct file **filp)
+{
+        DECLARE_COMPLETION(done);
+        struct subprocess_info sub_info = {
+                .complete       = &done,
+                .path           = path,
+                .argv           = argv,
+                .envp           = envp,
+                .retval         = 0,
+        };
+        struct file *f;
+        DECLARE_WORK(work, __call_usermodehelper, &sub_info);
+        if (!khelper_wq)
+                return -EBUSY;
+        if (path[0] == '\0')
+                return 0;
+        f = create_write_pipe();
+        if (!f)
+                return -ENOMEM;
+        *filp = f;
+        f = create_read_pipe(f);
+        if (!f) {
+                free_write_pipe(*filp);
+                return -ENOMEM;
+        }
+        sub_info.stdin = f;
+        queue_work(khelper_wq, &work);
+        wait_for_completion(&done);
+        return sub_info.retval;
+}
+EXPORT_SYMBOL(call_usermodehelper_pipe);
 void __init usermodehelper_init(void)
 {
        khelper_wq = create_singlethread_workqueue("khelper");
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 3f57dfdc8f..610c837ad9 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -37,6 +37,7 @@
 #include <linux/slab.h>
 #include <linux/module.h>
 #include <linux/moduleloader.h>
+#include <linux/kallsyms.h>
 #include <asm-generic/sections.h>
 #include <asm/cacheflush.h>
 #include <asm/errno.h>
@@ -45,6 +46,16 @@
 #define KPROBE_HASH_BITS 6
 #define KPROBE_TABLE_SIZE (1 << KPROBE_HASH_BITS)
+/*
+ * Some oddball architectures like 64bit powerpc have function descriptors
+ * so this must be overridable.
+ */
+#ifndef kprobe_lookup_name
+#define kprobe_lookup_name(name, addr) \
+        addr = ((kprobe_opcode_t *)(kallsyms_lookup_name(name)))
+#endif
 static struct hlist_head kprobe_table[KPROBE_TABLE_SIZE];
 static struct hlist_head kretprobe_inst_table[KPROBE_TABLE_SIZE];
 static atomic_t kprobe_count;
@@ -308,7 +319,8 @@ void __kprobes add_rp_inst(struct kretprobe_instance *ri)
 }
 /* Called with kretprobe_lock held */
-void __kprobes recycle_rp_inst(struct kretprobe_instance *ri)
+void __kprobes recycle_rp_inst(struct kretprobe_instance *ri,
+                                struct hlist_head *head)
 {
        /* remove rp inst off the rprobe_inst_table */
        hlist_del(&ri->hlist);
@@ -320,7 +332,7 @@ void __kprobes recycle_rp_inst(struct kretprobe_instance *ri)
                hlist_add_head(&ri->uflist, &ri->rp->free_instances);
        } else
                /* Unregistering */
-                kfree(ri);
+                hlist_add_head(&ri->hlist, head);
 }
 struct hlist_head __kprobes *kretprobe_inst_table_head(struct task_struct *tsk)
@@ -336,18 +348,24 @@ struct hlist_head __kprobes *kretprobe_inst_table_head(struct task_struct *tsk)
 */
 void __kprobes kprobe_flush_task(struct task_struct *tk)
 {
-        struct kretprobe_instance *ri;
+        struct kretprobe_instance *ri;
-        struct hlist_head *head;
+        struct hlist_head *head, empty_rp;
        struct hlist_node *node, *tmp;
        unsigned long flags = 0;
+        INIT_HLIST_HEAD(&empty_rp);
        spin_lock_irqsave(&kretprobe_lock, flags);
-        head = kretprobe_inst_table_head(tk);
+        head = kretprobe_inst_table_head(tk);
-        hlist_for_each_entry_safe(ri, node, tmp, head, hlist) {
+        hlist_for_each_entry_safe(ri, node, tmp, head, hlist) {
-                if (ri->task == tk)
+                if (ri->task == tk)
-                        recycle_rp_inst(ri);
+                        recycle_rp_inst(ri, &empty_rp);
-        }
+        }
        spin_unlock_irqrestore(&kretprobe_lock, flags);
+        hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) {
+                hlist_del(&ri->hlist);
+                kfree(ri);
+        }
 }
 static inline void free_rp_inst(struct kretprobe *rp)
@@ -447,6 +465,21 @@ static int __kprobes __register_kprobe(struct kprobe *p,
        struct kprobe *old_p;
        struct module *probed_mod;
+        /*
+         * If we have a symbol_name argument look it up,
+         * and add it to the address.  That way the addr
+         * field can either be global or relative to a symbol.
+         */
+        if (p->symbol_name) {
+                if (p->addr)
+                        return -EINVAL;
+                kprobe_lookup_name(p->symbol_name, p->addr);
+        }
+        if (!p->addr)
+                return -EINVAL;
+        p->addr = (kprobe_opcode_t *)(((char *)p->addr)+ p->offset);
        if ((!kernel_text_address((unsigned long) p->addr)) ||
                in_kprobes_functions((unsigned long) p->addr))
                return -EINVAL;
@@ -488,7 +521,7 @@ static int __kprobes __register_kprobe(struct kprobe *p,
                                (ARCH_INACTIVE_KPROBE_COUNT + 1))
                register_page_fault_notifier(&kprobe_page_fault_nb);
-        arch_arm_kprobe(p);
+        arch_arm_kprobe(p);
 out:
        mutex_unlock(&kprobe_mutex);
diff --git a/kernel/latency.c b/kernel/latency.c
new file mode 100644
index 0000000000..258f2555ab
--- /dev/null
+++ b/kernel/latency.c
@@ -0,0 +1,279 @@
+/*
+ * latency.c: Explicit system-wide latency-expectation infrastructure
+ *
+ * The purpose of this infrastructure is to allow device drivers to set
+ * latency constraint they have and to collect and summarize these
+ * expectations globally. The cummulated result can then be used by
+ * power management and similar users to make decisions that have
+ * tradoffs with a latency component.
+ *
+ * An example user of this are the x86 C-states; each higher C state saves
+ * more power, but has a higher exit latency. For the idle loop power
+ * code to make a good decision which C-state to use, information about
+ * acceptable latencies is required.
+ *
+ * An example announcer of latency is an audio driver that knowns it
+ * will get an interrupt when the hardware has 200 usec of samples
+ * left in the DMA buffer; in that case the driver can set a latency
+ * constraint of, say, 150 usec.
+ *
+ * Multiple drivers can each announce their maximum accepted latency,
+ * to keep these appart, a string based identifier is used.
+ *
+ *
+ * (C) Copyright 2006 Intel Corporation
+ * Author: Arjan van de Ven <arjan@linux.intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+#include <linux/latency.h>
+#include <linux/list.h>
+#include <linux/spinlock.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/notifier.h>
+#include <asm/atomic.h>
+struct latency_info {
+        struct list_head list;
+        int usecs;
+        char *identifier;
+};
+/*
+ * locking rule: all modifications to current_max_latency and
+ * latency_list need to be done while holding the latency_lock.
+ * latency_lock needs to be taken _irqsave.
+ */
+static atomic_t current_max_latency;
+static DEFINE_SPINLOCK(latency_lock);
+static LIST_HEAD(latency_list);
+static BLOCKING_NOTIFIER_HEAD(latency_notifier);
+/*
+ * This function returns the maximum latency allowed, which
+ * happens to be the minimum of all maximum latencies on the
+ * list.
+ */
+static int __find_max_latency(void)
+{
+        int min = INFINITE_LATENCY;
+        struct latency_info *info;
+        list_for_each_entry(info, &latency_list, list) {
+                if (info->usecs < min)
+                        min = info->usecs;
+        }
+        return min;
+}
+/**
+ * set_acceptable_latency - sets the maximum latency acceptable
+ * @identifier: string that identifies this driver
+ * @usecs: maximum acceptable latency for this driver
+ *
+ * This function informs the kernel that this device(driver)
+ * can accept at most usecs latency. This setting is used for
+ * power management and similar tradeoffs.
+ *
+ * This function sleeps and can only be called from process
+ * context.
+ * Calling this function with an existing identifier is valid
+ * and will cause the existing latency setting to be changed.
+ */
+void set_acceptable_latency(char *identifier, int usecs)
+{
+        struct latency_info *info, *iter;
+        unsigned long flags;
+        int found_old = 0;
+        info = kzalloc(sizeof(struct latency_info), GFP_KERNEL);
+        if (!info)
+                return;
+        info->usecs = usecs;
+        info->identifier = kstrdup(identifier, GFP_KERNEL);
+        if (!info->identifier)
+                goto free_info;
+        spin_lock_irqsave(&latency_lock, flags);
+        list_for_each_entry(iter, &latency_list, list) {
+                if (strcmp(iter->identifier, identifier)==0) {
+                        found_old = 1;
+                        iter->usecs = usecs;
+                        break;
+                }
+        }
+        if (!found_old)
+                list_add(&info->list, &latency_list);
+        if (usecs < atomic_read(&current_max_latency))
+                atomic_set(&current_max_latency, usecs);
+        spin_unlock_irqrestore(&latency_lock, flags);
+        blocking_notifier_call_chain(&latency_notifier,
+                atomic_read(&current_max_latency), NULL);
+        /*
+         * if we inserted the new one, we're done; otherwise there was
+         * an existing one so we need to free the redundant data
+         */
+        if (!found_old)
+                return;
+        kfree(info->identifier);
+free_info:
+        kfree(info);
+}
+EXPORT_SYMBOL_GPL(set_acceptable_latency);
+/**
+ * modify_acceptable_latency - changes the maximum latency acceptable
+ * @identifier: string that identifies this driver
+ * @usecs: maximum acceptable latency for this driver
+ *
+ * This function informs the kernel that this device(driver)
+ * can accept at most usecs latency. This setting is used for
+ * power management and similar tradeoffs.
+ *
+ * This function does not sleep and can be called in any context.
+ * Trying to use a non-existing identifier silently gets ignored.
+ *
+ * Due to the atomic nature of this function, the modified latency
+ * value will only be used for future decisions; past decisions
+ * can still lead to longer latencies in the near future.
+ */
+void modify_acceptable_latency(char *identifier, int usecs)
+{
+        struct latency_info *iter;
+        unsigned long flags;
+        spin_lock_irqsave(&latency_lock, flags);
+        list_for_each_entry(iter, &latency_list, list) {
+                if (strcmp(iter->identifier, identifier) == 0) {
+                        iter->usecs = usecs;
+                        break;
+                }
+        }
+        if (usecs < atomic_read(&current_max_latency))
+                atomic_set(&current_max_latency, usecs);
+        spin_unlock_irqrestore(&latency_lock, flags);
+}
+EXPORT_SYMBOL_GPL(modify_acceptable_latency);
+/**
+ * remove_acceptable_latency - removes the maximum latency acceptable
+ * @identifier: string that identifies this driver
+ *
+ * This function removes a previously set maximum latency setting
+ * for the driver and frees up any resources associated with the
+ * bookkeeping needed for this.
+ *
+ * This function does not sleep and can be called in any context.
+ * Trying to use a non-existing identifier silently gets ignored.
+ */
+void remove_acceptable_latency(char *identifier)
+{
+        unsigned long flags;
+        int newmax = 0;
+        struct latency_info *iter, *temp;
+        spin_lock_irqsave(&latency_lock, flags);
+        list_for_each_entry_safe(iter,  temp, &latency_list, list) {
+                if (strcmp(iter->identifier, identifier) == 0) {
+                        list_del(&iter->list);
+                        newmax = iter->usecs;
+                        kfree(iter->identifier);
+                        kfree(iter);
+                        break;
+                }
+        }
+        /* If we just deleted the system wide value, we need to
+         * recalculate with a full search
+         */
+        if (newmax == atomic_read(&current_max_latency)) {
+                newmax = __find_max_latency();
+                atomic_set(&current_max_latency, newmax);
+        }
+        spin_unlock_irqrestore(&latency_lock, flags);
+}
+EXPORT_SYMBOL_GPL(remove_acceptable_latency);
+/**
+ * system_latency_constraint - queries the system wide latency maximum
+ *
+ * This function returns the system wide maximum latency in
+ * microseconds.
+ *
+ * This function does not sleep and can be called in any context.
+ */
+int system_latency_constraint(void)
+{
+        return atomic_read(&current_max_latency);
+}
+EXPORT_SYMBOL_GPL(system_latency_constraint);
+/**
+ * synchronize_acceptable_latency - recalculates all latency decisions
+ *
+ * This function will cause a callback to various kernel pieces that
+ * will make those pieces rethink their latency decisions. This implies
+ * that if there are overlong latencies in hardware state already, those
+ * latencies get taken right now. When this call completes no overlong
+ * latency decisions should be active anymore.
+ *
+ * Typical usecase of this is after a modify_acceptable_latency() call,
+ * which in itself is non-blocking and non-synchronizing.
+ *
+ * This function blocks and should not be called with locks held.
+ */
+void synchronize_acceptable_latency(void)
+{
+        blocking_notifier_call_chain(&latency_notifier,
+                atomic_read(&current_max_latency), NULL);
+}
+EXPORT_SYMBOL_GPL(synchronize_acceptable_latency);
+/*
+ * Latency notifier: this notifier gets called when a non-atomic new
+ * latency value gets set. The expectation nof the caller of the
+ * non-atomic set is that when the call returns, future latencies
+ * are within bounds, so the functions on the notifier list are
+ * expected to take the overlong latencies immediately, inside the
+ * callback, and not make a overlong latency decision anymore.
+ *
+ * The callback gets called when the new latency value is made
+ * active so system_latency_constraint() returns the new latency.
+ */
+int register_latency_notifier(struct notifier_block * nb)
+{
+        return blocking_notifier_chain_register(&latency_notifier, nb);
+}
+EXPORT_SYMBOL_GPL(register_latency_notifier);
+int unregister_latency_notifier(struct notifier_block * nb)
+{
+        return blocking_notifier_chain_unregister(&latency_notifier, nb);
+}
+EXPORT_SYMBOL_GPL(unregister_latency_notifier);
+static __init int latency_init(void)
+{
+        atomic_set(&current_max_latency, INFINITE_LATENCY);
+        /*
+         * we don't want by default to have longer latencies than 2 ticks,
+         * since that would cause lost ticks
+         */
+        set_acceptable_latency("kernel", 2*1000000/HZ);
+        return 0;
+}
+module_init(latency_init);
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index e596525669..4c05534610 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -518,9 +518,9 @@ print_circular_bug_entry(struct lock_list *target, unsigned int depth)
 static void print_kernel_version(void)
 {
-        printk("%s %.*s\n", system_utsname.release,
+        printk("%s %.*s\n", init_utsname()->release,
-                (int)strcspn(system_utsname.version, " "),
+                (int)strcspn(init_utsname()->version, " "),
-                system_utsname.version);
+                init_utsname()->version);
 }
 /*
diff --git a/kernel/module.c b/kernel/module.c
index 05625d5dc7..7f60e782de 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -851,6 +851,7 @@ static int check_version(Elf_Shdr *sechdrs,
                printk("%s: no version for \"%s\" found: kernel tainted.\n",
                       mod->name, symname);
                add_taint(TAINT_FORCED_MODULE);
+                mod->taints |= TAINT_FORCED_MODULE;
        }
        return 1;
 }
@@ -1339,6 +1340,7 @@ static void set_license(struct module *mod, const char *license)
                printk(KERN_WARNING "%s: module license '%s' taints kernel.\n",
                       mod->name, license);
                add_taint(TAINT_PROPRIETARY_MODULE);
+                mod->taints |= TAINT_PROPRIETARY_MODULE;
        }
 }
@@ -1618,6 +1620,7 @@ static struct module *load_module(void __user *umod,
        /* This is allowed: modprobe --force will invalidate it. */
        if (!modmagic) {
                add_taint(TAINT_FORCED_MODULE);
+                mod->taints |= TAINT_FORCED_MODULE;
                printk(KERN_WARNING "%s: no version magic, tainting kernel.\n",
                       mod->name);
        } else if (!same_magic(modmagic, vermagic)) {
@@ -1711,10 +1714,14 @@ static struct module *load_module(void __user *umod,
        /* Set up license info based on the info section */
        set_license(mod, get_modinfo(sechdrs, infoindex, "license"));
-        if (strcmp(mod->name, "ndiswrapper") == 0)
+        if (strcmp(mod->name, "ndiswrapper") == 0) {
                add_taint(TAINT_PROPRIETARY_MODULE);
-        if (strcmp(mod->name, "driverloader") == 0)
+                mod->taints |= TAINT_PROPRIETARY_MODULE;
+        }
+        if (strcmp(mod->name, "driverloader") == 0) {
                add_taint(TAINT_PROPRIETARY_MODULE);
+                mod->taints |= TAINT_PROPRIETARY_MODULE;
+        }
        /* Set up MODINFO_ATTR fields */
        setup_modinfo(mod, sechdrs, infoindex);
@@ -1760,6 +1767,7 @@ static struct module *load_module(void __user *umod,
                printk(KERN_WARNING "%s: No versions for exported symbols."
                       " Tainting kernel.\n", mod->name);
                add_taint(TAINT_FORCED_MODULE);
+                mod->taints |= TAINT_FORCED_MODULE;
        }
 #endif
@@ -2032,7 +2040,8 @@ const char *module_address_lookup(unsigned long addr,
        list_for_each_entry(mod, &modules, list) {
                if (within(addr, mod->module_init, mod->init_size)
                    || within(addr, mod->module_core, mod->core_size)) {
-                        *modname = mod->name;
+                        if (modname)
+                                *modname = mod->name;
                        return get_ksymbol(mod, addr, size, offset);
                }
        }
@@ -2226,14 +2235,37 @@ struct module *module_text_address(unsigned long addr)
        return mod;
 }
+static char *taint_flags(unsigned int taints, char *buf)
+{
+        *buf = '\0';
+        if (taints) {
+                int bx;
+                buf[0] = '(';
+                bx = 1;
+                if (taints & TAINT_PROPRIETARY_MODULE)
+                        buf[bx++] = 'P';
+                if (taints & TAINT_FORCED_MODULE)
+                        buf[bx++] = 'F';
+                /*
+                 * TAINT_FORCED_RMMOD: could be added.
+                 * TAINT_UNSAFE_SMP, TAINT_MACHINE_CHECK, TAINT_BAD_PAGE don't
+                 * apply to modules.
+                 */
+                buf[bx] = ')';
+        }
+        return buf;
+}
 /* Don't grab lock, we're oopsing. */
 void print_modules(void)
 {
        struct module *mod;
+        char buf[8];
        printk("Modules linked in:");
        list_for_each_entry(mod, &modules, list)
-                printk(" %s", mod->name);
+                printk(" %s%s", mod->name, taint_flags(mod->taints, buf));
        printk("\n");
 }
diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c
new file mode 100644
index 0000000000..6ebdb82a0c
--- /dev/null
+++ b/kernel/nsproxy.c
@@ -0,0 +1,139 @@
+/*
+ *  Copyright (C) 2006 IBM Corporation
+ *
+ *  Author: Serge Hallyn <serue@us.ibm.com>
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License as
+ *  published by the Free Software Foundation, version 2 of the
+ *  License.
+ *
+ *  Jun 2006 - namespaces support
+ *             OpenVZ, SWsoft Inc.
+ *             Pavel Emelianov <xemul@openvz.org>
+ */
+#include <linux/module.h>
+#include <linux/version.h>
+#include <linux/nsproxy.h>
+#include <linux/init_task.h>
+#include <linux/namespace.h>
+#include <linux/utsname.h>
+struct nsproxy init_nsproxy = INIT_NSPROXY(init_nsproxy);
+static inline void get_nsproxy(struct nsproxy *ns)
+{
+        atomic_inc(&ns->count);
+}
+void get_task_namespaces(struct task_struct *tsk)
+{
+        struct nsproxy *ns = tsk->nsproxy;
+        if (ns) {
+                get_nsproxy(ns);
+        }
+}
+/*
+ * creates a copy of "orig" with refcount 1.
+ * This does not grab references to the contained namespaces,
+ * so that needs to be done by dup_namespaces.
+ */
+static inline struct nsproxy *clone_namespaces(struct nsproxy *orig)
+{
+        struct nsproxy *ns;
+        ns = kmalloc(sizeof(struct nsproxy), GFP_KERNEL);
+        if (ns) {
+                memcpy(ns, orig, sizeof(struct nsproxy));
+                atomic_set(&ns->count, 1);
+        }
+        return ns;
+}
+/*
+ * copies the nsproxy, setting refcount to 1, and grabbing a
+ * reference to all contained namespaces.  Called from
+ * sys_unshare()
+ */
+struct nsproxy *dup_namespaces(struct nsproxy *orig)
+{
+        struct nsproxy *ns = clone_namespaces(orig);
+        if (ns) {
+                if (ns->namespace)
+                        get_namespace(ns->namespace);
+                if (ns->uts_ns)
+                        get_uts_ns(ns->uts_ns);
+                if (ns->ipc_ns)
+                        get_ipc_ns(ns->ipc_ns);
+        }
+        return ns;
+}
+/*
+ * called from clone.  This now handles copy for nsproxy and all
+ * namespaces therein.
+ */
+int copy_namespaces(int flags, struct task_struct *tsk)
+{
+        struct nsproxy *old_ns = tsk->nsproxy;
+        struct nsproxy *new_ns;
+        int err = 0;
+        if (!old_ns)
+                return 0;
+        get_nsproxy(old_ns);
+        if (!(flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC)))
+                return 0;
+        new_ns = clone_namespaces(old_ns);
+        if (!new_ns) {
+                err = -ENOMEM;
+                goto out;
+        }
+        tsk->nsproxy = new_ns;
+        err = copy_namespace(flags, tsk);
+        if (err)
+                goto out_ns;
+        err = copy_utsname(flags, tsk);
+        if (err)
+                goto out_uts;
+        err = copy_ipcs(flags, tsk);
+        if (err)
+                goto out_ipc;
+out:
+        put_nsproxy(old_ns);
+        return err;
+out_ipc:
+        if (new_ns->uts_ns)
+                put_uts_ns(new_ns->uts_ns);
+out_uts:
+        if (new_ns->namespace)
+                put_namespace(new_ns->namespace);
+out_ns:
+        tsk->nsproxy = old_ns;
+        kfree(new_ns);
+        goto out;
+}
+void free_nsproxy(struct nsproxy *ns)
+{
+                if (ns->namespace)
+                        put_namespace(ns->namespace);
+                if (ns->uts_ns)
+                        put_uts_ns(ns->uts_ns);
+                if (ns->ipc_ns)
+                        put_ipc_ns(ns->ipc_ns);
+                kfree(ns);
+}
diff --git a/kernel/panic.c b/kernel/panic.c
index 6ceb664fb5..525e365f72 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -21,7 +21,6 @@
 #include <linux/debug_locks.h>
 int panic_on_oops;
-int panic_on_unrecovered_nmi;
 int tainted;
 static int pause_on_oops;
 static int pause_on_oops_flag;
diff --git a/kernel/pid.c b/kernel/pid.c
index 8387e8c681..b914392085 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -26,6 +26,7 @@
 #include <linux/init.h>
 #include <linux/bootmem.h>
 #include <linux/hash.h>
+#include <linux/pspace.h>
 #define pid_hashfn(nr) hash_long((unsigned long)nr, pidhash_shift)
 static struct hlist_head *pid_hash;
@@ -33,17 +34,20 @@ static int pidhash_shift;
 static kmem_cache_t *pid_cachep;
 int pid_max = PID_MAX_DEFAULT;
-int last_pid;
 #define RESERVED_PIDS           300
 int pid_max_min = RESERVED_PIDS + 1;
 int pid_max_max = PID_MAX_LIMIT;
-#define PIDMAP_ENTRIES          ((PID_MAX_LIMIT + 8*PAGE_SIZE - 1)/PAGE_SIZE/8)
 #define BITS_PER_PAGE           (PAGE_SIZE*8)
 #define BITS_PER_PAGE_MASK      (BITS_PER_PAGE-1)
-#define mk_pid(map, off)        (((map) - pidmap_array)*BITS_PER_PAGE + (off))
+static inline int mk_pid(struct pspace *pspace, struct pidmap *map, int off)
+{
+        return (map - pspace->pidmap)*BITS_PER_PAGE + off;
+}
 #define find_next_offset(map, off)                                      \
                find_next_zero_bit((map)->page, BITS_PER_PAGE, off)
@@ -53,13 +57,12 @@ int pid_max_max = PID_MAX_LIMIT;
 * value does not cause lots of bitmaps to be allocated, but
 * the scheme scales to up to 4 million PIDs, runtime.
 */
-typedef struct pidmap {
+struct pspace init_pspace = {
-        atomic_t nr_free;
+        .pidmap = {
-        void *page;
+                [ 0 ... PIDMAP_ENTRIES-1] = { ATOMIC_INIT(BITS_PER_PAGE), NULL }
-} pidmap_t;
+        },
+        .last_pid = 0
-static pidmap_t pidmap_array[PIDMAP_ENTRIES] =
+};
-         { [ 0 ... PIDMAP_ENTRIES-1 ] = { ATOMIC_INIT(BITS_PER_PAGE), NULL } };
 /*
 * Note: disable interrupts while the pidmap_lock is held as an
@@ -74,40 +77,41 @@ static pidmap_t pidmap_array[PIDMAP_ENTRIES] =
 * irq handlers that take it we can leave the interrupts enabled.
 * For now it is easier to be safe than to prove it can't happen.
 */
 static  __cacheline_aligned_in_smp DEFINE_SPINLOCK(pidmap_lock);
-static fastcall void free_pidmap(int pid)
+static fastcall void free_pidmap(struct pspace *pspace, int pid)
 {
-        pidmap_t *map = pidmap_array + pid / BITS_PER_PAGE;
+        struct pidmap *map = pspace->pidmap + pid / BITS_PER_PAGE;
        int offset = pid & BITS_PER_PAGE_MASK;
        clear_bit(offset, map->page);
        atomic_inc(&map->nr_free);
 }
-static int alloc_pidmap(void)
+static int alloc_pidmap(struct pspace *pspace)
 {
-        int i, offset, max_scan, pid, last = last_pid;
+        int i, offset, max_scan, pid, last = pspace->last_pid;
-        pidmap_t *map;
+        struct pidmap *map;
        pid = last + 1;
        if (pid >= pid_max)
                pid = RESERVED_PIDS;
        offset = pid & BITS_PER_PAGE_MASK;
-        map = &pidmap_array[pid/BITS_PER_PAGE];
+        map = &pspace->pidmap[pid/BITS_PER_PAGE];
        max_scan = (pid_max + BITS_PER_PAGE - 1)/BITS_PER_PAGE - !offset;
        for (i = 0; i <= max_scan; ++i) {
                if (unlikely(!map->page)) {
-                        unsigned long page = get_zeroed_page(GFP_KERNEL);
+                        void *page = kzalloc(PAGE_SIZE, GFP_KERNEL);
                        /*
                         * Free the page if someone raced with us
                         * installing it:
                         */
                        spin_lock_irq(&pidmap_lock);
                        if (map->page)
-                                free_page(page);
+                                kfree(page);
                        else
-                                map->page = (void *)page;
+                                map->page = page;
                        spin_unlock_irq(&pidmap_lock);
                        if (unlikely(!map->page))
                                break;
@@ -116,11 +120,11 @@ static int alloc_pidmap(void)
                        do {
                                if (!test_and_set_bit(offset, map->page)) {
                                        atomic_dec(&map->nr_free);
-                                        last_pid = pid;
+                                        pspace->last_pid = pid;
                                        return pid;
                                }
                                offset = find_next_offset(map, offset);
-                                pid = mk_pid(map, offset);
+                                pid = mk_pid(pspace, map, offset);
                        /*
                         * find_next_offset() found a bit, the pid from it
                         * is in-bounds, and if we fell back to the last
@@ -131,16 +135,34 @@ static int alloc_pidmap(void)
                                        (i != max_scan || pid < last ||
                                            !((last+1) & BITS_PER_PAGE_MASK)));
                }
-                if (map < &pidmap_array[(pid_max-1)/BITS_PER_PAGE]) {
+                if (map < &pspace->pidmap[(pid_max-1)/BITS_PER_PAGE]) {
                        ++map;
                        offset = 0;
                } else {
-                        map = &pidmap_array[0];
+                        map = &pspace->pidmap[0];
                        offset = RESERVED_PIDS;
                        if (unlikely(last == offset))
                                break;
                }
-                pid = mk_pid(map, offset);
+                pid = mk_pid(pspace, map, offset);
+        }
+        return -1;
+}
+static int next_pidmap(struct pspace *pspace, int last)
+{
+        int offset;
+        struct pidmap *map, *end;
+        offset = (last + 1) & BITS_PER_PAGE_MASK;
+        map = &pspace->pidmap[(last + 1)/BITS_PER_PAGE];
+        end = &pspace->pidmap[PIDMAP_ENTRIES];
+        for (; map < end; map++, offset = 0) {
+                if (unlikely(!map->page))
+                        continue;
+                offset = find_next_bit((map)->page, BITS_PER_PAGE, offset);
+                if (offset < BITS_PER_PAGE)
+                        return mk_pid(pspace, map, offset);
        }
        return -1;
 }
@@ -153,6 +175,7 @@ fastcall void put_pid(struct pid *pid)
             atomic_dec_and_test(&pid->count))
                kmem_cache_free(pid_cachep, pid);
 }
+EXPORT_SYMBOL_GPL(put_pid);
 static void delayed_put_pid(struct rcu_head *rhp)
 {
@@ -169,7 +192,7 @@ fastcall void free_pid(struct pid *pid)
        hlist_del_rcu(&pid->pid_chain);
        spin_unlock_irqrestore(&pidmap_lock, flags);
-        free_pidmap(pid->nr);
+        free_pidmap(&init_pspace, pid->nr);
        call_rcu(&pid->rcu, delayed_put_pid);
 }
@@ -183,7 +206,7 @@ struct pid *alloc_pid(void)
        if (!pid)
                goto out;
-        nr = alloc_pidmap();
+        nr = alloc_pidmap(&init_pspace);
        if (nr < 0)
                goto out_free;
@@ -217,6 +240,7 @@ struct pid * fastcall find_pid(int nr)
        }
        return NULL;
 }
+EXPORT_SYMBOL_GPL(find_pid);
 int fastcall attach_pid(struct task_struct *task, enum pid_type type, int nr)
 {
@@ -280,6 +304,15 @@ struct task_struct *find_task_by_pid_type(int type, int nr)
 EXPORT_SYMBOL(find_task_by_pid_type);
+struct pid *get_task_pid(struct task_struct *task, enum pid_type type)
+{
+        struct pid *pid;
+        rcu_read_lock();
+        pid = get_pid(task->pids[type].pid);
+        rcu_read_unlock();
+        return pid;
+}
 struct task_struct *fastcall get_pid_task(struct pid *pid, enum pid_type type)
 {
        struct task_struct *result;
@@ -303,6 +336,26 @@ struct pid *find_get_pid(pid_t nr)
 }
 /*
+ * Used by proc to find the first pid that is greater then or equal to nr.
+ *
+ * If there is a pid at nr this function is exactly the same as find_pid.
+ */
+struct pid *find_ge_pid(int nr)
+{
+        struct pid *pid;
+        do {
+                pid = find_pid(nr);
+                if (pid)
+                        break;
+                nr = next_pidmap(&init_pspace, nr);
+        } while (nr > 0);
+        return pid;
+}
+EXPORT_SYMBOL_GPL(find_get_pid);
+/*
 * The pid hash table is scaled according to the amount of memory in the
 * machine.  From a minimum of 16 slots up to 4096 slots at one gigabyte or
 * more.
@@ -329,10 +382,10 @@ void __init pidhash_init(void)
 void __init pidmap_init(void)
 {
-        pidmap_array->page = (void *)get_zeroed_page(GFP_KERNEL);
+        init_pspace.pidmap[0].page = kzalloc(PAGE_SIZE, GFP_KERNEL);
        /* Reserve PID 0. We never call free_pidmap(0) */
-        set_bit(0, pidmap_array->page);
+        set_bit(0, init_pspace.pidmap[0].page);
-        atomic_dec(&pidmap_array->nr_free);
+        atomic_dec(&init_pspace.pidmap[0].nr_free);
        pid_cachep = kmem_cache_create("pid", sizeof(struct pid),
                                        __alignof__(struct pid),
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index 1b84313cba..99f9b7d177 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -906,7 +906,7 @@ static void init_header(struct swsusp_info *info)
        memset(info, 0, sizeof(struct swsusp_info));
        info->version_code = LINUX_VERSION_CODE;
        info->num_physpages = num_physpages;
-        memcpy(&info->uts, &system_utsname, sizeof(system_utsname));
+        memcpy(&info->uts, init_utsname(), sizeof(struct new_utsname));
        info->cpus = num_online_cpus();
        info->image_pages = nr_copy_pages;
        info->pages = nr_copy_pages + nr_meta_pages + 1;
@@ -1050,13 +1050,13 @@ static inline int check_header(struct swsusp_info *info)
                reason = "kernel version";
        if (info->num_physpages != num_physpages)
                reason = "memory size";
-        if (strcmp(info->uts.sysname,system_utsname.sysname))
+        if (strcmp(info->uts.sysname,init_utsname()->sysname))
                reason = "system type";
-        if (strcmp(info->uts.release,system_utsname.release))
+        if (strcmp(info->uts.release,init_utsname()->release))
                reason = "kernel release";
-        if (strcmp(info->uts.version,system_utsname.version))
+        if (strcmp(info->uts.version,init_utsname()->version))
                reason = "version";
-        if (strcmp(info->uts.machine,system_utsname.machine))
+        if (strcmp(info->uts.machine,init_utsname()->machine))
                reason = "machine";
        if (reason) {
                printk(KERN_ERR "swsusp: Resume mismatch: %s\n", reason);
diff --git a/kernel/resource.c b/kernel/resource.c
index 9db38a1a75..6de60c1214 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -193,6 +193,13 @@ static int __release_resource(struct resource *old)
        return -EINVAL;
 }
+/**
+ * request_resource - request and reserve an I/O or memory resource
+ * @root: root resource descriptor
+ * @new: resource descriptor desired by caller
+ *
+ * Returns 0 for success, negative error code on error.
+ */
 int request_resource(struct resource *root, struct resource *new)
 {
        struct resource *conflict;
@@ -205,6 +212,15 @@ int request_resource(struct resource *root, struct resource *new)
 EXPORT_SYMBOL(request_resource);
+/**
+ * ____request_resource - reserve a resource, with resource conflict returned
+ * @root: root resource descriptor
+ * @new: resource descriptor desired by caller
+ *
+ * Returns:
+ * On success, NULL is returned.
+ * On error, a pointer to the conflicting resource is returned.
+ */
 struct resource *____request_resource(struct resource *root, struct resource *new)
 {
        struct resource *conflict;
@@ -217,6 +233,10 @@ struct resource *____request_resource(struct resource *root, struct resource *ne
 EXPORT_SYMBOL(____request_resource);
+/**
+ * release_resource - release a previously reserved resource
+ * @old: resource pointer
+ */
 int release_resource(struct resource *old)
 {
        int retval;
@@ -315,8 +335,16 @@ static int find_resource(struct resource *root, struct resource *new,
        return -EBUSY;
 }
-/*
+/**
- * Allocate empty slot in the resource tree given range and alignment.
+ * allocate_resource - allocate empty slot in the resource tree given range & alignment
+ * @root: root resource descriptor
+ * @new: resource descriptor desired by caller
+ * @size: requested resource region size
+ * @min: minimum size to allocate
+ * @max: maximum size to allocate
+ * @align: alignment requested, in bytes
+ * @alignf: alignment function, optional, called if not NULL
+ * @alignf_data: arbitrary data to pass to the @alignf function
 */
 int allocate_resource(struct resource *root, struct resource *new,
                      resource_size_t size, resource_size_t min,
@@ -407,10 +435,15 @@ int insert_resource(struct resource *parent, struct resource *new)
        return result;
 }
-/*
+/**
+ * adjust_resource - modify a resource's start and size
+ * @res: resource to modify
+ * @start: new start value
+ * @size: new size
+ *
 * Given an existing resource, change its start and size to match the
- * arguments.  Returns -EBUSY if it can't fit.  Existing children of
+ * arguments.  Returns 0 on success, -EBUSY if it can't fit.
- * the resource are assumed to be immutable.
+ * Existing children of the resource are assumed to be immutable.
 */
 int adjust_resource(struct resource *res, resource_size_t start, resource_size_t size)
 {
@@ -456,11 +489,19 @@ EXPORT_SYMBOL(adjust_resource);
 * Note how this, unlike the above, knows about
 * the IO flag meanings (busy etc).
 *
- * Request-region creates a new busy region.
+ * request_region creates a new busy region.
 *
- * Check-region returns non-zero if the area is already busy
+ * check_region returns non-zero if the area is already busy.
 *
- * Release-region releases a matching busy region.
+ * release_region releases a matching busy region.
+ */
+/**
+ * __request_region - create a new busy resource region
+ * @parent: parent resource descriptor
+ * @start: resource start address
+ * @n: resource region size
+ * @name: reserving caller's ID string
 */
 struct resource * __request_region(struct resource *parent,
                                   resource_size_t start, resource_size_t n,
@@ -497,9 +538,23 @@ struct resource * __request_region(struct resource *parent,
        }
        return res;
 }
 EXPORT_SYMBOL(__request_region);
+/**
+ * __check_region - check if a resource region is busy or free
+ * @parent: parent resource descriptor
+ * @start: resource start address
+ * @n: resource region size
+ *
+ * Returns 0 if the region is free at the moment it is checked,
+ * returns %-EBUSY if the region is busy.
+ *
+ * NOTE:
+ * This function is deprecated because its use is racy.
+ * Even if it returns 0, a subsequent call to request_region()
+ * may fail because another driver etc. just allocated the region.
+ * Do NOT use it.  It will be removed from the kernel.
+ */
 int __check_region(struct resource *parent, resource_size_t start,
                        resource_size_t n)
 {
@@ -513,9 +568,16 @@ int __check_region(struct resource *parent, resource_size_t start,
        kfree(res);
        return 0;
 }
 EXPORT_SYMBOL(__check_region);
+/**
+ * __release_region - release a previously reserved resource region
+ * @parent: parent resource descriptor
+ * @start: resource start address
+ * @n: resource region size
+ *
+ * The described resource region must match a currently busy region.
+ */
 void __release_region(struct resource *parent, resource_size_t start,
                        resource_size_t n)
 {
@@ -553,7 +615,6 @@ void __release_region(struct resource *parent, resource_size_t start,
                "<%016llx-%016llx>\n", (unsigned long long)start,
                (unsigned long long)end);
 }
 EXPORT_SYMBOL(__release_region);
 /*
diff --git a/kernel/sched.c b/kernel/sched.c
index 74f169ac07..53608a59d6 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -49,7 +49,7 @@
 #include <linux/seq_file.h>
 #include <linux/syscalls.h>
 #include <linux/times.h>
-#include <linux/acct.h>
+#include <linux/tsacct_kern.h>
 #include <linux/kprobes.h>
 #include <linux/delayacct.h>
 #include <asm/tlb.h>
@@ -1232,7 +1232,7 @@ nextgroup:
 }
 /*
- * find_idlest_queue - find the idlest runqueue among the cpus in group.
+ * find_idlest_cpu - find the idlest cpu among the cpus in group.
 */
 static int
 find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu)
@@ -1286,21 +1286,29 @@ static int sched_balance_self(int cpu, int flag)
        while (sd) {
                cpumask_t span;
                struct sched_group *group;
-                int new_cpu;
+                int new_cpu, weight;
-                int weight;
+                if (!(sd->flags & flag)) {
+                        sd = sd->child;
+                        continue;
+                }
                span = sd->span;
                group = find_idlest_group(sd, t, cpu);
-                if (!group)
+                if (!group) {
-                        goto nextlevel;
+                        sd = sd->child;
+                        continue;
+                }
                new_cpu = find_idlest_cpu(group, t, cpu);
-                if (new_cpu == -1 || new_cpu == cpu)
+                if (new_cpu == -1 || new_cpu == cpu) {
-                        goto nextlevel;
+                        /* Now try balancing at a lower domain level of cpu */
+                        sd = sd->child;
+                        continue;
+                }
-                /* Now try balancing at a lower domain level */
+                /* Now try balancing at a lower domain level of new_cpu */
                cpu = new_cpu;
-nextlevel:
                sd = NULL;
                weight = cpus_weight(span);
                for_each_domain(cpu, tmp) {
@@ -2533,8 +2541,14 @@ static int load_balance(int this_cpu, struct rq *this_rq,
        struct rq *busiest;
        cpumask_t cpus = CPU_MASK_ALL;
+        /*
+         * When power savings policy is enabled for the parent domain, idle
+         * sibling can pick up load irrespective of busy siblings. In this case,
+         * let the state of idle sibling percolate up as IDLE, instead of
+         * portraying it as NOT_IDLE.
+         */
        if (idle != NOT_IDLE && sd->flags & SD_SHARE_CPUPOWER &&
-            !sched_smt_power_savings)
+            !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE))
                sd_idle = 1;
        schedstat_inc(sd, lb_cnt[idle]);
@@ -2630,7 +2644,7 @@ redo:
        }
        if (!nr_moved && !sd_idle && sd->flags & SD_SHARE_CPUPOWER &&
-            !sched_smt_power_savings)
+            !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE))
                return -1;
        return nr_moved;
@@ -2646,7 +2660,7 @@ out_one_pinned:
                sd->balance_interval *= 2;
        if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER &&
-                        !sched_smt_power_savings)
+            !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE))
                return -1;
        return 0;
 }
@@ -2668,7 +2682,14 @@ load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd)
        int sd_idle = 0;
        cpumask_t cpus = CPU_MASK_ALL;
-        if (sd->flags & SD_SHARE_CPUPOWER && !sched_smt_power_savings)
+        /*
+         * When power savings policy is enabled for the parent domain, idle
+         * sibling can pick up load irrespective of busy siblings. In this case,
+         * let the state of idle sibling percolate up as IDLE, instead of
+         * portraying it as NOT_IDLE.
+         */
+        if (sd->flags & SD_SHARE_CPUPOWER &&
+            !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE))
                sd_idle = 1;
        schedstat_inc(sd, lb_cnt[NEWLY_IDLE]);
@@ -2709,7 +2730,8 @@ redo:
        if (!nr_moved) {
                schedstat_inc(sd, lb_failed[NEWLY_IDLE]);
-                if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER)
+                if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER &&
+                    !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE))
                        return -1;
        } else
                sd->nr_balance_failed = 0;
@@ -2719,7 +2741,7 @@ redo:
 out_balanced:
        schedstat_inc(sd, lb_balanced[NEWLY_IDLE]);
        if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER &&
-                                        !sched_smt_power_savings)
+            !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE))
                return -1;
        sd->nr_balance_failed = 0;
@@ -4384,7 +4406,10 @@ EXPORT_SYMBOL(cpu_present_map);
 #ifndef CONFIG_SMP
 cpumask_t cpu_online_map __read_mostly = CPU_MASK_ALL;
+EXPORT_SYMBOL(cpu_online_map);
 cpumask_t cpu_possible_map __read_mostly = CPU_MASK_ALL;
+EXPORT_SYMBOL(cpu_possible_map);
 #endif
 long sched_getaffinity(pid_t pid, cpumask_t *mask)
@@ -4814,7 +4839,7 @@ void show_state(void)
 * NOTE: this function does not set the idle thread's NEED_RESCHED
 * flag, to make booting more robust.
 */
-void __devinit init_idle(struct task_struct *idle, int cpu)
+void __cpuinit init_idle(struct task_struct *idle, int cpu)
 {
        struct rq *rq = cpu_rq(cpu);
        unsigned long flags;
@@ -5389,7 +5414,9 @@ static int sd_degenerate(struct sched_domain *sd)
        if (sd->flags & (SD_LOAD_BALANCE |
                         SD_BALANCE_NEWIDLE |
                         SD_BALANCE_FORK |
-                         SD_BALANCE_EXEC)) {
+                         SD_BALANCE_EXEC |
+                         SD_SHARE_CPUPOWER |
+                         SD_SHARE_PKG_RESOURCES)) {
                if (sd->groups != sd->groups->next)
                        return 0;
        }
@@ -5423,7 +5450,9 @@ sd_parent_degenerate(struct sched_domain *sd, struct sched_domain *parent)
                pflags &= ~(SD_LOAD_BALANCE |
                                SD_BALANCE_NEWIDLE |
                                SD_BALANCE_FORK |
-                                SD_BALANCE_EXEC);
+                                SD_BALANCE_EXEC |
+                                SD_SHARE_CPUPOWER |
+                                SD_SHARE_PKG_RESOURCES);
        }
        if (~cflags & pflags)
                return 0;
@@ -5445,12 +5474,18 @@ static void cpu_attach_domain(struct sched_domain *sd, int cpu)
                struct sched_domain *parent = tmp->parent;
                if (!parent)
                        break;
-                if (sd_parent_degenerate(tmp, parent))
+                if (sd_parent_degenerate(tmp, parent)) {
                        tmp->parent = parent->parent;
+                        if (parent->parent)
+                                parent->parent->child = tmp;
+                }
        }
-        if (sd && sd_degenerate(sd))
+        if (sd && sd_degenerate(sd)) {
                sd = sd->parent;
+                if (sd)
+                        sd->child = NULL;
+        }
        sched_domain_debug(sd, cpu);
@@ -5458,7 +5493,7 @@ static void cpu_attach_domain(struct sched_domain *sd, int cpu)
 }
 /* cpus with isolated domains */
-static cpumask_t __devinitdata cpu_isolated_map = CPU_MASK_NONE;
+static cpumask_t __cpuinitdata cpu_isolated_map = CPU_MASK_NONE;
 /* Setup the mask of cpus configured for isolated domains */
 static int __init isolated_cpu_setup(char *str)
@@ -5486,15 +5521,17 @@ __setup ("isolcpus=", isolated_cpu_setup);
 * covered by the given span, and will set each group's ->cpumask correctly,
 * and ->cpu_power to 0.
 */
-static void init_sched_build_groups(struct sched_group groups[], cpumask_t span,
+static void
-                                    int (*group_fn)(int cpu))
+init_sched_build_groups(struct sched_group groups[], cpumask_t span,
+                        const cpumask_t *cpu_map,
+                        int (*group_fn)(int cpu, const cpumask_t *cpu_map))
 {
        struct sched_group *first = NULL, *last = NULL;
        cpumask_t covered = CPU_MASK_NONE;
        int i;
        for_each_cpu_mask(i, span) {
-                int group = group_fn(i);
+                int group = group_fn(i, cpu_map);
                struct sched_group *sg = &groups[group];
                int j;
@@ -5505,7 +5542,7 @@ static void init_sched_build_groups(struct sched_group groups[], cpumask_t span,
                sg->cpu_power = 0;
                for_each_cpu_mask(j, span) {
-                        if (group_fn(j) != group)
+                        if (group_fn(j, cpu_map) != group)
                                continue;
                        cpu_set(j, covered);
@@ -5972,13 +6009,15 @@ static void calibrate_migration_costs(const cpumask_t *cpu_map)
 #endif
                );
        if (system_state == SYSTEM_BOOTING) {
-                printk("migration_cost=");
+                if (num_online_cpus() > 1) {
-                for (distance = 0; distance <= max_distance; distance++) {
+                        printk("migration_cost=");
-                        if (distance)
+                        for (distance = 0; distance <= max_distance; distance++) {
-                                printk(",");
+                                if (distance)
-                        printk("%ld", (long)migration_cost[distance] / 1000);
+                                        printk(",");
+                                printk("%ld", (long)migration_cost[distance] / 1000);
+                        }
+                        printk("\n");
                }
-                printk("\n");
        }
        j1 = jiffies;
        if (migration_debug)
@@ -6081,7 +6120,7 @@ int sched_smt_power_savings = 0, sched_mc_power_savings = 0;
 static DEFINE_PER_CPU(struct sched_domain, cpu_domains);
 static struct sched_group sched_group_cpus[NR_CPUS];
-static int cpu_to_cpu_group(int cpu)
+static int cpu_to_cpu_group(int cpu, const cpumask_t *cpu_map)
 {
        return cpu;
 }
@@ -6092,31 +6131,36 @@ static int cpu_to_cpu_group(int cpu)
 */
 #ifdef CONFIG_SCHED_MC
 static DEFINE_PER_CPU(struct sched_domain, core_domains);
-static struct sched_group *sched_group_core_bycpu[NR_CPUS];
+static struct sched_group sched_group_core[NR_CPUS];
 #endif
 #if defined(CONFIG_SCHED_MC) && defined(CONFIG_SCHED_SMT)
-static int cpu_to_core_group(int cpu)
+static int cpu_to_core_group(int cpu, const cpumask_t *cpu_map)
 {
-        return first_cpu(cpu_sibling_map[cpu]);
+        cpumask_t mask = cpu_sibling_map[cpu];
+        cpus_and(mask, mask, *cpu_map);
+        return first_cpu(mask);
 }
 #elif defined(CONFIG_SCHED_MC)
-static int cpu_to_core_group(int cpu)
+static int cpu_to_core_group(int cpu, const cpumask_t *cpu_map)
 {
        return cpu;
 }
 #endif
 static DEFINE_PER_CPU(struct sched_domain, phys_domains);
-static struct sched_group *sched_group_phys_bycpu[NR_CPUS];
+static struct sched_group sched_group_phys[NR_CPUS];
-static int cpu_to_phys_group(int cpu)
+static int cpu_to_phys_group(int cpu, const cpumask_t *cpu_map)
 {
 #ifdef CONFIG_SCHED_MC
        cpumask_t mask = cpu_coregroup_map(cpu);
+        cpus_and(mask, mask, *cpu_map);
        return first_cpu(mask);
 #elif defined(CONFIG_SCHED_SMT)
-        return first_cpu(cpu_sibling_map[cpu]);
+        cpumask_t mask = cpu_sibling_map[cpu];
+        cpus_and(mask, mask, *cpu_map);
+        return first_cpu(mask);
 #else
        return cpu;
 #endif
@@ -6134,7 +6178,7 @@ static struct sched_group **sched_group_nodes_bycpu[NR_CPUS];
 static DEFINE_PER_CPU(struct sched_domain, allnodes_domains);
 static struct sched_group *sched_group_allnodes_bycpu[NR_CPUS];
-static int cpu_to_allnodes_group(int cpu)
+static int cpu_to_allnodes_group(int cpu, const cpumask_t *cpu_map)
 {
        return cpu_to_node(cpu);
 }
@@ -6166,12 +6210,11 @@ next_sg:
 }
 #endif
+#ifdef CONFIG_NUMA
 /* Free memory allocated for various sched_group structures */
 static void free_sched_groups(const cpumask_t *cpu_map)
 {
-        int cpu;
+        int cpu, i;
-#ifdef CONFIG_NUMA
-        int i;
        for_each_cpu_mask(cpu, *cpu_map) {
                struct sched_group *sched_group_allnodes
@@ -6208,19 +6251,63 @@ next_sg:
                kfree(sched_group_nodes);
                sched_group_nodes_bycpu[cpu] = NULL;
        }
+}
+#else
+static void free_sched_groups(const cpumask_t *cpu_map)
+{
+}
 #endif
-        for_each_cpu_mask(cpu, *cpu_map) {
-                if (sched_group_phys_bycpu[cpu]) {
+/*
-                        kfree(sched_group_phys_bycpu[cpu]);
+ * Initialize sched groups cpu_power.
-                        sched_group_phys_bycpu[cpu] = NULL;
+ *
-                }
+ * cpu_power indicates the capacity of sched group, which is used while
-#ifdef CONFIG_SCHED_MC
+ * distributing the load between different sched groups in a sched domain.
-                if (sched_group_core_bycpu[cpu]) {
+ * Typically cpu_power for all the groups in a sched domain will be same unless
-                        kfree(sched_group_core_bycpu[cpu]);
+ * there are asymmetries in the topology. If there are asymmetries, group
-                        sched_group_core_bycpu[cpu] = NULL;
+ * having more cpu_power will pickup more load compared to the group having
-                }
+ * less cpu_power.
-#endif
+ *
+ * cpu_power will be a multiple of SCHED_LOAD_SCALE. This multiple represents
+ * the maximum number of tasks a group can handle in the presence of other idle
+ * or lightly loaded groups in the same sched domain.
+ */
+static void init_sched_groups_power(int cpu, struct sched_domain *sd)
+{
+        struct sched_domain *child;
+        struct sched_group *group;
+        WARN_ON(!sd || !sd->groups);
+        if (cpu != first_cpu(sd->groups->cpumask))
+                return;
+        child = sd->child;
+        /*
+         * For perf policy, if the groups in child domain share resources
+         * (for example cores sharing some portions of the cache hierarchy
+         * or SMT), then set this domain groups cpu_power such that each group
+         * can handle only one task, when there are other idle groups in the
+         * same sched domain.
+         */
+        if (!child || (!(sd->flags & SD_POWERSAVINGS_BALANCE) &&
+                       (child->flags &
+                        (SD_SHARE_CPUPOWER | SD_SHARE_PKG_RESOURCES)))) {
+                sd->groups->cpu_power = SCHED_LOAD_SCALE;
+                return;
        }
+        sd->groups->cpu_power = 0;
+        /*
+         * add cpu_power of each child group to this groups cpu_power
+         */
+        group = child->groups;
+        do {
+                sd->groups->cpu_power += group->cpu_power;
+                group = group->next;
+        } while (group != child->groups);
 }
 /*
@@ -6230,10 +6317,7 @@ next_sg:
 static int build_sched_domains(const cpumask_t *cpu_map)
 {
        int i;
-        struct sched_group *sched_group_phys = NULL;
+        struct sched_domain *sd;
-#ifdef CONFIG_SCHED_MC
-        struct sched_group *sched_group_core = NULL;
-#endif
 #ifdef CONFIG_NUMA
        struct sched_group **sched_group_nodes = NULL;
        struct sched_group *sched_group_allnodes = NULL;
@@ -6265,9 +6349,10 @@ static int build_sched_domains(const cpumask_t *cpu_map)
                                > SD_NODES_PER_DOMAIN*cpus_weight(nodemask)) {
                        if (!sched_group_allnodes) {
                                sched_group_allnodes
-                                        = kmalloc(sizeof(struct sched_group)
+                                        = kmalloc_node(sizeof(struct sched_group)
-                                                        * MAX_NUMNODES,
+                                                        * MAX_NUMNODES,
-                                                  GFP_KERNEL);
+                                                  GFP_KERNEL,
+                                                  cpu_to_node(i));
                                if (!sched_group_allnodes) {
                                        printk(KERN_WARNING
                                        "Can not alloc allnodes sched group\n");
@@ -6279,7 +6364,7 @@ static int build_sched_domains(const cpumask_t *cpu_map)
                        sd = &per_cpu(allnodes_domains, i);
                        *sd = SD_ALLNODES_INIT;
                        sd->span = *cpu_map;
-                        group = cpu_to_allnodes_group(i);
+                        group = cpu_to_allnodes_group(i, cpu_map);
                        sd->groups = &sched_group_allnodes[group];
                        p = sd;
                } else
@@ -6289,60 +6374,42 @@ static int build_sched_domains(const cpumask_t *cpu_map)
                *sd = SD_NODE_INIT;
                sd->span = sched_domain_node_span(cpu_to_node(i));
                sd->parent = p;
+                if (p)
+                        p->child = sd;
                cpus_and(sd->span, sd->span, *cpu_map);
 #endif
-                if (!sched_group_phys) {
-                        sched_group_phys
-                                = kmalloc(sizeof(struct sched_group) * NR_CPUS,
-                                          GFP_KERNEL);
-                        if (!sched_group_phys) {
-                                printk (KERN_WARNING "Can not alloc phys sched"
-                                                     "group\n");
-                                goto error;
-                        }
-                        sched_group_phys_bycpu[i] = sched_group_phys;
-                }
                p = sd;
                sd = &per_cpu(phys_domains, i);
-                group = cpu_to_phys_group(i);
+                group = cpu_to_phys_group(i, cpu_map);
                *sd = SD_CPU_INIT;
                sd->span = nodemask;
                sd->parent = p;
+                if (p)
+                        p->child = sd;
                sd->groups = &sched_group_phys[group];
 #ifdef CONFIG_SCHED_MC
-                if (!sched_group_core) {
-                        sched_group_core
-                                = kmalloc(sizeof(struct sched_group) * NR_CPUS,
-                                          GFP_KERNEL);
-                        if (!sched_group_core) {
-                                printk (KERN_WARNING "Can not alloc core sched"
-                                                     "group\n");
-                                goto error;
-                        }
-                        sched_group_core_bycpu[i] = sched_group_core;
-                }
                p = sd;
                sd = &per_cpu(core_domains, i);
-                group = cpu_to_core_group(i);
+                group = cpu_to_core_group(i, cpu_map);
                *sd = SD_MC_INIT;
                sd->span = cpu_coregroup_map(i);
                cpus_and(sd->span, sd->span, *cpu_map);
                sd->parent = p;
+                p->child = sd;
                sd->groups = &sched_group_core[group];
 #endif
 #ifdef CONFIG_SCHED_SMT
                p = sd;
                sd = &per_cpu(cpu_domains, i);
-                group = cpu_to_cpu_group(i);
+                group = cpu_to_cpu_group(i, cpu_map);
                *sd = SD_SIBLING_INIT;
                sd->span = cpu_sibling_map[i];
                cpus_and(sd->span, sd->span, *cpu_map);
                sd->parent = p;
+                p->child = sd;
                sd->groups = &sched_group_cpus[group];
 #endif
        }
@@ -6356,7 +6423,7 @@ static int build_sched_domains(const cpumask_t *cpu_map)
                        continue;
                init_sched_build_groups(sched_group_cpus, this_sibling_map,
-                                                &cpu_to_cpu_group);
+                                        cpu_map, &cpu_to_cpu_group);
        }
 #endif
@@ -6368,7 +6435,7 @@ static int build_sched_domains(const cpumask_t *cpu_map)
                if (i != first_cpu(this_core_map))
                        continue;
                init_sched_build_groups(sched_group_core, this_core_map,
-                                        &cpu_to_core_group);
+                                        cpu_map, &cpu_to_core_group);
        }
 #endif
@@ -6382,14 +6449,14 @@ static int build_sched_domains(const cpumask_t *cpu_map)
                        continue;
                init_sched_build_groups(sched_group_phys, nodemask,
-                                                &cpu_to_phys_group);
+                                        cpu_map, &cpu_to_phys_group);
        }
 #ifdef CONFIG_NUMA
        /* Set up node groups */
        if (sched_group_allnodes)
                init_sched_build_groups(sched_group_allnodes, *cpu_map,
-                                        &cpu_to_allnodes_group);
+                                        cpu_map, &cpu_to_allnodes_group);
        for (i = 0; i < MAX_NUMNODES; i++) {
                /* Set up node groups */
@@ -6461,72 +6528,20 @@ static int build_sched_domains(const cpumask_t *cpu_map)
        /* Calculate CPU power for physical packages and nodes */
 #ifdef CONFIG_SCHED_SMT
        for_each_cpu_mask(i, *cpu_map) {
-                struct sched_domain *sd;
                sd = &per_cpu(cpu_domains, i);
-                sd->groups->cpu_power = SCHED_LOAD_SCALE;
+                init_sched_groups_power(i, sd);
        }
 #endif
 #ifdef CONFIG_SCHED_MC
        for_each_cpu_mask(i, *cpu_map) {
-                int power;
-                struct sched_domain *sd;
                sd = &per_cpu(core_domains, i);
-                if (sched_smt_power_savings)
+                init_sched_groups_power(i, sd);
-                        power = SCHED_LOAD_SCALE * cpus_weight(sd->groups->cpumask);
-                else
-                        power = SCHED_LOAD_SCALE + (cpus_weight(sd->groups->cpumask)-1)
-                                            * SCHED_LOAD_SCALE / 10;
-                sd->groups->cpu_power = power;
        }
 #endif
        for_each_cpu_mask(i, *cpu_map) {
-                struct sched_domain *sd;
-#ifdef CONFIG_SCHED_MC
                sd = &per_cpu(phys_domains, i);
-                if (i != first_cpu(sd->groups->cpumask))
+                init_sched_groups_power(i, sd);
-                        continue;
-                sd->groups->cpu_power = 0;
-                if (sched_mc_power_savings || sched_smt_power_savings) {
-                        int j;
-                        for_each_cpu_mask(j, sd->groups->cpumask) {
-                                struct sched_domain *sd1;
-                                sd1 = &per_cpu(core_domains, j);
-                                /*
-                                 * for each core we will add once
-                                 * to the group in physical domain
-                                 */
-                                if (j != first_cpu(sd1->groups->cpumask))
-                                        continue;
-                                if (sched_smt_power_savings)
-                                        sd->groups->cpu_power += sd1->groups->cpu_power;
-                                else
-                                        sd->groups->cpu_power += SCHED_LOAD_SCALE;
-                        }
-                } else
-                        /*
-                         * This has to be < 2 * SCHED_LOAD_SCALE
-                         * Lets keep it SCHED_LOAD_SCALE, so that
-                         * while calculating NUMA group's cpu_power
-                         * we can simply do
-                         *  numa_group->cpu_power += phys_group->cpu_power;
-                         *
-                         * See "only add power once for each physical pkg"
-                         * comment below
-                         */
-                        sd->groups->cpu_power = SCHED_LOAD_SCALE;
-#else
-                int power;
-                sd = &per_cpu(phys_domains, i);
-                if (sched_smt_power_savings)
-                        power = SCHED_LOAD_SCALE * cpus_weight(sd->groups->cpumask);
-                else
-                        power = SCHED_LOAD_SCALE;
-                sd->groups->cpu_power = power;
-#endif
        }
 #ifdef CONFIG_NUMA
@@ -6534,7 +6549,7 @@ static int build_sched_domains(const cpumask_t *cpu_map)
                init_numa_sched_groups_power(sched_group_nodes[i]);
        if (sched_group_allnodes) {
-                int group = cpu_to_allnodes_group(first_cpu(*cpu_map));
+                int group = cpu_to_allnodes_group(first_cpu(*cpu_map), cpu_map);
                struct sched_group *sg = &sched_group_allnodes[group];
                init_numa_sched_groups_power(sg);
@@ -6560,9 +6575,11 @@ static int build_sched_domains(const cpumask_t *cpu_map)
        return 0;
+#ifdef CONFIG_NUMA
 error:
        free_sched_groups(cpu_map);
        return -ENOMEM;
+#endif
 }
 /*
 * Set up scheduler domains and groups.  Callers must hold the hotplug lock.
@@ -6744,11 +6761,20 @@ static int update_sched_domains(struct notifier_block *nfb,
 void __init sched_init_smp(void)
 {
+        cpumask_t non_isolated_cpus;
        lock_cpu_hotplug();
        arch_init_sched_domains(&cpu_online_map);
+        cpus_andnot(non_isolated_cpus, cpu_online_map, cpu_isolated_map);
+        if (cpus_empty(non_isolated_cpus))
+                cpu_set(smp_processor_id(), non_isolated_cpus);
        unlock_cpu_hotplug();
        /* XXX: Theoretical race here - CPU may be hotplugged now */
        hotcpu_notifier(update_sched_domains, 0);
+        /* Move init over to a non-isolated CPU */
+        if (set_cpus_allowed(current, non_isolated_cpus) < 0)
+                BUG();
 }
 #else
 void __init sched_init_smp(void)
diff --git a/kernel/signal.c b/kernel/signal.c
index fb5da6d19f..7ed8d5304b 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -1055,28 +1055,44 @@ int group_send_sig_info(int sig, struct siginfo *info, struct task_struct *p)
 }
 /*
- * kill_pg_info() sends a signal to a process group: this is what the tty
+ * kill_pgrp_info() sends a signal to a process group: this is what the tty
 * control characters do (^C, ^Z etc)
 */
-int __kill_pg_info(int sig, struct siginfo *info, pid_t pgrp)
+int __kill_pgrp_info(int sig, struct siginfo *info, struct pid *pgrp)
 {
        struct task_struct *p = NULL;
        int retval, success;
-        if (pgrp <= 0)
-                return -EINVAL;
        success = 0;
        retval = -ESRCH;
-        do_each_task_pid(pgrp, PIDTYPE_PGID, p) {
+        do_each_pid_task(pgrp, PIDTYPE_PGID, p) {
                int err = group_send_sig_info(sig, info, p);
                success |= !err;
                retval = err;
-        } while_each_task_pid(pgrp, PIDTYPE_PGID, p);
+        } while_each_pid_task(pgrp, PIDTYPE_PGID, p);
        return success ? 0 : retval;
 }
+int kill_pgrp_info(int sig, struct siginfo *info, struct pid *pgrp)
+{
+        int retval;
+        read_lock(&tasklist_lock);
+        retval = __kill_pgrp_info(sig, info, pgrp);
+        read_unlock(&tasklist_lock);
+        return retval;
+}
+int __kill_pg_info(int sig, struct siginfo *info, pid_t pgrp)
+{
+        if (pgrp <= 0)
+                return -EINVAL;
+        return __kill_pgrp_info(sig, info, find_pid(pgrp));
+}
 int
 kill_pg_info(int sig, struct siginfo *info, pid_t pgrp)
 {
@@ -1089,8 +1105,7 @@ kill_pg_info(int sig, struct siginfo *info, pid_t pgrp)
        return retval;
 }
-int
+int kill_pid_info(int sig, struct siginfo *info, struct pid *pid)
-kill_proc_info(int sig, struct siginfo *info, pid_t pid)
 {
        int error;
        int acquired_tasklist_lock = 0;
@@ -1101,7 +1116,7 @@ kill_proc_info(int sig, struct siginfo *info, pid_t pid)
                read_lock(&tasklist_lock);
                acquired_tasklist_lock = 1;
        }
-        p = find_task_by_pid(pid);
+        p = pid_task(pid, PIDTYPE_PID);
        error = -ESRCH;
        if (p)
                error = group_send_sig_info(sig, info, p);
@@ -1111,8 +1126,18 @@ kill_proc_info(int sig, struct siginfo *info, pid_t pid)
        return error;
 }
-/* like kill_proc_info(), but doesn't use uid/euid of "current" */
+int
-int kill_proc_info_as_uid(int sig, struct siginfo *info, pid_t pid,
+kill_proc_info(int sig, struct siginfo *info, pid_t pid)
+{
+        int error;
+        rcu_read_lock();
+        error = kill_pid_info(sig, info, find_pid(pid));
+        rcu_read_unlock();
+        return error;
+}
+/* like kill_pid_info(), but doesn't use uid/euid of "current" */
+int kill_pid_info_as_uid(int sig, struct siginfo *info, struct pid *pid,
                      uid_t uid, uid_t euid, u32 secid)
 {
        int ret = -EINVAL;
@@ -1122,7 +1147,7 @@ int kill_proc_info_as_uid(int sig, struct siginfo *info, pid_t pid,
                return ret;
        read_lock(&tasklist_lock);
-        p = find_task_by_pid(pid);
+        p = pid_task(pid, PIDTYPE_PID);
        if (!p) {
                ret = -ESRCH;
                goto out_unlock;
@@ -1146,7 +1171,7 @@ out_unlock:
        read_unlock(&tasklist_lock);
        return ret;
 }
-EXPORT_SYMBOL_GPL(kill_proc_info_as_uid);
+EXPORT_SYMBOL_GPL(kill_pid_info_as_uid);
 /*
 * kill_something_info() interprets pid in interesting ways just like kill(2).
@@ -1264,6 +1289,18 @@ force_sigsegv(int sig, struct task_struct *p)
        return 0;
 }
+int kill_pgrp(struct pid *pid, int sig, int priv)
+{
+        return kill_pgrp_info(sig, __si_special(priv), pid);
+}
+EXPORT_SYMBOL(kill_pgrp);
+int kill_pid(struct pid *pid, int sig, int priv)
+{
+        return kill_pid_info(sig, __si_special(priv), pid);
+}
+EXPORT_SYMBOL(kill_pid);
 int
 kill_pg(pid_t pgrp, int sig, int priv)
 {
diff --git a/kernel/spinlock.c b/kernel/spinlock.c
index d48143eafb..476c374151 100644
--- a/kernel/spinlock.c
+++ b/kernel/spinlock.c
@@ -215,7 +215,7 @@ void __lockfunc _##op##_lock(locktype##_t *lock)			\
                if (!(lock)->break_lock)                                \
                        (lock)->break_lock = 1;                         \
                while (!op##_can_lock(lock) && (lock)->break_lock)      \
-                        cpu_relax();                                    \
+                        _raw_##op##_relax(&lock->raw_lock);             \
        }                                                               \
        (lock)->break_lock = 0;                                         \
 }                                                                       \
@@ -237,7 +237,7 @@ unsigned long __lockfunc _##op##_lock_irqsave(locktype##_t *lock)	\
                if (!(lock)->break_lock)                                \
                        (lock)->break_lock = 1;                         \
                while (!op##_can_lock(lock) && (lock)->break_lock)      \
-                        cpu_relax();                                    \
+                        _raw_##op##_relax(&lock->raw_lock);             \
        }                                                               \
        (lock)->break_lock = 0;                                         \
        return flags;                                                   \
diff --git a/kernel/sys.c b/kernel/sys.c
index 8647061c08..2314867ae3 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -92,7 +92,8 @@ EXPORT_SYMBOL(fs_overflowgid);
 */
 int C_A_D = 1;
-int cad_pid = 1;
+struct pid *cad_pid;
+EXPORT_SYMBOL(cad_pid);
 /*
 *      Notifier list for kernel code which wants to be called
@@ -221,7 +222,7 @@ EXPORT_SYMBOL_GPL(atomic_notifier_chain_unregister);
 *      of the last notifier function called.
 */
 
-int atomic_notifier_call_chain(struct atomic_notifier_head *nh,
+int __kprobes atomic_notifier_call_chain(struct atomic_notifier_head *nh,
                unsigned long val, void *v)
 {
        int ret;
@@ -607,11 +608,10 @@ static void kernel_restart_prepare(char *cmd)
 void kernel_restart(char *cmd)
 {
        kernel_restart_prepare(cmd);
-        if (!cmd) {
+        if (!cmd)
                printk(KERN_EMERG "Restarting system.\n");
-        } else {
+        else
                printk(KERN_EMERG "Restarting system with command '%s'.\n", cmd);
-        }
        machine_restart(cmd);
 }
 EXPORT_SYMBOL_GPL(kernel_restart);
@@ -627,9 +627,8 @@ static void kernel_kexec(void)
 #ifdef CONFIG_KEXEC
        struct kimage *image;
        image = xchg(&kexec_image, NULL);
-        if (!image) {
+        if (!image)
                return;
-        }
        kernel_restart_prepare(NULL);
        printk(KERN_EMERG "Starting new kernel\n");
        machine_shutdown();
@@ -775,10 +774,9 @@ void ctrl_alt_del(void)
        if (C_A_D)
                schedule_work(&cad_work);
        else
-                kill_proc(cad_pid, SIGINT, 1);
+                kill_cad_pid(SIGINT, 1);
 }
        
 /*
 * Unprivileged users may change the real gid to the effective gid
 * or vice versa.  (BSD-style)
@@ -823,12 +821,10 @@ asmlinkage long sys_setregid(gid_t rgid, gid_t egid)
                    (current->sgid == egid) ||
                    capable(CAP_SETGID))
                        new_egid = egid;
-                else {
+                else
                        return -EPERM;
-                }
        }
-        if (new_egid != old_egid)
+        if (new_egid != old_egid) {
-        {
                current->mm->dumpable = suid_dumpable;
                smp_wmb();
        }
@@ -857,19 +853,14 @@ asmlinkage long sys_setgid(gid_t gid)
        if (retval)
                return retval;
-        if (capable(CAP_SETGID))
+        if (capable(CAP_SETGID)) {
-        {
+                if (old_egid != gid) {
-                if(old_egid != gid)
-                {
                        current->mm->dumpable = suid_dumpable;
                        smp_wmb();
                }
                current->gid = current->egid = current->sgid = current->fsgid = gid;
-        }
+        } else if ((gid == current->gid) || (gid == current->sgid)) {
-        else if ((gid == current->gid) || (gid == current->sgid))
+                if (old_egid != gid) {
-        {
-                if(old_egid != gid)
-                {
                        current->mm->dumpable = suid_dumpable;
                        smp_wmb();
                }
@@ -900,8 +891,7 @@ static int set_user(uid_t new_ruid, int dumpclear)
        switch_uid(new_user);
-        if(dumpclear)
+        if (dumpclear) {
-        {
                current->mm->dumpable = suid_dumpable;
                smp_wmb();
        }
@@ -957,8 +947,7 @@ asmlinkage long sys_setreuid(uid_t ruid, uid_t euid)
        if (new_ruid != old_ruid && set_user(new_ruid, new_euid != old_euid) < 0)
                return -EAGAIN;
-        if (new_euid != old_euid)
+        if (new_euid != old_euid) {
-        {
                current->mm->dumpable = suid_dumpable;
                smp_wmb();
        }
@@ -1008,8 +997,7 @@ asmlinkage long sys_setuid(uid_t uid)
        } else if ((uid != current->uid) && (uid != new_suid))
                return -EPERM;
-        if (old_euid != uid)
+        if (old_euid != uid) {
-        {
                current->mm->dumpable = suid_dumpable;
                smp_wmb();
        }
@@ -1054,8 +1042,7 @@ asmlinkage long sys_setresuid(uid_t ruid, uid_t euid, uid_t suid)
                        return -EAGAIN;
        }
        if (euid != (uid_t) -1) {
-                if (euid != current->euid)
+                if (euid != current->euid) {
-                {
                        current->mm->dumpable = suid_dumpable;
                        smp_wmb();
                }
@@ -1105,8 +1092,7 @@ asmlinkage long sys_setresgid(gid_t rgid, gid_t egid, gid_t sgid)
                        return -EPERM;
        }
        if (egid != (gid_t) -1) {
-                if (egid != current->egid)
+                if (egid != current->egid) {
-                {
                        current->mm->dumpable = suid_dumpable;
                        smp_wmb();
                }
@@ -1151,10 +1137,8 @@ asmlinkage long sys_setfsuid(uid_t uid)
        if (uid == current->uid || uid == current->euid ||
            uid == current->suid || uid == current->fsuid || 
-            capable(CAP_SETUID))
+            capable(CAP_SETUID)) {
-        {
+                if (uid != old_fsuid) {
-                if (uid != old_fsuid)
-                {
                        current->mm->dumpable = suid_dumpable;
                        smp_wmb();
                }
@@ -1182,10 +1166,8 @@ asmlinkage long sys_setfsgid(gid_t gid)
        if (gid == current->gid || gid == current->egid ||
            gid == current->sgid || gid == current->fsgid || 
-            capable(CAP_SETGID))
+            capable(CAP_SETGID)) {
-        {
+                if (gid != old_fsgid) {
-                if (gid != old_fsgid)
-                {
                        current->mm->dumpable = suid_dumpable;
                        smp_wmb();
                }
@@ -1321,9 +1303,9 @@ out:
 asmlinkage long sys_getpgid(pid_t pid)
 {
-        if (!pid) {
+        if (!pid)
                return process_group(current);
-        } else {
+        else {
                int retval;
                struct task_struct *p;
@@ -1353,9 +1335,9 @@ asmlinkage long sys_getpgrp(void)
 asmlinkage long sys_getsid(pid_t pid)
 {
-        if (!pid) {
+        if (!pid)
                return current->signal->session;
-        } else {
+        else {
                int retval;
                struct task_struct *p;
@@ -1363,7 +1345,7 @@ asmlinkage long sys_getsid(pid_t pid)
                p = find_task_by_pid(pid);
                retval = -ESRCH;
-                if(p) {
+                if (p) {
                        retval = security_task_getsid(p);
                        if (!retval)
                                retval = p->signal->session;
@@ -1431,9 +1413,9 @@ struct group_info *groups_alloc(int gidsetsize)
        group_info->nblocks = nblocks;
        atomic_set(&group_info->usage, 1);
-        if (gidsetsize <= NGROUPS_SMALL) {
+        if (gidsetsize <= NGROUPS_SMALL)
                group_info->blocks[0] = group_info->small_block;
-        } else {
+        else {
                for (i = 0; i < nblocks; i++) {
                        gid_t *b;
                        b = (void *)__get_free_page(GFP_USER);
@@ -1489,7 +1471,7 @@ static int groups_to_user(gid_t __user *grouplist,
 /* fill a group_info from a user-space array - it must be allocated already */
 static int groups_from_user(struct group_info *group_info,
    gid_t __user *grouplist)
- {
+{
        int i;
        int count = group_info->ngroups;
@@ -1647,9 +1629,8 @@ asmlinkage long sys_setgroups(int gidsetsize, gid_t __user *grouplist)
 int in_group_p(gid_t grp)
 {
        int retval = 1;
-        if (grp != current->fsgid) {
+        if (grp != current->fsgid)
                retval = groups_search(current->group_info, grp);
-        }
        return retval;
 }
@@ -1658,9 +1639,8 @@ EXPORT_SYMBOL(in_group_p);
 int in_egroup_p(gid_t grp)
 {
        int retval = 1;
-        if (grp != current->egid) {
+        if (grp != current->egid)
                retval = groups_search(current->group_info, grp);
-        }
        return retval;
 }
@@ -1675,7 +1655,7 @@ asmlinkage long sys_newuname(struct new_utsname __user * name)
        int errno = 0;
        down_read(&uts_sem);
-        if (copy_to_user(name,&system_utsname,sizeof *name))
+        if (copy_to_user(name, utsname(), sizeof *name))
                errno = -EFAULT;
        up_read(&uts_sem);
        return errno;
@@ -1693,8 +1673,8 @@ asmlinkage long sys_sethostname(char __user *name, int len)
        down_write(&uts_sem);
        errno = -EFAULT;
        if (!copy_from_user(tmp, name, len)) {
-                memcpy(system_utsname.nodename, tmp, len);
+                memcpy(utsname()->nodename, tmp, len);
-                system_utsname.nodename[len] = 0;
+                utsname()->nodename[len] = 0;
                errno = 0;
        }
        up_write(&uts_sem);
@@ -1710,11 +1690,11 @@ asmlinkage long sys_gethostname(char __user *name, int len)
        if (len < 0)
                return -EINVAL;
        down_read(&uts_sem);
-        i = 1 + strlen(system_utsname.nodename);
+        i = 1 + strlen(utsname()->nodename);
        if (i > len)
                i = len;
        errno = 0;
-        if (copy_to_user(name, system_utsname.nodename, i))
+        if (copy_to_user(name, utsname()->nodename, i))
                errno = -EFAULT;
        up_read(&uts_sem);
        return errno;
@@ -1739,8 +1719,8 @@ asmlinkage long sys_setdomainname(char __user *name, int len)
        down_write(&uts_sem);
        errno = -EFAULT;
        if (!copy_from_user(tmp, name, len)) {
-                memcpy(system_utsname.domainname, tmp, len);
+                memcpy(utsname()->domainname, tmp, len);
-                system_utsname.domainname[len] = 0;
+                utsname()->domainname[len] = 0;
                errno = 0;
        }
        up_write(&uts_sem);
@@ -1775,9 +1755,9 @@ asmlinkage long sys_old_getrlimit(unsigned int resource, struct rlimit __user *r
        task_lock(current->group_leader);
        x = current->signal->rlim[resource];
        task_unlock(current->group_leader);
-        if(x.rlim_cur > 0x7FFFFFFF)
+        if (x.rlim_cur > 0x7FFFFFFF)
                x.rlim_cur = 0x7FFFFFFF;
-        if(x.rlim_max > 0x7FFFFFFF)
+        if (x.rlim_max > 0x7FFFFFFF)
                x.rlim_max = 0x7FFFFFFF;
        return copy_to_user(rlim, &x, sizeof(x))?-EFAULT:0;
 }
@@ -2083,12 +2063,12 @@ asmlinkage long sys_getcpu(unsigned __user *cpup, unsigned __user *nodep,
                 * padding
                 */
                unsigned long t0, t1;
-                get_user(t0, &cache->t0);
+                get_user(t0, &cache->blob[0]);
-                get_user(t1, &cache->t1);
+                get_user(t1, &cache->blob[1]);
                t0++;
                t1++;
-                put_user(t0, &cache->t0);
+                put_user(t0, &cache->blob[0]);
-                put_user(t1, &cache->t1);
+                put_user(t1, &cache->blob[1]);
        }
        return err ? -EFAULT : 0;
 }
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index 6991bece67..7a3b2e75f0 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -134,3 +134,8 @@ cond_syscall(sys_madvise);
 cond_syscall(sys_mremap);
 cond_syscall(sys_remap_file_pages);
 cond_syscall(compat_sys_move_pages);
+/* block-layer dependent */
+cond_syscall(sys_bdflush);
+cond_syscall(sys_ioprio_set);
+cond_syscall(sys_ioprio_get);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 9535a38399..8020fb273c 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -52,6 +52,10 @@
 extern int proc_nr_files(ctl_table *table, int write, struct file *filp,
                     void __user *buffer, size_t *lenp, loff_t *ppos);
+#ifdef CONFIG_X86
+#include <asm/nmi.h>
+#endif
 #if defined(CONFIG_SYSCTL)
 /* External variables not in a header file. */
@@ -64,7 +68,6 @@ extern int sysrq_enabled;
 extern int core_uses_pid;
 extern int suid_dumpable;
 extern char core_pattern[];
-extern int cad_pid;
 extern int pid_max;
 extern int min_free_kbytes;
 extern int printk_ratelimit_jiffies;
@@ -74,13 +77,6 @@ extern int sysctl_drop_caches;
 extern int percpu_pagelist_fraction;
 extern int compat_log;
-#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86)
-int unknown_nmi_panic;
-int nmi_watchdog_enabled;
-extern int proc_nmi_enabled(struct ctl_table *, int , struct file *,
-                        void __user *, size_t *, loff_t *);
-#endif
 /* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */
 static int maxolduid = 65535;
 static int minolduid;
@@ -95,13 +91,8 @@ extern char modprobe_path[];
 extern int sg_big_buff;
 #endif
 #ifdef CONFIG_SYSVIPC
-extern size_t shm_ctlmax;
+static int proc_do_ipc_string(ctl_table *table, int write, struct file *filp,
-extern size_t shm_ctlall;
+                void __user *buffer, size_t *lenp, loff_t *ppos);
-extern int shm_ctlmni;
-extern int msg_ctlmax;
-extern int msg_ctlmnb;
-extern int msg_ctlmni;
-extern int sem_ctls[];
 #endif
 #ifdef __sparc__
@@ -142,7 +133,10 @@ static int parse_table(int __user *, int, void __user *, size_t __user *,
                void __user *, size_t, ctl_table *, void **);
 #endif
-static int proc_doutsstring(ctl_table *table, int write, struct file *filp,
+static int proc_do_uts_string(ctl_table *table, int write, struct file *filp,
+                  void __user *buffer, size_t *lenp, loff_t *ppos);
+static int proc_do_cad_pid(ctl_table *table, int write, struct file *filp,
                  void __user *buffer, size_t *lenp, loff_t *ppos);
 static ctl_table root_table[];
@@ -232,51 +226,100 @@ static ctl_table root_table[] = {
 };
 static ctl_table kern_table[] = {
+#ifndef CONFIG_UTS_NS
+        {
+                .ctl_name       = KERN_OSTYPE,
+                .procname       = "ostype",
+                .data           = init_uts_ns.name.sysname,
+                .maxlen         = sizeof(init_uts_ns.name.sysname),
+                .mode           = 0444,
+                .proc_handler   = &proc_do_uts_string,
+                .strategy       = &sysctl_string,
+        },
+        {
+                .ctl_name       = KERN_OSRELEASE,
+                .procname       = "osrelease",
+                .data           = init_uts_ns.name.release,
+                .maxlen         = sizeof(init_uts_ns.name.release),
+                .mode           = 0444,
+                .proc_handler   = &proc_do_uts_string,
+                .strategy       = &sysctl_string,
+        },
+        {
+                .ctl_name       = KERN_VERSION,
+                .procname       = "version",
+                .data           = init_uts_ns.name.version,
+                .maxlen         = sizeof(init_uts_ns.name.version),
+                .mode           = 0444,
+                .proc_handler   = &proc_do_uts_string,
+                .strategy       = &sysctl_string,
+        },
+        {
+                .ctl_name       = KERN_NODENAME,
+                .procname       = "hostname",
+                .data           = init_uts_ns.name.nodename,
+                .maxlen         = sizeof(init_uts_ns.name.nodename),
+                .mode           = 0644,
+                .proc_handler   = &proc_do_uts_string,
+                .strategy       = &sysctl_string,
+        },
+        {
+                .ctl_name       = KERN_DOMAINNAME,
+                .procname       = "domainname",
+                .data           = init_uts_ns.name.domainname,
+                .maxlen         = sizeof(init_uts_ns.name.domainname),
+                .mode           = 0644,
+                .proc_handler   = &proc_do_uts_string,
+                .strategy       = &sysctl_string,
+        },
+#else  /* !CONFIG_UTS_NS */
        {
                .ctl_name       = KERN_OSTYPE,
                .procname       = "ostype",
-                .data           = system_utsname.sysname,
+                .data           = NULL,
-                .maxlen         = sizeof(system_utsname.sysname),
+                /* could maybe use __NEW_UTS_LEN here? */
+                .maxlen         = FIELD_SIZEOF(struct new_utsname, sysname),
                .mode           = 0444,
-                .proc_handler   = &proc_doutsstring,
+                .proc_handler   = &proc_do_uts_string,
                .strategy       = &sysctl_string,
        },
        {
                .ctl_name       = KERN_OSRELEASE,
                .procname       = "osrelease",
-                .data           = system_utsname.release,
+                .data           = NULL,
-                .maxlen         = sizeof(system_utsname.release),
+                .maxlen         = FIELD_SIZEOF(struct new_utsname, release),
                .mode           = 0444,
-                .proc_handler   = &proc_doutsstring,
+                .proc_handler   = &proc_do_uts_string,
                .strategy       = &sysctl_string,
        },
        {
                .ctl_name       = KERN_VERSION,
                .procname       = "version",
-                .data           = system_utsname.version,
+                .data           = NULL,
-                .maxlen         = sizeof(system_utsname.version),
+                .maxlen         = FIELD_SIZEOF(struct new_utsname, version),
                .mode           = 0444,
-                .proc_handler   = &proc_doutsstring,
+                .proc_handler   = &proc_do_uts_string,
                .strategy       = &sysctl_string,
        },
        {
                .ctl_name       = KERN_NODENAME,
                .procname       = "hostname",
-                .data           = system_utsname.nodename,
+                .data           = NULL,
-                .maxlen         = sizeof(system_utsname.nodename),
+                .maxlen         = FIELD_SIZEOF(struct new_utsname, nodename),
                .mode           = 0644,
-                .proc_handler   = &proc_doutsstring,
+                .proc_handler   = &proc_do_uts_string,
                .strategy       = &sysctl_string,
        },
        {
                .ctl_name       = KERN_DOMAINNAME,
                .procname       = "domainname",
-                .data           = system_utsname.domainname,
+                .data           = NULL,
-                .maxlen         = sizeof(system_utsname.domainname),
+                .maxlen         = FIELD_SIZEOF(struct new_utsname, domainname),
                .mode           = 0644,
-                .proc_handler   = &proc_doutsstring,
+                .proc_handler   = &proc_do_uts_string,
                .strategy       = &sysctl_string,
        },
+#endif /* !CONFIG_UTS_NS */
        {
                .ctl_name       = KERN_PANIC,
                .procname       = "panic",
@@ -297,7 +340,7 @@ static ctl_table kern_table[] = {
                .ctl_name       = KERN_CORE_PATTERN,
                .procname       = "core_pattern",
                .data           = core_pattern,
-                .maxlen         = 64,
+                .maxlen         = 128,
                .mode           = 0644,
                .proc_handler   = &proc_dostring,
                .strategy       = &sysctl_string,
@@ -435,58 +478,58 @@ static ctl_table kern_table[] = {
        {
                .ctl_name       = KERN_SHMMAX,
                .procname       = "shmmax",
-                .data           = &shm_ctlmax,
+                .data           = NULL,
                .maxlen         = sizeof (size_t),
                .mode           = 0644,
-                .proc_handler   = &proc_doulongvec_minmax,
+                .proc_handler   = &proc_do_ipc_string,
        },
        {
                .ctl_name       = KERN_SHMALL,
                .procname       = "shmall",
-                .data           = &shm_ctlall,
+                .data           = NULL,
                .maxlen         = sizeof (size_t),
                .mode           = 0644,
-                .proc_handler   = &proc_doulongvec_minmax,
+                .proc_handler   = &proc_do_ipc_string,
        },
        {
                .ctl_name       = KERN_SHMMNI,
                .procname       = "shmmni",
-                .data           = &shm_ctlmni,
+                .data           = NULL,
                .maxlen         = sizeof (int),
                .mode           = 0644,
-                .proc_handler   = &proc_dointvec,
+                .proc_handler   = &proc_do_ipc_string,
        },
        {
                .ctl_name       = KERN_MSGMAX,
                .procname       = "msgmax",
-                .data           = &msg_ctlmax,
+                .data           = NULL,
                .maxlen         = sizeof (int),
                .mode           = 0644,
-                .proc_handler   = &proc_dointvec,
+                .proc_handler   = &proc_do_ipc_string,
        },
        {
                .ctl_name       = KERN_MSGMNI,
                .procname       = "msgmni",
-                .data           = &msg_ctlmni,
+                .data           = NULL,
                .maxlen         = sizeof (int),
                .mode           = 0644,
-                .proc_handler   = &proc_dointvec,
+                .proc_handler   = &proc_do_ipc_string,
        },
        {
                .ctl_name       = KERN_MSGMNB,
                .procname       =  "msgmnb",
-                .data           = &msg_ctlmnb,
+                .data           = NULL,
                .maxlen         = sizeof (int),
                .mode           = 0644,
-                .proc_handler   = &proc_dointvec,
+                .proc_handler   = &proc_do_ipc_string,
        },
        {
                .ctl_name       = KERN_SEM,
                .procname       = "sem",
-                .data           = &sem_ctls,
+                .data           = NULL,
                .maxlen         = 4*sizeof (int),
                .mode           = 0644,
-                .proc_handler   = &proc_dointvec,
+                .proc_handler   = &proc_do_ipc_string,
        },
 #endif
 #ifdef CONFIG_MAGIC_SYSRQ
@@ -502,10 +545,10 @@ static ctl_table kern_table[] = {
        {
                .ctl_name       = KERN_CADPID,
                .procname       = "cad_pid",
-                .data           = &cad_pid,
+                .data           = NULL,
                .maxlen         = sizeof (int),
                .mode           = 0600,
-                .proc_handler   = &proc_dointvec,
+                .proc_handler   = &proc_do_cad_pid,
        },
        {
                .ctl_name       = KERN_MAX_THREADS,
@@ -1627,32 +1670,15 @@ static ssize_t proc_writesys(struct file * file, const char __user * buf,
        return do_rw_proc(1, file, (char __user *) buf, count, ppos);
 }
-/**
+static int _proc_do_string(void* data, int maxlen, int write,
- * proc_dostring - read a string sysctl
+                           struct file *filp, void __user *buffer,
- * @table: the sysctl table
+                           size_t *lenp, loff_t *ppos)
- * @write: %TRUE if this is a write to the sysctl file
- * @filp: the file structure
- * @buffer: the user buffer
- * @lenp: the size of the user buffer
- * @ppos: file position
- *
- * Reads/writes a string from/to the user buffer. If the kernel
- * buffer provided is not large enough to hold the string, the
- * string is truncated. The copied string is %NULL-terminated.
- * If the string is being read by the user process, it is copied
- * and a newline '\n' is added. It is truncated if the buffer is
- * not large enough.
- *
- * Returns 0 on success.
- */
-int proc_dostring(ctl_table *table, int write, struct file *filp,
-                  void __user *buffer, size_t *lenp, loff_t *ppos)
 {
        size_t len;
        char __user *p;
        char c;
        
-        if (!table->data || !table->maxlen || !*lenp ||
+        if (!data || !maxlen || !*lenp ||
            (*ppos && !write)) {
                *lenp = 0;
                return 0;
@@ -1668,20 +1694,20 @@ int proc_dostring(ctl_table *table, int write, struct file *filp,
                                break;
                        len++;
                }
-                if (len >= table->maxlen)
+                if (len >= maxlen)
-                        len = table->maxlen-1;
+                        len = maxlen-1;
-                if(copy_from_user(table->data, buffer, len))
+                if(copy_from_user(data, buffer, len))
                        return -EFAULT;
-                ((char *) table->data)[len] = 0;
+                ((char *) data)[len] = 0;
                *ppos += *lenp;
        } else {
-                len = strlen(table->data);
+                len = strlen(data);
-                if (len > table->maxlen)
+                if (len > maxlen)
-                        len = table->maxlen;
+                        len = maxlen;
                if (len > *lenp)
                        len = *lenp;
                if (len)
-                        if(copy_to_user(buffer, table->data, len))
+                        if(copy_to_user(buffer, data, len))
                                return -EFAULT;
                if (len < *lenp) {
                        if(put_user('\n', ((char __user *) buffer) + len))
@@ -1694,12 +1720,38 @@ int proc_dostring(ctl_table *table, int write, struct file *filp,
        return 0;
 }
+/**
+ * proc_dostring - read a string sysctl
+ * @table: the sysctl table
+ * @write: %TRUE if this is a write to the sysctl file
+ * @filp: the file structure
+ * @buffer: the user buffer
+ * @lenp: the size of the user buffer
+ * @ppos: file position
+ *
+ * Reads/writes a string from/to the user buffer. If the kernel
+ * buffer provided is not large enough to hold the string, the
+ * string is truncated. The copied string is %NULL-terminated.
+ * If the string is being read by the user process, it is copied
+ * and a newline '\n' is added. It is truncated if the buffer is
+ * not large enough.
+ *
+ * Returns 0 on success.
+ */
+int proc_dostring(ctl_table *table, int write, struct file *filp,
+                  void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+        return _proc_do_string(table->data, table->maxlen, write, filp,
+                               buffer, lenp, ppos);
+}
 /*
 *      Special case of dostring for the UTS structure. This has locks
 *      to observe. Should this be in kernel/sys.c ????
 */
 
-static int proc_doutsstring(ctl_table *table, int write, struct file *filp,
+#ifndef CONFIG_UTS_NS
+static int proc_do_uts_string(ctl_table *table, int write, struct file *filp,
                  void __user *buffer, size_t *lenp, loff_t *ppos)
 {
        int r;
@@ -1715,6 +1767,48 @@ static int proc_doutsstring(ctl_table *table, int write, struct file *filp,
        }
        return r;
 }
+#else /* !CONFIG_UTS_NS */
+static int proc_do_uts_string(ctl_table *table, int write, struct file *filp,
+                  void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+        int r;
+        struct uts_namespace* uts_ns = current->nsproxy->uts_ns;
+        char* which;
+        switch (table->ctl_name) {
+        case KERN_OSTYPE:
+                which = uts_ns->name.sysname;
+                break;
+        case KERN_NODENAME:
+                which = uts_ns->name.nodename;
+                break;
+        case KERN_OSRELEASE:
+                which = uts_ns->name.release;
+                break;
+        case KERN_VERSION:
+                which = uts_ns->name.version;
+                break;
+        case KERN_DOMAINNAME:
+                which = uts_ns->name.domainname;
+                break;
+        default:
+                r = -EINVAL;
+                goto out;
+        }
+        if (!write) {
+                down_read(&uts_sem);
+                r=_proc_do_string(which,table->maxlen,0,filp,buffer,lenp, ppos);
+                up_read(&uts_sem);
+        } else {
+                down_write(&uts_sem);
+                r=_proc_do_string(which,table->maxlen,1,filp,buffer,lenp, ppos);
+                up_write(&uts_sem);
+        }
+ out:
+        return r;
+}
+#endif /* !CONFIG_UTS_NS */
 static int do_proc_dointvec_conv(int *negp, unsigned long *lvalp,
                                 int *valp,
@@ -1735,8 +1829,9 @@ static int do_proc_dointvec_conv(int *negp, unsigned long *lvalp,
        return 0;
 }
-static int do_proc_dointvec(ctl_table *table, int write, struct file *filp,
+static int __do_proc_dointvec(void *tbl_data, ctl_table *table,
-                  void __user *buffer, size_t *lenp, loff_t *ppos,
+                  int write, struct file *filp, void __user *buffer,
+                  size_t *lenp, loff_t *ppos,
                  int (*conv)(int *negp, unsigned long *lvalp, int *valp,
                              int write, void *data),
                  void *data)
@@ -1749,13 +1844,13 @@ static int do_proc_dointvec(ctl_table *table, int write, struct file *filp,
        char buf[TMPBUFLEN], *p;
        char __user *s = buffer;
        
-        if (!table->data || !table->maxlen || !*lenp ||
+        if (!tbl_data || !table->maxlen || !*lenp ||
            (*ppos && !write)) {
                *lenp = 0;
                return 0;
        }
        
-        i = (int *) table->data;
+        i = (int *) tbl_data;
        vleft = table->maxlen / sizeof(*i);
        left = *lenp;
@@ -1844,6 +1939,16 @@ static int do_proc_dointvec(ctl_table *table, int write, struct file *filp,
 #undef TMPBUFLEN
 }
+static int do_proc_dointvec(ctl_table *table, int write, struct file *filp,
+                  void __user *buffer, size_t *lenp, loff_t *ppos,
+                  int (*conv)(int *negp, unsigned long *lvalp, int *valp,
+                              int write, void *data),
+                  void *data)
+{
+        return __do_proc_dointvec(table->data, table, write, filp,
+                        buffer, lenp, ppos, conv, data);
+}
 /**
 * proc_dointvec - read a vector of integers
 * @table: the sysctl table
@@ -1977,7 +2082,7 @@ int proc_dointvec_minmax(ctl_table *table, int write, struct file *filp,
                                do_proc_dointvec_minmax_conv, &param);
 }
-static int do_proc_doulongvec_minmax(ctl_table *table, int write,
+static int __do_proc_doulongvec_minmax(void *data, ctl_table *table, int write,
                                     struct file *filp,
                                     void __user *buffer,
                                     size_t *lenp, loff_t *ppos,
@@ -1991,13 +2096,13 @@ static int do_proc_doulongvec_minmax(ctl_table *table, int write,
        char buf[TMPBUFLEN], *p;
        char __user *s = buffer;
        
-        if (!table->data || !table->maxlen || !*lenp ||
+        if (!data || !table->maxlen || !*lenp ||
            (*ppos && !write)) {
                *lenp = 0;
                return 0;
        }
        
-        i = (unsigned long *) table->data;
+        i = (unsigned long *) data;
        min = (unsigned long *) table->extra1;
        max = (unsigned long *) table->extra2;
        vleft = table->maxlen / sizeof(unsigned long);
@@ -2082,6 +2187,17 @@ static int do_proc_doulongvec_minmax(ctl_table *table, int write,
 #undef TMPBUFLEN
 }
+static int do_proc_doulongvec_minmax(ctl_table *table, int write,
+                                     struct file *filp,
+                                     void __user *buffer,
+                                     size_t *lenp, loff_t *ppos,
+                                     unsigned long convmul,
+                                     unsigned long convdiv)
+{
+        return __do_proc_doulongvec_minmax(table->data, table, write,
+                        filp, buffer, lenp, ppos, convmul, convdiv);
+}
 /**
 * proc_doulongvec_minmax - read a vector of long integers with min/max values
 * @table: the sysctl table
@@ -2270,6 +2386,71 @@ int proc_dointvec_ms_jiffies(ctl_table *table, int write, struct file *filp,
                                do_proc_dointvec_ms_jiffies_conv, NULL);
 }
+#ifdef CONFIG_SYSVIPC
+static int proc_do_ipc_string(ctl_table *table, int write, struct file *filp,
+                void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+        void *data;
+        struct ipc_namespace *ns;
+        ns = current->nsproxy->ipc_ns;
+        switch (table->ctl_name) {
+        case KERN_SHMMAX:
+                data = &ns->shm_ctlmax;
+                goto proc_minmax;
+        case KERN_SHMALL:
+                data = &ns->shm_ctlall;
+                goto proc_minmax;
+        case KERN_SHMMNI:
+                data = &ns->shm_ctlmni;
+                break;
+        case KERN_MSGMAX:
+                data = &ns->msg_ctlmax;
+                break;
+        case KERN_MSGMNI:
+                data = &ns->msg_ctlmni;
+                break;
+        case KERN_MSGMNB:
+                data = &ns->msg_ctlmnb;
+                break;
+        case KERN_SEM:
+                data = &ns->sem_ctls;
+                break;
+        default:
+                return -EINVAL;
+        }
+        return __do_proc_dointvec(data, table, write, filp, buffer,
+                        lenp, ppos, NULL, NULL);
+proc_minmax:
+        return __do_proc_doulongvec_minmax(data, table, write, filp, buffer,
+                        lenp, ppos, 1l, 1l);
+}
+#endif
+static int proc_do_cad_pid(ctl_table *table, int write, struct file *filp,
+                           void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+        struct pid *new_pid;
+        pid_t tmp;
+        int r;
+        tmp = pid_nr(cad_pid);
+        r = __do_proc_dointvec(&tmp, table, write, filp, buffer,
+                               lenp, ppos, NULL, NULL);
+        if (r || !write)
+                return r;
+        new_pid = find_get_pid(tmp);
+        if (!new_pid)
+                return -ESRCH;
+        put_pid(xchg(&cad_pid, new_pid));
+        return 0;
+}
 #else /* CONFIG_PROC_FS */
 int proc_dostring(ctl_table *table, int write, struct file *filp,
@@ -2278,12 +2459,20 @@ int proc_dostring(ctl_table *table, int write, struct file *filp,
        return -ENOSYS;
 }
-static int proc_doutsstring(ctl_table *table, int write, struct file *filp,
+static int proc_do_uts_string(ctl_table *table, int write, struct file *filp,
-                            void __user *buffer, size_t *lenp, loff_t *ppos)
+                void __user *buffer, size_t *lenp, loff_t *ppos)
 {
        return -ENOSYS;
 }
+#ifdef CONFIG_SYSVIPC
+static int proc_do_ipc_string(ctl_table *table, int write, struct file *filp,
+                void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+        return -ENOSYS;
+}
+#endif
 int proc_dointvec(ctl_table *table, int write, struct file *filp,
                  void __user *buffer, size_t *lenp, loff_t *ppos)
 {
diff --git a/kernel/taskstats.c b/kernel/taskstats.c
index 2ed4040d0d..5d6a8c54ee 100644
--- a/kernel/taskstats.c
+++ b/kernel/taskstats.c
@@ -18,7 +18,9 @@
 #include <linux/kernel.h>
 #include <linux/taskstats_kern.h>
+#include <linux/tsacct_kern.h>
 #include <linux/delayacct.h>
+#include <linux/tsacct_kern.h>
 #include <linux/cpumask.h>
 #include <linux/percpu.h>
 #include <net/genetlink.h>
@@ -75,7 +77,7 @@ static int prepare_reply(struct genl_info *info, u8 cmd, struct sk_buff **skbp,
        /*
         * If new attributes are added, please revisit this allocation
         */
-        skb = nlmsg_new(size, GFP_KERNEL);
+        skb = nlmsg_new(genlmsg_total_size(size), GFP_KERNEL);
        if (!skb)
                return -ENOMEM;
@@ -198,7 +200,13 @@ static int fill_pid(pid_t pid, struct task_struct *pidtsk,
         */
        delayacct_add_tsk(stats, tsk);
+        /* fill in basic acct fields */
        stats->version = TASKSTATS_VERSION;
+        bacct_add_tsk(stats, tsk);
+        /* fill in extended acct fields */
+        xacct_add_tsk(stats, tsk);
        /* Define err: label here if needed */
        put_task_struct(tsk);
diff --git a/kernel/time.c b/kernel/time.c
index 5bd4897476..0e017bff4c 100644
--- a/kernel/time.c
+++ b/kernel/time.c
@@ -202,179 +202,6 @@ asmlinkage long sys_settimeofday(struct timeval __user *tv,
        return do_sys_settimeofday(tv ? &new_ts : NULL, tz ? &new_tz : NULL);
 }
-/* we call this to notify the arch when the clock is being
- * controlled.  If no such arch routine, do nothing.
- */
-void __attribute__ ((weak)) notify_arch_cmos_timer(void)
-{
-        return;
-}
-/* adjtimex mainly allows reading (and writing, if superuser) of
- * kernel time-keeping variables. used by xntpd.
- */
-int do_adjtimex(struct timex *txc)
-{
-        long ltemp, mtemp, save_adjust;
-        int result;
-        /* In order to modify anything, you gotta be super-user! */
-        if (txc->modes && !capable(CAP_SYS_TIME))
-                return -EPERM;
-                
-        /* Now we validate the data before disabling interrupts */
-        if ((txc->modes & ADJ_OFFSET_SINGLESHOT) == ADJ_OFFSET_SINGLESHOT)
-          /* singleshot must not be used with any other mode bits */
-                if (txc->modes != ADJ_OFFSET_SINGLESHOT)
-                        return -EINVAL;
-        if (txc->modes != ADJ_OFFSET_SINGLESHOT && (txc->modes & ADJ_OFFSET))
-          /* adjustment Offset limited to +- .512 seconds */
-                if (txc->offset <= - MAXPHASE || txc->offset >= MAXPHASE )
-                        return -EINVAL; 
-        /* if the quartz is off by more than 10% something is VERY wrong ! */
-        if (txc->modes & ADJ_TICK)
-                if (txc->tick <  900000/USER_HZ ||
-                    txc->tick > 1100000/USER_HZ)
-                        return -EINVAL;
-        write_seqlock_irq(&xtime_lock);
-        result = time_state;    /* mostly `TIME_OK' */
-        /* Save for later - semantics of adjtime is to return old value */
-        save_adjust = time_next_adjust ? time_next_adjust : time_adjust;
-#if 0   /* STA_CLOCKERR is never set yet */
-        time_status &= ~STA_CLOCKERR;           /* reset STA_CLOCKERR */
-#endif
-        /* If there are input parameters, then process them */
-        if (txc->modes)
-        {
-            if (txc->modes & ADJ_STATUS)        /* only set allowed bits */
-                time_status =  (txc->status & ~STA_RONLY) |
-                              (time_status & STA_RONLY);
-            if (txc->modes & ADJ_FREQUENCY) {   /* p. 22 */
-                if (txc->freq > MAXFREQ || txc->freq < -MAXFREQ) {
-                    result = -EINVAL;
-                    goto leave;
-                }
-                time_freq = txc->freq;
-            }
-            if (txc->modes & ADJ_MAXERROR) {
-                if (txc->maxerror < 0 || txc->maxerror >= NTP_PHASE_LIMIT) {
-                    result = -EINVAL;
-                    goto leave;
-                }
-                time_maxerror = txc->maxerror;
-            }
-            if (txc->modes & ADJ_ESTERROR) {
-                if (txc->esterror < 0 || txc->esterror >= NTP_PHASE_LIMIT) {
-                    result = -EINVAL;
-                    goto leave;
-                }
-                time_esterror = txc->esterror;
-            }
-            if (txc->modes & ADJ_TIMECONST) {   /* p. 24 */
-                if (txc->constant < 0) {        /* NTP v4 uses values > 6 */
-                    result = -EINVAL;
-                    goto leave;
-                }
-                time_constant = txc->constant;
-            }
-            if (txc->modes & ADJ_OFFSET) {      /* values checked earlier */
-                if (txc->modes == ADJ_OFFSET_SINGLESHOT) {
-                    /* adjtime() is independent from ntp_adjtime() */
-                    if ((time_next_adjust = txc->offset) == 0)
-                         time_adjust = 0;
-                }
-                else if (time_status & STA_PLL) {
-                    ltemp = txc->offset;
-                    /*
-                     * Scale the phase adjustment and
-                     * clamp to the operating range.
-                     */
-                    if (ltemp > MAXPHASE)
-                        time_offset = MAXPHASE << SHIFT_UPDATE;
-                    else if (ltemp < -MAXPHASE)
-                        time_offset = -(MAXPHASE << SHIFT_UPDATE);
-                    else
-                        time_offset = ltemp << SHIFT_UPDATE;
-                    /*
-                     * Select whether the frequency is to be controlled
-                     * and in which mode (PLL or FLL). Clamp to the operating
-                     * range. Ugly multiply/divide should be replaced someday.
-                     */
-                    if (time_status & STA_FREQHOLD || time_reftime == 0)
-                        time_reftime = xtime.tv_sec;
-                    mtemp = xtime.tv_sec - time_reftime;
-                    time_reftime = xtime.tv_sec;
-                    if (time_status & STA_FLL) {
-                        if (mtemp >= MINSEC) {
-                            ltemp = (time_offset / mtemp) << (SHIFT_USEC -
-                                                              SHIFT_UPDATE);
-                            time_freq += shift_right(ltemp, SHIFT_KH);
-                        } else /* calibration interval too short (p. 12) */
-                                result = TIME_ERROR;
-                    } else {    /* PLL mode */
-                        if (mtemp < MAXSEC) {
-                            ltemp *= mtemp;
-                            time_freq += shift_right(ltemp,(time_constant +
-                                                       time_constant +
-                                                       SHIFT_KF - SHIFT_USEC));
-                        } else /* calibration interval too long (p. 12) */
-                                result = TIME_ERROR;
-                    }
-                    time_freq = min(time_freq, time_tolerance);
-                    time_freq = max(time_freq, -time_tolerance);
-                } /* STA_PLL */
-            } /* txc->modes & ADJ_OFFSET */
-            if (txc->modes & ADJ_TICK) {
-                tick_usec = txc->tick;
-                tick_nsec = TICK_USEC_TO_NSEC(tick_usec);
-            }
-        } /* txc->modes */
-leave:  if ((time_status & (STA_UNSYNC|STA_CLOCKERR)) != 0)
-                result = TIME_ERROR;
-        
-        if ((txc->modes & ADJ_OFFSET_SINGLESHOT) == ADJ_OFFSET_SINGLESHOT)
-            txc->offset    = save_adjust;
-        else {
-            txc->offset = shift_right(time_offset, SHIFT_UPDATE);
-        }
-        txc->freq          = time_freq;
-        txc->maxerror      = time_maxerror;
-        txc->esterror      = time_esterror;
-        txc->status        = time_status;
-        txc->constant      = time_constant;
-        txc->precision     = time_precision;
-        txc->tolerance     = time_tolerance;
-        txc->tick          = tick_usec;
-        /* PPS is not implemented, so these are zero */
-        txc->ppsfreq       = 0;
-        txc->jitter        = 0;
-        txc->shift         = 0;
-        txc->stabil        = 0;
-        txc->jitcnt        = 0;
-        txc->calcnt        = 0;
-        txc->errcnt        = 0;
-        txc->stbcnt        = 0;
-        write_sequnlock_irq(&xtime_lock);
-        do_gettimeofday(&txc->time);
-        notify_arch_cmos_timer();
-        return(result);
-}
 asmlinkage long sys_adjtimex(struct timex __user *txc_p)
 {
        struct timex txc;               /* Local copy of parameter */
diff --git a/kernel/time/Makefile b/kernel/time/Makefile
index e1dfd8e86c..61a3907d16 100644
--- a/kernel/time/Makefile
+++ b/kernel/time/Makefile
@@ -1 +1 @@
-obj-y += clocksource.o jiffies.o
+obj-y += ntp.o clocksource.o jiffies.o
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c
new file mode 100644
index 0000000000..47195fa0ec
--- /dev/null
+++ b/kernel/time/ntp.c
@@ -0,0 +1,350 @@
+/*
+ * linux/kernel/time/ntp.c
+ *
+ * NTP state machine interfaces and logic.
+ *
+ * This code was mainly moved from kernel/timer.c and kernel/time.c
+ * Please see those files for relevant copyright info and historical
+ * changelogs.
+ */
+#include <linux/mm.h>
+#include <linux/time.h>
+#include <linux/timex.h>
+#include <asm/div64.h>
+#include <asm/timex.h>
+/*
+ * Timekeeping variables
+ */
+unsigned long tick_usec = TICK_USEC;            /* USER_HZ period (usec) */
+unsigned long tick_nsec;                        /* ACTHZ period (nsec) */
+static u64 tick_length, tick_length_base;
+#define MAX_TICKADJ             500             /* microsecs */
+#define MAX_TICKADJ_SCALED      (((u64)(MAX_TICKADJ * NSEC_PER_USEC) << \
+                                  TICK_LENGTH_SHIFT) / HZ)
+/*
+ * phase-lock loop variables
+ */
+/* TIME_ERROR prevents overwriting the CMOS clock */
+static int time_state = TIME_OK;        /* clock synchronization status */
+int time_status = STA_UNSYNC;           /* clock status bits            */
+static long time_offset;                /* time adjustment (ns)         */
+static long time_constant = 2;          /* pll time constant            */
+long time_maxerror = NTP_PHASE_LIMIT;   /* maximum error (us)           */
+long time_esterror = NTP_PHASE_LIMIT;   /* estimated error (us)         */
+long time_freq;                         /* frequency offset (scaled ppm)*/
+static long time_reftime;               /* time at last adjustment (s)  */
+long time_adjust;
+#define CLOCK_TICK_OVERFLOW     (LATCH * HZ - CLOCK_TICK_RATE)
+#define CLOCK_TICK_ADJUST       (((s64)CLOCK_TICK_OVERFLOW * NSEC_PER_SEC) / \
+                                        (s64)CLOCK_TICK_RATE)
+static void ntp_update_frequency(void)
+{
+        tick_length_base = (u64)(tick_usec * NSEC_PER_USEC * USER_HZ) << TICK_LENGTH_SHIFT;
+        tick_length_base += (s64)CLOCK_TICK_ADJUST << TICK_LENGTH_SHIFT;
+        tick_length_base += (s64)time_freq << (TICK_LENGTH_SHIFT - SHIFT_NSEC);
+        do_div(tick_length_base, HZ);
+        tick_nsec = tick_length_base >> TICK_LENGTH_SHIFT;
+}
+/**
+ * ntp_clear - Clears the NTP state variables
+ *
+ * Must be called while holding a write on the xtime_lock
+ */
+void ntp_clear(void)
+{
+        time_adjust = 0;                /* stop active adjtime() */
+        time_status |= STA_UNSYNC;
+        time_maxerror = NTP_PHASE_LIMIT;
+        time_esterror = NTP_PHASE_LIMIT;
+        ntp_update_frequency();
+        tick_length = tick_length_base;
+        time_offset = 0;
+}
+/*
+ * this routine handles the overflow of the microsecond field
+ *
+ * The tricky bits of code to handle the accurate clock support
+ * were provided by Dave Mills (Mills@UDEL.EDU) of NTP fame.
+ * They were originally developed for SUN and DEC kernels.
+ * All the kudos should go to Dave for this stuff.
+ */
+void second_overflow(void)
+{
+        long time_adj;
+        /* Bump the maxerror field */
+        time_maxerror += MAXFREQ >> SHIFT_USEC;
+        if (time_maxerror > NTP_PHASE_LIMIT) {
+                time_maxerror = NTP_PHASE_LIMIT;
+                time_status |= STA_UNSYNC;
+        }
+        /*
+         * Leap second processing. If in leap-insert state at the end of the
+         * day, the system clock is set back one second; if in leap-delete
+         * state, the system clock is set ahead one second. The microtime()
+         * routine or external clock driver will insure that reported time is
+         * always monotonic. The ugly divides should be replaced.
+         */
+        switch (time_state) {
+        case TIME_OK:
+                if (time_status & STA_INS)
+                        time_state = TIME_INS;
+                else if (time_status & STA_DEL)
+                        time_state = TIME_DEL;
+                break;
+        case TIME_INS:
+                if (xtime.tv_sec % 86400 == 0) {
+                        xtime.tv_sec--;
+                        wall_to_monotonic.tv_sec++;
+                        /*
+                         * The timer interpolator will make time change
+                         * gradually instead of an immediate jump by one second
+                         */
+                        time_interpolator_update(-NSEC_PER_SEC);
+                        time_state = TIME_OOP;
+                        clock_was_set();
+                        printk(KERN_NOTICE "Clock: inserting leap second "
+                                        "23:59:60 UTC\n");
+                }
+                break;
+        case TIME_DEL:
+                if ((xtime.tv_sec + 1) % 86400 == 0) {
+                        xtime.tv_sec++;
+                        wall_to_monotonic.tv_sec--;
+                        /*
+                         * Use of time interpolator for a gradual change of
+                         * time
+                         */
+                        time_interpolator_update(NSEC_PER_SEC);
+                        time_state = TIME_WAIT;
+                        clock_was_set();
+                        printk(KERN_NOTICE "Clock: deleting leap second "
+                                        "23:59:59 UTC\n");
+                }
+                break;
+        case TIME_OOP:
+                time_state = TIME_WAIT;
+                break;
+        case TIME_WAIT:
+                if (!(time_status & (STA_INS | STA_DEL)))
+                time_state = TIME_OK;
+        }
+        /*
+         * Compute the phase adjustment for the next second. The offset is
+         * reduced by a fixed factor times the time constant.
+         */
+        tick_length = tick_length_base;
+        time_adj = shift_right(time_offset, SHIFT_PLL + time_constant);
+        time_offset -= time_adj;
+        tick_length += (s64)time_adj << (TICK_LENGTH_SHIFT - SHIFT_UPDATE);
+        if (unlikely(time_adjust)) {
+                if (time_adjust > MAX_TICKADJ) {
+                        time_adjust -= MAX_TICKADJ;
+                        tick_length += MAX_TICKADJ_SCALED;
+                } else if (time_adjust < -MAX_TICKADJ) {
+                        time_adjust += MAX_TICKADJ;
+                        tick_length -= MAX_TICKADJ_SCALED;
+                } else {
+                        time_adjust = 0;
+                        tick_length += (s64)(time_adjust * NSEC_PER_USEC /
+                                             HZ) << TICK_LENGTH_SHIFT;
+                }
+        }
+}
+/*
+ * Return how long ticks are at the moment, that is, how much time
+ * update_wall_time_one_tick will add to xtime next time we call it
+ * (assuming no calls to do_adjtimex in the meantime).
+ * The return value is in fixed-point nanoseconds shifted by the
+ * specified number of bits to the right of the binary point.
+ * This function has no side-effects.
+ */
+u64 current_tick_length(void)
+{
+        return tick_length;
+}
+void __attribute__ ((weak)) notify_arch_cmos_timer(void)
+{
+        return;
+}
+/* adjtimex mainly allows reading (and writing, if superuser) of
+ * kernel time-keeping variables. used by xntpd.
+ */
+int do_adjtimex(struct timex *txc)
+{
+        long ltemp, mtemp, save_adjust;
+        s64 freq_adj, temp64;
+        int result;
+        /* In order to modify anything, you gotta be super-user! */
+        if (txc->modes && !capable(CAP_SYS_TIME))
+                return -EPERM;
+        /* Now we validate the data before disabling interrupts */
+        if ((txc->modes & ADJ_OFFSET_SINGLESHOT) == ADJ_OFFSET_SINGLESHOT)
+          /* singleshot must not be used with any other mode bits */
+                if (txc->modes != ADJ_OFFSET_SINGLESHOT)
+                        return -EINVAL;
+        if (txc->modes != ADJ_OFFSET_SINGLESHOT && (txc->modes & ADJ_OFFSET))
+          /* adjustment Offset limited to +- .512 seconds */
+                if (txc->offset <= - MAXPHASE || txc->offset >= MAXPHASE )
+                        return -EINVAL;
+        /* if the quartz is off by more than 10% something is VERY wrong ! */
+        if (txc->modes & ADJ_TICK)
+                if (txc->tick <  900000/USER_HZ ||
+                    txc->tick > 1100000/USER_HZ)
+                        return -EINVAL;
+        write_seqlock_irq(&xtime_lock);
+        result = time_state;    /* mostly `TIME_OK' */
+        /* Save for later - semantics of adjtime is to return old value */
+        save_adjust = time_adjust;
+#if 0   /* STA_CLOCKERR is never set yet */
+        time_status &= ~STA_CLOCKERR;           /* reset STA_CLOCKERR */
+#endif
+        /* If there are input parameters, then process them */
+        if (txc->modes)
+        {
+            if (txc->modes & ADJ_STATUS)        /* only set allowed bits */
+                time_status =  (txc->status & ~STA_RONLY) |
+                              (time_status & STA_RONLY);
+            if (txc->modes & ADJ_FREQUENCY) {   /* p. 22 */
+                if (txc->freq > MAXFREQ || txc->freq < -MAXFREQ) {
+                    result = -EINVAL;
+                    goto leave;
+                }
+                time_freq = ((s64)txc->freq * NSEC_PER_USEC) >> (SHIFT_USEC - SHIFT_NSEC);
+            }
+            if (txc->modes & ADJ_MAXERROR) {
+                if (txc->maxerror < 0 || txc->maxerror >= NTP_PHASE_LIMIT) {
+                    result = -EINVAL;
+                    goto leave;
+                }
+                time_maxerror = txc->maxerror;
+            }
+            if (txc->modes & ADJ_ESTERROR) {
+                if (txc->esterror < 0 || txc->esterror >= NTP_PHASE_LIMIT) {
+                    result = -EINVAL;
+                    goto leave;
+                }
+                time_esterror = txc->esterror;
+            }
+            if (txc->modes & ADJ_TIMECONST) {   /* p. 24 */
+                if (txc->constant < 0) {        /* NTP v4 uses values > 6 */
+                    result = -EINVAL;
+                    goto leave;
+                }
+                time_constant = min(txc->constant + 4, (long)MAXTC);
+            }
+            if (txc->modes & ADJ_OFFSET) {      /* values checked earlier */
+                if (txc->modes == ADJ_OFFSET_SINGLESHOT) {
+                    /* adjtime() is independent from ntp_adjtime() */
+                    time_adjust = txc->offset;
+                }
+                else if (time_status & STA_PLL) {
+                    ltemp = txc->offset * NSEC_PER_USEC;
+                    /*
+                     * Scale the phase adjustment and
+                     * clamp to the operating range.
+                     */
+                    time_offset = min(ltemp, MAXPHASE * NSEC_PER_USEC);
+                    time_offset = max(time_offset, -MAXPHASE * NSEC_PER_USEC);
+                    /*
+                     * Select whether the frequency is to be controlled
+                     * and in which mode (PLL or FLL). Clamp to the operating
+                     * range. Ugly multiply/divide should be replaced someday.
+                     */
+                    if (time_status & STA_FREQHOLD || time_reftime == 0)
+                        time_reftime = xtime.tv_sec;
+                    mtemp = xtime.tv_sec - time_reftime;
+                    time_reftime = xtime.tv_sec;
+                    freq_adj = (s64)time_offset * mtemp;
+                    freq_adj = shift_right(freq_adj, time_constant * 2 +
+                                           (SHIFT_PLL + 2) * 2 - SHIFT_NSEC);
+                    if (mtemp >= MINSEC && (time_status & STA_FLL || mtemp > MAXSEC)) {
+                        temp64 = (s64)time_offset << (SHIFT_NSEC - SHIFT_FLL);
+                        if (time_offset < 0) {
+                            temp64 = -temp64;
+                            do_div(temp64, mtemp);
+                            freq_adj -= temp64;
+                        } else {
+                            do_div(temp64, mtemp);
+                            freq_adj += temp64;
+                        }
+                    }
+                    freq_adj += time_freq;
+                    freq_adj = min(freq_adj, (s64)MAXFREQ_NSEC);
+                    time_freq = max(freq_adj, (s64)-MAXFREQ_NSEC);
+                    time_offset = (time_offset / HZ) << SHIFT_UPDATE;
+                } /* STA_PLL */
+            } /* txc->modes & ADJ_OFFSET */
+            if (txc->modes & ADJ_TICK)
+                tick_usec = txc->tick;
+            if (txc->modes & (ADJ_TICK|ADJ_FREQUENCY|ADJ_OFFSET))
+                    ntp_update_frequency();
+        } /* txc->modes */
+leave:  if ((time_status & (STA_UNSYNC|STA_CLOCKERR)) != 0)
+                result = TIME_ERROR;
+        if ((txc->modes & ADJ_OFFSET_SINGLESHOT) == ADJ_OFFSET_SINGLESHOT)
+            txc->offset    = save_adjust;
+        else
+            txc->offset    = shift_right(time_offset, SHIFT_UPDATE) * HZ / 1000;
+        txc->freq          = (time_freq / NSEC_PER_USEC) << (SHIFT_USEC - SHIFT_NSEC);
+        txc->maxerror      = time_maxerror;
+        txc->esterror      = time_esterror;
+        txc->status        = time_status;
+        txc->constant      = time_constant;
+        txc->precision     = 1;
+        txc->tolerance     = MAXFREQ;
+        txc->tick          = tick_usec;
+        /* PPS is not implemented, so these are zero */
+        txc->ppsfreq       = 0;
+        txc->jitter        = 0;
+        txc->shift         = 0;
+        txc->stabil        = 0;
+        txc->jitcnt        = 0;
+        txc->calcnt        = 0;
+        txc->errcnt        = 0;
+        txc->stbcnt        = 0;
+        write_sequnlock_irq(&xtime_lock);
+        do_gettimeofday(&txc->time);
+        notify_arch_cmos_timer();
+        return(result);
+}
diff --git a/kernel/timer.c b/kernel/timer.c
index 4f55622b0d..c1c7fbcffe 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -41,12 +41,6 @@
 #include <asm/timex.h>
 #include <asm/io.h>
-#ifdef CONFIG_TIME_INTERPOLATION
-static void time_interpolator_update(long delta_nsec);
-#else
-#define time_interpolator_update(x)
-#endif
 u64 jiffies_64 __cacheline_aligned_in_smp = INITIAL_JIFFIES;
 EXPORT_SYMBOL(jiffies_64);
@@ -568,12 +562,6 @@ found:
 /******************************************************************/
-/*
- * Timekeeping variables
- */
-unsigned long tick_usec = TICK_USEC;            /* USER_HZ period (usec) */
-unsigned long tick_nsec = TICK_NSEC;            /* ACTHZ period (nsec) */
 /* 
 * The current time 
 * wall_to_monotonic is what we need to add to xtime (or xtime corrected 
@@ -587,209 +575,6 @@ struct timespec wall_to_monotonic __attribute__ ((aligned (16)));
 EXPORT_SYMBOL(xtime);
-/* Don't completely fail for HZ > 500.  */
-int tickadj = 500/HZ ? : 1;             /* microsecs */
-/*
- * phase-lock loop variables
- */
-/* TIME_ERROR prevents overwriting the CMOS clock */
-int time_state = TIME_OK;               /* clock synchronization status */
-int time_status = STA_UNSYNC;           /* clock status bits            */
-long time_offset;                       /* time adjustment (us)         */
-long time_constant = 2;                 /* pll time constant            */
-long time_tolerance = MAXFREQ;          /* frequency tolerance (ppm)    */
-long time_precision = 1;                /* clock precision (us)         */
-long time_maxerror = NTP_PHASE_LIMIT;   /* maximum error (us)           */
-long time_esterror = NTP_PHASE_LIMIT;   /* estimated error (us)         */
-long time_freq = (((NSEC_PER_SEC + HZ/2) % HZ - HZ/2) << SHIFT_USEC) / NSEC_PER_USEC;
-                                        /* frequency offset (scaled ppm)*/
-static long time_adj;                   /* tick adjust (scaled 1 / HZ)  */
-long time_reftime;                      /* time at last adjustment (s)  */
-long time_adjust;
-long time_next_adjust;
-/*
- * this routine handles the overflow of the microsecond field
- *
- * The tricky bits of code to handle the accurate clock support
- * were provided by Dave Mills (Mills@UDEL.EDU) of NTP fame.
- * They were originally developed for SUN and DEC kernels.
- * All the kudos should go to Dave for this stuff.
- *
- */
-static void second_overflow(void)
-{
-        long ltemp;
-        /* Bump the maxerror field */
-        time_maxerror += time_tolerance >> SHIFT_USEC;
-        if (time_maxerror > NTP_PHASE_LIMIT) {
-                time_maxerror = NTP_PHASE_LIMIT;
-                time_status |= STA_UNSYNC;
-        }
-        /*
-         * Leap second processing. If in leap-insert state at the end of the
-         * day, the system clock is set back one second; if in leap-delete
-         * state, the system clock is set ahead one second. The microtime()
-         * routine or external clock driver will insure that reported time is
-         * always monotonic. The ugly divides should be replaced.
-         */
-        switch (time_state) {
-        case TIME_OK:
-                if (time_status & STA_INS)
-                        time_state = TIME_INS;
-                else if (time_status & STA_DEL)
-                        time_state = TIME_DEL;
-                break;
-        case TIME_INS:
-                if (xtime.tv_sec % 86400 == 0) {
-                        xtime.tv_sec--;
-                        wall_to_monotonic.tv_sec++;
-                        /*
-                         * The timer interpolator will make time change
-                         * gradually instead of an immediate jump by one second
-                         */
-                        time_interpolator_update(-NSEC_PER_SEC);
-                        time_state = TIME_OOP;
-                        clock_was_set();
-                        printk(KERN_NOTICE "Clock: inserting leap second "
-                                        "23:59:60 UTC\n");
-                }
-                break;
-        case TIME_DEL:
-                if ((xtime.tv_sec + 1) % 86400 == 0) {
-                        xtime.tv_sec++;
-                        wall_to_monotonic.tv_sec--;
-                        /*
-                         * Use of time interpolator for a gradual change of
-                         * time
-                         */
-                        time_interpolator_update(NSEC_PER_SEC);
-                        time_state = TIME_WAIT;
-                        clock_was_set();
-                        printk(KERN_NOTICE "Clock: deleting leap second "
-                                        "23:59:59 UTC\n");
-                }
-                break;
-        case TIME_OOP:
-                time_state = TIME_WAIT;
-                break;
-        case TIME_WAIT:
-                if (!(time_status & (STA_INS | STA_DEL)))
-                time_state = TIME_OK;
-        }
-        /*
-         * Compute the phase adjustment for the next second. In PLL mode, the
-         * offset is reduced by a fixed factor times the time constant. In FLL
-         * mode the offset is used directly. In either mode, the maximum phase
-         * adjustment for each second is clamped so as to spread the adjustment
-         * over not more than the number of seconds between updates.
-         */
-        ltemp = time_offset;
-        if (!(time_status & STA_FLL))
-                ltemp = shift_right(ltemp, SHIFT_KG + time_constant);
-        ltemp = min(ltemp, (MAXPHASE / MINSEC) << SHIFT_UPDATE);
-        ltemp = max(ltemp, -(MAXPHASE / MINSEC) << SHIFT_UPDATE);
-        time_offset -= ltemp;
-        time_adj = ltemp << (SHIFT_SCALE - SHIFT_HZ - SHIFT_UPDATE);
-        /*
-         * Compute the frequency estimate and additional phase adjustment due
-         * to frequency error for the next second.
-         */
-        ltemp = time_freq;
-        time_adj += shift_right(ltemp,(SHIFT_USEC + SHIFT_HZ - SHIFT_SCALE));
-#if HZ == 100
-        /*
-         * Compensate for (HZ==100) != (1 << SHIFT_HZ).  Add 25% and 3.125% to
-         * get 128.125; => only 0.125% error (p. 14)
-         */
-        time_adj += shift_right(time_adj, 2) + shift_right(time_adj, 5);
-#endif
-#if HZ == 250
-        /*
-         * Compensate for (HZ==250) != (1 << SHIFT_HZ).  Add 1.5625% and
-         * 0.78125% to get 255.85938; => only 0.05% error (p. 14)
-         */
-        time_adj += shift_right(time_adj, 6) + shift_right(time_adj, 7);
-#endif
-#if HZ == 1000
-        /*
-         * Compensate for (HZ==1000) != (1 << SHIFT_HZ).  Add 1.5625% and
-         * 0.78125% to get 1023.4375; => only 0.05% error (p. 14)
-         */
-        time_adj += shift_right(time_adj, 6) + shift_right(time_adj, 7);
-#endif
-}
-/*
- * Returns how many microseconds we need to add to xtime this tick
- * in doing an adjustment requested with adjtime.
- */
-static long adjtime_adjustment(void)
-{
-        long time_adjust_step;
-        time_adjust_step = time_adjust;
-        if (time_adjust_step) {
-                /*
-                 * We are doing an adjtime thing.  Prepare time_adjust_step to
-                 * be within bounds.  Note that a positive time_adjust means we
-                 * want the clock to run faster.
-                 *
-                 * Limit the amount of the step to be in the range
-                 * -tickadj .. +tickadj
-                 */
-                time_adjust_step = min(time_adjust_step, (long)tickadj);
-                time_adjust_step = max(time_adjust_step, (long)-tickadj);
-        }
-        return time_adjust_step;
-}
-/* in the NTP reference this is called "hardclock()" */
-static void update_ntp_one_tick(void)
-{
-        long time_adjust_step;
-        time_adjust_step = adjtime_adjustment();
-        if (time_adjust_step)
-                /* Reduce by this step the amount of time left  */
-                time_adjust -= time_adjust_step;
-        /* Changes by adjtime() do not take effect till next tick. */
-        if (time_next_adjust != 0) {
-                time_adjust = time_next_adjust;
-                time_next_adjust = 0;
-        }
-}
-/*
- * Return how long ticks are at the moment, that is, how much time
- * update_wall_time_one_tick will add to xtime next time we call it
- * (assuming no calls to do_adjtimex in the meantime).
- * The return value is in fixed-point nanoseconds shifted by the
- * specified number of bits to the right of the binary point.
- * This function has no side-effects.
- */
-u64 current_tick_length(void)
-{
-        long delta_nsec;
-        u64 ret;
-        /* calculate the finest interval NTP will allow.
-         *    ie: nanosecond value shifted by (SHIFT_SCALE - 10)
-         */
-        delta_nsec = tick_nsec + adjtime_adjustment() * 1000;
-        ret = (u64)delta_nsec << TICK_LENGTH_SHIFT;
-        ret += (s64)time_adj << (TICK_LENGTH_SHIFT - (SHIFT_SCALE - 10));
-        return ret;
-}
 /* XXX - all of this timekeeping code should be later moved to time.c */
 #include <linux/clocksource.h>
@@ -966,10 +751,13 @@ void __init timekeeping_init(void)
        unsigned long flags;
        write_seqlock_irqsave(&xtime_lock, flags);
+        ntp_clear();
        clock = clocksource_get_next();
        clocksource_calculate_interval(clock, tick_nsec);
        clock->cycle_last = clocksource_read(clock);
-        ntp_clear();
        write_sequnlock_irqrestore(&xtime_lock, flags);
 }
@@ -980,7 +768,7 @@ static int timekeeping_suspended;
 * @dev:        unused
 *
 * This is for the generic clocksource timekeeping.
- * xtime/wall_to_monotonic/jiffies/wall_jiffies/etc are
+ * xtime/wall_to_monotonic/jiffies/etc are
 * still managed by arch specific suspend/resume code.
 */
 static int timekeeping_resume(struct sys_device *dev)
@@ -1149,8 +937,6 @@ static void update_wall_time(void)
                /* interpolator bits */
                time_interpolator_update(clock->xtime_interval
                                                >> clock->shift);
-                /* increment the NTP state machine */
-                update_ntp_one_tick();
                /* accumulate error between NTP and clock interval */
                clock->error += current_tick_length();
@@ -1230,9 +1016,6 @@ static inline void calc_load(unsigned long ticks)
        }
 }
-/* jiffies at the most recent update of wall time */
-unsigned long wall_jiffies = INITIAL_JIFFIES;
 /*
 * This read-write spinlock protects us from races in SMP while
 * playing with xtime and avenrun.
@@ -1270,7 +1053,6 @@ void run_local_timers(void)
 */
 static inline void update_times(unsigned long ticks)
 {
-        wall_jiffies += ticks;
        update_wall_time();
        calc_load(ticks);
 }
@@ -1775,7 +1557,7 @@ unsigned long time_interpolator_get_offset(void)
 #define INTERPOLATOR_ADJUST 65536
 #define INTERPOLATOR_MAX_SKIP 10*INTERPOLATOR_ADJUST
-static void time_interpolator_update(long delta_nsec)
+void time_interpolator_update(long delta_nsec)
 {
        u64 counter;
        unsigned long offset;
diff --git a/kernel/tsacct.c b/kernel/tsacct.c
new file mode 100644
index 0000000000..db443221ba
--- /dev/null
+++ b/kernel/tsacct.c
@@ -0,0 +1,124 @@
+/*
+ * tsacct.c - System accounting over taskstats interface
+ *
+ * Copyright (C) Jay Lan,       <jlan@sgi.com>
+ *
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/tsacct_kern.h>
+#include <linux/acct.h>
+#include <linux/jiffies.h>
+#define USEC_PER_TICK   (USEC_PER_SEC/HZ)
+/*
+ * fill in basic accounting fields
+ */
+void bacct_add_tsk(struct taskstats *stats, struct task_struct *tsk)
+{
+        struct timespec uptime, ts;
+        s64 ac_etime;
+        BUILD_BUG_ON(TS_COMM_LEN < TASK_COMM_LEN);
+        /* calculate task elapsed time in timespec */
+        do_posix_clock_monotonic_gettime(&uptime);
+        ts = timespec_sub(uptime, current->group_leader->start_time);
+        /* rebase elapsed time to usec */
+        ac_etime = timespec_to_ns(&ts);
+        do_div(ac_etime, NSEC_PER_USEC);
+        stats->ac_etime = ac_etime;
+        stats->ac_btime = xtime.tv_sec - ts.tv_sec;
+        if (thread_group_leader(tsk)) {
+                stats->ac_exitcode = tsk->exit_code;
+                if (tsk->flags & PF_FORKNOEXEC)
+                        stats->ac_flag |= AFORK;
+        }
+        if (tsk->flags & PF_SUPERPRIV)
+                stats->ac_flag |= ASU;
+        if (tsk->flags & PF_DUMPCORE)
+                stats->ac_flag |= ACORE;
+        if (tsk->flags & PF_SIGNALED)
+                stats->ac_flag |= AXSIG;
+        stats->ac_nice   = task_nice(tsk);
+        stats->ac_sched  = tsk->policy;
+        stats->ac_uid    = tsk->uid;
+        stats->ac_gid    = tsk->gid;
+        stats->ac_pid    = tsk->pid;
+        stats->ac_ppid   = (tsk->parent) ? tsk->parent->pid : 0;
+        stats->ac_utime  = cputime_to_msecs(tsk->utime) * USEC_PER_MSEC;
+        stats->ac_stime  = cputime_to_msecs(tsk->stime) * USEC_PER_MSEC;
+        stats->ac_minflt = tsk->min_flt;
+        stats->ac_majflt = tsk->maj_flt;
+        strncpy(stats->ac_comm, tsk->comm, sizeof(stats->ac_comm));
+}
+#ifdef CONFIG_TASK_XACCT
+#define KB 1024
+#define MB (1024*KB)
+/*
+ * fill in extended accounting fields
+ */
+void xacct_add_tsk(struct taskstats *stats, struct task_struct *p)
+{
+        /* convert pages-jiffies to Mbyte-usec */
+        stats->coremem = jiffies_to_usecs(p->acct_rss_mem1) * PAGE_SIZE / MB;
+        stats->virtmem = jiffies_to_usecs(p->acct_vm_mem1) * PAGE_SIZE / MB;
+        if (p->mm) {
+                /* adjust to KB unit */
+                stats->hiwater_rss   = p->mm->hiwater_rss * PAGE_SIZE / KB;
+                stats->hiwater_vm    = p->mm->hiwater_vm * PAGE_SIZE / KB;
+        }
+        stats->read_char        = p->rchar;
+        stats->write_char       = p->wchar;
+        stats->read_syscalls    = p->syscr;
+        stats->write_syscalls   = p->syscw;
+}
+#undef KB
+#undef MB
+/**
+ * acct_update_integrals - update mm integral fields in task_struct
+ * @tsk: task_struct for accounting
+ */
+void acct_update_integrals(struct task_struct *tsk)
+{
+        if (likely(tsk->mm)) {
+                long delta = cputime_to_jiffies(
+                        cputime_sub(tsk->stime, tsk->acct_stimexpd));
+                if (delta == 0)
+                        return;
+                tsk->acct_stimexpd = tsk->stime;
+                tsk->acct_rss_mem1 += delta * get_mm_rss(tsk->mm);
+                tsk->acct_vm_mem1 += delta * tsk->mm->total_vm;
+        }
+}
+/**
+ * acct_clear_integrals - clear the mm integral fields in task_struct
+ * @tsk: task_struct whose accounting fields are cleared
+ */
+void acct_clear_integrals(struct task_struct *tsk)
+{
+        tsk->acct_stimexpd = 0;
+        tsk->acct_rss_mem1 = 0;
+        tsk->acct_vm_mem1 = 0;
+}
+#endif
diff --git a/kernel/utsname.c b/kernel/utsname.c
new file mode 100644
index 0000000000..c859164a69
--- /dev/null
+++ b/kernel/utsname.c
@@ -0,0 +1,95 @@
+/*
+ *  Copyright (C) 2004 IBM Corporation
+ *
+ *  Author: Serge Hallyn <serue@us.ibm.com>
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License as
+ *  published by the Free Software Foundation, version 2 of the
+ *  License.
+ */
+#include <linux/module.h>
+#include <linux/uts.h>
+#include <linux/utsname.h>
+#include <linux/version.h>
+/*
+ * Clone a new ns copying an original utsname, setting refcount to 1
+ * @old_ns: namespace to clone
+ * Return NULL on error (failure to kmalloc), new ns otherwise
+ */
+static struct uts_namespace *clone_uts_ns(struct uts_namespace *old_ns)
+{
+        struct uts_namespace *ns;
+        ns = kmalloc(sizeof(struct uts_namespace), GFP_KERNEL);
+        if (ns) {
+                memcpy(&ns->name, &old_ns->name, sizeof(ns->name));
+                kref_init(&ns->kref);
+        }
+        return ns;
+}
+/*
+ * unshare the current process' utsname namespace.
+ * called only in sys_unshare()
+ */
+int unshare_utsname(unsigned long unshare_flags, struct uts_namespace **new_uts)
+{
+        if (unshare_flags & CLONE_NEWUTS) {
+                if (!capable(CAP_SYS_ADMIN))
+                        return -EPERM;
+                *new_uts = clone_uts_ns(current->nsproxy->uts_ns);
+                if (!*new_uts)
+                        return -ENOMEM;
+        }
+        return 0;
+}
+/*
+ * Copy task tsk's utsname namespace, or clone it if flags
+ * specifies CLONE_NEWUTS.  In latter case, changes to the
+ * utsname of this process won't be seen by parent, and vice
+ * versa.
+ */
+int copy_utsname(int flags, struct task_struct *tsk)
+{
+        struct uts_namespace *old_ns = tsk->nsproxy->uts_ns;
+        struct uts_namespace *new_ns;
+        int err = 0;
+        if (!old_ns)
+                return 0;
+        get_uts_ns(old_ns);
+        if (!(flags & CLONE_NEWUTS))
+                return 0;
+        if (!capable(CAP_SYS_ADMIN)) {
+                err = -EPERM;
+                goto out;
+        }
+        new_ns = clone_uts_ns(old_ns);
+        if (!new_ns) {
+                err = -ENOMEM;
+                goto out;
+        }
+        tsk->nsproxy->uts_ns = new_ns;
+out:
+        put_uts_ns(old_ns);
+        return err;
+}
+void free_uts_ns(struct kref *kref)
+{
+        struct uts_namespace *ns;
+        ns = container_of(kref, struct uts_namespace, kref);
+        kfree(ns);
+}