Merge with git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6.git

author: Adrian Bunk <bunk@r063144.stusta.swh.mhn.de> 2006-03-20 12:30:36 -0500
committer: Adrian Bunk <bunk@r063144.stusta.swh.mhn.de> 2006-03-20 12:30:36 -0500
commit: 0f76ee451484d02c7405d92e7bceb39b415abb01 (patch)
tree: 9722f84281f786ba48971dde057f5171a49969e4 /kernel
parent: 01d206a7c1167639f6ca6dac22140fbdca017558 (diff)
parent: 7705a8792b0fc82fd7d4dd923724606bbfd9fb20 (diff)
22 files changed, 664 insertions, 279 deletions
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 685c25175d96..d7e7e637b92a 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -841,7 +841,7 @@ static void audit_log_exit(struct audit_context *context, gfp_t gfp_mask)
        for (aux = context->aux; aux; aux = aux->next) {
-                ab = audit_log_start(context, GFP_KERNEL, aux->type);
+                ab = audit_log_start(context, gfp_mask, aux->type);
                if (!ab)
                        continue; /* audit_panic has been called */
@@ -878,14 +878,14 @@ static void audit_log_exit(struct audit_context *context, gfp_t gfp_mask)
        }
        if (context->pwd && context->pwdmnt) {
-                ab = audit_log_start(context, GFP_KERNEL, AUDIT_CWD);
+                ab = audit_log_start(context, gfp_mask, AUDIT_CWD);
                if (ab) {
                        audit_log_d_path(ab, "cwd=", context->pwd, context->pwdmnt);
                        audit_log_end(ab);
                }
        }
        for (i = 0; i < context->name_count; i++) {
-                ab = audit_log_start(context, GFP_KERNEL, AUDIT_PATH);
+                ab = audit_log_start(context, gfp_mask, AUDIT_PATH);
                if (!ab)
                        continue; /* audit_panic has been called */
diff --git a/kernel/compat.c b/kernel/compat.c
index 1867290c37e3..8c9cd88b6785 100644
--- a/kernel/compat.c
+++ b/kernel/compat.c
@@ -23,7 +23,6 @@
 #include <linux/security.h>
 #include <asm/uaccess.h>
-#include <asm/bug.h>
 int get_compat_timespec(struct timespec *ts, const struct compat_timespec __user *cts)
 {
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index ba42b0a76961..12815d3f1a05 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -1977,6 +1977,39 @@ void cpuset_fork(struct task_struct *child)
 * We don't need to task_lock() this reference to tsk->cpuset,
 * because tsk is already marked PF_EXITING, so attach_task() won't
 * mess with it, or task is a failed fork, never visible to attach_task.
+ *
+ * Hack:
+ *
+ *    Set the exiting tasks cpuset to the root cpuset (top_cpuset).
+ *
+ *    Don't leave a task unable to allocate memory, as that is an
+ *    accident waiting to happen should someone add a callout in
+ *    do_exit() after the cpuset_exit() call that might allocate.
+ *    If a task tries to allocate memory with an invalid cpuset,
+ *    it will oops in cpuset_update_task_memory_state().
+ *
+ *    We call cpuset_exit() while the task is still competent to
+ *    handle notify_on_release(), then leave the task attached to
+ *    the root cpuset (top_cpuset) for the remainder of its exit.
+ *
+ *    To do this properly, we would increment the reference count on
+ *    top_cpuset, and near the very end of the kernel/exit.c do_exit()
+ *    code we would add a second cpuset function call, to drop that
+ *    reference.  This would just create an unnecessary hot spot on
+ *    the top_cpuset reference count, to no avail.
+ *
+ *    Normally, holding a reference to a cpuset without bumping its
+ *    count is unsafe.   The cpuset could go away, or someone could
+ *    attach us to a different cpuset, decrementing the count on
+ *    the first cpuset that we never incremented.  But in this case,
+ *    top_cpuset isn't going away, and either task has PF_EXITING set,
+ *    which wards off any attach_task() attempts, or task is a failed
+ *    fork, never visible to attach_task.
+ *
+ *    Another way to do this would be to set the cpuset pointer
+ *    to NULL here, and check in cpuset_update_task_memory_state()
+ *    for a NULL pointer.  This hack avoids that NULL check, for no
+ *    cost (other than this way too long comment ;).
 **/
 void cpuset_exit(struct task_struct *tsk)
@@ -1984,7 +2017,7 @@ void cpuset_exit(struct task_struct *tsk)
        struct cpuset *cs;
        cs = tsk->cpuset;
-        tsk->cpuset = NULL;
+        tsk->cpuset = &top_cpuset;      /* Hack - see comment above */
        if (notify_on_release(cs)) {
                char *pathbuf = NULL;
diff --git a/kernel/exit.c b/kernel/exit.c
index 93cee3671332..531aadca5530 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -360,6 +360,9 @@ void daemonize(const char *name, ...)
        fs = init_task.fs;
        current->fs = fs;
        atomic_inc(&fs->count);
+        exit_namespace(current);
+        current->namespace = init_task.namespace;
+        get_namespace(current->namespace);
        exit_files(current);
        current->files = init_task.files;
        atomic_inc(&current->files->count);
diff --git a/kernel/fork.c b/kernel/fork.c
index 7f0ab5ee948c..b373322ca497 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -108,8 +108,10 @@ void free_task(struct task_struct *tsk)
 }
 EXPORT_SYMBOL(free_task);
-void __put_task_struct(struct task_struct *tsk)
+void __put_task_struct_cb(struct rcu_head *rhp)
 {
+        struct task_struct *tsk = container_of(rhp, struct task_struct, rcu);
        WARN_ON(!(tsk->exit_state & (EXIT_DEAD | EXIT_ZOMBIE)));
        WARN_ON(atomic_read(&tsk->usage));
        WARN_ON(tsk == current);
@@ -446,6 +448,55 @@ void mm_release(struct task_struct *tsk, struct mm_struct *mm)
        }
 }
+/*
+ * Allocate a new mm structure and copy contents from the
+ * mm structure of the passed in task structure.
+ */
+static struct mm_struct *dup_mm(struct task_struct *tsk)
+{
+        struct mm_struct *mm, *oldmm = current->mm;
+        int err;
+        if (!oldmm)
+                return NULL;
+        mm = allocate_mm();
+        if (!mm)
+                goto fail_nomem;
+        memcpy(mm, oldmm, sizeof(*mm));
+        if (!mm_init(mm))
+                goto fail_nomem;
+        if (init_new_context(tsk, mm))
+                goto fail_nocontext;
+        err = dup_mmap(mm, oldmm);
+        if (err)
+                goto free_pt;
+        mm->hiwater_rss = get_mm_rss(mm);
+        mm->hiwater_vm = mm->total_vm;
+        return mm;
+free_pt:
+        mmput(mm);
+fail_nomem:
+        return NULL;
+fail_nocontext:
+        /*
+         * If init_new_context() failed, we cannot use mmput() to free the mm
+         * because it calls destroy_context()
+         */
+        mm_free_pgd(mm);
+        free_mm(mm);
+        return NULL;
+}
 static int copy_mm(unsigned long clone_flags, struct task_struct * tsk)
 {
        struct mm_struct * mm, *oldmm;
@@ -473,43 +524,17 @@ static int copy_mm(unsigned long clone_flags, struct task_struct * tsk)
        }
        retval = -ENOMEM;
-        mm = allocate_mm();
+        mm = dup_mm(tsk);
        if (!mm)
                goto fail_nomem;
-        /* Copy the current MM stuff.. */
-        memcpy(mm, oldmm, sizeof(*mm));
-        if (!mm_init(mm))
-                goto fail_nomem;
-        if (init_new_context(tsk,mm))
-                goto fail_nocontext;
-        retval = dup_mmap(mm, oldmm);
-        if (retval)
-                goto free_pt;
-        mm->hiwater_rss = get_mm_rss(mm);
-        mm->hiwater_vm = mm->total_vm;
 good_mm:
        tsk->mm = mm;
        tsk->active_mm = mm;
        return 0;
-free_pt:
-        mmput(mm);
 fail_nomem:
        return retval;
-fail_nocontext:
-        /*
-         * If init_new_context() failed, we cannot use mmput() to free the mm
-         * because it calls destroy_context()
-         */
-        mm_free_pgd(mm);
-        free_mm(mm);
-        return retval;
 }
 static inline struct fs_struct *__copy_fs_struct(struct fs_struct *old)
@@ -597,32 +622,17 @@ out:
        return newf;
 }
-static int copy_files(unsigned long clone_flags, struct task_struct * tsk)
+/*
+ * Allocate a new files structure and copy contents from the
+ * passed in files structure.
+ */
+static struct files_struct *dup_fd(struct files_struct *oldf, int *errorp)
 {
-        struct files_struct *oldf, *newf;
+        struct files_struct *newf;
        struct file **old_fds, **new_fds;
-        int open_files, size, i, error = 0, expand;
+        int open_files, size, i, expand;
        struct fdtable *old_fdt, *new_fdt;
-        /*
-         * A background process may not have any files ...
-         */
-        oldf = current->files;
-        if (!oldf)
-                goto out;
-        if (clone_flags & CLONE_FILES) {
-                atomic_inc(&oldf->count);
-                goto out;
-        }
-        /*
-         * Note: we may be using current for both targets (See exec.c)
-         * This works because we cache current->files (old) as oldf. Don't
-         * break this.
-         */
-        tsk->files = NULL;
-        error = -ENOMEM;
        newf = alloc_files();
        if (!newf)
                goto out;
@@ -651,9 +661,9 @@ static int copy_files(unsigned long clone_flags, struct task_struct * tsk)
        if (expand) {
                spin_unlock(&oldf->file_lock);
                spin_lock(&newf->file_lock);
-                error = expand_files(newf, open_files-1);
+                *errorp = expand_files(newf, open_files-1);
                spin_unlock(&newf->file_lock);
-                if (error < 0)
+                if (*errorp < 0)
                        goto out_release;
                new_fdt = files_fdtable(newf);
                /*
@@ -702,10 +712,8 @@ static int copy_files(unsigned long clone_flags, struct task_struct * tsk)
                memset(&new_fdt->close_on_exec->fds_bits[start], 0, left);
        }
-        tsk->files = newf;
-        error = 0;
 out:
-        return error;
+        return newf;
 out_release:
        free_fdset (new_fdt->close_on_exec, new_fdt->max_fdset);
@@ -715,6 +723,40 @@ out_release:
        goto out;
 }
+static int copy_files(unsigned long clone_flags, struct task_struct * tsk)
+{
+        struct files_struct *oldf, *newf;
+        int error = 0;
+        /*
+         * A background process may not have any files ...
+         */
+        oldf = current->files;
+        if (!oldf)
+                goto out;
+        if (clone_flags & CLONE_FILES) {
+                atomic_inc(&oldf->count);
+                goto out;
+        }
+        /*
+         * Note: we may be using current for both targets (See exec.c)
+         * This works because we cache current->files (old) as oldf. Don't
+         * break this.
+         */
+        tsk->files = NULL;
+        error = -ENOMEM;
+        newf = dup_fd(oldf, &error);
+        if (!newf)
+                goto out;
+        tsk->files = newf;
+        error = 0;
+out:
+        return error;
+}
 /*
 *      Helper to unshare the files of the current task.
 *      We don't want to expose copy_files internals to
@@ -1020,6 +1062,12 @@ static task_t *copy_process(unsigned long clone_flags,
        p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? child_tidptr: NULL;
        /*
+         * sigaltstack should be cleared when sharing the same VM
+         */
+        if ((clone_flags & (CLONE_VM|CLONE_VFORK)) == CLONE_VM)
+                p->sas_ss_sp = p->sas_ss_size = 0;
+        /*
         * Syscall tracing should be turned off in the child regardless
         * of CLONE_PTRACE.
         */
@@ -1083,8 +1131,8 @@ static task_t *copy_process(unsigned long clone_flags,
                p->real_parent = current;
        p->parent = p->real_parent;
+        spin_lock(&current->sighand->siglock);
        if (clone_flags & CLONE_THREAD) {
-                spin_lock(&current->sighand->siglock);
                /*
                 * Important: if an exit-all has been started then
                 * do not create this new thread - the whole thread
@@ -1122,8 +1170,6 @@ static task_t *copy_process(unsigned long clone_flags,
                         */
                        p->it_prof_expires = jiffies_to_cputime(1);
                }
-                spin_unlock(&current->sighand->siglock);
        }
        /*
@@ -1135,8 +1181,6 @@ static task_t *copy_process(unsigned long clone_flags,
        if (unlikely(p->ptrace & PT_PTRACED))
                __ptrace_link(p, current->parent);
-        attach_pid(p, PIDTYPE_PID, p->pid);
-        attach_pid(p, PIDTYPE_TGID, p->tgid);
        if (thread_group_leader(p)) {
                p->signal->tty = current->signal->tty;
                p->signal->pgrp = process_group(current);
@@ -1146,9 +1190,12 @@ static task_t *copy_process(unsigned long clone_flags,
                if (p->pid)
                        __get_cpu_var(process_counts)++;
        }
+        attach_pid(p, PIDTYPE_TGID, p->tgid);
+        attach_pid(p, PIDTYPE_PID, p->pid);
        nr_threads++;
        total_forks++;
+        spin_unlock(&current->sighand->siglock);
        write_unlock_irq(&tasklist_lock);
        proc_fork_connector(p);
        return p;
@@ -1323,3 +1370,247 @@ void __init proc_caches_init(void)
                        sizeof(struct mm_struct), ARCH_MIN_MMSTRUCT_ALIGN,
                        SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
 }
+/*
+ * Check constraints on flags passed to the unshare system call and
+ * force unsharing of additional process context as appropriate.
+ */
+static inline void check_unshare_flags(unsigned long *flags_ptr)
+{
+        /*
+         * If unsharing a thread from a thread group, must also
+         * unshare vm.
+         */
+        if (*flags_ptr & CLONE_THREAD)
+                *flags_ptr |= CLONE_VM;
+        /*
+         * If unsharing vm, must also unshare signal handlers.
+         */
+        if (*flags_ptr & CLONE_VM)
+                *flags_ptr |= CLONE_SIGHAND;
+        /*
+         * If unsharing signal handlers and the task was created
+         * using CLONE_THREAD, then must unshare the thread
+         */
+        if ((*flags_ptr & CLONE_SIGHAND) &&
+            (atomic_read(&current->signal->count) > 1))
+                *flags_ptr |= CLONE_THREAD;
+        /*
+         * If unsharing namespace, must also unshare filesystem information.
+         */
+        if (*flags_ptr & CLONE_NEWNS)
+                *flags_ptr |= CLONE_FS;
+}
+/*
+ * Unsharing of tasks created with CLONE_THREAD is not supported yet
+ */
+static int unshare_thread(unsigned long unshare_flags)
+{
+        if (unshare_flags & CLONE_THREAD)
+                return -EINVAL;
+        return 0;
+}
+/*
+ * Unshare the filesystem structure if it is being shared
+ */
+static int unshare_fs(unsigned long unshare_flags, struct fs_struct **new_fsp)
+{
+        struct fs_struct *fs = current->fs;
+        if ((unshare_flags & CLONE_FS) &&
+            (fs && atomic_read(&fs->count) > 1)) {
+                *new_fsp = __copy_fs_struct(current->fs);
+                if (!*new_fsp)
+                        return -ENOMEM;
+        }
+        return 0;
+}
+/*
+ * Unshare the namespace structure if it is being shared
+ */
+static int unshare_namespace(unsigned long unshare_flags, struct namespace **new_nsp, struct fs_struct *new_fs)
+{
+        struct namespace *ns = current->namespace;
+        if ((unshare_flags & CLONE_NEWNS) &&
+            (ns && atomic_read(&ns->count) > 1)) {
+                if (!capable(CAP_SYS_ADMIN))
+                        return -EPERM;
+                *new_nsp = dup_namespace(current, new_fs ? new_fs : current->fs);
+                if (!*new_nsp)
+                        return -ENOMEM;
+        }
+        return 0;
+}
+/*
+ * Unsharing of sighand for tasks created with CLONE_SIGHAND is not
+ * supported yet
+ */
+static int unshare_sighand(unsigned long unshare_flags, struct sighand_struct **new_sighp)
+{
+        struct sighand_struct *sigh = current->sighand;
+        if ((unshare_flags & CLONE_SIGHAND) &&
+            (sigh && atomic_read(&sigh->count) > 1))
+                return -EINVAL;
+        else
+                return 0;
+}
+/*
+ * Unshare vm if it is being shared
+ */
+static int unshare_vm(unsigned long unshare_flags, struct mm_struct **new_mmp)
+{
+        struct mm_struct *mm = current->mm;
+        if ((unshare_flags & CLONE_VM) &&
+            (mm && atomic_read(&mm->mm_users) > 1)) {
+                return -EINVAL;
+        }
+        return 0;
+}
+/*
+ * Unshare file descriptor table if it is being shared
+ */
+static int unshare_fd(unsigned long unshare_flags, struct files_struct **new_fdp)
+{
+        struct files_struct *fd = current->files;
+        int error = 0;
+        if ((unshare_flags & CLONE_FILES) &&
+            (fd && atomic_read(&fd->count) > 1)) {
+                *new_fdp = dup_fd(fd, &error);
+                if (!*new_fdp)
+                        return error;
+        }
+        return 0;
+}
+/*
+ * Unsharing of semundo for tasks created with CLONE_SYSVSEM is not
+ * supported yet
+ */
+static int unshare_semundo(unsigned long unshare_flags, struct sem_undo_list **new_ulistp)
+{
+        if (unshare_flags & CLONE_SYSVSEM)
+                return -EINVAL;
+        return 0;
+}
+/*
+ * unshare allows a process to 'unshare' part of the process
+ * context which was originally shared using clone.  copy_*
+ * functions used by do_fork() cannot be used here directly
+ * because they modify an inactive task_struct that is being
+ * constructed. Here we are modifying the current, active,
+ * task_struct.
+ */
+asmlinkage long sys_unshare(unsigned long unshare_flags)
+{
+        int err = 0;
+        struct fs_struct *fs, *new_fs = NULL;
+        struct namespace *ns, *new_ns = NULL;
+        struct sighand_struct *sigh, *new_sigh = NULL;
+        struct mm_struct *mm, *new_mm = NULL, *active_mm = NULL;
+        struct files_struct *fd, *new_fd = NULL;
+        struct sem_undo_list *new_ulist = NULL;
+        check_unshare_flags(&unshare_flags);
+        if ((err = unshare_thread(unshare_flags)))
+                goto bad_unshare_out;
+        if ((err = unshare_fs(unshare_flags, &new_fs)))
+                goto bad_unshare_cleanup_thread;
+        if ((err = unshare_namespace(unshare_flags, &new_ns, new_fs)))
+                goto bad_unshare_cleanup_fs;
+        if ((err = unshare_sighand(unshare_flags, &new_sigh)))
+                goto bad_unshare_cleanup_ns;
+        if ((err = unshare_vm(unshare_flags, &new_mm)))
+                goto bad_unshare_cleanup_sigh;
+        if ((err = unshare_fd(unshare_flags, &new_fd)))
+                goto bad_unshare_cleanup_vm;
+        if ((err = unshare_semundo(unshare_flags, &new_ulist)))
+                goto bad_unshare_cleanup_fd;
+        if (new_fs || new_ns || new_sigh || new_mm || new_fd || new_ulist) {
+                task_lock(current);
+                if (new_fs) {
+                        fs = current->fs;
+                        current->fs = new_fs;
+                        new_fs = fs;
+                }
+                if (new_ns) {
+                        ns = current->namespace;
+                        current->namespace = new_ns;
+                        new_ns = ns;
+                }
+                if (new_sigh) {
+                        sigh = current->sighand;
+                        rcu_assign_pointer(current->sighand, new_sigh);
+                        new_sigh = sigh;
+                }
+                if (new_mm) {
+                        mm = current->mm;
+                        active_mm = current->active_mm;
+                        current->mm = new_mm;
+                        current->active_mm = new_mm;
+                        activate_mm(active_mm, new_mm);
+                        new_mm = mm;
+                }
+                if (new_fd) {
+                        fd = current->files;
+                        current->files = new_fd;
+                        new_fd = fd;
+                }
+                task_unlock(current);
+        }
+bad_unshare_cleanup_fd:
+        if (new_fd)
+                put_files_struct(new_fd);
+bad_unshare_cleanup_vm:
+        if (new_mm)
+                mmput(new_mm);
+bad_unshare_cleanup_sigh:
+        if (new_sigh)
+                if (atomic_dec_and_test(&new_sigh->count))
+                        kmem_cache_free(sighand_cachep, new_sigh);
+bad_unshare_cleanup_ns:
+        if (new_ns)
+                put_namespace(new_ns);
+bad_unshare_cleanup_fs:
+        if (new_fs)
+                put_fs_struct(new_fs);
+bad_unshare_cleanup_thread:
+bad_unshare_out:
+        return err;
+}
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index 2b6e1757aedd..14bc9cfa6399 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -418,8 +418,19 @@ hrtimer_start(struct hrtimer *timer, ktime_t tim, const enum hrtimer_mode mode)
        /* Switch the timer base, if necessary: */
        new_base = switch_hrtimer_base(timer, base);
-        if (mode == HRTIMER_REL)
+        if (mode == HRTIMER_REL) {
                tim = ktime_add(tim, new_base->get_time());
+                /*
+                 * CONFIG_TIME_LOW_RES is a temporary way for architectures
+                 * to signal that they simply return xtime in
+                 * do_gettimeoffset(). In this case we want to round up by
+                 * resolution when starting a relative timer, to avoid short
+                 * timeouts. This will go away with the GTOD framework.
+                 */
+#ifdef CONFIG_TIME_LOW_RES
+                tim = ktime_add(tim, base->resolution);
+#endif
+        }
        timer->expires = tim;
        enqueue_hrtimer(timer, new_base);
@@ -494,6 +505,41 @@ ktime_t hrtimer_get_remaining(const struct hrtimer *timer)
        return rem;
 }
+#ifdef CONFIG_NO_IDLE_HZ
+/**
+ * hrtimer_get_next_event - get the time until next expiry event
+ *
+ * Returns the delta to the next expiry event or KTIME_MAX if no timer
+ * is pending.
+ */
+ktime_t hrtimer_get_next_event(void)
+{
+        struct hrtimer_base *base = __get_cpu_var(hrtimer_bases);
+        ktime_t delta, mindelta = { .tv64 = KTIME_MAX };
+        unsigned long flags;
+        int i;
+        for (i = 0; i < MAX_HRTIMER_BASES; i++, base++) {
+                struct hrtimer *timer;
+                spin_lock_irqsave(&base->lock, flags);
+                if (!base->first) {
+                        spin_unlock_irqrestore(&base->lock, flags);
+                        continue;
+                }
+                timer = rb_entry(base->first, struct hrtimer, node);
+                delta.tv64 = timer->expires.tv64;
+                spin_unlock_irqrestore(&base->lock, flags);
+                delta = ktime_sub(delta, base->get_time());
+                if (delta.tv64 < mindelta.tv64)
+                        mindelta.tv64 = delta.tv64;
+        }
+        if (mindelta.tv64 < 0)
+                mindelta.tv64 = 0;
+        return mindelta;
+}
+#endif
 /**
 * hrtimer_init - initialize a timer to the given clock
 *
diff --git a/kernel/intermodule.c b/kernel/intermodule.c
index 0cbe633420fb..55b1e5b85db9 100644
--- a/kernel/intermodule.c
+++ b/kernel/intermodule.c
@@ -179,3 +179,6 @@ EXPORT_SYMBOL(inter_module_register);
 EXPORT_SYMBOL(inter_module_unregister);
 EXPORT_SYMBOL(inter_module_get_request);
 EXPORT_SYMBOL(inter_module_put);
+MODULE_LICENSE("GPL");
diff --git a/kernel/module.c b/kernel/module.c
index e058aedf6b93..5aad477ddc79 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -1670,6 +1670,9 @@ static struct module *load_module(void __user *umod,
                goto free_mod;
        }
+        /* Userspace could have altered the string after the strlen_user() */
+        args[arglen - 1] = '\0';
        if (find_module(mod->name)) {
                err = -EEXIST;
                goto free_mod;
diff --git a/kernel/panic.c b/kernel/panic.c
index c5c4ab255834..126dc43f1c74 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -130,6 +130,7 @@ NORET_TYPE void panic(const char * fmt, ...)
 #endif
        local_irq_enable();
        for (i = 0;;) {
+                touch_softlockup_watchdog();
                i += panic_blink(i);
                mdelay(1);
                i++;
diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c
index 216f574b5ffb..fa895fc2ecf5 100644
--- a/kernel/posix-timers.c
+++ b/kernel/posix-timers.c
@@ -353,6 +353,7 @@ static int posix_timer_fn(void *data)
                                hrtimer_forward(&timr->it.real.timer,
                                                timr->it.real.interval);
                        ret = HRTIMER_RESTART;
+                        ++timr->it_requeue_pending;
                }
        }
diff --git a/kernel/power/console.c b/kernel/power/console.c
index 579d239d129f..623786d44159 100644
--- a/kernel/power/console.c
+++ b/kernel/power/console.c
@@ -9,7 +9,9 @@
 #include <linux/console.h>
 #include "power.h"
-#ifdef SUSPEND_CONSOLE
+#if defined(CONFIG_VT) && defined(CONFIG_VT_CONSOLE)
+#define SUSPEND_CONSOLE (MAX_NR_CONSOLES-1)
 static int orig_fgconsole, orig_kmsg;
 int pm_prepare_console(void)
diff --git a/kernel/power/power.h b/kernel/power/power.h
index d8f0d1a76bae..388dba680841 100644
--- a/kernel/power/power.h
+++ b/kernel/power/power.h
@@ -1,14 +1,6 @@
 #include <linux/suspend.h>
 #include <linux/utsname.h>
-/* With SUSPEND_CONSOLE defined suspend looks *really* cool, but
-   we probably do not take enough locks for switching consoles, etc,
-   so bad things might happen.
-*/
-#if defined(CONFIG_VT) && defined(CONFIG_VT_CONSOLE)
-#define SUSPEND_CONSOLE (MAX_NR_CONSOLES-1)
-#endif
 struct swsusp_info {
        struct new_utsname      uts;
        u32                     version_code;
@@ -42,14 +34,6 @@ static struct subsys_attribute _name##_attr = {	\
 extern struct subsystem power_subsys;
-#ifdef SUSPEND_CONSOLE
-extern int pm_prepare_console(void);
-extern void pm_restore_console(void);
-#else
-static int pm_prepare_console(void) { return 0; }
-static void pm_restore_console(void) {}
-#endif
 /* References to section boundaries */
 extern const void __nosave_begin, __nosave_end;
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index 41f66365f0d8..8d5a5986d621 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -91,10 +91,8 @@ static int save_highmem_zone(struct zone *zone)
                 * corrected eventually when the cases giving rise to this
                 * are better understood.
                 */
-                if (PageReserved(page)) {
+                if (PageReserved(page))
-                        printk("highmem reserved page?!\n");
                        continue;
-                }
                BUG_ON(PageNosave(page));
                if (PageNosaveFree(page))
                        continue;
diff --git a/kernel/power/swsusp.c b/kernel/power/swsusp.c
index 59c91c148e82..2d9d08f72f76 100644
--- a/kernel/power/swsusp.c
+++ b/kernel/power/swsusp.c
@@ -153,13 +153,11 @@ static int swsusp_swap_check(void) /* This is called before saving image */
 {
        int i;
-        if (!swsusp_resume_device)
-                return -ENODEV;
        spin_lock(&swap_lock);
        for (i = 0; i < MAX_SWAPFILES; i++) {
                if (!(swap_info[i].flags & SWP_WRITEOK))
                        continue;
-                if (is_resume_device(swap_info + i)) {
+                if (!swsusp_resume_device || is_resume_device(swap_info + i)) {
                        spin_unlock(&swap_lock);
                        root_swap = i;
                        return 0;
@@ -743,7 +741,6 @@ static int submit(int rw, pgoff_t page_off, void *page)
        if (!bio)
                return -ENOMEM;
        bio->bi_sector = page_off * (PAGE_SIZE >> 9);
-        bio_get(bio);
        bio->bi_bdev = resume_bdev;
        bio->bi_end_io = end_io;
@@ -753,14 +750,13 @@ static int submit(int rw, pgoff_t page_off, void *page)
                goto Done;
        }
-        if (rw == WRITE)
-                bio_set_pages_dirty(bio);
        atomic_set(&io_done, 1);
        submit_bio(rw | (1 << BIO_RW_SYNC), bio);
        while (atomic_read(&io_done))
                yield();
+        if (rw == READ)
+                bio_set_pages_dirty(bio);
 Done:
        bio_put(bio);
        return error;
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 5f33cdb6fff5..d95a72c9279d 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -72,8 +72,8 @@ void ptrace_untrace(task_t *child)
 */
 void __ptrace_unlink(task_t *child)
 {
-        if (!child->ptrace)
+        BUG_ON(!child->ptrace);
-                BUG();
        child->ptrace = 0;
        if (!list_empty(&child->ptrace_list)) {
                list_del_init(&child->ptrace_list);
@@ -184,22 +184,27 @@ bad:
        return retval;
 }
+void __ptrace_detach(struct task_struct *child, unsigned int data)
+{
+        child->exit_code = data;
+        /* .. re-parent .. */
+        __ptrace_unlink(child);
+        /* .. and wake it up. */
+        if (child->exit_state != EXIT_ZOMBIE)
+                wake_up_process(child);
+}
 int ptrace_detach(struct task_struct *child, unsigned int data)
 {
        if (!valid_signal(data))
-                return  -EIO;
+                return -EIO;
        /* Architecture-specific hardware disable .. */
        ptrace_disable(child);
-        /* .. re-parent .. */
-        child->exit_code = data;
        write_lock_irq(&tasklist_lock);
-        __ptrace_unlink(child);
+        if (child->ptrace)
-        /* .. and wake it up. */
+                __ptrace_detach(child, data);
-        if (child->exit_state != EXIT_ZOMBIE)
-                wake_up_process(child);
        write_unlock_irq(&tasklist_lock);
        return 0;
@@ -242,8 +247,7 @@ int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, in
                if (write) {
                        copy_to_user_page(vma, page, addr,
                                          maddr + offset, buf, bytes);
-                        if (!PageCompound(page))
+                        set_page_dirty_lock(page);
-                                set_page_dirty_lock(page);
                } else {
                        copy_from_user_page(vma, page, addr,
                                            buf, maddr + offset, bytes);
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c
index 0cf8146bd585..8cf15a569fcd 100644
--- a/kernel/rcupdate.c
+++ b/kernel/rcupdate.c
@@ -67,7 +67,43 @@ DEFINE_PER_CPU(struct rcu_data, rcu_bh_data) = { 0L };
 /* Fake initialization required by compiler */
 static DEFINE_PER_CPU(struct tasklet_struct, rcu_tasklet) = {NULL};
-static int maxbatch = 10000;
+static int blimit = 10;
+static int qhimark = 10000;
+static int qlowmark = 100;
+#ifdef CONFIG_SMP
+static int rsinterval = 1000;
+#endif
+static atomic_t rcu_barrier_cpu_count;
+static struct semaphore rcu_barrier_sema;
+static struct completion rcu_barrier_completion;
+#ifdef CONFIG_SMP
+static void force_quiescent_state(struct rcu_data *rdp,
+                        struct rcu_ctrlblk *rcp)
+{
+        int cpu;
+        cpumask_t cpumask;
+        set_need_resched();
+        if (unlikely(rdp->qlen - rdp->last_rs_qlen > rsinterval)) {
+                rdp->last_rs_qlen = rdp->qlen;
+                /*
+                 * Don't send IPI to itself. With irqs disabled,
+                 * rdp->cpu is the current cpu.
+                 */
+                cpumask = rcp->cpumask;
+                cpu_clear(rdp->cpu, cpumask);
+                for_each_cpu_mask(cpu, cpumask)
+                        smp_send_reschedule(cpu);
+        }
+}
+#else
+static inline void force_quiescent_state(struct rcu_data *rdp,
+                        struct rcu_ctrlblk *rcp)
+{
+        set_need_resched();
+}
+#endif
 /**
 * call_rcu - Queue an RCU callback for invocation after a grace period.
@@ -92,17 +128,13 @@ void fastcall call_rcu(struct rcu_head *head,
        rdp = &__get_cpu_var(rcu_data);
        *rdp->nxttail = head;
        rdp->nxttail = &head->next;
+        if (unlikely(++rdp->qlen > qhimark)) {
-        if (unlikely(++rdp->count > 10000))
+                rdp->blimit = INT_MAX;
-                set_need_resched();
+                force_quiescent_state(rdp, &rcu_ctrlblk);
+        }
        local_irq_restore(flags);
 }
-static atomic_t rcu_barrier_cpu_count;
-static struct semaphore rcu_barrier_sema;
-static struct completion rcu_barrier_completion;
 /**
 * call_rcu_bh - Queue an RCU for invocation after a quicker grace period.
 * @head: structure to be used for queueing the RCU updates.
@@ -131,12 +163,12 @@ void fastcall call_rcu_bh(struct rcu_head *head,
        rdp = &__get_cpu_var(rcu_bh_data);
        *rdp->nxttail = head;
        rdp->nxttail = &head->next;
-        rdp->count++;
-/*
+        if (unlikely(++rdp->qlen > qhimark)) {
- *  Should we directly call rcu_do_batch() here ?
+                rdp->blimit = INT_MAX;
- *  if (unlikely(rdp->count > 10000))
+                force_quiescent_state(rdp, &rcu_bh_ctrlblk);
- *      rcu_do_batch(rdp);
+        }
- */
        local_irq_restore(flags);
 }
@@ -199,10 +231,12 @@ static void rcu_do_batch(struct rcu_data *rdp)
                next = rdp->donelist = list->next;
                list->func(list);
                list = next;
-                rdp->count--;
+                rdp->qlen--;
-                if (++count >= maxbatch)
+                if (++count >= rdp->blimit)
                        break;
        }
+        if (rdp->blimit == INT_MAX && rdp->qlen <= qlowmark)
+                rdp->blimit = blimit;
        if (!rdp->donelist)
                rdp->donetail = &rdp->donelist;
        else
@@ -473,6 +507,7 @@ static void rcu_init_percpu_data(int cpu, struct rcu_ctrlblk *rcp,
        rdp->quiescbatch = rcp->completed;
        rdp->qs_pending = 0;
        rdp->cpu = cpu;
+        rdp->blimit = blimit;
 }
 static void __devinit rcu_online_cpu(int cpu)
@@ -567,7 +602,12 @@ void synchronize_kernel(void)
        synchronize_rcu();
 }
-module_param(maxbatch, int, 0);
+module_param(blimit, int, 0);
+module_param(qhimark, int, 0);
+module_param(qlowmark, int, 0);
+#ifdef CONFIG_SMP
+module_param(rsinterval, int, 0);
+#endif
 EXPORT_SYMBOL_GPL(rcu_batches_completed);
 EXPORT_SYMBOL(call_rcu);  /* WARNING: GPL-only in April 2006. */
 EXPORT_SYMBOL(call_rcu_bh);  /* WARNING: GPL-only in April 2006. */
diff --git a/kernel/sched.c b/kernel/sched.c
index f77f23f8f479..4d46e90f59c3 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -178,13 +178,6 @@ static unsigned int task_timeslice(task_t *p)
 #define task_hot(p, now, sd) ((long long) ((now) - (p)->last_ran)       \
                                < (long long) (sd)->cache_hot_time)
-void __put_task_struct_cb(struct rcu_head *rhp)
-{
-        __put_task_struct(container_of(rhp, struct task_struct, rcu));
-}
-EXPORT_SYMBOL_GPL(__put_task_struct_cb);
 /*
 * These are the runqueue data structures:
 */
@@ -215,7 +208,6 @@ struct runqueue {
         */
        unsigned long nr_running;
 #ifdef CONFIG_SMP
-        unsigned long prio_bias;
        unsigned long cpu_load[3];
 #endif
        unsigned long long nr_switches;
@@ -669,68 +661,13 @@ static int effective_prio(task_t *p)
        return prio;
 }
-#ifdef CONFIG_SMP
-static inline void inc_prio_bias(runqueue_t *rq, int prio)
-{
-        rq->prio_bias += MAX_PRIO - prio;
-}
-static inline void dec_prio_bias(runqueue_t *rq, int prio)
-{
-        rq->prio_bias -= MAX_PRIO - prio;
-}
-static inline void inc_nr_running(task_t *p, runqueue_t *rq)
-{
-        rq->nr_running++;
-        if (rt_task(p)) {
-                if (p != rq->migration_thread)
-                        /*
-                         * The migration thread does the actual balancing. Do
-                         * not bias by its priority as the ultra high priority
-                         * will skew balancing adversely.
-                         */
-                        inc_prio_bias(rq, p->prio);
-        } else
-                inc_prio_bias(rq, p->static_prio);
-}
-static inline void dec_nr_running(task_t *p, runqueue_t *rq)
-{
-        rq->nr_running--;
-        if (rt_task(p)) {
-                if (p != rq->migration_thread)
-                        dec_prio_bias(rq, p->prio);
-        } else
-                dec_prio_bias(rq, p->static_prio);
-}
-#else
-static inline void inc_prio_bias(runqueue_t *rq, int prio)
-{
-}
-static inline void dec_prio_bias(runqueue_t *rq, int prio)
-{
-}
-static inline void inc_nr_running(task_t *p, runqueue_t *rq)
-{
-        rq->nr_running++;
-}
-static inline void dec_nr_running(task_t *p, runqueue_t *rq)
-{
-        rq->nr_running--;
-}
-#endif
 /*
 * __activate_task - move a task to the runqueue.
 */
 static inline void __activate_task(task_t *p, runqueue_t *rq)
 {
        enqueue_task(p, rq->active);
-        inc_nr_running(p, rq);
+        rq->nr_running++;
 }
 /*
@@ -739,7 +676,7 @@ static inline void __activate_task(task_t *p, runqueue_t *rq)
 static inline void __activate_idle_task(task_t *p, runqueue_t *rq)
 {
        enqueue_task_head(p, rq->active);
-        inc_nr_running(p, rq);
+        rq->nr_running++;
 }
 static int recalc_task_prio(task_t *p, unsigned long long now)
@@ -863,7 +800,7 @@ static void activate_task(task_t *p, runqueue_t *rq, int local)
 */
 static void deactivate_task(struct task_struct *p, runqueue_t *rq)
 {
-        dec_nr_running(p, rq);
+        rq->nr_running--;
        dequeue_task(p, p->array);
        p->array = NULL;
 }
@@ -1007,61 +944,27 @@ void kick_process(task_t *p)
 * We want to under-estimate the load of migration sources, to
 * balance conservatively.
 */
-static unsigned long __source_load(int cpu, int type, enum idle_type idle)
+static inline unsigned long source_load(int cpu, int type)
 {
        runqueue_t *rq = cpu_rq(cpu);
-        unsigned long running = rq->nr_running;
+        unsigned long load_now = rq->nr_running * SCHED_LOAD_SCALE;
-        unsigned long source_load, cpu_load = rq->cpu_load[type-1],
-                load_now = running * SCHED_LOAD_SCALE;
        if (type == 0)
-                source_load = load_now;
+                return load_now;
-        else
-                source_load = min(cpu_load, load_now);
-        if (running > 1 || (idle == NOT_IDLE && running))
-                /*
-                 * If we are busy rebalancing the load is biased by
-                 * priority to create 'nice' support across cpus. When
-                 * idle rebalancing we should only bias the source_load if
-                 * there is more than one task running on that queue to
-                 * prevent idle rebalance from trying to pull tasks from a
-                 * queue with only one running task.
-                 */
-                source_load = source_load * rq->prio_bias / running;
-        return source_load;
-}
-static inline unsigned long source_load(int cpu, int type)
+        return min(rq->cpu_load[type-1], load_now);
-{
-        return __source_load(cpu, type, NOT_IDLE);
 }
 /*
 * Return a high guess at the load of a migration-target cpu
 */
-static inline unsigned long __target_load(int cpu, int type, enum idle_type idle)
+static inline unsigned long target_load(int cpu, int type)
 {
        runqueue_t *rq = cpu_rq(cpu);
-        unsigned long running = rq->nr_running;
+        unsigned long load_now = rq->nr_running * SCHED_LOAD_SCALE;
-        unsigned long target_load, cpu_load = rq->cpu_load[type-1],
-                load_now = running * SCHED_LOAD_SCALE;
        if (type == 0)
-                target_load = load_now;
+                return load_now;
-        else
-                target_load = max(cpu_load, load_now);
-        if (running > 1 || (idle == NOT_IDLE && running))
-                target_load = target_load * rq->prio_bias / running;
-        return target_load;
+        return max(rq->cpu_load[type-1], load_now);
-}
-static inline unsigned long target_load(int cpu, int type)
-{
-        return __target_load(cpu, type, NOT_IDLE);
 }
 /*
@@ -1294,9 +1197,6 @@ static int try_to_wake_up(task_t *p, unsigned int state, int sync)
                }
        }
-        if (p->last_waker_cpu != this_cpu)
-                goto out_set_cpu;
        if (unlikely(!cpu_isset(this_cpu, p->cpus_allowed)))
                goto out_set_cpu;
@@ -1367,8 +1267,6 @@ out_set_cpu:
                cpu = task_cpu(p);
        }
-        p->last_waker_cpu = this_cpu;
 out_activate:
 #endif /* CONFIG_SMP */
        if (old_state == TASK_UNINTERRUPTIBLE) {
@@ -1450,12 +1348,9 @@ void fastcall sched_fork(task_t *p, int clone_flags)
 #ifdef CONFIG_SCHEDSTATS
        memset(&p->sched_info, 0, sizeof(p->sched_info));
 #endif
-#if defined(CONFIG_SMP)
+#if defined(CONFIG_SMP) && defined(__ARCH_WANT_UNLOCKED_CTXSW)
-        p->last_waker_cpu = cpu;
-#if defined(__ARCH_WANT_UNLOCKED_CTXSW)
        p->oncpu = 0;
 #endif
-#endif
 #ifdef CONFIG_PREEMPT
        /* Want to start with kernel preemption disabled. */
        task_thread_info(p)->preempt_count = 1;
@@ -1530,7 +1425,7 @@ void fastcall wake_up_new_task(task_t *p, unsigned long clone_flags)
                                list_add_tail(&p->run_list, &current->run_list);
                                p->array = current->array;
                                p->array->nr_active++;
-                                inc_nr_running(p, rq);
+                                rq->nr_running++;
                        }
                        set_need_resched();
                } else
@@ -1875,9 +1770,9 @@ void pull_task(runqueue_t *src_rq, prio_array_t *src_array, task_t *p,
               runqueue_t *this_rq, prio_array_t *this_array, int this_cpu)
 {
        dequeue_task(p, src_array);
-        dec_nr_running(p, src_rq);
+        src_rq->nr_running--;
        set_task_cpu(p, this_cpu);
-        inc_nr_running(p, this_rq);
+        this_rq->nr_running++;
        enqueue_task(p, this_array);
        p->timestamp = (p->timestamp - src_rq->timestamp_last_tick)
                                + this_rq->timestamp_last_tick;
@@ -2056,9 +1951,9 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
                        /* Bias balancing toward cpus of our domain */
                        if (local_group)
-                                load = __target_load(i, load_idx, idle);
+                                load = target_load(i, load_idx);
                        else
-                                load = __source_load(i, load_idx, idle);
+                                load = source_load(i, load_idx);
                        avg_load += load;
                }
@@ -2171,7 +2066,7 @@ static runqueue_t *find_busiest_queue(struct sched_group *group,
        int i;
        for_each_cpu_mask(i, group->cpumask) {
-                load = __source_load(i, 0, idle);
+                load = source_load(i, 0);
                if (load > max_load) {
                        max_load = load;
@@ -3571,10 +3466,8 @@ void set_user_nice(task_t *p, long nice)
                goto out_unlock;
        }
        array = p->array;
-        if (array) {
+        if (array)
                dequeue_task(p, array);
-                dec_prio_bias(rq, p->static_prio);
-        }
        old_prio = p->prio;
        new_prio = NICE_TO_PRIO(nice);
@@ -3584,7 +3477,6 @@ void set_user_nice(task_t *p, long nice)
        if (array) {
                enqueue_task(p, array);
-                inc_prio_bias(rq, p->static_prio);
                /*
                 * If the task increased its priority or is running and
                 * lowered its priority, then reschedule its CPU:
@@ -4129,6 +4021,8 @@ static inline void __cond_resched(void)
         */
        if (unlikely(preempt_count()))
                return;
+        if (unlikely(system_state != SYSTEM_RUNNING))
+                return;
        do {
                add_preempt_count(PREEMPT_ACTIVE);
                schedule();
@@ -4434,6 +4328,7 @@ void __devinit init_idle(task_t *idle, int cpu)
        runqueue_t *rq = cpu_rq(cpu);
        unsigned long flags;
+        idle->timestamp = sched_clock();
        idle->sleep_avg = 0;
        idle->array = NULL;
        idle->prio = MAX_PRIO;
@@ -5159,7 +5054,18 @@ static void init_sched_build_groups(struct sched_group groups[], cpumask_t span,
 #define MAX_DOMAIN_DISTANCE 32
 static unsigned long long migration_cost[MAX_DOMAIN_DISTANCE] =
-                { [ 0 ... MAX_DOMAIN_DISTANCE-1 ] = -1LL };
+                { [ 0 ... MAX_DOMAIN_DISTANCE-1 ] =
+/*
+ * Architectures may override the migration cost and thus avoid
+ * boot-time calibration. Unit is nanoseconds. Mostly useful for
+ * virtualized hardware:
+ */
+#ifdef CONFIG_DEFAULT_MIGRATION_COST
+                        CONFIG_DEFAULT_MIGRATION_COST
+#else
+                        -1LL
+#endif
+};
 /*
 * Allow override of migration cost - in units of microseconds.
@@ -5551,13 +5457,15 @@ static void calibrate_migration_costs(const cpumask_t *cpu_map)
                        -1
 #endif
                );
-        printk("migration_cost=");
+        if (system_state == SYSTEM_BOOTING) {
-        for (distance = 0; distance <= max_distance; distance++) {
+                printk("migration_cost=");
-                if (distance)
+                for (distance = 0; distance <= max_distance; distance++) {
-                        printk(",");
+                        if (distance)
-                printk("%ld", (long)migration_cost[distance] / 1000);
+                                printk(",");
+                        printk("%ld", (long)migration_cost[distance] / 1000);
+                }
+                printk("\n");
        }
-        printk("\n");
        j1 = jiffies;
        if (migration_debug)
                printk("migration: %ld seconds\n", (j1-j0)/HZ);
@@ -6109,7 +6017,7 @@ void __init sched_init(void)
        runqueue_t *rq;
        int i, j, k;
-        for (i = 0; i < NR_CPUS; i++) {
+        for_each_cpu(i) {
                prio_array_t *array;
                rq = cpu_rq(i);
diff --git a/kernel/signal.c b/kernel/signal.c
index b373fc2420da..ea154104a00b 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -2430,7 +2430,7 @@ sys_rt_sigqueueinfo(int pid, int sig, siginfo_t __user *uinfo)
 }
 int
-do_sigaction(int sig, const struct k_sigaction *act, struct k_sigaction *oact)
+do_sigaction(int sig, struct k_sigaction *act, struct k_sigaction *oact)
 {
        struct k_sigaction *k;
        sigset_t mask;
@@ -2454,6 +2454,8 @@ do_sigaction(int sig, const struct k_sigaction *act, struct k_sigaction *oact)
                *oact = *k;
        if (act) {
+                sigdelsetmask(&act->sa.sa_mask,
+                              sigmask(SIGKILL) | sigmask(SIGSTOP));
                /*
                 * POSIX 3.3.1.3:
                 *  "Setting a signal action to SIG_IGN for a signal that is
@@ -2479,8 +2481,6 @@ do_sigaction(int sig, const struct k_sigaction *act, struct k_sigaction *oact)
                        read_lock(&tasklist_lock);
                        spin_lock_irq(&t->sighand->siglock);
                        *k = *act;
-                        sigdelsetmask(&k->sa.sa_mask,
-                                      sigmask(SIGKILL) | sigmask(SIGSTOP));
                        sigemptyset(&mask);
                        sigaddset(&mask, sig);
                        rm_from_queue_full(&mask, &t->signal->shared_pending);
@@ -2495,8 +2495,6 @@ do_sigaction(int sig, const struct k_sigaction *act, struct k_sigaction *oact)
                }
                *k = *act;
-                sigdelsetmask(&k->sa.sa_mask,
-                              sigmask(SIGKILL) | sigmask(SIGSTOP));
        }
        spin_unlock_irq(&current->sighand->siglock);
@@ -2702,6 +2700,7 @@ sys_signal(int sig, __sighandler_t handler)
        new_sa.sa.sa_handler = handler;
        new_sa.sa.sa_flags = SA_ONESHOT | SA_NOMASK;
+        sigemptyset(&new_sa.sa.sa_mask);
        ret = do_sigaction(sig, &new_sa, &old_sa);
diff --git a/kernel/sys.c b/kernel/sys.c
index 0929c698affc..f91218a5463e 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -428,7 +428,7 @@ void kernel_kexec(void)
 {
 #ifdef CONFIG_KEXEC
        struct kimage *image;
-        image = xchg(&kexec_image, 0);
+        image = xchg(&kexec_image, NULL);
        if (!image) {
                return;
        }
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index 17313b99e53d..1067090db6b1 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -104,6 +104,8 @@ cond_syscall(sys_setreuid16);
 cond_syscall(sys_setuid16);
 cond_syscall(sys_vm86old);
 cond_syscall(sys_vm86);
+cond_syscall(compat_sys_ipc);
+cond_syscall(compat_sys_sysctl);
 /* arch-specific weak syscall entries */
 cond_syscall(sys_pciconfig_read);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 71dd6f62efec..32b48e8ee36e 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -44,13 +44,14 @@
 #include <linux/limits.h>
 #include <linux/dcache.h>
 #include <linux/syscalls.h>
+#include <linux/nfs_fs.h>
+#include <linux/acpi.h>
 #include <asm/uaccess.h>
 #include <asm/processor.h>
-#ifdef CONFIG_ROOT_NFS
+extern int proc_nr_files(ctl_table *table, int write, struct file *filp,
-#include <linux/nfs_fs.h>
+                     void __user *buffer, size_t *lenp, loff_t *ppos);
-#endif
 #if defined(CONFIG_SYSCTL)
@@ -126,7 +127,9 @@ extern int sysctl_hz_timer;
 extern int acct_parm[];
 #endif
-int randomize_va_space = 1;
+#ifdef CONFIG_IA64
+extern int no_unaligned_warning;
+#endif
 static int parse_table(int __user *, int, void __user *, size_t __user *, void __user *, size_t,
                       ctl_table *, void **);
@@ -640,6 +643,7 @@ static ctl_table kern_table[] = {
                .proc_handler   = &proc_dointvec,
        },
 #endif
+#if defined(CONFIG_MMU)
        {
                .ctl_name       = KERN_RANDOMIZE,
                .procname       = "randomize_va_space",
@@ -648,6 +652,7 @@ static ctl_table kern_table[] = {
                .mode           = 0644,
                .proc_handler   = &proc_dointvec,
        },
+#endif
 #if defined(CONFIG_S390) && defined(CONFIG_SMP)
        {
                .ctl_name       = KERN_SPIN_RETRY,
@@ -658,6 +663,26 @@ static ctl_table kern_table[] = {
                .proc_handler   = &proc_dointvec,
        },
 #endif
+#ifdef CONFIG_ACPI_SLEEP
+        {
+                .ctl_name       = KERN_ACPI_VIDEO_FLAGS,
+                .procname       = "acpi_video_flags",
+                .data           = &acpi_video_flags,
+                .maxlen         = sizeof (unsigned long),
+                .mode           = 0644,
+                .proc_handler   = &proc_doulongvec_minmax,
+        },
+#endif
+#ifdef CONFIG_IA64
+        {
+                .ctl_name       = KERN_IA64_UNALIGNED,
+                .procname       = "ignore-unaligned-usertrap",
+                .data           = &no_unaligned_warning,
+                .maxlen         = sizeof (int),
+                .mode           = 0644,
+                .proc_handler   = &proc_dointvec,
+        },
+#endif
        { .ctl_name = 0 }
 };
@@ -921,7 +946,7 @@ static ctl_table fs_table[] = {
                .data           = &files_stat,
                .maxlen         = 3*sizeof(int),
                .mode           = 0444,
-                .proc_handler   = &proc_dointvec,
+                .proc_handler   = &proc_nr_files,
        },
        {
                .ctl_name       = FS_MAXFILE,
diff --git a/kernel/timer.c b/kernel/timer.c
index 4f1cb0ab5251..2410c18dbeb1 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -489,13 +489,25 @@ unsigned long next_timer_interrupt(void)
        struct list_head *list;
        struct timer_list *nte;
        unsigned long expires;
+        unsigned long hr_expires = MAX_JIFFY_OFFSET;
+        ktime_t hr_delta;
        tvec_t *varray[4];
        int i, j;
+        hr_delta = hrtimer_get_next_event();
+        if (hr_delta.tv64 != KTIME_MAX) {
+                struct timespec tsdelta;
+                tsdelta = ktime_to_timespec(hr_delta);
+                hr_expires = timespec_to_jiffies(&tsdelta);
+                if (hr_expires < 3)
+                        return hr_expires + jiffies;
+        }
+        hr_expires += jiffies;
        base = &__get_cpu_var(tvec_bases);
        spin_lock(&base->t_base.lock);
        expires = base->timer_jiffies + (LONG_MAX >> 1);
-        list = 0;
+        list = NULL;
        /* Look for timer events in tv1. */
        j = base->timer_jiffies & TVR_MASK;
@@ -542,6 +554,10 @@ found:
                }
        }
        spin_unlock(&base->t_base.lock);
+        if (time_before(hr_expires, expires))
+                return hr_expires;
        return expires;
 }
 #endif
@@ -717,12 +733,16 @@ static void second_overflow(void)
 #endif
 }
-/* in the NTP reference this is called "hardclock()" */
+/*
-static void update_wall_time_one_tick(void)
+ * Returns how many microseconds we need to add to xtime this tick
+ * in doing an adjustment requested with adjtime.
+ */
+static long adjtime_adjustment(void)
 {
-        long time_adjust_step, delta_nsec;
+        long time_adjust_step;
-        if ((time_adjust_step = time_adjust) != 0 ) {
+        time_adjust_step = time_adjust;
+        if (time_adjust_step) {
                /*
                 * We are doing an adjtime thing.  Prepare time_adjust_step to
                 * be within bounds.  Note that a positive time_adjust means we
@@ -733,10 +753,19 @@ static void update_wall_time_one_tick(void)
                 */
                time_adjust_step = min(time_adjust_step, (long)tickadj);
                time_adjust_step = max(time_adjust_step, (long)-tickadj);
+        }
+        return time_adjust_step;
+}
+/* in the NTP reference this is called "hardclock()" */
+static void update_wall_time_one_tick(void)
+{
+        long time_adjust_step, delta_nsec;
+        time_adjust_step = adjtime_adjustment();
+        if (time_adjust_step)
                /* Reduce by this step the amount of time left  */
                time_adjust -= time_adjust_step;
-        }
        delta_nsec = tick_nsec + time_adjust_step * 1000;
        /*
         * Advance the phase, once it gets to one microsecond, then
@@ -759,6 +788,22 @@ static void update_wall_time_one_tick(void)
 }
 /*
+ * Return how long ticks are at the moment, that is, how much time
+ * update_wall_time_one_tick will add to xtime next time we call it
+ * (assuming no calls to do_adjtimex in the meantime).
+ * The return value is in fixed-point nanoseconds with SHIFT_SCALE-10
+ * bits to the right of the binary point.
+ * This function has no side-effects.
+ */
+u64 current_tick_length(void)
+{
+        long delta_nsec;
+        delta_nsec = tick_nsec + adjtime_adjustment() * 1000;
+        return ((u64) delta_nsec << (SHIFT_SCALE - 10)) + time_adj;
+}
+/*
 * Using a loop looks inefficient, but "ticks" is
 * usually just one (we shouldn't be losing ticks,
 * we're doing this this way mainly for interrupt
@@ -896,6 +941,8 @@ static inline void update_times(void)
 void do_timer(struct pt_regs *regs)
 {
        jiffies_64++;
+        /* prevent loading jiffies before storing new jiffies_64 value. */
+        barrier();
        update_times();
        softlockup_tick(regs);
 }
@@ -1307,8 +1354,8 @@ void __init init_timers(void)
 #ifdef CONFIG_TIME_INTERPOLATION
-struct time_interpolator *time_interpolator;
+struct time_interpolator *time_interpolator __read_mostly;
-static struct time_interpolator *time_interpolator_list;
+static struct time_interpolator *time_interpolator_list __read_mostly;
 static DEFINE_SPINLOCK(time_interpolator_lock);
 static inline u64 time_interpolator_get_cycles(unsigned int src)
@@ -1322,10 +1369,10 @@ static inline u64 time_interpolator_get_cycles(unsigned int src)
                        return x();
                case TIME_SOURCE_MMIO64 :
-                        return readq((void __iomem *) time_interpolator->addr);
+                        return readq_relaxed((void __iomem *)time_interpolator->addr);
                case TIME_SOURCE_MMIO32 :
-                        return readl((void __iomem *) time_interpolator->addr);
+                        return readl_relaxed((void __iomem *)time_interpolator->addr);
                default: return get_cycles();
        }
author	Adrian Bunk <bunk@r063144.stusta.swh.mhn.de>	2006-03-20 12:30:36 -0500
committer	Adrian Bunk <bunk@r063144.stusta.swh.mhn.de>	2006-03-20 12:30:36 -0500
commit	0f76ee451484d02c7405d92e7bceb39b415abb01 (patch)
tree	9722f84281f786ba48971dde057f5171a49969e4 /kernel
parent	01d206a7c1167639f6ca6dac22140fbdca017558 (diff)
parent	7705a8792b0fc82fd7d4dd923724606bbfd9fb20 (diff)