Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net

Conflicts: drivers/net/ethernet/emulex/benet/be_main.c drivers/net/ethernet/intel/igb/igb_main.c drivers/net/wireless/brcm80211/brcmsmac/mac80211_if.c include/net/scm.h net/batman-adv/routing.c net/ipv4/tcp_input.c The e{uid,gid} --> {uid,gid} credentials fix conflicted with the cleanup in net-next to now pass cred structs around. The be2net driver had a bug fix in 'net' that overlapped with the VLAN interface changes by Patrick McHardy in net-next. An IGB conflict existed because in 'net' the build_skb() support was reverted, and in 'net-next' there was a comment style fix within that code. Several batman-adv conflicts were resolved by making sure that all calls to batadv_is_my_mac() are changed to have a new bat_priv first argument. Eric Dumazet's TS ECR fix in TCP in 'net' conflicted with the F-RTO rewrite in 'net-next', mostly overlapping changes. Thanks to Stephen Rothwell and Antonio Quartulli for help with several of these merge resolutions. Signed-off-by: David S. Miller <davem@davemloft.net>
author: David S. Miller <davem@davemloft.net> 2013-04-22 20:32:51 -0400
committer: David S. Miller <davem@davemloft.net> 2013-04-22 20:32:51 -0400
commit: 6e0895c2ea326cc4bb11e8fa2f654628d5754c31 (patch)
tree: 7089303ac11a12edc43a8c4fa1b23974e10937ea /kernel
parent: 55fbbe46e9eb3cbe6c335503f5550855a1128dce (diff)
parent: 60d509fa6a9c4653a86ad830e4c4b30360b23f0e (diff)
19 files changed, 289 insertions, 125 deletions
diff --git a/kernel/capability.c b/kernel/capability.c
index 493d97259484..f6c2ce5701e1 100644
--- a/kernel/capability.c
+++ b/kernel/capability.c
@@ -393,6 +393,30 @@ bool ns_capable(struct user_namespace *ns, int cap)
 EXPORT_SYMBOL(ns_capable);
 /**
+ * file_ns_capable - Determine if the file's opener had a capability in effect
+ * @file:  The file we want to check
+ * @ns:  The usernamespace we want the capability in
+ * @cap: The capability to be tested for
+ *
+ * Return true if task that opened the file had a capability in effect
+ * when the file was opened.
+ *
+ * This does not set PF_SUPERPRIV because the caller may not
+ * actually be privileged.
+ */
+bool file_ns_capable(const struct file *file, struct user_namespace *ns, int cap)
+{
+        if (WARN_ON_ONCE(!cap_valid(cap)))
+                return false;
+        if (security_capable(file->f_cred, ns, cap) == 0)
+                return true;
+        return false;
+}
+EXPORT_SYMBOL(file_ns_capable);
+/**
 * capable - Determine if the current task has a superior capability in effect
 * @cap: The capability to be tested for
 *
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 59412d037eed..4d3124b39277 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -4737,7 +4737,8 @@ static void perf_event_mmap_event(struct perf_mmap_event *mmap_event)
        } else {
                if (arch_vma_name(mmap_event->vma)) {
                        name = strncpy(tmp, arch_vma_name(mmap_event->vma),
-                                       sizeof(tmp));
+                                       sizeof(tmp) - 1);
+                        tmp[sizeof(tmp) - 1] = '\0';
                        goto got_name;
                }
@@ -5330,7 +5331,7 @@ static void sw_perf_event_destroy(struct perf_event *event)
 static int perf_swevent_init(struct perf_event *event)
 {
-        int event_id = event->attr.config;
+        u64 event_id = event->attr.config;
        if (event->attr.type != PERF_TYPE_SOFTWARE)
                return -ENOENT;
@@ -5986,6 +5987,7 @@ skip_type:
        if (pmu->pmu_cpu_context)
                goto got_cpu_context;
+        ret = -ENOMEM;
        pmu->pmu_cpu_context = alloc_percpu(struct perf_cpu_context);
        if (!pmu->pmu_cpu_context)
                goto free_dev;
diff --git a/kernel/events/internal.h b/kernel/events/internal.h
index d56a64c99a8b..eb675c4d59df 100644
--- a/kernel/events/internal.h
+++ b/kernel/events/internal.h
@@ -16,7 +16,7 @@ struct ring_buffer {
        int                             page_order;     /* allocation order  */
 #endif
        int                             nr_pages;       /* nr of data pages  */
-        int                             writable;       /* are we writable   */
+        int                             overwrite;      /* can overwrite itself */
        atomic_t                        poll;           /* POLL_ for wakeups */
diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c
index 23cb34ff3973..97fddb09762b 100644
--- a/kernel/events/ring_buffer.c
+++ b/kernel/events/ring_buffer.c
@@ -18,12 +18,24 @@
 static bool perf_output_space(struct ring_buffer *rb, unsigned long tail,
                              unsigned long offset, unsigned long head)
 {
-        unsigned long mask;
+        unsigned long sz = perf_data_size(rb);
+        unsigned long mask = sz - 1;
-        if (!rb->writable)
+        /*
+         * check if user-writable
+         * overwrite : over-write its own tail
+         * !overwrite: buffer possibly drops events.
+         */
+        if (rb->overwrite)
                return true;
-        mask = perf_data_size(rb) - 1;
+        /*
+         * verify that payload is not bigger than buffer
+         * otherwise masking logic may fail to detect
+         * the "not enough space" condition
+         */
+        if ((head - offset) > sz)
+                return false;
        offset = (offset - tail) & mask;
        head   = (head   - tail) & mask;
@@ -212,7 +224,9 @@ ring_buffer_init(struct ring_buffer *rb, long watermark, int flags)
                rb->watermark = max_size / 2;
        if (flags & RING_BUFFER_WRITABLE)
-                rb->writable = 1;
+                rb->overwrite = 0;
+        else
+                rb->overwrite = 1;
        atomic_set(&rb->refcount, 1);
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index cc47812d3feb..14be27feda49 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -63,6 +63,7 @@
 DEFINE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases) =
 {
+        .lock = __RAW_SPIN_LOCK_UNLOCKED(hrtimer_bases.lock),
        .clock_base =
        {
                {
@@ -1642,8 +1643,6 @@ static void __cpuinit init_hrtimers_cpu(int cpu)
        struct hrtimer_cpu_base *cpu_base = &per_cpu(hrtimer_bases, cpu);
        int i;
-        raw_spin_lock_init(&cpu_base->lock);
        for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) {
                cpu_base->clock_base[i].cpu_base = cpu_base;
                timerqueue_init_head(&cpu_base->clock_base[i].active);
diff --git a/kernel/kexec.c b/kernel/kexec.c
index bddd3d7a74b6..ffd4e111fd67 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -55,7 +55,7 @@ struct resource crashk_res = {
        .flags = IORESOURCE_BUSY | IORESOURCE_MEM
 };
 struct resource crashk_low_res = {
-        .name  = "Crash kernel low",
+        .name  = "Crash kernel",
        .start = 0,
        .end   = 0,
        .flags = IORESOURCE_BUSY | IORESOURCE_MEM
@@ -1368,35 +1368,114 @@ static int __init parse_crashkernel_simple(char 		*cmdline,
        return 0;
 }
+#define SUFFIX_HIGH 0
+#define SUFFIX_LOW  1
+#define SUFFIX_NULL 2
+static __initdata char *suffix_tbl[] = {
+        [SUFFIX_HIGH] = ",high",
+        [SUFFIX_LOW]  = ",low",
+        [SUFFIX_NULL] = NULL,
+};
 /*
- * That function is the entry point for command line parsing and should be
+ * That function parses "suffix"  crashkernel command lines like
- * called from the arch-specific code.
+ *
+ *      crashkernel=size,[high|low]
+ *
+ * It returns 0 on success and -EINVAL on failure.
 */
+static int __init parse_crashkernel_suffix(char *cmdline,
+                                           unsigned long long   *crash_size,
+                                           unsigned long long   *crash_base,
+                                           const char *suffix)
+{
+        char *cur = cmdline;
+        *crash_size = memparse(cmdline, &cur);
+        if (cmdline == cur) {
+                pr_warn("crashkernel: memory value expected\n");
+                return -EINVAL;
+        }
+        /* check with suffix */
+        if (strncmp(cur, suffix, strlen(suffix))) {
+                pr_warn("crashkernel: unrecognized char\n");
+                return -EINVAL;
+        }
+        cur += strlen(suffix);
+        if (*cur != ' ' && *cur != '\0') {
+                pr_warn("crashkernel: unrecognized char\n");
+                return -EINVAL;
+        }
+        return 0;
+}
+static __init char *get_last_crashkernel(char *cmdline,
+                             const char *name,
+                             const char *suffix)
+{
+        char *p = cmdline, *ck_cmdline = NULL;
+        /* find crashkernel and use the last one if there are more */
+        p = strstr(p, name);
+        while (p) {
+                char *end_p = strchr(p, ' ');
+                char *q;
+                if (!end_p)
+                        end_p = p + strlen(p);
+                if (!suffix) {
+                        int i;
+                        /* skip the one with any known suffix */
+                        for (i = 0; suffix_tbl[i]; i++) {
+                                q = end_p - strlen(suffix_tbl[i]);
+                                if (!strncmp(q, suffix_tbl[i],
+                                             strlen(suffix_tbl[i])))
+                                        goto next;
+                        }
+                        ck_cmdline = p;
+                } else {
+                        q = end_p - strlen(suffix);
+                        if (!strncmp(q, suffix, strlen(suffix)))
+                                ck_cmdline = p;
+                }
+next:
+                p = strstr(p+1, name);
+        }
+        if (!ck_cmdline)
+                return NULL;
+        return ck_cmdline;
+}
 static int __init __parse_crashkernel(char *cmdline,
                             unsigned long long system_ram,
                             unsigned long long *crash_size,
                             unsigned long long *crash_base,
-                                const char *name)
+                             const char *name,
+                             const char *suffix)
 {
-        char    *p = cmdline, *ck_cmdline = NULL;
        char    *first_colon, *first_space;
+        char    *ck_cmdline;
        BUG_ON(!crash_size || !crash_base);
        *crash_size = 0;
        *crash_base = 0;
-        /* find crashkernel and use the last one if there are more */
+        ck_cmdline = get_last_crashkernel(cmdline, name, suffix);
-        p = strstr(p, name);
-        while (p) {
-                ck_cmdline = p;
-                p = strstr(p+1, name);
-        }
        if (!ck_cmdline)
                return -EINVAL;
        ck_cmdline += strlen(name);
+        if (suffix)
+                return parse_crashkernel_suffix(ck_cmdline, crash_size,
+                                crash_base, suffix);
        /*
         * if the commandline contains a ':', then that's the extended
         * syntax -- if not, it must be the classic syntax
@@ -1413,13 +1492,26 @@ static int __init __parse_crashkernel(char *cmdline,
        return 0;
 }
+/*
+ * That function is the entry point for command line parsing and should be
+ * called from the arch-specific code.
+ */
 int __init parse_crashkernel(char *cmdline,
                             unsigned long long system_ram,
                             unsigned long long *crash_size,
                             unsigned long long *crash_base)
 {
        return __parse_crashkernel(cmdline, system_ram, crash_size, crash_base,
-                                        "crashkernel=");
+                                        "crashkernel=", NULL);
+}
+int __init parse_crashkernel_high(char *cmdline,
+                             unsigned long long system_ram,
+                             unsigned long long *crash_size,
+                             unsigned long long *crash_base)
+{
+        return __parse_crashkernel(cmdline, system_ram, crash_size, crash_base,
+                                "crashkernel=", suffix_tbl[SUFFIX_HIGH]);
 }
 int __init parse_crashkernel_low(char *cmdline,
@@ -1428,7 +1520,7 @@ int __init parse_crashkernel_low(char *cmdline,
                             unsigned long long *crash_base)
 {
        return __parse_crashkernel(cmdline, system_ram, crash_size, crash_base,
-                                        "crashkernel_low=");
+                                "crashkernel=", suffix_tbl[SUFFIX_LOW]);
 }
 static void update_vmcoreinfo_note(void)
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index e35be53f6613..3fed7f0cbcdf 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -794,16 +794,16 @@ out:
 }
 #ifdef CONFIG_SYSCTL
-/* This should be called with kprobe_mutex locked */
 static void __kprobes optimize_all_kprobes(void)
 {
        struct hlist_head *head;
        struct kprobe *p;
        unsigned int i;
+        mutex_lock(&kprobe_mutex);
        /* If optimization is already allowed, just return */
        if (kprobes_allow_optimization)
-                return;
+                goto out;
        kprobes_allow_optimization = true;
        for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
@@ -813,18 +813,22 @@ static void __kprobes optimize_all_kprobes(void)
                                optimize_kprobe(p);
        }
        printk(KERN_INFO "Kprobes globally optimized\n");
+out:
+        mutex_unlock(&kprobe_mutex);
 }
-/* This should be called with kprobe_mutex locked */
 static void __kprobes unoptimize_all_kprobes(void)
 {
        struct hlist_head *head;
        struct kprobe *p;
        unsigned int i;
+        mutex_lock(&kprobe_mutex);
        /* If optimization is already prohibited, just return */
-        if (!kprobes_allow_optimization)
+        if (!kprobes_allow_optimization) {
+                mutex_unlock(&kprobe_mutex);
                return;
+        }
        kprobes_allow_optimization = false;
        for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
@@ -834,11 +838,14 @@ static void __kprobes unoptimize_all_kprobes(void)
                                unoptimize_kprobe(p, false);
                }
        }
+        mutex_unlock(&kprobe_mutex);
        /* Wait for unoptimizing completion */
        wait_for_kprobe_optimizer();
        printk(KERN_INFO "Kprobes globally unoptimized\n");
 }
+static DEFINE_MUTEX(kprobe_sysctl_mutex);
 int sysctl_kprobes_optimization;
 int proc_kprobes_optimization_handler(struct ctl_table *table, int write,
                                      void __user *buffer, size_t *length,
@@ -846,7 +853,7 @@ int proc_kprobes_optimization_handler(struct ctl_table *table, int write,
 {
        int ret;
-        mutex_lock(&kprobe_mutex);
+        mutex_lock(&kprobe_sysctl_mutex);
        sysctl_kprobes_optimization = kprobes_allow_optimization ? 1 : 0;
        ret = proc_dointvec_minmax(table, write, buffer, length, ppos);
@@ -854,7 +861,7 @@ int proc_kprobes_optimization_handler(struct ctl_table *table, int write,
                optimize_all_kprobes();
        else
                unoptimize_all_kprobes();
-        mutex_unlock(&kprobe_mutex);
+        mutex_unlock(&kprobe_sysctl_mutex);
        return ret;
 }
diff --git a/kernel/kthread.c b/kernel/kthread.c
index 691dc2ef9baf..9eb7fed0bbaa 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -124,12 +124,12 @@ void *kthread_data(struct task_struct *task)
 static void __kthread_parkme(struct kthread *self)
 {
-        __set_current_state(TASK_INTERRUPTIBLE);
+        __set_current_state(TASK_PARKED);
        while (test_bit(KTHREAD_SHOULD_PARK, &self->flags)) {
                if (!test_and_set_bit(KTHREAD_IS_PARKED, &self->flags))
                        complete(&self->parked);
                schedule();
-                __set_current_state(TASK_INTERRUPTIBLE);
+                __set_current_state(TASK_PARKED);
        }
        clear_bit(KTHREAD_IS_PARKED, &self->flags);
        __set_current_state(TASK_RUNNING);
@@ -256,8 +256,13 @@ struct task_struct *kthread_create_on_node(int (*threadfn)(void *data),
 }
 EXPORT_SYMBOL(kthread_create_on_node);
-static void __kthread_bind(struct task_struct *p, unsigned int cpu)
+static void __kthread_bind(struct task_struct *p, unsigned int cpu, long state)
 {
+        /* Must have done schedule() in kthread() before we set_task_cpu */
+        if (!wait_task_inactive(p, state)) {
+                WARN_ON(1);
+                return;
+        }
        /* It's safe because the task is inactive. */
        do_set_cpus_allowed(p, cpumask_of(cpu));
        p->flags |= PF_THREAD_BOUND;
@@ -274,12 +279,7 @@ static void __kthread_bind(struct task_struct *p, unsigned int cpu)
 */
 void kthread_bind(struct task_struct *p, unsigned int cpu)
 {
-        /* Must have done schedule() in kthread() before we set_task_cpu */
+        __kthread_bind(p, cpu, TASK_UNINTERRUPTIBLE);
-        if (!wait_task_inactive(p, TASK_UNINTERRUPTIBLE)) {
-                WARN_ON(1);
-                return;
-        }
-        __kthread_bind(p, cpu);
 }
 EXPORT_SYMBOL(kthread_bind);
@@ -324,6 +324,22 @@ static struct kthread *task_get_live_kthread(struct task_struct *k)
        return NULL;
 }
+static void __kthread_unpark(struct task_struct *k, struct kthread *kthread)
+{
+        clear_bit(KTHREAD_SHOULD_PARK, &kthread->flags);
+        /*
+         * We clear the IS_PARKED bit here as we don't wait
+         * until the task has left the park code. So if we'd
+         * park before that happens we'd see the IS_PARKED bit
+         * which might be about to be cleared.
+         */
+        if (test_and_clear_bit(KTHREAD_IS_PARKED, &kthread->flags)) {
+                if (test_bit(KTHREAD_IS_PER_CPU, &kthread->flags))
+                        __kthread_bind(k, kthread->cpu, TASK_PARKED);
+                wake_up_state(k, TASK_PARKED);
+        }
+}
 /**
 * kthread_unpark - unpark a thread created by kthread_create().
 * @k:          thread created by kthread_create().
@@ -336,20 +352,8 @@ void kthread_unpark(struct task_struct *k)
 {
        struct kthread *kthread = task_get_live_kthread(k);
-        if (kthread) {
+        if (kthread)
-                clear_bit(KTHREAD_SHOULD_PARK, &kthread->flags);
+                __kthread_unpark(k, kthread);
-                /*
-                 * We clear the IS_PARKED bit here as we don't wait
-                 * until the task has left the park code. So if we'd
-                 * park before that happens we'd see the IS_PARKED bit
-                 * which might be about to be cleared.
-                 */
-                if (test_and_clear_bit(KTHREAD_IS_PARKED, &kthread->flags)) {
-                        if (test_bit(KTHREAD_IS_PER_CPU, &kthread->flags))
-                                __kthread_bind(k, kthread->cpu);
-                        wake_up_process(k);
-                }
-        }
        put_task_struct(k);
 }
@@ -407,7 +411,7 @@ int kthread_stop(struct task_struct *k)
        trace_sched_kthread_stop(k);
        if (kthread) {
                set_bit(KTHREAD_SHOULD_STOP, &kthread->flags);
-                clear_bit(KTHREAD_SHOULD_PARK, &kthread->flags);
+                __kthread_unpark(k, kthread);
                wake_up_process(k);
                wait_for_completion(&kthread->exited);
        }
diff --git a/kernel/sched/clock.c b/kernel/sched/clock.c
index c685e31492df..c3ae1446461c 100644
--- a/kernel/sched/clock.c
+++ b/kernel/sched/clock.c
@@ -176,10 +176,36 @@ static u64 sched_clock_remote(struct sched_clock_data *scd)
        u64 this_clock, remote_clock;
        u64 *ptr, old_val, val;
+#if BITS_PER_LONG != 64
+again:
+        /*
+         * Careful here: The local and the remote clock values need to
+         * be read out atomic as we need to compare the values and
+         * then update either the local or the remote side. So the
+         * cmpxchg64 below only protects one readout.
+         *
+         * We must reread via sched_clock_local() in the retry case on
+         * 32bit as an NMI could use sched_clock_local() via the
+         * tracer and hit between the readout of
+         * the low32bit and the high 32bit portion.
+         */
+        this_clock = sched_clock_local(my_scd);
+        /*
+         * We must enforce atomic readout on 32bit, otherwise the
+         * update on the remote cpu can hit inbetween the readout of
+         * the low32bit and the high 32bit portion.
+         */
+        remote_clock = cmpxchg64(&scd->clock, 0, 0);
+#else
+        /*
+         * On 64bit the read of [my]scd->clock is atomic versus the
+         * update, so we can avoid the above 32bit dance.
+         */
        sched_clock_local(my_scd);
 again:
        this_clock = my_scd->clock;
        remote_clock = scd->clock;
+#endif
        /*
         * Use the opportunity that we have both locks
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 7f12624a393c..67d04651f44b 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1498,8 +1498,10 @@ static void try_to_wake_up_local(struct task_struct *p)
 {
        struct rq *rq = task_rq(p);
-        BUG_ON(rq != this_rq());
+        if (WARN_ON_ONCE(rq != this_rq()) ||
-        BUG_ON(p == current);
+            WARN_ON_ONCE(p == current))
+                return;
        lockdep_assert_held(&rq->lock);
        if (!raw_spin_trylock(&p->pi_lock)) {
@@ -4999,7 +5001,7 @@ static void sd_free_ctl_entry(struct ctl_table **tablep)
 }
 static int min_load_idx = 0;
-static int max_load_idx = CPU_LOAD_IDX_MAX;
+static int max_load_idx = CPU_LOAD_IDX_MAX-1;
 static void
 set_table_entry(struct ctl_table *entry,
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index ed12cbb135f4..e93cca92f38b 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -310,7 +310,7 @@ void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times)
        t = tsk;
        do {
-                task_cputime(tsk, &utime, &stime);
+                task_cputime(t, &utime, &stime);
                times->utime += utime;
                times->stime += stime;
                times->sum_exec_runtime += task_sched_runtime(t);
diff --git a/kernel/signal.c b/kernel/signal.c
index 497330ec2ae9..06ff7764ab7c 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -2950,7 +2950,7 @@ do_send_specific(pid_t tgid, pid_t pid, int sig, struct siginfo *info)
 static int do_tkill(pid_t tgid, pid_t pid, int sig)
 {
-        struct siginfo info;
+        struct siginfo info = {};
        info.si_signo = sig;
        info.si_errno = 0;
diff --git a/kernel/smpboot.c b/kernel/smpboot.c
index 8eaed9aa9cf0..02fc5c933673 100644
--- a/kernel/smpboot.c
+++ b/kernel/smpboot.c
@@ -185,8 +185,18 @@ __smpboot_create_thread(struct smp_hotplug_thread *ht, unsigned int cpu)
        }
        get_task_struct(tsk);
        *per_cpu_ptr(ht->store, cpu) = tsk;
-        if (ht->create)
+        if (ht->create) {
-                ht->create(cpu);
+                /*
+                 * Make sure that the task has actually scheduled out
+                 * into park position, before calling the create
+                 * callback. At least the migration thread callback
+                 * requires that the task is off the runqueue.
+                 */
+                if (!wait_task_inactive(tsk, TASK_PARKED))
+                        WARN_ON(1);
+                else
+                        ht->create(cpu);
+        }
        return 0;
 }
diff --git a/kernel/sys.c b/kernel/sys.c
index 39c9c4a2949f..0da73cf73e60 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -324,7 +324,6 @@ void kernel_restart_prepare(char *cmd)
        system_state = SYSTEM_RESTART;
        usermodehelper_disable();
        device_shutdown();
-        syscore_shutdown();
 }
 /**
@@ -370,6 +369,7 @@ void kernel_restart(char *cmd)
 {
        kernel_restart_prepare(cmd);
        disable_nonboot_cpus();
+        syscore_shutdown();
        if (!cmd)
                printk(KERN_EMERG "Restarting system.\n");
        else
@@ -395,6 +395,7 @@ static void kernel_shutdown_prepare(enum system_states state)
 void kernel_halt(void)
 {
        kernel_shutdown_prepare(SYSTEM_HALT);
+        disable_nonboot_cpus();
        syscore_shutdown();
        printk(KERN_EMERG "System halted.\n");
        kmsg_dump(KMSG_DUMP_HALT);
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index 9e5b8c272eec..5a0f781cd729 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -739,12 +739,6 @@ static void blk_add_trace_rq_complete(void *ignore,
                                      struct request_queue *q,
                                      struct request *rq)
 {
-        struct blk_trace *bt = q->blk_trace;
-        /* if control ever passes through here, it's a request based driver */
-        if (unlikely(bt && !bt->rq_based))
-                bt->rq_based = true;
        blk_add_trace_rq(q, rq, BLK_TA_COMPLETE);
 }
@@ -780,24 +774,10 @@ static void blk_add_trace_bio_bounce(void *ignore,
        blk_add_trace_bio(q, bio, BLK_TA_BOUNCE, 0);
 }
-static void blk_add_trace_bio_complete(void *ignore, struct bio *bio, int error)
+static void blk_add_trace_bio_complete(void *ignore,
+                                       struct request_queue *q, struct bio *bio,
+                                       int error)
 {
-        struct request_queue *q;
-        struct blk_trace *bt;
-        if (!bio->bi_bdev)
-                return;
-        q = bdev_get_queue(bio->bi_bdev);
-        bt = q->blk_trace;
-        /*
-         * Request based drivers will generate both rq and bio completions.
-         * Ignore bio ones.
-         */
-        if (likely(!bt) || bt->rq_based)
-                return;
        blk_add_trace_bio(q, bio, BLK_TA_COMPLETE, error);
 }
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 6893d5a2bf08..b3fde6d7b7fc 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -66,7 +66,7 @@
 static struct ftrace_ops ftrace_list_end __read_mostly = {
        .func           = ftrace_stub,
-        .flags          = FTRACE_OPS_FL_RECURSION_SAFE,
+        .flags          = FTRACE_OPS_FL_RECURSION_SAFE | FTRACE_OPS_FL_STUB,
 };
 /* ftrace_enabled is a method to turn ftrace on or off */
@@ -694,7 +694,6 @@ int ftrace_profile_pages_init(struct ftrace_profile_stat *stat)
                free_page(tmp);
        }
-        free_page((unsigned long)stat->pages);
        stat->pages = NULL;
        stat->start = NULL;
@@ -1053,6 +1052,19 @@ static __init void ftrace_profile_debugfs(struct dentry *d_tracer)
 static struct pid * const ftrace_swapper_pid = &init_struct_pid;
+loff_t
+ftrace_filter_lseek(struct file *file, loff_t offset, int whence)
+{
+        loff_t ret;
+        if (file->f_mode & FMODE_READ)
+                ret = seq_lseek(file, offset, whence);
+        else
+                file->f_pos = ret = 1;
+        return ret;
+}
 #ifdef CONFIG_DYNAMIC_FTRACE
 #ifndef CONFIG_FTRACE_MCOUNT_RECORD
@@ -2613,7 +2625,7 @@ static void ftrace_filter_reset(struct ftrace_hash *hash)
 * routine, you can use ftrace_filter_write() for the write
 * routine if @flag has FTRACE_ITER_FILTER set, or
 * ftrace_notrace_write() if @flag has FTRACE_ITER_NOTRACE set.
- * ftrace_regex_lseek() should be used as the lseek routine, and
+ * ftrace_filter_lseek() should be used as the lseek routine, and
 * release must call ftrace_regex_release().
 */
 int
@@ -2697,19 +2709,6 @@ ftrace_notrace_open(struct inode *inode, struct file *file)
                                 inode, file);
 }
-loff_t
-ftrace_regex_lseek(struct file *file, loff_t offset, int whence)
-{
-        loff_t ret;
-        if (file->f_mode & FMODE_READ)
-                ret = seq_lseek(file, offset, whence);
-        else
-                file->f_pos = ret = 1;
-        return ret;
-}
 static int ftrace_match(char *str, char *regex, int len, int type)
 {
        int matched = 0;
@@ -3441,14 +3440,14 @@ static char ftrace_filter_buf[FTRACE_FILTER_SIZE] __initdata;
 static int __init set_ftrace_notrace(char *str)
 {
-        strncpy(ftrace_notrace_buf, str, FTRACE_FILTER_SIZE);
+        strlcpy(ftrace_notrace_buf, str, FTRACE_FILTER_SIZE);
        return 1;
 }
 __setup("ftrace_notrace=", set_ftrace_notrace);
 static int __init set_ftrace_filter(char *str)
 {
-        strncpy(ftrace_filter_buf, str, FTRACE_FILTER_SIZE);
+        strlcpy(ftrace_filter_buf, str, FTRACE_FILTER_SIZE);
        return 1;
 }
 __setup("ftrace_filter=", set_ftrace_filter);
@@ -3571,7 +3570,7 @@ static const struct file_operations ftrace_filter_fops = {
        .open = ftrace_filter_open,
        .read = seq_read,
        .write = ftrace_filter_write,
-        .llseek = ftrace_regex_lseek,
+        .llseek = ftrace_filter_lseek,
        .release = ftrace_regex_release,
 };
@@ -3579,7 +3578,7 @@ static const struct file_operations ftrace_notrace_fops = {
        .open = ftrace_notrace_open,
        .read = seq_read,
        .write = ftrace_notrace_write,
-        .llseek = ftrace_regex_lseek,
+        .llseek = ftrace_filter_lseek,
        .release = ftrace_regex_release,
 };
@@ -3784,8 +3783,8 @@ static const struct file_operations ftrace_graph_fops = {
        .open           = ftrace_graph_open,
        .read           = seq_read,
        .write          = ftrace_graph_write,
+        .llseek         = ftrace_filter_lseek,
        .release        = ftrace_graph_release,
-        .llseek         = seq_lseek,
 };
 #endif /* CONFIG_FUNCTION_GRAPH_TRACER */
@@ -4131,7 +4130,8 @@ ftrace_ops_control_func(unsigned long ip, unsigned long parent_ip,
        preempt_disable_notrace();
        trace_recursion_set(TRACE_CONTROL_BIT);
        do_for_each_ftrace_op(op, ftrace_control_list) {
-                if (!ftrace_function_local_disabled(op) &&
+                if (!(op->flags & FTRACE_OPS_FL_STUB) &&
+                    !ftrace_function_local_disabled(op) &&
                    ftrace_ops_test(op, ip))
                        op->func(ip, parent_ip, op, regs);
        } while_for_each_ftrace_op(op);
@@ -4439,7 +4439,7 @@ static const struct file_operations ftrace_pid_fops = {
        .open           = ftrace_pid_open,
        .write          = ftrace_pid_write,
        .read           = seq_read,
-        .llseek         = seq_lseek,
+        .llseek         = ftrace_filter_lseek,
        .release        = ftrace_pid_release,
 };
@@ -4555,12 +4555,8 @@ ftrace_enable_sysctl(struct ctl_table *table, int write,
                ftrace_startup_sysctl();
                /* we are starting ftrace again */
-                if (ftrace_ops_list != &ftrace_list_end) {
+                if (ftrace_ops_list != &ftrace_list_end)
-                        if (ftrace_ops_list->next == &ftrace_list_end)
+                        update_ftrace_function();
-                                ftrace_trace_function = ftrace_ops_list->func;
-                        else
-                                ftrace_trace_function = ftrace_ops_list_func;
-                }
        } else {
                /* stopping ftrace calls (just send to ftrace_stub) */
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 4f1dade56981..66338c4f7f4b 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -132,7 +132,7 @@ static char *default_bootup_tracer;
 static int __init set_cmdline_ftrace(char *str)
 {
-        strncpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
+        strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
        default_bootup_tracer = bootup_tracer_buf;
        /* We are using ftrace early, expand it */
        ring_buffer_expanded = 1;
@@ -162,7 +162,7 @@ static char *trace_boot_options __initdata;
 static int __init set_trace_boot_options(char *str)
 {
-        strncpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
+        strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
        trace_boot_options = trace_boot_options_buf;
        return 0;
 }
@@ -744,8 +744,11 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
                return;
        WARN_ON_ONCE(!irqs_disabled());
-        if (WARN_ON_ONCE(!current_trace->allocated_snapshot))
+        if (!current_trace->allocated_snapshot) {
+                /* Only the nop tracer should hit this when disabling */
+                WARN_ON_ONCE(current_trace != &nop_trace);
                return;
+        }
        arch_spin_lock(&ftrace_max_lock);
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
index 42ca822fc701..83a8b5b7bd35 100644
--- a/kernel/trace/trace_stack.c
+++ b/kernel/trace/trace_stack.c
@@ -322,7 +322,7 @@ static const struct file_operations stack_trace_filter_fops = {
        .open = stack_trace_filter_open,
        .read = seq_read,
        .write = ftrace_filter_write,
-        .llseek = ftrace_regex_lseek,
+        .llseek = ftrace_filter_lseek,
        .release = ftrace_regex_release,
 };
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index a54f26f82eb2..e134d8f365dd 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -25,7 +25,8 @@
 static struct kmem_cache *user_ns_cachep __read_mostly;
-static bool new_idmap_permitted(struct user_namespace *ns, int cap_setid,
+static bool new_idmap_permitted(const struct file *file,
+                                struct user_namespace *ns, int cap_setid,
                                struct uid_gid_map *map);
 static void set_cred_user_ns(struct cred *cred, struct user_namespace *user_ns)
@@ -612,10 +613,10 @@ static ssize_t map_write(struct file *file, const char __user *buf,
        if (map->nr_extents != 0)
                goto out;
-        /* Require the appropriate privilege CAP_SETUID or CAP_SETGID
+        /*
-         * over the user namespace in order to set the id mapping.
+         * Adjusting namespace settings requires capabilities on the target.
         */
-        if (cap_valid(cap_setid) && !ns_capable(ns, cap_setid))
+        if (cap_valid(cap_setid) && !file_ns_capable(file, ns, CAP_SYS_ADMIN))
                goto out;
        /* Get a buffer */
@@ -700,7 +701,7 @@ static ssize_t map_write(struct file *file, const char __user *buf,
        ret = -EPERM;
        /* Validate the user is allowed to use user id's mapped to. */
-        if (!new_idmap_permitted(ns, cap_setid, &new_map))
+        if (!new_idmap_permitted(file, ns, cap_setid, &new_map))
                goto out;
        /* Map the lower ids from the parent user namespace to the
@@ -787,7 +788,8 @@ ssize_t proc_projid_map_write(struct file *file, const char __user *buf, size_t
                         &ns->projid_map, &ns->parent->projid_map);
 }
-static bool new_idmap_permitted(struct user_namespace *ns, int cap_setid,
+static bool new_idmap_permitted(const struct file *file, 
+                                struct user_namespace *ns, int cap_setid,
                                struct uid_gid_map *new_map)
 {
        /* Allow mapping to your own filesystem ids */
@@ -795,12 +797,12 @@ static bool new_idmap_permitted(struct user_namespace *ns, int cap_setid,
                u32 id = new_map->extent[0].lower_first;
                if (cap_setid == CAP_SETUID) {
                        kuid_t uid = make_kuid(ns->parent, id);
-                        if (uid_eq(uid, current_fsuid()))
+                        if (uid_eq(uid, file->f_cred->fsuid))
                                return true;
                }
                else if (cap_setid == CAP_SETGID) {
                        kgid_t gid = make_kgid(ns->parent, id);
-                        if (gid_eq(gid, current_fsgid()))
+                        if (gid_eq(gid, file->f_cred->fsgid))
                                return true;
                }
        }
@@ -811,8 +813,10 @@ static bool new_idmap_permitted(struct user_namespace *ns, int cap_setid,
        /* Allow the specified ids if we have the appropriate capability
         * (CAP_SETUID or CAP_SETGID) over the parent user namespace.
+         * And the opener of the id file also had the approprpiate capability.
         */
-        if (ns_capable(ns->parent, cap_setid))
+        if (ns_capable(ns->parent, cap_setid) &&
+            file_ns_capable(file, ns->parent, cap_setid))
                return true;
        return false;
author	David S. Miller <davem@davemloft.net>	2013-04-22 20:32:51 -0400
committer	David S. Miller <davem@davemloft.net>	2013-04-22 20:32:51 -0400
commit	6e0895c2ea326cc4bb11e8fa2f654628d5754c31 (patch)
tree	7089303ac11a12edc43a8c4fa1b23974e10937ea /kernel
parent	55fbbe46e9eb3cbe6c335503f5550855a1128dce (diff)
parent	60d509fa6a9c4653a86ad830e4c4b30360b23f0e (diff)