Merge commit 'v3.5-rc3' into x86/debug

Merge it in to pick up a fix that we are going to clean up in this branch. Signed-off-by: Ingo Molnar <mingo@kernel.org>
author: Ingo Molnar <mingo@kernel.org> 2012-06-20 08:22:32 -0400
committer: Ingo Molnar <mingo@kernel.org> 2012-06-20 08:22:34 -0400
commit: 6a991acceedce3ca93caef8ba7af2468c9451614 (patch)
tree: 1997d71fc57bdebd12fc70a73070281614b52f15 /arch/x86/kernel
parent: 70fb74a5420f9caa3e001d65004e4b669124283e (diff)
parent: 485802a6c524e62b5924849dd727ddbb1497cc71 (diff)
12 files changed, 146 insertions, 69 deletions
diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c
index 6e76c191a83..d5fd66f0d4c 100644
--- a/arch/x86/kernel/aperture_64.c
+++ b/arch/x86/kernel/aperture_64.c
@@ -20,7 +20,6 @@
 #include <linux/bitops.h>
 #include <linux/ioport.h>
 #include <linux/suspend.h>
-#include <linux/kmemleak.h>
 #include <asm/e820.h>
 #include <asm/io.h>
 #include <asm/iommu.h>
@@ -95,11 +94,6 @@ static u32 __init allocate_aperture(void)
                return 0;
        }
        memblock_reserve(addr, aper_size);
-        /*
-         * Kmemleak should not scan this block as it may not be mapped via the
-         * kernel direct mapping.
-         */
-        kmemleak_ignore(phys_to_virt(addr));
        printk(KERN_INFO "Mapping aperture over %d KB of RAM @ %lx\n",
                        aper_size >> 10, addr);
        insert_aperture_resource((u32)addr, aper_size);
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 5155d6f806f..8704918514d 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -1195,7 +1195,7 @@ static void __clear_irq_vector(int irq, struct irq_cfg *cfg)
        BUG_ON(!cfg->vector);
        vector = cfg->vector;
-        for_each_cpu_and(cpu, cfg->domain, cpu_online_mask)
+        for_each_cpu(cpu, cfg->domain)
                per_cpu(vector_irq, cpu)[vector] = -1;
        cfg->vector = 0;
@@ -1203,7 +1203,7 @@ static void __clear_irq_vector(int irq, struct irq_cfg *cfg)
        if (likely(!cfg->move_in_progress))
                return;
-        for_each_cpu_and(cpu, cfg->old_domain, cpu_online_mask) {
+        for_each_cpu(cpu, cfg->old_domain) {
                for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS;
                                                                vector++) {
                        if (per_cpu(vector_irq, cpu)[vector] != irq)
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 5623b4b5d51..5a5a5dc1ff1 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -1278,7 +1278,7 @@ static void mce_timer_fn(unsigned long data)
         */
        iv = __this_cpu_read(mce_next_interval);
        if (mce_notify_irq())
-                iv = max(iv, (unsigned long) HZ/100);
+                iv = max(iv / 2, (unsigned long) HZ/100);
        else
                iv = min(iv * 2, round_jiffies_relative(check_interval * HZ));
        __this_cpu_write(mce_next_interval, iv);
@@ -1560,7 +1560,7 @@ static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c)
 static void __mcheck_cpu_init_timer(void)
 {
        struct timer_list *t = &__get_cpu_var(mce_timer);
-        unsigned long iv = __this_cpu_read(mce_next_interval);
+        unsigned long iv = check_interval * HZ;
        setup_timer(t, mce_timer_fn, smp_processor_id());
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index e049d6da018..c4706cf9c01 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -1496,6 +1496,7 @@ static struct cpu_hw_events *allocate_fake_cpuc(void)
                if (!cpuc->shared_regs)
                        goto error;
        }
+        cpuc->is_fake = 1;
        return cpuc;
 error:
        free_fake_cpuc(cpuc);
@@ -1756,6 +1757,12 @@ perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs)
        dump_trace(NULL, regs, NULL, 0, &backtrace_ops, entry);
 }
+static inline int
+valid_user_frame(const void __user *fp, unsigned long size)
+{
+        return (__range_not_ok(fp, size, TASK_SIZE) == 0);
+}
 #ifdef CONFIG_COMPAT
 #include <asm/compat.h>
@@ -1780,7 +1787,7 @@ perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry)
                if (bytes != sizeof(frame))
                        break;
-                if (fp < compat_ptr(regs->sp))
+                if (!valid_user_frame(fp, sizeof(frame)))
                        break;
                perf_callchain_store(entry, frame.return_address);
@@ -1826,7 +1833,7 @@ perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
                if (bytes != sizeof(frame))
                        break;
-                if ((unsigned long)fp < regs->sp)
+                if (!valid_user_frame(fp, sizeof(frame)))
                        break;
                perf_callchain_store(entry, frame.return_address);
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index 6638aaf5449..7241e2fc3c1 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -117,6 +117,7 @@ struct cpu_hw_events {
        struct perf_event       *event_list[X86_PMC_IDX_MAX]; /* in enabled order */
        unsigned int            group_flag;
+        int                     is_fake;
        /*
         * Intel DebugStore bits
@@ -364,6 +365,7 @@ struct x86_pmu {
        int             pebs_record_size;
        void            (*drain_pebs)(struct pt_regs *regs);
        struct event_constraint *pebs_constraints;
+        void            (*pebs_aliases)(struct perf_event *event);
        /*
         * Intel LBR
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 9e3f5d6e3d2..5073bf1c1d8 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -1121,27 +1121,33 @@ intel_bts_constraints(struct perf_event *event)
        return NULL;
 }
-static bool intel_try_alt_er(struct perf_event *event, int orig_idx)
+static int intel_alt_er(int idx)
 {
        if (!(x86_pmu.er_flags & ERF_HAS_RSP_1))
-                return false;
+                return idx;
-        if (event->hw.extra_reg.idx == EXTRA_REG_RSP_0) {
+        if (idx == EXTRA_REG_RSP_0)
-                event->hw.config &= ~INTEL_ARCH_EVENT_MASK;
+                return EXTRA_REG_RSP_1;
-                event->hw.config |= 0x01bb;
-                event->hw.extra_reg.idx = EXTRA_REG_RSP_1;
+        if (idx == EXTRA_REG_RSP_1)
-                event->hw.extra_reg.reg = MSR_OFFCORE_RSP_1;
+                return EXTRA_REG_RSP_0;
-        } else if (event->hw.extra_reg.idx == EXTRA_REG_RSP_1) {
+        return idx;
+}
+static void intel_fixup_er(struct perf_event *event, int idx)
+{
+        event->hw.extra_reg.idx = idx;
+        if (idx == EXTRA_REG_RSP_0) {
                event->hw.config &= ~INTEL_ARCH_EVENT_MASK;
                event->hw.config |= 0x01b7;
-                event->hw.extra_reg.idx = EXTRA_REG_RSP_0;
                event->hw.extra_reg.reg = MSR_OFFCORE_RSP_0;
+        } else if (idx == EXTRA_REG_RSP_1) {
+                event->hw.config &= ~INTEL_ARCH_EVENT_MASK;
+                event->hw.config |= 0x01bb;
+                event->hw.extra_reg.reg = MSR_OFFCORE_RSP_1;
        }
-        if (event->hw.extra_reg.idx == orig_idx)
-                return false;
-        return true;
 }
 /*
@@ -1159,14 +1165,18 @@ __intel_shared_reg_get_constraints(struct cpu_hw_events *cpuc,
        struct event_constraint *c = &emptyconstraint;
        struct er_account *era;
        unsigned long flags;
-        int orig_idx = reg->idx;
+        int idx = reg->idx;
-        /* already allocated shared msr */
+        /*
-        if (reg->alloc)
+         * reg->alloc can be set due to existing state, so for fake cpuc we
+         * need to ignore this, otherwise we might fail to allocate proper fake
+         * state for this extra reg constraint. Also see the comment below.
+         */
+        if (reg->alloc && !cpuc->is_fake)
                return NULL; /* call x86_get_event_constraint() */
 again:
-        era = &cpuc->shared_regs->regs[reg->idx];
+        era = &cpuc->shared_regs->regs[idx];
        /*
         * we use spin_lock_irqsave() to avoid lockdep issues when
         * passing a fake cpuc
@@ -1175,6 +1185,29 @@ again:
        if (!atomic_read(&era->ref) || era->config == reg->config) {
+                /*
+                 * If its a fake cpuc -- as per validate_{group,event}() we
+                 * shouldn't touch event state and we can avoid doing so
+                 * since both will only call get_event_constraints() once
+                 * on each event, this avoids the need for reg->alloc.
+                 *
+                 * Not doing the ER fixup will only result in era->reg being
+                 * wrong, but since we won't actually try and program hardware
+                 * this isn't a problem either.
+                 */
+                if (!cpuc->is_fake) {
+                        if (idx != reg->idx)
+                                intel_fixup_er(event, idx);
+                        /*
+                         * x86_schedule_events() can call get_event_constraints()
+                         * multiple times on events in the case of incremental
+                         * scheduling(). reg->alloc ensures we only do the ER
+                         * allocation once.
+                         */
+                        reg->alloc = 1;
+                }
                /* lock in msr value */
                era->config = reg->config;
                era->reg = reg->reg;
@@ -1182,17 +1215,17 @@ again:
                /* one more user */
                atomic_inc(&era->ref);
-                /* no need to reallocate during incremental event scheduling */
-                reg->alloc = 1;
                /*
                 * need to call x86_get_event_constraint()
                 * to check if associated event has constraints
                 */
                c = NULL;
-        } else if (intel_try_alt_er(event, orig_idx)) {
+        } else {
-                raw_spin_unlock_irqrestore(&era->lock, flags);
+                idx = intel_alt_er(idx);
-                goto again;
+                if (idx != reg->idx) {
+                        raw_spin_unlock_irqrestore(&era->lock, flags);
+                        goto again;
+                }
        }
        raw_spin_unlock_irqrestore(&era->lock, flags);
@@ -1206,11 +1239,14 @@ __intel_shared_reg_put_constraints(struct cpu_hw_events *cpuc,
        struct er_account *era;
        /*
-         * only put constraint if extra reg was actually
+         * Only put constraint if extra reg was actually allocated. Also takes
-         * allocated. Also takes care of event which do
+         * care of event which do not use an extra shared reg.
-         * not use an extra shared reg
+         *
+         * Also, if this is a fake cpuc we shouldn't touch any event state
+         * (reg->alloc) and we don't care about leaving inconsistent cpuc state
+         * either since it'll be thrown out.
         */
-        if (!reg->alloc)
+        if (!reg->alloc || cpuc->is_fake)
                return;
        era = &cpuc->shared_regs->regs[reg->idx];
@@ -1302,15 +1338,9 @@ static void intel_put_event_constraints(struct cpu_hw_events *cpuc,
        intel_put_shared_regs_event_constraints(cpuc, event);
 }
-static int intel_pmu_hw_config(struct perf_event *event)
+static void intel_pebs_aliases_core2(struct perf_event *event)
 {
-        int ret = x86_pmu_hw_config(event);
+        if ((event->hw.config & X86_RAW_EVENT_MASK) == 0x003c) {
-        if (ret)
-                return ret;
-        if (event->attr.precise_ip &&
-            (event->hw.config & X86_RAW_EVENT_MASK) == 0x003c) {
                /*
                 * Use an alternative encoding for CPU_CLK_UNHALTED.THREAD_P
                 * (0x003c) so that we can use it with PEBS.
@@ -1331,10 +1361,48 @@ static int intel_pmu_hw_config(struct perf_event *event)
                 */
                u64 alt_config = X86_CONFIG(.event=0xc0, .inv=1, .cmask=16);
+                alt_config |= (event->hw.config & ~X86_RAW_EVENT_MASK);
+                event->hw.config = alt_config;
+        }
+}
+static void intel_pebs_aliases_snb(struct perf_event *event)
+{
+        if ((event->hw.config & X86_RAW_EVENT_MASK) == 0x003c) {
+                /*
+                 * Use an alternative encoding for CPU_CLK_UNHALTED.THREAD_P
+                 * (0x003c) so that we can use it with PEBS.
+                 *
+                 * The regular CPU_CLK_UNHALTED.THREAD_P event (0x003c) isn't
+                 * PEBS capable. However we can use UOPS_RETIRED.ALL
+                 * (0x01c2), which is a PEBS capable event, to get the same
+                 * count.
+                 *
+                 * UOPS_RETIRED.ALL counts the number of cycles that retires
+                 * CNTMASK micro-ops. By setting CNTMASK to a value (16)
+                 * larger than the maximum number of micro-ops that can be
+                 * retired per cycle (4) and then inverting the condition, we
+                 * count all cycles that retire 16 or less micro-ops, which
+                 * is every cycle.
+                 *
+                 * Thereby we gain a PEBS capable cycle counter.
+                 */
+                u64 alt_config = X86_CONFIG(.event=0xc2, .umask=0x01, .inv=1, .cmask=16);
                alt_config |= (event->hw.config & ~X86_RAW_EVENT_MASK);
                event->hw.config = alt_config;
        }
+}
+static int intel_pmu_hw_config(struct perf_event *event)
+{
+        int ret = x86_pmu_hw_config(event);
+        if (ret)
+                return ret;
+        if (event->attr.precise_ip && x86_pmu.pebs_aliases)
+                x86_pmu.pebs_aliases(event);
        if (intel_pmu_needs_lbr_smpl(event)) {
                ret = intel_pmu_setup_lbr_filter(event);
@@ -1609,6 +1677,7 @@ static __initconst const struct x86_pmu intel_pmu = {
        .max_period             = (1ULL << 31) - 1,
        .get_event_constraints  = intel_get_event_constraints,
        .put_event_constraints  = intel_put_event_constraints,
+        .pebs_aliases           = intel_pebs_aliases_core2,
        .format_attrs           = intel_arch3_formats_attr,
@@ -1842,8 +1911,9 @@ __init int intel_pmu_init(void)
                break;
        case 42: /* SandyBridge */
-                x86_add_quirk(intel_sandybridge_quirk);
        case 45: /* SandyBridge, "Romely-EP" */
+                x86_add_quirk(intel_sandybridge_quirk);
+        case 58: /* IvyBridge */
                memcpy(hw_cache_event_ids, snb_hw_cache_event_ids,
                       sizeof(hw_cache_event_ids));
@@ -1851,6 +1921,7 @@ __init int intel_pmu_init(void)
                x86_pmu.event_constraints = intel_snb_event_constraints;
                x86_pmu.pebs_constraints = intel_snb_pebs_event_constraints;
+                x86_pmu.pebs_aliases = intel_pebs_aliases_snb;
                x86_pmu.extra_regs = intel_snb_extra_regs;
                /* all extra regs are per-cpu when HT is on */
                x86_pmu.er_flags |= ERF_HAS_RSP_1;
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index 5a3edc27f6e..35e2192df9f 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -400,14 +400,7 @@ struct event_constraint intel_snb_pebs_event_constraints[] = {
        INTEL_EVENT_CONSTRAINT(0xc4, 0xf),    /* BR_INST_RETIRED.* */
        INTEL_EVENT_CONSTRAINT(0xc5, 0xf),    /* BR_MISP_RETIRED.* */
        INTEL_EVENT_CONSTRAINT(0xcd, 0x8),    /* MEM_TRANS_RETIRED.* */
-        INTEL_UEVENT_CONSTRAINT(0x11d0, 0xf), /* MEM_UOP_RETIRED.STLB_MISS_LOADS */
+        INTEL_EVENT_CONSTRAINT(0xd0, 0xf),    /* MEM_UOP_RETIRED.* */
-        INTEL_UEVENT_CONSTRAINT(0x12d0, 0xf), /* MEM_UOP_RETIRED.STLB_MISS_STORES */
-        INTEL_UEVENT_CONSTRAINT(0x21d0, 0xf), /* MEM_UOP_RETIRED.LOCK_LOADS */
-        INTEL_UEVENT_CONSTRAINT(0x22d0, 0xf), /* MEM_UOP_RETIRED.LOCK_STORES */
-        INTEL_UEVENT_CONSTRAINT(0x41d0, 0xf), /* MEM_UOP_RETIRED.SPLIT_LOADS */
-        INTEL_UEVENT_CONSTRAINT(0x42d0, 0xf), /* MEM_UOP_RETIRED.SPLIT_STORES */
-        INTEL_UEVENT_CONSTRAINT(0x81d0, 0xf), /* MEM_UOP_RETIRED.ANY_LOADS */
-        INTEL_UEVENT_CONSTRAINT(0x82d0, 0xf), /* MEM_UOP_RETIRED.ANY_STORES */
        INTEL_EVENT_CONSTRAINT(0xd1, 0xf),    /* MEM_LOAD_UOPS_RETIRED.* */
        INTEL_EVENT_CONSTRAINT(0xd2, 0xf),    /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
        INTEL_UEVENT_CONSTRAINT(0x02d4, 0xf), /* MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS */
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index 086eb58c6e8..f1b42b3a186 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -120,11 +120,6 @@ bool kvm_check_and_clear_guest_paused(void)
        bool ret = false;
        struct pvclock_vcpu_time_info *src;
-        /*
-         * per_cpu() is safe here because this function is only called from
-         * timer functions where preemption is already disabled.
-         */
-        WARN_ON(!in_atomic());
        src = &__get_cpu_var(hv_clock);
        if ((src->flags & PVCLOCK_GUEST_STOPPED) != 0) {
                __this_cpu_and(hv_clock.flags, ~PVCLOCK_GUEST_STOPPED);
diff --git a/arch/x86/kernel/nmi_selftest.c b/arch/x86/kernel/nmi_selftest.c
index e31bf8d5c4d..149b8d9c6ad 100644
--- a/arch/x86/kernel/nmi_selftest.c
+++ b/arch/x86/kernel/nmi_selftest.c
@@ -42,7 +42,7 @@ static int __init nmi_unk_cb(unsigned int val, struct pt_regs *regs)
 static void __init init_nmi_testsuite(void)
 {
        /* trap all the unknown NMIs we may generate */
-        register_nmi_handler(NMI_UNKNOWN, nmi_unk_cb, 0, "nmi_selftest_unk");
+        register_nmi_handler_initonly(NMI_UNKNOWN, nmi_unk_cb, 0, "nmi_selftest_unk");
 }
 static void __init cleanup_nmi_testsuite(void)
@@ -64,7 +64,7 @@ static void __init test_nmi_ipi(struct cpumask *mask)
 {
        unsigned long timeout;
-        if (register_nmi_handler(NMI_LOCAL, test_nmi_ipi_callback,
+        if (register_nmi_handler_initonly(NMI_LOCAL, test_nmi_ipi_callback,
                                 NMI_FLAG_FIRST, "nmi_selftest")) {
                nmi_fail = FAILURE;
                return;
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index 62c9457ccd2..c0f420f76cd 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -100,7 +100,7 @@ void *dma_generic_alloc_coherent(struct device *dev, size_t size,
                                 struct dma_attrs *attrs)
 {
        unsigned long dma_mask;
-        struct page *page = NULL;
+        struct page *page;
        unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
        dma_addr_t addr;
@@ -108,6 +108,7 @@ void *dma_generic_alloc_coherent(struct device *dev, size_t size,
        flag |= __GFP_ZERO;
 again:
+        page = NULL;
        if (!(flag & GFP_ATOMIC))
                page = dma_alloc_from_contiguous(dev, count, get_order(size));
        if (!page)
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index ab3f0626071..10ae9be07b4 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -643,9 +643,11 @@ void native_machine_shutdown(void)
        set_cpus_allowed_ptr(current, cpumask_of(reboot_cpu_id));
        /*
-         * O.K Now that I'm on the appropriate processor,
+         * O.K Now that I'm on the appropriate processor, stop all of the
-         * stop all of the others.
+         * others. Also disable the local irq to not receive the per-cpu
+         * timer interrupt which may trigger scheduler's load balance.
         */
+        local_irq_disable();
        stop_other_cpus();
 #endif
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 456d64806c8..54e938de713 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -351,9 +351,12 @@ static bool __cpuinit match_llc(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
 static bool __cpuinit match_mc(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
 {
-        if (c->phys_proc_id == o->phys_proc_id)
+        if (c->phys_proc_id == o->phys_proc_id) {
-                return topology_sane(c, o, "mc");
+                if (cpu_has(c, X86_FEATURE_AMD_DCM))
+                        return true;
+                return topology_sane(c, o, "mc");
+        }
        return false;
 }
@@ -384,6 +387,15 @@ void __cpuinit set_cpu_sibling_map(int cpu)
                if ((i == cpu) || (has_mc && match_llc(c, o)))
                        link_mask(llc_shared, cpu, i);
+        }
+        /*
+         * This needs a separate iteration over the cpus because we rely on all
+         * cpu_sibling_mask links to be set-up.
+         */
+        for_each_cpu(i, cpu_sibling_setup_mask) {
+                o = &cpu_data(i);
                if ((i == cpu) || (has_mc && match_mc(c, o))) {
                        link_mask(core, cpu, i);
author	Ingo Molnar <mingo@kernel.org>	2012-06-20 08:22:32 -0400
committer	Ingo Molnar <mingo@kernel.org>	2012-06-20 08:22:34 -0400
commit	6a991acceedce3ca93caef8ba7af2468c9451614 (patch)
tree	1997d71fc57bdebd12fc70a73070281614b52f15 /arch/x86/kernel
parent	70fb74a5420f9caa3e001d65004e4b669124283e (diff)
parent	485802a6c524e62b5924849dd727ddbb1497cc71 (diff)