8 files changed, 162 insertions, 29 deletions
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 486935143e02..de7353c0ce9c 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -498,8 +498,8 @@ static void *__init_or_module text_poke_early(void *addr, const void *opcode,
        unsigned long flags;
        local_irq_save(flags);
        memcpy(addr, opcode, len);
-        local_irq_restore(flags);
        sync_core();
+        local_irq_restore(flags);
        /* Could also do a CLFLUSH here to speed up CPU recovery; but
           that causes hangs on some VIA CPUs. */
        return addr;
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 83b217c7225f..22a47c82f3c0 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -253,6 +253,64 @@ static int __cpuinit nearby_node(int apicid)
 #endif
 /*
+ * Fixup core topology information for AMD multi-node processors.
+ * Assumption 1: Number of cores in each internal node is the same.
+ * Assumption 2: Mixed systems with both single-node and dual-node
+ *               processors are not supported.
+ */
+#ifdef CONFIG_X86_HT
+static void __cpuinit amd_fixup_dcm(struct cpuinfo_x86 *c)
+{
+#ifdef CONFIG_PCI
+        u32 t, cpn;
+        u8 n, n_id;
+        int cpu = smp_processor_id();
+        /* fixup topology information only once for a core */
+        if (cpu_has(c, X86_FEATURE_AMD_DCM))
+                return;
+        /* check for multi-node processor on boot cpu */
+        t = read_pci_config(0, 24, 3, 0xe8);
+        if (!(t & (1 << 29)))
+                return;
+        set_cpu_cap(c, X86_FEATURE_AMD_DCM);
+        /* cores per node: each internal node has half the number of cores */
+        cpn = c->x86_max_cores >> 1;
+        /* even-numbered NB_id of this dual-node processor */
+        n = c->phys_proc_id << 1;
+        /*
+         * determine internal node id and assign cores fifty-fifty to
+         * each node of the dual-node processor
+         */
+        t = read_pci_config(0, 24 + n, 3, 0xe8);
+        n = (t>>30) & 0x3;
+        if (n == 0) {
+                if (c->cpu_core_id < cpn)
+                        n_id = 0;
+                else
+                        n_id = 1;
+        } else {
+                if (c->cpu_core_id < cpn)
+                        n_id = 1;
+                else
+                        n_id = 0;
+        }
+        /* compute entire NodeID, use llc_shared_map to store sibling info */
+        per_cpu(cpu_llc_id, cpu) = (c->phys_proc_id << 1) + n_id;
+        /* fixup core id to be in range from 0 to cpn */
+        c->cpu_core_id = c->cpu_core_id % cpn;
+#endif
+}
+#endif
+/*
 * On a AMD dual core setup the lower bits of the APIC id distingush the cores.
 * Assumes number of cores is a power of two.
 */
@@ -269,6 +327,9 @@ static void __cpuinit amd_detect_cmp(struct cpuinfo_x86 *c)
        c->phys_proc_id = c->initial_apicid >> bits;
        /* use socket ID also for last level cache */
        per_cpu(cpu_llc_id, cpu) = c->phys_proc_id;
+        /* fixup topology information on multi-node processors */
+        if ((c->x86 == 0x10) && (c->x86_model == 9))
+                amd_fixup_dcm(c);
 #endif
 }
@@ -277,9 +338,10 @@ static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c)
 #if defined(CONFIG_NUMA) && defined(CONFIG_X86_64)
        int cpu = smp_processor_id();
        int node;
-        unsigned apicid = cpu_has_apic ? hard_smp_processor_id() : c->apicid;
+        unsigned apicid = c->apicid;
+        node = per_cpu(cpu_llc_id, cpu);
-        node = c->phys_proc_id;
        if (apicid_to_node[apicid] != NUMA_NO_NODE)
                node = apicid_to_node[apicid];
        if (!node_online(node)) {
@@ -406,12 +468,24 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
                /*
                 * Some BIOSes incorrectly force this feature, but only K8
                 * revision D (model = 0x14) and later actually support it.
+                 * (AMD Erratum #110, docId: 25759).
                 */
-                if (c->x86_model < 0x14)
+                if (c->x86_model < 0x14 && cpu_has(c, X86_FEATURE_LAHF_LM)) {
+                        u64 val;
                        clear_cpu_cap(c, X86_FEATURE_LAHF_LM);
+                        if (!rdmsrl_amd_safe(0xc001100d, &val)) {
+                                val &= ~(1ULL << 32);
+                                wrmsrl_amd_safe(0xc001100d, val);
+                        }
+                }
        }
        if (c->x86 == 0x10 || c->x86 == 0x11)
                set_cpu_cap(c, X86_FEATURE_REP_GOOD);
+        /* get apicid instead of initial apic id from cpuid */
+        c->apicid = hard_smp_processor_id();
 #else
        /*
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index 306bf0dca061..804c40e2bc3e 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -241,7 +241,7 @@ amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
        case 0:
                if (!l1->val)
                        return;
-                assoc = l1->assoc;
+                assoc = assocs[l1->assoc];
                line_size = l1->line_size;
                lines_per_tag = l1->lines_per_tag;
                size_in_kb = l1->size_in_kb;
@@ -249,7 +249,7 @@ amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
        case 2:
                if (!l2.val)
                        return;
-                assoc = l2.assoc;
+                assoc = assocs[l2.assoc];
                line_size = l2.line_size;
                lines_per_tag = l2.lines_per_tag;
                /* cpu_data has errata corrections for K7 applied */
@@ -258,10 +258,14 @@ amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
        case 3:
                if (!l3.val)
                        return;
-                assoc = l3.assoc;
+                assoc = assocs[l3.assoc];
                line_size = l3.line_size;
                lines_per_tag = l3.lines_per_tag;
                size_in_kb = l3.size_encoded * 512;
+                if (boot_cpu_has(X86_FEATURE_AMD_DCM)) {
+                        size_in_kb = size_in_kb >> 1;
+                        assoc = assoc >> 1;
+                }
                break;
        default:
                return;
@@ -270,18 +274,14 @@ amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
        eax->split.is_self_initializing = 1;
        eax->split.type = types[leaf];
        eax->split.level = levels[leaf];
-        if (leaf == 3)
+        eax->split.num_threads_sharing = 0;
-                eax->split.num_threads_sharing =
-                        current_cpu_data.x86_max_cores - 1;
-        else
-                eax->split.num_threads_sharing = 0;
        eax->split.num_cores_on_die = current_cpu_data.x86_max_cores - 1;
-        if (assoc == 0xf)
+        if (assoc == 0xffff)
                eax->split.is_fully_associative = 1;
        ebx->split.coherency_line_size = line_size - 1;
-        ebx->split.ways_of_associativity = assocs[assoc] - 1;
+        ebx->split.ways_of_associativity = assoc - 1;
        ebx->split.physical_line_partition = lines_per_tag - 1;
        ecx->split.number_of_sets = (size_in_kb * 1024) / line_size /
                (ebx->split.ways_of_associativity + 1) - 1;
@@ -523,6 +523,18 @@ static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index)
        int index_msb, i;
        struct cpuinfo_x86 *c = &cpu_data(cpu);
+        if ((index == 3) && (c->x86_vendor == X86_VENDOR_AMD)) {
+                struct cpuinfo_x86 *d;
+                for_each_online_cpu(i) {
+                        if (!per_cpu(cpuid4_info, i))
+                                continue;
+                        d = &cpu_data(i);
+                        this_leaf = CPUID4_INFO_IDX(i, index);
+                        cpumask_copy(to_cpumask(this_leaf->shared_cpu_map),
+                                     d->llc_shared_map);
+                }
+                return;
+        }
        this_leaf = CPUID4_INFO_IDX(cpu, index);
        num_threads_sharing = 1 + this_leaf->eax.split.num_threads_sharing;
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index ddae21620bda..1fecba404fd8 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -489,12 +489,14 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
        int i, err = 0;
        struct threshold_bank *b = NULL;
        char name[32];
+        struct cpuinfo_x86 *c = &cpu_data(cpu);
        sprintf(name, "threshold_bank%i", bank);
 #ifdef CONFIG_SMP
        if (cpu_data(cpu).cpu_core_id && shared_bank[bank]) {   /* symlink */
-                i = cpumask_first(cpu_core_mask(cpu));
+                i = cpumask_first(c->llc_shared_map);
                /* first core not up yet */
                if (cpu_data(i).cpu_core_id)
@@ -514,7 +516,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
                if (err)
                        goto out;
-                cpumask_copy(b->cpus, cpu_core_mask(cpu));
+                cpumask_copy(b->cpus, c->llc_shared_map);
                per_cpu(threshold_banks, cpu)[bank] = b;
                goto out;
@@ -539,7 +541,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
 #ifndef CONFIG_SMP
        cpumask_setall(b->cpus);
 #else
-        cpumask_copy(b->cpus, cpu_core_mask(cpu));
+        cpumask_copy(b->cpus, c->llc_shared_map);
 #endif
        per_cpu(threshold_banks, cpu)[bank] = b;
diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c
index 1e904346bbf4..62ac8cb6ba27 100644
--- a/arch/x86/kernel/cpu/proc.c
+++ b/arch/x86/kernel/cpu/proc.c
@@ -116,11 +116,9 @@ static int show_cpuinfo(struct seq_file *m, void *v)
                seq_printf(m, "TLB size\t: %d 4K pages\n", c->x86_tlbsize);
 #endif
        seq_printf(m, "clflush size\t: %u\n", c->x86_clflush_size);
-#ifdef CONFIG_X86_64
        seq_printf(m, "cache_alignment\t: %d\n", c->x86_cache_alignment);
        seq_printf(m, "address sizes\t: %u bits physical, %u bits virtual\n",
                   c->x86_phys_bits, c->x86_virt_bits);
-#endif
        seq_printf(m, "power management:");
        for (i = 0; i < 32; i++) {
diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c
index 98fd6cd4e3a4..7dd950094178 100644
--- a/arch/x86/kernel/msr.c
+++ b/arch/x86/kernel/msr.c
@@ -1,6 +1,7 @@
 /* ----------------------------------------------------------------------- *
 *
 *   Copyright 2000-2008 H. Peter Anvin - All Rights Reserved
+ *   Copyright 2009 Intel Corporation; author: H. Peter Anvin
 *
 *   This program is free software; you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
@@ -80,11 +81,8 @@ static ssize_t msr_read(struct file *file, char __user *buf,
        for (; count; count -= 8) {
                err = rdmsr_safe_on_cpu(cpu, reg, &data[0], &data[1]);
-                if (err) {
+                if (err)
-                        if (err == -EFAULT) /* Fix idiotic error code */
-                                err = -EIO;
                        break;
-                }
                if (copy_to_user(tmp, &data, 8)) {
                        err = -EFAULT;
                        break;
@@ -115,11 +113,8 @@ static ssize_t msr_write(struct file *file, const char __user *buf,
                        break;
                }
                err = wrmsr_safe_on_cpu(cpu, reg, data[0], data[1]);
-                if (err) {
+                if (err)
-                        if (err == -EFAULT) /* Fix idiotic error code */
-                                err = -EIO;
                        break;
-                }
                tmp += 2;
                bytes += 8;
        }
@@ -127,6 +122,54 @@ static ssize_t msr_write(struct file *file, const char __user *buf,
        return bytes ? bytes : err;
 }
+static long msr_ioctl(struct file *file, unsigned int ioc, unsigned long arg)
+{
+        u32 __user *uregs = (u32 __user *)arg;
+        u32 regs[8];
+        int cpu = iminor(file->f_path.dentry->d_inode);
+        int err;
+        switch (ioc) {
+        case X86_IOC_RDMSR_REGS:
+                if (!(file->f_mode & FMODE_READ)) {
+                        err = -EBADF;
+                        break;
+                }
+                if (copy_from_user(&regs, uregs, sizeof regs)) {
+                        err = -EFAULT;
+                        break;
+                }
+                err = rdmsr_safe_regs_on_cpu(cpu, regs);
+                if (err)
+                        break;
+                if (copy_to_user(uregs, &regs, sizeof regs))
+                        err = -EFAULT;
+                break;
+        case X86_IOC_WRMSR_REGS:
+                if (!(file->f_mode & FMODE_WRITE)) {
+                        err = -EBADF;
+                        break;
+                }
+                if (copy_from_user(&regs, uregs, sizeof regs)) {
+                        err = -EFAULT;
+                        break;
+                }
+                err = wrmsr_safe_regs_on_cpu(cpu, regs);
+                if (err)
+                        break;
+                if (copy_to_user(uregs, &regs, sizeof regs))
+                        err = -EFAULT;
+                break;
+        default:
+                err = -ENOTTY;
+                break;
+        }
+        return err;
+}
 static int msr_open(struct inode *inode, struct file *file)
 {
        unsigned int cpu = iminor(file->f_path.dentry->d_inode);
@@ -157,6 +200,8 @@ static const struct file_operations msr_fops = {
        .read = msr_read,
        .write = msr_write,
        .open = msr_open,
+        .unlocked_ioctl = msr_ioctl,
+        .compat_ioctl = msr_ioctl,
 };
 static int __cpuinit msr_device_create(int cpu)
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 70ec9b951d76..f5b0b4a01fb2 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -362,8 +362,9 @@ struct pv_cpu_ops pv_cpu_ops = {
 #endif
        .wbinvd = native_wbinvd,
        .read_msr = native_read_msr_safe,
-        .read_msr_amd = native_read_msr_amd_safe,
+        .rdmsr_regs = native_rdmsr_safe_regs,
        .write_msr = native_write_msr_safe,
+        .wrmsr_regs = native_wrmsr_safe_regs,
        .read_tsc = native_read_tsc,
        .read_pmc = native_read_pmc,
        .read_tscp = native_read_tscp,
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 2fecda69ee64..c36cc1452cdc 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -434,7 +434,8 @@ const struct cpumask *cpu_coregroup_mask(int cpu)
         * For perf, we return last level cache shared map.
         * And for power savings, we return cpu_core_map
         */
-        if (sched_mc_power_savings || sched_smt_power_savings)
+        if ((sched_mc_power_savings || sched_smt_power_savings) &&
+            !(cpu_has(c, X86_FEATURE_AMD_DCM)))
                return cpu_core_mask(cpu);
        else
                return c->llc_shared_map;